From 7ab83427af0f77b59941ceba41d509d7d097b065 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Sat, 10 Jun 2017 13:44:06 +0000 Subject: [PATCH] Vendor import of llvm trunk r305145: https://llvm.org/svn/llvm-project/llvm/trunk@305145 --- CMakeLists.txt | 6 +- bindings/go/llvm/ir.go | 6 + bindings/go/llvm/ir_test.go | 26 + bindings/ocaml/llvm/llvm.ml | 2 + bindings/ocaml/llvm/llvm.mli | 3 + bindings/ocaml/llvm/llvm_ocaml.c | 14 + cmake/modules/HandleLLVMOptions.cmake | 7 + cmake/modules/TableGen.cmake | 50 +- docs/AMDGPUUsage.rst | 3558 ++++++++++++++++- docs/CodeGenerator.rst | 59 +- docs/CompilerWriterInfo.rst | 11 +- docs/LangRef.rst | 33 +- docs/ReleaseNotes.rst | 11 + docs/index.rst | 4 +- examples/ExceptionDemo/ExceptionDemo.cpp | 4 +- include/llvm-c/Core.h | 14 + include/llvm-c/ExecutionEngine.h | 2 +- include/llvm-c/Support.h | 2 +- include/llvm-c/TargetMachine.h | 2 +- include/llvm/ADT/APInt.h | 5 + include/llvm/ADT/GraphTraits.h | 1 - include/llvm/ADT/ImmutableSet.h | 6 +- include/llvm/ADT/PointerUnion.h | 2 +- include/llvm/ADT/PostOrderIterator.h | 2 +- include/llvm/ADT/PriorityWorklist.h | 2 +- include/llvm/ADT/SCCIterator.h | 10 - include/llvm/ADT/SmallPtrSet.h | 6 +- include/llvm/ADT/SmallVector.h | 21 +- include/llvm/ADT/SparseMultiSet.h | 2 +- include/llvm/ADT/StringExtras.h | 2 +- include/llvm/ADT/StringRef.h | 2 +- include/llvm/ADT/iterator_range.h | 2 +- include/llvm/Analysis/AliasAnalysis.h | 4 +- include/llvm/Analysis/AssumptionCache.h | 2 +- include/llvm/Analysis/BranchProbabilityInfo.h | 11 +- include/llvm/Analysis/ConstantFolding.h | 6 +- include/llvm/Analysis/DemandedBits.h | 2 +- include/llvm/Analysis/InlineCost.h | 2 +- include/llvm/Analysis/InstructionSimplify.h | 6 +- .../llvm/Analysis/LazyBranchProbabilityInfo.h | 9 +- include/llvm/Analysis/LazyValueInfo.h | 7 +- include/llvm/Analysis/LoopInfoImpl.h | 2 +- .../llvm/Analysis/MemoryDependenceAnalysis.h | 2 +- include/llvm/Analysis/MemorySSAUpdater.h | 2 +- include/llvm/Analysis/ObjCARCAnalysisUtils.h | 2 +- include/llvm/Analysis/ObjCARCInstKind.h | 2 +- .../Analysis/ScalarEvolutionNormalization.h | 2 +- include/llvm/Analysis/TargetLibraryInfo.h | 8 + include/llvm/Analysis/TargetTransformInfo.h | 31 + .../llvm/Analysis/TargetTransformInfoImpl.h | 18 +- include/llvm/BinaryFormat/COFF.h | 713 ++++ .../llvm/{Support => BinaryFormat}/Dwarf.def | 0 .../llvm/{Support => BinaryFormat}/Dwarf.h | 58 +- include/llvm/{Support => BinaryFormat}/ELF.h | 23 +- .../ELFRelocs/AArch64.def | 0 .../ELFRelocs/AMDGPU.def | 0 .../ELFRelocs/ARM.def | 0 .../ELFRelocs/AVR.def | 0 .../ELFRelocs/BPF.def | 0 .../ELFRelocs/Hexagon.def | 0 .../ELFRelocs/Lanai.def | 0 .../ELFRelocs/Mips.def | 0 .../ELFRelocs/PowerPC.def | 0 .../ELFRelocs/PowerPC64.def | 0 .../ELFRelocs/RISCV.def | 0 .../ELFRelocs/Sparc.def | 0 .../ELFRelocs/SystemZ.def | 0 .../ELFRelocs/WebAssembly.def | 0 .../ELFRelocs/i386.def | 0 .../ELFRelocs/x86_64.def | 0 .../llvm/{Support => BinaryFormat}/MachO.def | 0 include/llvm/BinaryFormat/MachO.h | 1984 +++++++++ include/llvm/BinaryFormat/Magic.h | 73 + include/llvm/{Support => BinaryFormat}/Wasm.h | 46 +- .../WasmRelocs/WebAssembly.def | 0 include/llvm/Bitcode/BitcodeReader.h | 13 +- include/llvm/Bitcode/LLVMBitCodes.h | 2 + include/llvm/CodeGen/BasicTTIImpl.h | 76 +- include/llvm/CodeGen/DFAPacketizer.h | 37 +- include/llvm/CodeGen/DIE.h | 2 +- include/llvm/CodeGen/ExecutionDepsFix.h | 42 +- include/llvm/CodeGen/FastISel.h | 39 +- include/llvm/CodeGen/FunctionLoweringInfo.h | 33 +- include/llvm/CodeGen/GCMetadata.h | 38 +- include/llvm/CodeGen/GCMetadataPrinter.h | 25 +- include/llvm/CodeGen/GCStrategy.h | 2 +- .../CodeGen/GlobalISel/InstructionSelector.h | 2 +- .../llvm/CodeGen/GlobalISel/LegalizerHelper.h | 2 +- .../CodeGen/GlobalISel/MachineIRBuilder.h | 2 +- include/llvm/CodeGen/LexicalScopes.h | 3 +- include/llvm/CodeGen/LiveInterval.h | 4 +- include/llvm/CodeGen/LiveRegUnits.h | 2 +- include/llvm/CodeGen/MIRParser/MIRParser.h | 20 +- include/llvm/CodeGen/MIRYamlMapping.h | 140 +- include/llvm/CodeGen/MachineBasicBlock.h | 4 +- include/llvm/CodeGen/MachineFunction.h | 4 +- .../llvm/CodeGen/MachineFunctionInitializer.h | 38 - include/llvm/CodeGen/MachineFunctionPass.h | 2 +- include/llvm/CodeGen/MachineMemOperand.h | 2 +- include/llvm/CodeGen/MachineModuleInfo.h | 15 +- include/llvm/CodeGen/MachineModuleInfoImpls.h | 4 +- include/llvm/CodeGen/MachineOperand.h | 2 +- include/llvm/CodeGen/MachineRegisterInfo.h | 4 +- include/llvm/CodeGen/RegAllocRegistry.h | 17 +- include/llvm/CodeGen/RegisterPressure.h | 10 +- include/llvm/CodeGen/RegisterUsageInfo.h | 17 +- include/llvm/CodeGen/ScheduleDAG.h | 2 +- include/llvm/CodeGen/ScheduleDAGInstrs.h | 2 +- include/llvm/CodeGen/ScheduleDFS.h | 2 +- include/llvm/CodeGen/SchedulerRegistry.h | 17 +- include/llvm/CodeGen/SelectionDAG.h | 39 +- include/llvm/CodeGen/SelectionDAGNodes.h | 4 +- include/llvm/CodeGen/SlotIndexes.h | 2 +- include/llvm/CodeGen/StackProtector.h | 19 +- include/llvm/CodeGen/TailDuplicator.h | 37 +- .../CodeGen/TargetLoweringObjectFileImpl.h | 8 +- include/llvm/CodeGen/TargetPassConfig.h | 11 + include/llvm/CodeGen/TargetSchedule.h | 3 +- include/llvm/CodeGen/VirtRegMap.h | 9 +- include/llvm/Config/abi-breaking.h.cmake | 3 + include/llvm/Config/config.h.cmake | 21 - include/llvm/DebugInfo/CodeView/CVRecord.h | 6 +- include/llvm/DebugInfo/CodeView/CodeView.h | 18 + .../CodeView/DebugChecksumsSubsection.h | 4 +- .../CodeView/DebugCrossExSubsection.h | 64 + .../CodeView/DebugCrossImpSubsection.h | 88 + .../CodeView/DebugInlineeLinesSubsection.h | 7 +- .../DebugInfo/CodeView/DebugLinesSubsection.h | 10 +- .../CodeView/DebugStringTableSubsection.h | 3 + .../CodeView/DebugSubsectionRecord.h | 14 +- .../CodeView/DebugSubsectionVisitor.h | 132 +- .../CodeView/DebugSymbolRVASubsection.h | 59 + .../CodeView/DebugSymbolsSubsection.h | 3 + include/llvm/DebugInfo/CodeView/EnumTables.h | 2 +- .../llvm/DebugInfo/CodeView/TypeSerializer.h | 2 + .../DebugInfo/CodeView/TypeTableBuilder.h | 5 +- include/llvm/DebugInfo/DIContext.h | 2 + .../DWARF/DWARFAbbreviationDeclaration.h | 4 +- .../DebugInfo/DWARF/DWARFAcceleratorTable.h | 2 +- include/llvm/DebugInfo/DWARF/DWARFAttribute.h | 2 +- .../llvm/DebugInfo/DWARF/DWARFCompileUnit.h | 6 +- include/llvm/DebugInfo/DWARF/DWARFContext.h | 35 +- .../DebugInfo/DWARF/DWARFDebugInfoEntry.h | 2 +- .../llvm/DebugInfo/DWARF/DWARFDebugPubTable.h | 2 +- .../DebugInfo/DWARF/DWARFDebugRangeList.h | 2 +- include/llvm/DebugInfo/DWARF/DWARFDie.h | 7 +- include/llvm/DebugInfo/DWARF/DWARFFormValue.h | 2 +- include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h | 2 +- include/llvm/DebugInfo/DWARF/DWARFUnit.h | 32 +- .../llvm/DebugInfo/MSF/MappedBlockStream.h | 1 - .../DebugInfo/PDB/DIA/DIAEnumDebugStreams.h | 1 + .../DebugInfo/PDB/DIA/DIAEnumLineNumbers.h | 1 + .../DebugInfo/PDB/DIA/DIAEnumSourceFiles.h | 1 + .../llvm/DebugInfo/PDB/DIA/DIAEnumSymbols.h | 1 + .../PDB/Native/DbiModuleDescriptor.h | 5 +- include/llvm/DebugInfo/PDB/Native/DbiStream.h | 3 +- include/llvm/DebugInfo/PDB/PDBSymbol.h | 4 + .../llvm/ExecutionEngine/ExecutionEngine.h | 2 +- .../Orc/CompileOnDemandLayer.h | 2 +- .../llvm/ExecutionEngine/Orc/ExecutionUtils.h | 2 +- .../llvm/ExecutionEngine/Orc/IRCompileLayer.h | 2 +- .../Orc/RTDyldObjectLinkingLayer.h | 2 +- .../ExecutionEngine/RTDyldMemoryManager.h | 2 +- include/llvm/IR/Attributes.h | 4 +- include/llvm/IR/BasicBlock.h | 4 +- include/llvm/IR/CallSite.h | 4 +- include/llvm/IR/Constants.h | 6 +- include/llvm/IR/DataLayout.h | 2 +- include/llvm/IR/DebugInfoMetadata.h | 11 +- include/llvm/IR/DiagnosticInfo.h | 4 +- include/llvm/IR/Dominators.h | 6 + include/llvm/IR/Function.h | 4 +- include/llvm/IR/GetElementPtrTypeIterator.h | 4 +- include/llvm/IR/GlobalValue.h | 2 +- include/llvm/IR/GlobalVariable.h | 9 +- include/llvm/IR/IRBuilder.h | 26 +- include/llvm/IR/InstrTypes.h | 2 +- include/llvm/IR/Instruction.h | 8 +- include/llvm/IR/Instructions.h | 101 +- include/llvm/IR/IntrinsicsAMDGPU.td | 10 + include/llvm/IR/Metadata.h | 8 +- include/llvm/IR/Module.h | 4 +- include/llvm/IR/ModuleSummaryIndex.h | 4 +- include/llvm/IR/OperandTraits.h | 3 - include/llvm/IR/PatternMatch.h | 2 +- include/llvm/IR/Statepoint.h | 2 +- include/llvm/IR/Type.h | 2 +- include/llvm/IR/Use.h | 2 +- include/llvm/IR/Value.h | 2 +- include/llvm/LTO/LTO.h | 9 +- include/llvm/LinkAllIR.h | 2 +- include/llvm/LinkAllPasses.h | 4 +- include/llvm/MC/MCAsmInfo.h | 8 +- include/llvm/MC/MCAssembler.h | 6 +- include/llvm/MC/MCCodeView.h | 4 +- include/llvm/MC/MCContext.h | 2 +- include/llvm/MC/MCELFObjectWriter.h | 2 +- include/llvm/MC/MCFragment.h | 2 +- include/llvm/MC/MCMachObjectWriter.h | 4 +- include/llvm/MC/MCObjectFileInfo.h | 4 + include/llvm/MC/MCParser/MCAsmParser.h | 2 +- include/llvm/MC/MCSection.h | 2 +- include/llvm/MC/MCSectionMachO.h | 2 +- include/llvm/MC/MCSymbolWasm.h | 2 +- include/llvm/MC/MCTargetOptions.h | 6 + include/llvm/MC/MCWasmObjectWriter.h | 30 +- include/llvm/Object/Archive.h | 2 +- include/llvm/Object/COFF.h | 9 +- include/llvm/Object/COFFModuleDefinition.h | 2 +- include/llvm/Object/Decompressor.h | 2 +- include/llvm/Object/ELF.h | 2 +- include/llvm/Object/ELFObjectFile.h | 2 +- include/llvm/Object/ELFTypes.h | 2 +- include/llvm/Object/IRObjectFile.h | 15 + include/llvm/Object/IRSymtab.h | 15 +- include/llvm/Object/MachO.h | 6 +- include/llvm/Object/MachOUniversal.h | 2 +- include/llvm/Object/ObjectFile.h | 10 +- include/llvm/Object/RelocVisitor.h | 4 +- include/llvm/Object/SymbolicFile.h | 7 +- include/llvm/Object/Wasm.h | 2 +- include/llvm/Object/WindowsResource.h | 74 +- include/llvm/ObjectYAML/COFFYAML.h | 2 +- .../ObjectYAML/CodeViewYAMLDebugSections.h | 32 +- include/llvm/ObjectYAML/CodeViewYAMLTypes.h | 5 + include/llvm/ObjectYAML/DWARFYAML.h | 14 +- include/llvm/ObjectYAML/ELFYAML.h | 2 +- include/llvm/ObjectYAML/MachOYAML.h | 8 +- include/llvm/ObjectYAML/WasmYAML.h | 2 +- include/llvm/Option/ArgList.h | 2 +- include/llvm/Pass.h | 2 +- .../ProfileData/Coverage/CoverageMapping.h | 6 +- .../llvm/Support}/AMDGPUCodeObjectMetadata.h | 6 +- include/llvm/Support/BinaryStreamArray.h | 262 +- include/llvm/Support/BinaryStreamReader.h | 20 +- include/llvm/Support/CBindingWrapping.h | 2 +- include/llvm/Support/COFF.h | 724 ---- include/llvm/Support/Casting.h | 45 +- include/llvm/Support/CommandLine.h | 77 +- include/llvm/Support/ConvertUTF.h | 2 +- include/llvm/Support/DataTypes.h.cmake | 4 +- include/llvm/Support/Endian.h | 150 +- include/llvm/Support/Error.h | 31 +- include/llvm/Support/ErrorOr.h | 18 +- include/llvm/Support/FileSystem.h | 55 - include/llvm/Support/FormatVariadic.h | 2 +- include/llvm/Support/GCOV.h | 4 +- include/llvm/Support/GenericDomTree.h | 2 +- include/llvm/Support/LowLevelTypeImpl.h | 2 +- include/llvm/Support/MachO.h | 2038 ---------- include/llvm/Support/MathExtras.h | 2 +- include/llvm/Support/MemoryBuffer.h | 4 +- include/llvm/Support/Solaris.h | 2 +- include/llvm/Support/SourceMgr.h | 2 +- include/llvm/Support/StringPool.h | 24 +- include/llvm/Support/TargetRegistry.h | 4 +- include/llvm/Support/raw_sha1_ostream.h | 4 +- include/llvm/Support/type_traits.h | 20 +- include/llvm/Target/TargetInstrInfo.h | 2 +- include/llvm/Target/TargetLowering.h | 69 + include/llvm/Target/TargetMachine.h | 12 +- include/llvm/Target/TargetOptions.h | 10 +- include/llvm/Target/TargetSubtargetInfo.h | 2 +- include/llvm/Transforms/IPO/FunctionAttrs.h | 2 +- .../llvm/Transforms/Scalar/GVNExpression.h | 13 + .../llvm/Transforms/Utils/EscapeEnumerator.h | 2 +- .../Transforms/Utils/FunctionComparator.h | 2 +- .../ImportedFunctionsInliningStatistics.h | 2 +- include/llvm/Transforms/Utils/Local.h | 2 +- .../llvm/Transforms/Utils/LoopVersioning.h | 2 +- .../Transforms/Utils/OrderedInstructions.h | 51 + include/llvm/Transforms/Utils/ValueMapper.h | 2 +- .../llvm/Transforms/Vectorize/SLPVectorizer.h | 2 +- include/llvm/module.modulemap | 44 +- lib/Analysis/AliasAnalysisEvaluator.cpp | 2 +- lib/Analysis/AliasSetTracker.cpp | 2 +- lib/Analysis/BranchProbabilityInfo.cpp | 47 +- lib/Analysis/CFLGraph.h | 1 - lib/Analysis/CallPrinter.cpp | 2 +- lib/Analysis/CaptureTracking.cpp | 2 +- lib/Analysis/CodeMetrics.cpp | 2 +- lib/Analysis/ConstantFolding.cpp | 42 +- lib/Analysis/GlobalsModRef.cpp | 16 +- lib/Analysis/InlineCost.cpp | 4 +- lib/Analysis/InstCount.cpp | 3 +- lib/Analysis/InstructionSimplify.cpp | 70 +- lib/Analysis/LLVMBuild.txt | 2 +- lib/Analysis/LazyBranchProbabilityInfo.cpp | 8 +- lib/Analysis/LazyCallGraph.cpp | 3 +- lib/Analysis/LazyValueInfo.cpp | 194 +- lib/Analysis/Lint.cpp | 2 +- lib/Analysis/MemDepPrinter.cpp | 2 +- lib/Analysis/MemDerefPrinter.cpp | 4 +- lib/Analysis/MemoryDependenceAnalysis.cpp | 6 +- lib/Analysis/MemorySSAUpdater.cpp | 19 +- lib/Analysis/ModuleDebugInfoPrinter.cpp | 2 +- lib/Analysis/ModuleSummaryAnalysis.cpp | 10 + lib/Analysis/ObjCARCInstKind.cpp | 2 +- lib/Analysis/RegionPrinter.cpp | 4 +- lib/Analysis/ScalarEvolution.cpp | 4 +- lib/Analysis/ScalarEvolutionNormalization.cpp | 2 +- lib/Analysis/TargetTransformInfo.cpp | 8 + lib/Analysis/ValueTracking.cpp | 20 +- lib/Analysis/VectorUtils.cpp | 8 +- lib/AsmParser/LLParser.cpp | 4 +- lib/AsmParser/LLVMBuild.txt | 2 +- lib/BinaryFormat/CMakeLists.txt | 8 + lib/{Support => BinaryFormat}/Dwarf.cpp | 200 +- lib/BinaryFormat/LLVMBuild.txt | 22 + lib/BinaryFormat/Magic.cpp | 216 + lib/Bitcode/Reader/BitcodeReader.cpp | 26 +- lib/Bitcode/Reader/MetadataLoader.cpp | 2 +- lib/Bitcode/Writer/BitcodeWriter.cpp | 10 +- lib/CMakeLists.txt | 1 + lib/CodeGen/Analysis.cpp | 2 +- lib/CodeGen/AsmPrinter/ARMException.cpp | 2 +- lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 10 +- lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp | 2 +- .../AsmPrinter/AsmPrinterInlineAsm.cpp | 2 +- lib/CodeGen/AsmPrinter/CodeViewDebug.cpp | 72 +- lib/CodeGen/AsmPrinter/CodeViewDebug.h | 51 +- lib/CodeGen/AsmPrinter/DIEHash.cpp | 4 +- lib/CodeGen/AsmPrinter/DebugLocStream.h | 2 +- lib/CodeGen/AsmPrinter/DwarfAccelTable.h | 2 +- lib/CodeGen/AsmPrinter/DwarfCFIException.cpp | 2 +- lib/CodeGen/AsmPrinter/DwarfCompileUnit.h | 2 +- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 2 +- lib/CodeGen/AsmPrinter/DwarfExpression.cpp | 2 +- lib/CodeGen/AsmPrinter/DwarfUnit.cpp | 4 +- lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp | 4 +- lib/CodeGen/AsmPrinter/LLVMBuild.txt | 2 +- lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp | 23 +- lib/CodeGen/AsmPrinter/WinException.cpp | 4 +- lib/CodeGen/BasicTargetTransformInfo.cpp | 2 +- lib/CodeGen/BranchFolding.cpp | 36 +- lib/CodeGen/BranchRelaxation.cpp | 6 +- lib/CodeGen/BuiltinGCs.cpp | 2 +- lib/CodeGen/CalcSpillWeights.cpp | 2 +- lib/CodeGen/CodeGen.cpp | 2 +- lib/CodeGen/CodeGenPrepare.cpp | 302 +- lib/CodeGen/DFAPacketizer.cpp | 85 +- lib/CodeGen/DeadMachineInstructionElim.cpp | 2 +- lib/CodeGen/DwarfEHPrepare.cpp | 4 +- lib/CodeGen/ExpandISelPseudos.cpp | 2 +- lib/CodeGen/ExpandPostRAPseudos.cpp | 2 +- lib/CodeGen/ExpandReductions.cpp | 6 +- lib/CodeGen/FaultMaps.cpp | 2 +- lib/CodeGen/FuncletLayout.cpp | 2 +- lib/CodeGen/GCMetadata.cpp | 16 +- lib/CodeGen/GCMetadataPrinter.cpp | 7 +- lib/CodeGen/GlobalISel/IRTranslator.cpp | 19 +- lib/CodeGen/GlobalISel/Legalizer.cpp | 1 - lib/CodeGen/GlobalMerge.cpp | 3 +- lib/CodeGen/IfConversion.cpp | 2 +- lib/CodeGen/ImplicitNullChecks.cpp | 10 +- lib/CodeGen/InlineSpiller.cpp | 31 +- lib/CodeGen/LLVMTargetMachine.cpp | 141 +- lib/CodeGen/LexicalScopes.cpp | 2 +- lib/CodeGen/LiveIntervalAnalysis.cpp | 4 +- lib/CodeGen/LiveIntervalUnion.cpp | 6 +- lib/CodeGen/LiveRegMatrix.cpp | 6 +- lib/CodeGen/LocalStackSlotAllocation.cpp | 2 +- lib/CodeGen/MIRParser/MILexer.h | 2 +- lib/CodeGen/MIRParser/MIParser.cpp | 44 +- lib/CodeGen/MIRParser/MIParser.h | 16 +- lib/CodeGen/MIRParser/MIRParser.cpp | 144 +- lib/CodeGen/MIRPrinter.cpp | 65 +- lib/CodeGen/MIRPrintingPass.cpp | 4 +- lib/CodeGen/MachineBlockPlacement.cpp | 4 +- lib/CodeGen/MachineCSE.cpp | 2 +- lib/CodeGen/MachineCopyPropagation.cpp | 2 +- lib/CodeGen/MachineDominanceFrontier.cpp | 1 - lib/CodeGen/MachineDominators.cpp | 2 +- lib/CodeGen/MachineFunction.cpp | 3 - lib/CodeGen/MachineFunctionPass.cpp | 2 +- lib/CodeGen/MachineFunctionPrinterPass.cpp | 2 +- lib/CodeGen/MachineInstr.cpp | 6 +- lib/CodeGen/MachineLICM.cpp | 2 +- lib/CodeGen/MachineModuleInfo.cpp | 16 +- lib/CodeGen/MachineOutliner.cpp | 6 +- lib/CodeGen/MachinePipeliner.cpp | 2 +- lib/CodeGen/MachineRegionInfo.cpp | 2 +- lib/CodeGen/MachineRegisterInfo.cpp | 2 +- lib/CodeGen/MachineScheduler.cpp | 8 +- lib/CodeGen/MachineSink.cpp | 2 +- lib/CodeGen/MachineTraceMetrics.cpp | 2 +- lib/CodeGen/MachineVerifier.cpp | 11 +- lib/CodeGen/OptimizePHIs.cpp | 2 +- lib/CodeGen/PatchableFunction.cpp | 2 +- lib/CodeGen/PeepholeOptimizer.cpp | 2 +- lib/CodeGen/PostRAHazardRecognizer.cpp | 2 +- lib/CodeGen/RegAllocBase.cpp | 5 +- lib/CodeGen/RegAllocBasic.cpp | 2 +- lib/CodeGen/RegAllocGreedy.cpp | 96 +- lib/CodeGen/RegAllocPBQP.cpp | 16 +- lib/CodeGen/RegisterClassInfo.cpp | 2 +- lib/CodeGen/RegisterPressure.cpp | 4 +- lib/CodeGen/RegisterUsageInfo.cpp | 15 +- lib/CodeGen/RenameIndependentSubregs.cpp | 9 +- lib/CodeGen/ResetMachineFunctionPass.cpp | 2 +- lib/CodeGen/ScheduleDAG.cpp | 8 +- lib/CodeGen/ScheduleDAGInstrs.cpp | 6 +- lib/CodeGen/ScheduleDAGPrinter.cpp | 2 +- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 32 +- lib/CodeGen/SelectionDAG/FastISel.cpp | 2 +- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 8 + lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp | 2 +- .../SelectionDAG/ScheduleDAGRRList.cpp | 2 +- lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp | 2 +- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 19 +- .../SelectionDAG/SelectionDAGBuilder.cpp | 249 +- .../SelectionDAG/SelectionDAGBuilder.h | 14 +- .../SelectionDAG/SelectionDAGDumper.cpp | 2 +- lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 28 +- .../SelectionDAG/SelectionDAGPrinter.cpp | 2 +- .../SelectionDAG/StatepointLowering.cpp | 4 +- lib/CodeGen/SelectionDAG/TargetLowering.cpp | 6 +- lib/CodeGen/ShadowStackGCLowering.cpp | 2 +- lib/CodeGen/SjLjEHPrepare.cpp | 2 +- lib/CodeGen/StackMaps.cpp | 2 +- lib/CodeGen/StackProtector.cpp | 8 + lib/CodeGen/StackSlotColoring.cpp | 2 +- lib/CodeGen/TailDuplication.cpp | 14 +- lib/CodeGen/TailDuplicator.cpp | 38 +- lib/CodeGen/TargetFrameLoweringImpl.cpp | 14 +- lib/CodeGen/TargetLoweringBase.cpp | 8 +- lib/CodeGen/TargetLoweringObjectFileImpl.cpp | 122 +- lib/CodeGen/TargetOptionsImpl.cpp | 4 +- lib/CodeGen/TargetPassConfig.cpp | 71 + lib/CodeGen/TargetRegisterInfo.cpp | 2 +- lib/CodeGen/TargetSchedule.cpp | 24 +- lib/CodeGen/TargetSubtargetInfo.cpp | 2 +- lib/CodeGen/VirtRegMap.cpp | 11 + lib/CodeGen/WinEHPrepare.cpp | 2 +- lib/CodeGen/XRayInstrumentation.cpp | 30 +- lib/DebugInfo/CodeView/CMakeLists.txt | 11 +- .../CodeView/DebugChecksumsSubsection.cpp | 4 +- .../CodeView/DebugCrossExSubsection.cpp | 51 + .../CodeView/DebugCrossImpSubsection.cpp | 91 + .../CodeView/DebugInlineeLinesSubsection.cpp | 9 +- .../CodeView/DebugLinesSubsection.cpp | 11 +- .../CodeView/DebugStringTableSubsection.cpp | 14 +- .../CodeView/DebugSubsectionRecord.cpp | 28 +- .../CodeView/DebugSubsectionVisitor.cpp | 84 +- .../CodeView/DebugSymbolRVASubsection.cpp | 31 + .../CodeView/TypeTableCollection.cpp | 3 +- .../DWARF/DWARFAbbreviationDeclaration.cpp | 5 +- lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp | 5 +- lib/DebugInfo/DWARF/DWARFCompileUnit.cpp | 4 +- lib/DebugInfo/DWARF/DWARFContext.cpp | 149 +- lib/DebugInfo/DWARF/DWARFDebugFrame.cpp | 8 +- lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp | 2 +- lib/DebugInfo/DWARF/DWARFDebugLine.cpp | 2 +- lib/DebugInfo/DWARF/DWARFDebugLoc.cpp | 6 +- lib/DebugInfo/DWARF/DWARFDebugMacro.cpp | 4 +- lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp | 4 +- lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp | 2 +- lib/DebugInfo/DWARF/DWARFDie.cpp | 41 +- lib/DebugInfo/DWARF/DWARFFormValue.cpp | 10 +- lib/DebugInfo/DWARF/DWARFGdbIndex.cpp | 2 +- lib/DebugInfo/DWARF/DWARFTypeUnit.cpp | 2 +- lib/DebugInfo/DWARF/DWARFUnit.cpp | 60 +- lib/DebugInfo/DWARF/DWARFUnitIndex.cpp | 4 +- lib/DebugInfo/DWARF/LLVMBuild.txt | 2 +- lib/DebugInfo/PDB/DIA/DIAEnumDebugStreams.cpp | 4 +- lib/DebugInfo/PDB/DIA/DIAEnumLineNumbers.cpp | 2 +- lib/DebugInfo/PDB/DIA/DIAEnumSourceFiles.cpp | 2 +- lib/DebugInfo/PDB/DIA/DIAEnumSymbols.cpp | 2 +- lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp | 7 +- .../PDB/Native/DbiModuleDescriptorBuilder.cpp | 16 +- lib/DebugInfo/PDB/Native/DbiStream.cpp | 6 +- lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp | 2 +- .../PDB/Native/ModuleDebugStream.cpp | 4 +- lib/DebugInfo/PDB/Native/PublicsStream.cpp | 10 +- lib/DebugInfo/PDB/PDBContext.cpp | 2 +- lib/DebugInfo/PDB/PDBSymbolBlock.cpp | 2 +- .../PDB/PDBSymbolCompilandDetails.cpp | 2 +- lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp | 2 +- lib/DebugInfo/PDB/PDBSymbolCustom.cpp | 2 +- lib/DebugInfo/PDB/PDBSymbolFunc.cpp | 2 +- lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp | 2 +- lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp | 2 +- lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp | 2 +- lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp | 2 +- lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp | 2 +- lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp | 2 +- lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp | 2 +- .../PDB/PDBSymbolTypeFunctionSig.cpp | 2 +- lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp | 2 +- .../PDB/PDBSymbolTypeVTableShape.cpp | 2 +- lib/DebugInfo/PDB/PDBSymbolUnknown.cpp | 2 +- lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp | 2 +- .../Symbolize/SymbolizableObjectFile.cpp | 2 +- lib/DebugInfo/Symbolize/Symbolize.cpp | 4 +- .../IntelJITEvents/IntelJITEventListener.cpp | 2 +- .../IntelJITEvents/jitprofiling.c | 2 +- .../Interpreter/ExternalFunctions.cpp | 4 +- .../OProfileJIT/OProfileJITEventListener.cpp | 2 +- lib/ExecutionEngine/Orc/IndirectionUtils.cpp | 2 +- lib/ExecutionEngine/Orc/OrcMCJITReplacement.h | 4 +- .../RuntimeDyld/RuntimeDyld.cpp | 4 +- .../RuntimeDyld/RuntimeDyldELF.cpp | 2 +- .../RuntimeDyld/RuntimeDyldImpl.h | 2 +- .../RuntimeDyld/Targets/RuntimeDyldCOFFI386.h | 4 +- .../Targets/RuntimeDyldCOFFThumb.h | 4 +- .../Targets/RuntimeDyldCOFFX86_64.h | 4 +- .../Targets/RuntimeDyldELFMips.cpp | 2 +- lib/ExecutionEngine/SectionMemoryManager.cpp | 2 +- lib/Fuzzer/FuzzerDriver.cpp | 4 +- lib/Fuzzer/FuzzerExtFunctionsDlsymWin.cpp | 2 + lib/Fuzzer/FuzzerLoop.cpp | 2 +- lib/Fuzzer/FuzzerMerge.cpp | 4 +- lib/Fuzzer/FuzzerMutate.cpp | 2 +- lib/Fuzzer/FuzzerShmemPosix.cpp | 6 +- lib/Fuzzer/FuzzerShmemWindows.cpp | 4 +- lib/Fuzzer/FuzzerTracePC.cpp | 2 +- lib/Fuzzer/FuzzerTraceState.cpp | 2 +- lib/Fuzzer/FuzzerUtilWindows.cpp | 2 + lib/Fuzzer/afl/afl_driver.cpp | 14 +- lib/Fuzzer/test/AFLDriverTest.cpp | 2 +- lib/Fuzzer/test/AbsNegAndConstant64Test.cpp | 6 +- lib/Fuzzer/test/AbsNegAndConstantTest.cpp | 6 +- lib/Fuzzer/test/AccumulateAllocationsTest.cpp | 2 +- lib/Fuzzer/test/BadStrcmpTest.cpp | 2 +- lib/Fuzzer/test/BufferOverflowOnInput.cpp | 2 +- lib/Fuzzer/test/CallerCalleeTest.cpp | 2 +- lib/Fuzzer/test/CleanseTest.cpp | 2 +- lib/Fuzzer/test/CustomMutatorTest.cpp | 2 +- lib/Fuzzer/test/CxxStringEqTest.cpp | 4 +- lib/Fuzzer/test/DSOTestMain.cpp | 2 +- lib/Fuzzer/test/DivTest.cpp | 2 +- .../test/FourIndependentBranchesTest.cpp | 2 +- lib/Fuzzer/test/FullCoverageSetTest.cpp | 2 +- lib/Fuzzer/test/FuzzerUnittest.cpp | 4 +- lib/Fuzzer/test/LeakTest.cpp | 2 +- lib/Fuzzer/test/LeakTimeoutTest.cpp | 2 +- lib/Fuzzer/test/LoadTest.cpp | 2 +- lib/Fuzzer/test/Memcmp64BytesTest.cpp | 2 +- lib/Fuzzer/test/MemcmpTest.cpp | 2 +- lib/Fuzzer/test/NotinstrumentedTest.cpp | 2 +- lib/Fuzzer/test/NthRunCrashTest.cpp | 2 +- lib/Fuzzer/test/NullDerefOnEmptyTest.cpp | 2 +- lib/Fuzzer/test/NullDerefTest.cpp | 2 +- lib/Fuzzer/test/OneHugeAllocTest.cpp | 2 +- .../test/OutOfMemorySingleLargeMallocTest.cpp | 2 +- lib/Fuzzer/test/OutOfMemoryTest.cpp | 2 +- lib/Fuzzer/test/RepeatedBytesTest.cpp | 2 +- lib/Fuzzer/test/RepeatedMemcmp.cpp | 3 +- lib/Fuzzer/test/ShrinkControlFlowTest.cpp | 6 +- lib/Fuzzer/test/ShrinkValueProfileTest.cpp | 6 +- lib/Fuzzer/test/SignedIntOverflowTest.cpp | 4 +- lib/Fuzzer/test/SimpleCmpTest.cpp | 2 +- lib/Fuzzer/test/SimpleDictionaryTest.cpp | 2 +- lib/Fuzzer/test/SimpleHashTest.cpp | 2 +- lib/Fuzzer/test/SimpleTest.cpp | 2 +- lib/Fuzzer/test/SimpleThreadedTest.cpp | 2 +- lib/Fuzzer/test/SingleByteInputTest.cpp | 4 +- lib/Fuzzer/test/SingleMemcmpTest.cpp | 2 +- lib/Fuzzer/test/SingleStrcmpTest.cpp | 2 +- lib/Fuzzer/test/SingleStrncmpTest.cpp | 2 +- lib/Fuzzer/test/SpamyTest.cpp | 2 +- lib/Fuzzer/test/StrcmpTest.cpp | 4 +- lib/Fuzzer/test/StrncmpOOBTest.cpp | 6 +- lib/Fuzzer/test/StrncmpTest.cpp | 2 +- lib/Fuzzer/test/StrstrTest.cpp | 4 +- lib/Fuzzer/test/SwapCmpTest.cpp | 2 +- lib/Fuzzer/test/Switch2Test.cpp | 8 +- lib/Fuzzer/test/SwitchTest.cpp | 8 +- lib/Fuzzer/test/TableLookupTest.cpp | 4 +- lib/Fuzzer/test/ThreadedLeakTest.cpp | 2 +- lib/Fuzzer/test/ThreadedTest.cpp | 2 +- lib/Fuzzer/test/TimeoutEmptyTest.cpp | 2 +- lib/Fuzzer/test/TimeoutTest.cpp | 2 +- lib/Fuzzer/test/TraceMallocTest.cpp | 2 +- lib/Fuzzer/test/TwoDifferentBugsTest.cpp | 2 +- lib/IR/AsmWriter.cpp | 2 +- lib/IR/Attributes.cpp | 4 +- lib/IR/Comdat.cpp | 2 +- lib/IR/ConstantRange.cpp | 40 +- lib/IR/Constants.cpp | 13 +- lib/IR/Core.cpp | 12 + lib/IR/DIBuilder.cpp | 4 +- lib/IR/DataLayout.cpp | 2 +- lib/IR/DebugInfo.cpp | 2 +- lib/IR/DebugLoc.cpp | 2 +- lib/IR/DiagnosticInfo.cpp | 6 +- lib/IR/DiagnosticPrinter.cpp | 4 +- lib/IR/Dominators.cpp | 20 +- lib/IR/Function.cpp | 4 +- lib/IR/Globals.cpp | 8 +- lib/IR/IRBuilder.cpp | 34 +- lib/IR/InlineAsm.cpp | 2 +- lib/IR/Instruction.cpp | 10 +- lib/IR/Instructions.cpp | 135 +- lib/IR/IntrinsicInst.cpp | 2 +- lib/IR/LLVMBuild.txt | 2 +- lib/IR/LLVMContext.cpp | 2 +- lib/IR/LLVMContextImpl.h | 2 +- lib/IR/LegacyPassManager.cpp | 2 +- lib/IR/Metadata.cpp | 4 +- lib/IR/Module.cpp | 6 +- lib/IR/OptBisect.cpp | 2 +- lib/IR/Type.cpp | 2 +- lib/IR/TypeFinder.cpp | 2 +- lib/IR/ValueSymbolTable.cpp | 2 +- lib/IR/Verifier.cpp | 6 +- lib/LLVMBuild.txt | 1 + lib/LTO/LTO.cpp | 74 +- lib/LTO/ThinLTOCodeGenerator.cpp | 2 +- lib/MC/ELFObjectWriter.cpp | 20 +- lib/MC/MCAsmBackend.cpp | 2 +- lib/MC/MCAsmInfo.cpp | 2 +- lib/MC/MCAsmInfoDarwin.cpp | 2 +- lib/MC/MCAsmInfoELF.cpp | 2 +- lib/MC/MCAssembler.cpp | 4 +- lib/MC/MCCodeView.cpp | 2 +- lib/MC/MCContext.cpp | 8 +- lib/MC/MCDisassembler/Disassembler.cpp | 2 +- lib/MC/MCDisassembler/MCRelocationInfo.cpp | 2 +- lib/MC/MCDwarf.cpp | 6 +- lib/MC/MCELFStreamer.cpp | 6 +- lib/MC/MCExpr.cpp | 6 +- lib/MC/MCFragment.cpp | 4 +- lib/MC/MCInstPrinter.cpp | 2 +- lib/MC/MCInstrAnalysis.cpp | 2 +- lib/MC/MCMachOStreamer.cpp | 2 +- lib/MC/MCNullStreamer.cpp | 2 +- lib/MC/MCObjectFileInfo.cpp | 20 +- lib/MC/MCObjectWriter.cpp | 6 +- lib/MC/MCParser/AsmLexer.cpp | 2 +- lib/MC/MCParser/AsmParser.cpp | 8 +- lib/MC/MCParser/COFFAsmParser.cpp | 4 +- lib/MC/MCParser/DarwinAsmParser.cpp | 6 +- lib/MC/MCParser/ELFAsmParser.cpp | 4 +- lib/MC/MCParser/MCAsmLexer.cpp | 2 +- lib/MC/MCParser/MCAsmParser.cpp | 2 +- lib/MC/MCParser/MCTargetAsmParser.cpp | 2 +- lib/MC/MCRegisterInfo.cpp | 2 +- lib/MC/MCSection.cpp | 2 +- lib/MC/MCSectionCOFF.cpp | 2 +- lib/MC/MCSectionELF.cpp | 4 +- lib/MC/MCStreamer.cpp | 6 +- lib/MC/MCSubtargetInfo.cpp | 2 +- lib/MC/MCSymbol.cpp | 2 +- lib/MC/MCSymbolELF.cpp | 4 +- lib/MC/MCTargetOptions.cpp | 2 +- lib/MC/MCWasmObjectTargetWriter.cpp | 10 +- lib/MC/MCWinEH.cpp | 4 +- lib/MC/MachObjectWriter.cpp | 4 +- lib/MC/StringTableBuilder.cpp | 4 +- lib/MC/SubtargetFeature.cpp | 2 +- lib/MC/WasmObjectWriter.cpp | 271 +- lib/MC/WinCOFFObjectWriter.cpp | 6 +- lib/MC/WinCOFFStreamer.cpp | 4 +- lib/Object/Archive.cpp | 2 +- lib/Object/ArchiveWriter.cpp | 3 +- lib/Object/Binary.cpp | 71 +- lib/Object/COFFImportFile.cpp | 14 +- lib/Object/COFFObjectFile.cpp | 2 +- lib/Object/Decompressor.cpp | 2 +- lib/Object/ELF.cpp | 34 +- lib/Object/ELFObjectFile.cpp | 6 +- lib/Object/IRObjectFile.cpp | 33 +- lib/Object/IRSymtab.cpp | 47 +- lib/Object/LLVMBuild.txt | 2 +- lib/Object/MachOObjectFile.cpp | 12 +- lib/Object/ModuleSymbolTable.cpp | 6 +- lib/Object/Object.cpp | 2 +- lib/Object/ObjectFile.cpp | 61 +- lib/Object/SymbolicFile.cpp | 64 +- lib/Object/WasmObjectFile.cpp | 4 +- lib/Object/WindowsResource.cpp | 518 ++- lib/ObjectYAML/CodeViewYAMLDebugSections.cpp | 551 ++- lib/ObjectYAML/CodeViewYAMLSymbols.cpp | 13 + lib/ObjectYAML/CodeViewYAMLTypes.cpp | 28 +- lib/ObjectYAML/DWARFEmitter.cpp | 2 +- lib/ObjectYAML/DWARFVisitor.h | 2 +- lib/ObjectYAML/ELFYAML.cpp | 26 +- lib/ObjectYAML/MachOYAML.cpp | 4 +- lib/ObjectYAML/ObjectYAML.cpp | 2 +- lib/ObjectYAML/WasmYAML.cpp | 2 +- lib/Option/Arg.cpp | 2 +- lib/Passes/PassBuilder.cpp | 10 + lib/ProfileData/Coverage/CoverageMapping.cpp | 2 +- .../Coverage/CoverageMappingWriter.cpp | 2 +- lib/ProfileData/InstrProf.cpp | 4 +- lib/ProfileData/InstrProfReader.cpp | 2 +- lib/ProfileData/InstrProfWriter.cpp | 2 +- lib/ProfileData/SampleProfWriter.cpp | 2 +- lib/Support/AMDGPUCodeObjectMetadata.cpp | 218 + lib/Support/ARMAttributeParser.cpp | 2 +- lib/Support/ARMBuildAttrs.cpp | 2 +- lib/Support/Atomic.cpp | 2 + lib/Support/CMakeLists.txt | 2 +- lib/Support/CommandLine.cpp | 18 +- lib/Support/ConvertUTF.cpp | 2 - lib/Support/ConvertUTFWrapper.cpp | 2 +- lib/Support/Errno.cpp | 2 +- lib/Support/Error.cpp | 1 - lib/Support/FormattedStream.cpp | 2 +- lib/Support/LockFileManager.cpp | 6 +- lib/Support/MD5.cpp | 2 +- lib/Support/Mutex.cpp | 2 +- lib/Support/Path.cpp | 176 +- lib/Support/PrettyStackTrace.cpp | 2 +- lib/Support/Process.cpp | 2 +- lib/Support/RWMutex.cpp | 2 +- lib/Support/SHA1.cpp | 2 +- lib/Support/Signals.cpp | 18 +- lib/Support/SourceMgr.cpp | 6 +- lib/Support/SpecialCaseList.cpp | 2 +- lib/Support/Statistic.cpp | 2 +- lib/Support/StringExtras.cpp | 2 +- lib/Support/TargetRegistry.cpp | 3 +- lib/Support/ThreadLocal.cpp | 2 +- lib/Support/Timer.cpp | 2 +- lib/Support/TrigramIndex.cpp | 2 +- lib/Support/Triple.cpp | 21 +- lib/Support/Unix/DynamicLibrary.inc | 7 +- lib/Support/Unix/Path.inc | 2 +- lib/Support/Unix/Signals.inc | 2 +- lib/Support/Unix/Threading.inc | 8 +- lib/Support/Windows/DynamicLibrary.inc | 2 +- lib/Support/Windows/WindowsSupport.h | 4 +- lib/Support/YAMLParser.cpp | 2 +- lib/TableGen/StringMatcher.cpp | 2 +- lib/Target/AArch64/AArch64AsmPrinter.cpp | 6 +- .../AArch64DeadRegisterDefinitionsPass.cpp | 2 +- .../AArch64/AArch64ExpandPseudoInsts.cpp | 2 +- lib/Target/AArch64/AArch64FastISel.cpp | 6 +- lib/Target/AArch64/AArch64ISelDAGToDAG.cpp | 2 +- lib/Target/AArch64/AArch64ISelLowering.cpp | 6 +- lib/Target/AArch64/AArch64InstrInfo.cpp | 2 +- lib/Target/AArch64/AArch64LegalizerInfo.cpp | 4 +- .../AArch64/AArch64LoadStoreOptimizer.cpp | 2 +- lib/Target/AArch64/AArch64PBQPRegAlloc.cpp | 2 +- .../AArch64/AArch64RegisterBankInfo.cpp | 4 +- .../AArch64/AArch64TargetObjectFile.cpp | 2 +- .../AArch64/AArch64TargetTransformInfo.cpp | 2 +- .../AArch64/AsmParser/AArch64AsmParser.cpp | 2 +- .../MCTargetDesc/AArch64AsmBackend.cpp | 4 +- .../MCTargetDesc/AArch64ELFObjectWriter.cpp | 2 +- .../MCTargetDesc/AArch64ELFStreamer.cpp | 2 +- .../MCTargetDesc/AArch64MachObjectWriter.cpp | 2 +- lib/Target/AMDGPU/AMDGPU.td | 33 +- lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp | 6 +- .../AMDGPU/AMDGPUAnnotateUniformValues.cpp | 2 +- lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 12 +- lib/Target/AMDGPU/AMDGPUAsmPrinter.h | 2 +- lib/Target/AMDGPU/AMDGPUCallLowering.cpp | 2 +- lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp | 4 +- lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 6 +- lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 2 +- lib/Target/AMDGPU/AMDGPUInstrInfo.h | 2 +- lib/Target/AMDGPU/AMDGPUInstructionSelector.h | 2 +- lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 10 +- lib/Target/AMDGPU/AMDGPUMCInstLower.cpp | 1 - .../AMDGPU/AMDGPUMachineCFGStructurizer.cpp | 2 +- lib/Target/AMDGPU/AMDGPUMachineFunction.h | 2 +- lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp | 20 +- lib/Target/AMDGPU/AMDGPURegAsmNames.inc.cpp | 353 ++ lib/Target/AMDGPU/AMDGPURegisterBankInfo.h | 1 - lib/Target/AMDGPU/AMDGPURegisterInfo.cpp | 1 - lib/Target/AMDGPU/AMDGPUSubtarget.h | 17 +- lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 18 +- lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp | 6 +- .../AMDGPU/AMDGPUTargetTransformInfo.cpp | 2 +- .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 106 +- lib/Target/AMDGPU/CMakeLists.txt | 1 + .../Disassembler/AMDGPUDisassembler.cpp | 7 +- .../AMDGPU/Disassembler/AMDGPUDisassembler.h | 2 +- lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 2 +- .../AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp | 2 +- .../AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp | 2 +- .../AMDGPUCodeObjectMetadataStreamer.cpp | 197 +- .../AMDGPUCodeObjectMetadataStreamer.h | 2 +- .../MCTargetDesc/AMDGPUELFObjectWriter.cpp | 2 +- .../MCTargetDesc/AMDGPUTargetStreamer.cpp | 32 +- .../MCTargetDesc/AMDGPUTargetStreamer.h | 12 - .../AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp | 2 +- lib/Target/AMDGPU/Processors.td | 81 +- .../AMDGPU/R600ControlFlowFinalizer.cpp | 4 +- lib/Target/AMDGPU/R600EmitClauseMarkers.cpp | 2 +- lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp | 2 +- lib/Target/AMDGPU/R600FrameLowering.cpp | 2 +- lib/Target/AMDGPU/R600ISelLowering.cpp | 2 +- lib/Target/AMDGPU/R600InstrInfo.cpp | 4 +- lib/Target/AMDGPU/R600MachineScheduler.cpp | 4 +- lib/Target/AMDGPU/R600Packetizer.cpp | 2 +- lib/Target/AMDGPU/SIDebuggerInsertNops.cpp | 2 +- lib/Target/AMDGPU/SIFixSGPRCopies.cpp | 2 +- lib/Target/AMDGPU/SIFoldOperands.cpp | 3 +- lib/Target/AMDGPU/SIFrameLowering.cpp | 2 +- lib/Target/AMDGPU/SIISelLowering.cpp | 6 +- lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 2 +- lib/Target/AMDGPU/SIInstrInfo.cpp | 2 +- lib/Target/AMDGPU/SILowerControlFlow.cpp | 2 +- lib/Target/AMDGPU/SILowerI1Copies.cpp | 2 +- lib/Target/AMDGPU/SIMachineFunctionInfo.h | 2 +- lib/Target/AMDGPU/SIMachineScheduler.cpp | 2 +- lib/Target/AMDGPU/SIPeepholeSDWA.cpp | 103 +- lib/Target/AMDGPU/SIRegisterInfo.cpp | 62 +- lib/Target/AMDGPU/SIRegisterInfo.h | 4 +- lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 43 +- lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 7 - lib/Target/AMDGPU/VOP3Instructions.td | 9 +- lib/Target/ARM/ARMAsmPrinter.cpp | 4 +- lib/Target/ARM/ARMBaseInstrInfo.cpp | 2 +- lib/Target/ARM/ARMBaseRegisterInfo.cpp | 4 +- lib/Target/ARM/ARMCallLowering.cpp | 16 +- lib/Target/ARM/ARMConstantIslandPass.cpp | 2 +- lib/Target/ARM/ARMFastISel.cpp | 2 +- lib/Target/ARM/ARMFrameLowering.cpp | 2 +- lib/Target/ARM/ARMISelLowering.cpp | 9 +- lib/Target/ARM/ARMInstrVFP.td | 9 +- lib/Target/ARM/ARMInstructionSelector.cpp | 68 +- lib/Target/ARM/ARMLegalizerInfo.cpp | 2 +- lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 2 +- lib/Target/ARM/ARMMCInstLower.cpp | 2 +- lib/Target/ARM/ARMRegisterBankInfo.cpp | 33 +- lib/Target/ARM/ARMSubtarget.cpp | 4 +- lib/Target/ARM/ARMTargetMachine.cpp | 15 + lib/Target/ARM/ARMTargetObjectFile.cpp | 6 +- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 19 +- .../ARM/Disassembler/ARMDisassembler.cpp | 2 +- lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp | 66 +- .../ARM/MCTargetDesc/ARMAsmBackendDarwin.h | 2 +- .../ARM/MCTargetDesc/ARMELFObjectWriter.cpp | 2 +- .../ARM/MCTargetDesc/ARMELFStreamer.cpp | 4 +- lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h | 3 + .../ARM/MCTargetDesc/ARMMCCodeEmitter.cpp | 12 +- .../ARM/MCTargetDesc/ARMMCTargetDesc.cpp | 2 +- .../MCTargetDesc/ARMMachORelocationInfo.cpp | 2 +- .../ARM/MCTargetDesc/ARMMachObjectWriter.cpp | 4 +- .../MCTargetDesc/ARMWinCOFFObjectWriter.cpp | 2 +- lib/Target/ARM/Thumb1FrameLowering.cpp | 6 +- lib/Target/ARM/Thumb1InstrInfo.cpp | 2 +- lib/Target/ARM/Thumb2InstrInfo.cpp | 20 +- lib/Target/ARM/Thumb2SizeReduction.cpp | 2 +- lib/Target/AVR/AVR.h | 2 +- lib/Target/AVR/AVRAsmPrinter.cpp | 2 +- lib/Target/AVR/AVRRegisterInfo.cpp | 2 - lib/Target/AVR/AVRSubtarget.cpp | 2 +- lib/Target/AVR/AVRSubtarget.h | 3 +- lib/Target/AVR/AVRTargetMachine.cpp | 4 +- lib/Target/AVR/AVRTargetObjectFile.cpp | 2 +- lib/Target/AVR/AsmParser/AVRAsmParser.cpp | 6 +- .../AVR/Disassembler/AVRDisassembler.cpp | 4 +- .../AVR/MCTargetDesc/AVRELFStreamer.cpp | 2 +- lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp | 2 +- .../AVR/MCTargetDesc/AVRMCTargetDesc.cpp | 2 +- lib/Target/BPF/BPFAsmPrinter.cpp | 4 +- lib/Target/BPF/BPFInstrInfo.cpp | 2 +- lib/Target/BPF/BPFRegisterInfo.cpp | 6 +- lib/Target/BPF/BPFTargetMachine.cpp | 6 +- .../BPF/Disassembler/BPFDisassembler.cpp | 4 +- lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp | 2 +- .../BPF/MCTargetDesc/BPFELFObjectWriter.cpp | 2 +- .../BPF/MCTargetDesc/BPFMCTargetDesc.cpp | 2 +- lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h | 2 +- .../Hexagon/AsmParser/HexagonAsmParser.cpp | 6 +- lib/Target/Hexagon/BitTracker.cpp | 2 +- .../Disassembler/HexagonDisassembler.cpp | 6 +- lib/Target/Hexagon/HexagonAsmPrinter.cpp | 4 +- lib/Target/Hexagon/HexagonBitSimplify.cpp | 2 +- lib/Target/Hexagon/HexagonBitTracker.cpp | 2 +- lib/Target/Hexagon/HexagonBlockRanges.h | 2 +- lib/Target/Hexagon/HexagonCommonGEP.cpp | 16 +- .../Hexagon/HexagonConstPropagation.cpp | 2 +- lib/Target/Hexagon/HexagonCopyToCombine.cpp | 2 +- lib/Target/Hexagon/HexagonEarlyIfConv.cpp | 2 +- lib/Target/Hexagon/HexagonFixupHwLoops.cpp | 3 +- lib/Target/Hexagon/HexagonFrameLowering.cpp | 2 +- lib/Target/Hexagon/HexagonGenExtract.cpp | 2 +- lib/Target/Hexagon/HexagonGenInsert.cpp | 4 +- lib/Target/Hexagon/HexagonGenMux.cpp | 7 +- lib/Target/Hexagon/HexagonISelLowering.cpp | 4 +- lib/Target/Hexagon/HexagonInstrInfo.cpp | 6 +- .../Hexagon/HexagonLoopIdiomRecognition.cpp | 4 +- .../Hexagon/HexagonMachineScheduler.cpp | 57 +- lib/Target/Hexagon/HexagonPatterns.td | 89 +- lib/Target/Hexagon/HexagonSplitDouble.cpp | 2 +- lib/Target/Hexagon/HexagonTargetMachine.cpp | 36 +- .../Hexagon/HexagonTargetObjectFile.cpp | 2 +- lib/Target/Hexagon/HexagonVLIWPacketizer.cpp | 2 +- .../MCTargetDesc/HexagonAsmBackend.cpp | 2 +- .../MCTargetDesc/HexagonInstPrinter.cpp | 2 +- .../MCTargetDesc/HexagonMCCodeEmitter.cpp | 2 +- .../MCTargetDesc/HexagonMCELFStreamer.cpp | 2 +- .../MCTargetDesc/HexagonMCShuffler.cpp | 2 +- .../MCTargetDesc/HexagonMCTargetDesc.cpp | 6 +- lib/Target/Hexagon/RDFDeadCode.cpp | 2 +- lib/Target/Hexagon/RDFGraph.cpp | 2 +- lib/Target/Hexagon/RDFLiveness.cpp | 2 +- lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp | 2 +- lib/Target/Lanai/LanaiTargetObjectFile.cpp | 2 +- .../MCTargetDesc/LanaiELFObjectWriter.cpp | 2 +- .../Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp | 2 +- .../Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp | 2 +- lib/Target/MSP430/MSP430AsmPrinter.cpp | 2 +- lib/Target/Mips/AsmParser/MipsAsmParser.cpp | 10 +- .../Mips/Disassembler/MipsDisassembler.cpp | 4 +- .../Mips/MCTargetDesc/MipsAsmBackend.cpp | 2 +- .../Mips/MCTargetDesc/MipsELFObjectWriter.cpp | 2 +- .../Mips/MCTargetDesc/MipsELFStreamer.cpp | 2 +- .../Mips/MCTargetDesc/MipsMCCodeEmitter.cpp | 2 +- lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp | 4 +- .../Mips/MCTargetDesc/MipsOptionRecord.cpp | 4 +- .../Mips/MCTargetDesc/MipsTargetStreamer.cpp | 4 +- lib/Target/Mips/Mips.td | 3 + lib/Target/Mips/Mips16FrameLowering.cpp | 4 +- lib/Target/Mips/MipsAsmPrinter.cpp | 4 +- lib/Target/Mips/MipsCCState.cpp | 61 +- lib/Target/Mips/MipsCCState.h | 33 +- lib/Target/Mips/MipsCallingConv.td | 10 +- lib/Target/Mips/MipsConstantIslandPass.cpp | 2 +- lib/Target/Mips/MipsDSPInstrInfo.td | 5 +- lib/Target/Mips/MipsFastISel.cpp | 2 +- lib/Target/Mips/MipsISelLowering.cpp | 79 +- lib/Target/Mips/MipsISelLowering.h | 27 + lib/Target/Mips/MipsInstrFPU.td | 18 +- lib/Target/Mips/MipsInstrInfo.td | 6 + lib/Target/Mips/MipsMachineFunction.cpp | 8 +- lib/Target/Mips/MipsOptimizePICCall.cpp | 2 +- lib/Target/Mips/MipsOs16.cpp | 2 +- lib/Target/Mips/MipsRegisterInfo.cpp | 4 +- lib/Target/Mips/MipsSEFrameLowering.cpp | 2 +- lib/Target/Mips/MipsSEISelDAGToDAG.cpp | 2 +- lib/Target/Mips/MipsSubtarget.cpp | 8 +- lib/Target/Mips/MipsSubtarget.h | 5 + lib/Target/Mips/MipsTargetMachine.cpp | 2 +- lib/Target/Mips/MipsTargetObjectFile.cpp | 2 +- lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 4 +- lib/Target/NVPTX/NVPTXGenericToNVVM.cpp | 2 +- lib/Target/NVPTX/NVPTXISelLowering.cpp | 2 +- lib/Target/NVPTX/NVPTXInstrInfo.cpp | 2 +- lib/Target/NVPTX/NVPTXLowerArgs.cpp | 2 +- lib/Target/NVPTX/NVPTXPeephole.cpp | 2 +- lib/Target/NVPTX/NVPTXTargetMachine.cpp | 2 +- lib/Target/NVPTX/NVVMIntrRange.cpp | 2 +- .../PowerPC/InstPrinter/PPCInstPrinter.cpp | 2 +- .../PowerPC/MCTargetDesc/PPCAsmBackend.cpp | 6 +- .../MCTargetDesc/PPCELFObjectWriter.cpp | 2 +- lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp | 2 +- .../PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp | 6 +- .../MCTargetDesc/PPCMachObjectWriter.cpp | 8 +- lib/Target/PowerPC/PPCAsmPrinter.cpp | 10 +- lib/Target/PowerPC/PPCBoolRetToInt.cpp | 42 +- lib/Target/PowerPC/PPCBranchSelector.cpp | 2 +- lib/Target/PowerPC/PPCCTRLoops.cpp | 2 +- lib/Target/PowerPC/PPCEarlyReturn.cpp | 2 +- lib/Target/PowerPC/PPCFastISel.cpp | 4 +- lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 65 +- lib/Target/PowerPC/PPCISelLowering.cpp | 10 +- lib/Target/PowerPC/PPCInstrVSX.td | 51 + lib/Target/PowerPC/PPCMCInstLower.cpp | 2 +- lib/Target/PowerPC/PPCMIPeephole.cpp | 2 +- lib/Target/PowerPC/PPCTLSDynamicCall.cpp | 2 +- lib/Target/PowerPC/PPCTOCRegDeps.cpp | 2 +- lib/Target/PowerPC/PPCTargetMachine.cpp | 2 +- lib/Target/PowerPC/PPCVSXCopy.cpp | 2 +- lib/Target/PowerPC/PPCVSXFMAMutate.cpp | 2 +- lib/Target/PowerPC/PPCVSXSwapRemoval.cpp | 2 +- .../RISCV/MCTargetDesc/RISCVAsmBackend.cpp | 2 +- .../RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp | 2 +- .../RISCV/MCTargetDesc/RISCVMCTargetDesc.h | 2 +- lib/Target/RISCV/RISCVTargetMachine.cpp | 2 +- lib/Target/Sparc/AsmParser/SparcAsmParser.cpp | 4 +- .../Sparc/Disassembler/SparcDisassembler.cpp | 4 +- .../Sparc/MCTargetDesc/SparcAsmBackend.cpp | 2 +- .../Sparc/MCTargetDesc/SparcMCAsmInfo.cpp | 2 +- lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp | 1 - lib/Target/Sparc/SparcAsmPrinter.cpp | 2 +- lib/Target/Sparc/SparcMCInstLower.cpp | 2 +- lib/Target/Sparc/SparcTargetMachine.cpp | 4 +- lib/Target/Sparc/SparcTargetObjectFile.cpp | 2 +- .../SystemZ/AsmParser/SystemZAsmParser.cpp | 2 +- .../MCTargetDesc/SystemZMCAsmBackend.cpp | 2 +- .../MCTargetDesc/SystemZMCObjectWriter.cpp | 2 +- lib/Target/SystemZ/SystemZHazardRecognizer.h | 2 +- lib/Target/SystemZ/SystemZISelLowering.cpp | 25 +- lib/Target/SystemZ/SystemZInstrInfo.cpp | 2 +- lib/Target/SystemZ/SystemZLDCleanup.cpp | 2 +- lib/Target/SystemZ/SystemZRegisterInfo.cpp | 2 +- lib/Target/SystemZ/SystemZShortenInst.cpp | 2 +- lib/Target/SystemZ/SystemZSubtarget.h | 2 +- lib/Target/SystemZ/SystemZTDC.cpp | 6 +- lib/Target/SystemZ/SystemZTargetMachine.cpp | 4 +- lib/Target/Target.cpp | 4 +- lib/Target/TargetLoweringObjectFile.cpp | 16 +- lib/Target/TargetMachineC.cpp | 4 +- .../Disassembler/WebAssemblyDisassembler.cpp | 2 +- .../InstPrinter/WebAssemblyInstPrinter.h | 2 +- .../MCTargetDesc/WebAssemblyAsmBackend.cpp | 2 +- .../MCTargetDesc/WebAssemblyMCCodeEmitter.cpp | 2 +- .../MCTargetDesc/WebAssemblyMCTargetDesc.h | 2 +- .../MCTargetDesc/WebAssemblyTargetStreamer.h | 2 +- .../WebAssemblyWasmObjectWriter.cpp | 11 +- lib/Target/WebAssembly/WebAssemblyCFGSort.cpp | 2 +- .../WebAssembly/WebAssemblyCFGStackify.cpp | 2 +- .../WebAssemblyCallIndirectFixup.cpp | 2 +- .../WebAssembly/WebAssemblyFastISel.cpp | 2 +- .../WebAssemblyFixIrreducibleControlFlow.cpp | 2 +- .../WebAssembly/WebAssemblyISelDAGToDAG.cpp | 2 +- .../WebAssembly/WebAssemblyLowerBrUnless.cpp | 2 +- .../WebAssemblyPrepareForLiveIntervals.cpp | 2 +- .../WebAssembly/WebAssemblyRegNumbering.cpp | 2 +- .../WebAssembly/WebAssemblyRegStackify.cpp | 2 +- .../WebAssemblyReplacePhysRegs.cpp | 2 +- .../WebAssemblySetP2AlignOperands.cpp | 2 +- .../WebAssembly/WebAssemblyStoreResults.cpp | 2 +- .../WebAssembly/WebAssemblyTargetMachine.cpp | 4 +- .../X86/AsmParser/X86AsmInstrumentation.cpp | 4 +- lib/Target/X86/AsmParser/X86Operand.h | 2 +- lib/Target/X86/CMakeLists.txt | 1 - .../X86/Disassembler/X86Disassembler.cpp | 2 +- .../Disassembler/X86DisassemblerDecoder.cpp | 8 +- .../X86/InstPrinter/X86ATTInstPrinter.cpp | 2 +- .../X86/InstPrinter/X86InstComments.cpp | 2 +- .../X86/InstPrinter/X86IntelInstPrinter.cpp | 2 +- lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp | 4 +- .../X86/MCTargetDesc/X86ELFObjectWriter.cpp | 2 +- lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp | 2 +- .../X86/MCTargetDesc/X86MachObjectWriter.cpp | 4 +- .../MCTargetDesc/X86WinCOFFObjectWriter.cpp | 2 +- lib/Target/X86/X86AsmPrinter.cpp | 2 +- lib/Target/X86/X86FastISel.cpp | 6 + lib/Target/X86/X86FrameLowering.cpp | 2 +- lib/Target/X86/X86ISelLowering.cpp | 191 +- lib/Target/X86/X86InstrFragmentsSIMD.td | 41 +- lib/Target/X86/X86InstrInfo.cpp | 3400 +++++++++++++++- lib/Target/X86/X86InstrSSE.td | 10 +- lib/Target/X86/X86MCInstLower.cpp | 16 +- lib/Target/X86/X86OptimizeLEAs.cpp | 2 +- lib/Target/X86/X86SchedHaswell.td | 33 + lib/Target/X86/X86SchedSandyBridge.td | 25 + lib/Target/X86/X86Schedule.td | 4 + lib/Target/X86/X86ScheduleBtVer2.td | 32 + lib/Target/X86/X86ScheduleSLM.td | 27 + lib/Target/X86/X86SelectionDAGInfo.cpp | 4 +- lib/Target/X86/X86Subtarget.cpp | 2 +- lib/Target/X86/X86TargetMachine.cpp | 2 +- lib/Target/X86/X86TargetObjectFile.cpp | 4 +- lib/Target/X86/X86TargetTransformInfo.cpp | 13 + lib/Target/X86/X86TargetTransformInfo.h | 4 + lib/Target/X86/X86WinEHState.cpp | 2 +- .../XCore/MCTargetDesc/XCoreMCTargetDesc.cpp | 4 +- lib/Target/XCore/XCoreAsmPrinter.cpp | 2 +- lib/Target/XCore/XCoreTargetMachine.cpp | 2 +- lib/Target/XCore/XCoreTargetMachine.h | 2 +- lib/Target/XCore/XCoreTargetObjectFile.cpp | 2 +- lib/ToolDrivers/llvm-lib/LLVMBuild.txt | 2 +- lib/ToolDrivers/llvm-lib/LibDriver.cpp | 12 +- lib/Transforms/Coroutines/CoroSplit.cpp | 2 +- lib/Transforms/IPO/ElimAvailExtern.cpp | 2 +- lib/Transforms/IPO/ExtractGV.cpp | 14 +- lib/Transforms/IPO/FunctionAttrs.cpp | 3 +- lib/Transforms/IPO/GlobalSplit.cpp | 2 +- lib/Transforms/IPO/IPConstantPropagation.cpp | 2 +- lib/Transforms/IPO/IPO.cpp | 4 +- lib/Transforms/IPO/InferFunctionAttrs.cpp | 2 +- lib/Transforms/IPO/Inliner.cpp | 8 +- lib/Transforms/IPO/LoopExtractor.cpp | 2 +- lib/Transforms/IPO/LowerTypeTests.cpp | 56 +- lib/Transforms/IPO/PruneEH.cpp | 4 +- lib/Transforms/IPO/SampleProfile.cpp | 7 + lib/Transforms/IPO/StripSymbols.cpp | 2 +- lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp | 12 +- lib/Transforms/IPO/WholeProgramDevirt.cpp | 2 +- .../InstCombine/InstCombineAddSub.cpp | 23 +- .../InstCombine/InstCombineAndOrXor.cpp | 27 +- .../InstCombine/InstCombineCalls.cpp | 16 +- .../InstCombine/InstCombineCasts.cpp | 8 +- .../InstCombine/InstCombineCompares.cpp | 70 +- .../InstCombine/InstCombineInternal.h | 2 +- .../InstCombine/InstCombineMulDivRem.cpp | 21 +- lib/Transforms/InstCombine/InstCombinePHI.cpp | 4 +- .../InstCombine/InstCombineSelect.cpp | 7 +- .../InstCombine/InstCombineShifts.cpp | 30 +- .../InstCombineSimplifyDemanded.cpp | 8 +- .../InstCombine/InstCombineVectorOps.cpp | 9 +- .../InstCombine/InstructionCombining.cpp | 45 +- .../Instrumentation/BoundsChecking.cpp | 2 +- .../Instrumentation/DataFlowSanitizer.cpp | 4 +- .../Instrumentation/EfficiencySanitizer.cpp | 2 +- .../Instrumentation/InstrProfiling.cpp | 2 +- .../Instrumentation/ThreadSanitizer.cpp | 2 +- lib/Transforms/ObjCARC/BlotMapVector.h | 2 +- lib/Transforms/ObjCARC/DependencyAnalysis.cpp | 2 +- lib/Transforms/ObjCARC/ObjCARCContract.cpp | 2 +- lib/Transforms/ObjCARC/ObjCARCOpts.cpp | 2 +- lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp | 2 +- .../ObjCARC/ProvenanceAnalysisEvaluator.cpp | 4 +- lib/Transforms/ObjCARC/PtrState.h | 2 +- .../Scalar/AlignmentFromAssumptions.cpp | 4 +- lib/Transforms/Scalar/ConstantProp.cpp | 6 +- lib/Transforms/Scalar/DCE.cpp | 2 +- lib/Transforms/Scalar/FlattenCFGPass.cpp | 2 +- lib/Transforms/Scalar/GVNHoist.cpp | 2 +- lib/Transforms/Scalar/GVNSink.cpp | 6 +- lib/Transforms/Scalar/GuardWidening.cpp | 2 +- lib/Transforms/Scalar/IndVarSimplify.cpp | 7 +- .../Scalar/InductiveRangeCheckElimination.cpp | 37 +- lib/Transforms/Scalar/InferAddressSpaces.cpp | 9 +- lib/Transforms/Scalar/JumpThreading.cpp | 10 +- lib/Transforms/Scalar/LoadCombine.cpp | 2 +- lib/Transforms/Scalar/LoopIdiomRecognize.cpp | 77 +- lib/Transforms/Scalar/LoopPredication.cpp | 2 +- lib/Transforms/Scalar/LoopRerollPass.cpp | 4 +- lib/Transforms/Scalar/LoopStrengthReduce.cpp | 167 +- lib/Transforms/Scalar/LoopUnswitch.cpp | 16 +- .../Scalar/LowerExpectIntrinsic.cpp | 4 +- lib/Transforms/Scalar/LowerGuardIntrinsic.cpp | 2 +- lib/Transforms/Scalar/MemCpyOptimizer.cpp | 8 +- lib/Transforms/Scalar/NewGVN.cpp | 80 +- lib/Transforms/Scalar/Reg2Mem.cpp | 2 +- .../Scalar/RewriteStatepointsForGC.cpp | 20 +- lib/Transforms/Scalar/SCCP.cpp | 4 +- lib/Transforms/Scalar/SROA.cpp | 19 +- lib/Transforms/Scalar/Scalar.cpp | 6 +- lib/Transforms/Scalar/Scalarizer.cpp | 2 +- .../Scalar/SeparateConstOffsetFromGEP.cpp | 10 +- lib/Transforms/Scalar/SimpleLoopUnswitch.cpp | 4 +- lib/Transforms/Scalar/Sink.cpp | 2 +- lib/Transforms/Scalar/StructurizeCFG.cpp | 2 +- .../Scalar/TailRecursionElimination.cpp | 4 +- lib/Transforms/Utils/CMakeLists.txt | 1 + lib/Transforms/Utils/CloneFunction.cpp | 2 +- lib/Transforms/Utils/CloneModule.cpp | 4 +- lib/Transforms/Utils/DemoteRegToStack.cpp | 2 +- lib/Transforms/Utils/Evaluator.cpp | 4 +- lib/Transforms/Utils/FlattenCFG.cpp | 2 +- lib/Transforms/Utils/FunctionComparator.cpp | 2 +- lib/Transforms/Utils/FunctionImportUtils.cpp | 2 +- lib/Transforms/Utils/GlobalStatus.cpp | 2 +- lib/Transforms/Utils/InlineFunction.cpp | 6 +- lib/Transforms/Utils/InstructionNamer.cpp | 2 +- lib/Transforms/Utils/Local.cpp | 2 +- lib/Transforms/Utils/LoopSimplify.cpp | 4 +- lib/Transforms/Utils/LoopUnroll.cpp | 2 +- lib/Transforms/Utils/LoopUnrollRuntime.cpp | 2 +- lib/Transforms/Utils/LoopUtils.cpp | 3 +- lib/Transforms/Utils/LowerMemIntrinsics.cpp | 4 +- lib/Transforms/Utils/LowerSwitch.cpp | 2 +- lib/Transforms/Utils/MetaRenamer.cpp | 2 +- lib/Transforms/Utils/OrderedInstructions.cpp | 33 + lib/Transforms/Utils/SSAUpdater.cpp | 4 +- lib/Transforms/Utils/SanitizerStats.cpp | 2 +- lib/Transforms/Utils/SimplifyCFG.cpp | 5 +- lib/Transforms/Utils/SimplifyInstructions.cpp | 2 +- lib/Transforms/Utils/SimplifyLibCalls.cpp | 2 +- lib/Transforms/Utils/StripGCRelocates.cpp | 2 +- .../Utils/StripNonLineTableDebugInfo.cpp | 2 +- lib/Transforms/Utils/SymbolRewriter.cpp | 2 +- lib/Transforms/Utils/Utils.cpp | 2 +- lib/Transforms/Vectorize/BBVectorize.cpp | 2 +- lib/Transforms/Vectorize/LoopVectorize.cpp | 9 +- lib/Transforms/Vectorize/Vectorize.cpp | 2 +- lib/XRay/InstrumentationMap.cpp | 2 +- .../BranchProbabilityInfo/libfunc_call.ll | 264 ++ .../gep-constanfolding-error.ll | 2 +- .../lvi-after-jumpthreading.ll | 70 +- test/Bindings/OCaml/core.ml | 11 + test/Bitcode/ptest-old.ll | 2 +- test/BugPoint/unsymbolized.ll | 21 + test/CMakeLists.txt | 2 +- .../arm64-irtranslator-stackprotect.ll | 2 +- .../AArch64/GlobalISel/arm64-irtranslator.ll | 23 +- .../GlobalISel/arm64-regbankselect.mir | 158 +- .../AArch64/GlobalISel/call-translator-ios.ll | 4 +- .../AArch64/GlobalISel/call-translator.ll | 10 +- .../CodeGen/AArch64/GlobalISel/debug-insts.ll | 4 +- .../GlobalISel/localizer-in-O0-pipeline.mir | 16 +- test/CodeGen/AArch64/GlobalISel/localizer.mir | 80 +- .../GlobalISel/regbankselect-dbg-value.mir | 2 +- .../GlobalISel/regbankselect-default.mir | 164 +- .../AArch64/GlobalISel/select-binop.mir | 200 +- .../AArch64/GlobalISel/select-bitcast.mir | 32 +- .../AArch64/GlobalISel/select-fp-casts.mir | 72 +- .../AArch64/GlobalISel/select-int-ext.mir | 46 +- .../GlobalISel/select-int-ptr-casts.mir | 24 +- .../AArch64/GlobalISel/select-load.mir | 100 +- .../AArch64/GlobalISel/select-muladd.mir | 14 +- .../AArch64/GlobalISel/select-store.mir | 84 +- .../AArch64/GlobalISel/select-trunc.mir | 12 +- .../CodeGen/AArch64/GlobalISel/select-xor.mir | 30 +- test/CodeGen/AArch64/GlobalISel/select.mir | 52 +- .../GlobalISel/varargs-ios-translator.ll | 2 +- .../arm64-fast-isel-conversion-fallback.ll | 131 + test/CodeGen/AArch64/spill-undef.mir | 67 + .../AMDGPU/GlobalISel/legalize-icmp.mir | 24 + .../AMDGPU/GlobalISel/legalize-select.mir | 28 + .../AMDGPU/GlobalISel/regbankselect.mir | 14 +- test/CodeGen/AMDGPU/add.v2i16.ll | 8 +- test/CodeGen/AMDGPU/ashr.v2i16.ll | 2 +- test/CodeGen/AMDGPU/branch-relax-spill.ll | 420 +- .../AMDGPU/clamp-omod-special-case.mir | 20 + test/CodeGen/AMDGPU/exceed-max-sgprs.ll | 142 +- test/CodeGen/AMDGPU/fabs.f16.ll | 8 +- test/CodeGen/AMDGPU/fadd.f16.ll | 6 +- test/CodeGen/AMDGPU/fcanonicalize.f16.ll | 6 +- test/CodeGen/AMDGPU/flat-scratch-reg.ll | 14 +- test/CodeGen/AMDGPU/fmul.f16.ll | 6 +- test/CodeGen/AMDGPU/fneg-fabs.f16.ll | 8 +- test/CodeGen/AMDGPU/fneg.f16.ll | 2 +- test/CodeGen/AMDGPU/fptosi.f16.ll | 2 +- test/CodeGen/AMDGPU/fptoui.f16.ll | 2 +- test/CodeGen/AMDGPU/fsub.f16.ll | 4 +- test/CodeGen/AMDGPU/hsa-note-no-func.ll | 13 + .../AMDGPU/illegal-sgpr-to-vgpr-copy.ll | 20 +- test/CodeGen/AMDGPU/immv216.ll | 38 +- test/CodeGen/AMDGPU/indirect-addressing-si.ll | 2 +- test/CodeGen/AMDGPU/inline-asm.ll | 12 +- .../CodeGen/AMDGPU/insert_vector_elt.v2i16.ll | 8 +- test/CodeGen/AMDGPU/limit-coalesce.mir | 14 +- test/CodeGen/AMDGPU/llvm.amdgcn.alignb.ll | 23 + .../AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll | 22 +- .../AMDGPU/llvm.amdgcn.mqsad.u32.u8.ll | 57 +- .../AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll | 22 +- test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll | 2 +- test/CodeGen/AMDGPU/llvm.maxnum.f16.ll | 6 +- test/CodeGen/AMDGPU/llvm.minnum.f16.ll | 6 +- .../AMDGPU/partial-sgpr-to-vgpr-spills.ll | 10 +- .../AMDGPU/promote-alloca-array-aggregate.ll | 131 + ...dependent-subregs-invalid-mac-operands.mir | 69 + test/CodeGen/AMDGPU/scratch-simple.ll | 6 +- test/CodeGen/AMDGPU/sdwa-peephole.ll | 24 +- test/CodeGen/AMDGPU/shl.v2i16.ll | 2 +- test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll | 88 +- test/CodeGen/AMDGPU/skip-if-dead.ll | 12 +- test/CodeGen/AMDGPU/sminmax.v2i16.ll | 8 +- test/CodeGen/AMDGPU/spill-scavenge-offset.ll | 14 +- test/CodeGen/AMDGPU/sub.v2i16.ll | 12 +- .../AMDGPU/undefined-subreg-liverange.ll | 6 +- test/CodeGen/AMDGPU/v_mac_f16.ll | 31 +- .../arm-instruction-select-combos.mir | 149 + .../ARM/GlobalISel/arm-instruction-select.mir | 115 +- .../ARM/GlobalISel/arm-irtranslator.ll | 90 +- test/CodeGen/ARM/GlobalISel/arm-isel.ll | 84 +- .../ARM/GlobalISel/arm-legalize-fp.mir | 36 +- test/CodeGen/ARM/GlobalISel/arm-legalizer.mir | 242 +- .../ARM/GlobalISel/arm-regbankselect.mir | 281 +- test/CodeGen/ARM/clang-section.ll | 140 + test/CodeGen/ARM/cortex-a57-misched-vfma.ll | 91 +- test/CodeGen/ARM/invalidated-save-point.ll | 4 +- test/CodeGen/Generic/llc-start-stop.ll | 4 +- test/CodeGen/Hexagon/common-gep-inbounds.ll | 20 + test/CodeGen/Hexagon/mux-undef.ll | 27 + .../generic-virtual-registers-error.mir | 1 - ...c-virtual-registers-with-regbank-error.mir | 1 - .../MIR/AArch64/register-operand-bank.mir | 4 +- .../MIR/AArch64/stack-object-local-offset.mir | 4 +- test/CodeGen/MIR/Generic/frame-info.mir | 5 + .../function-missing-machine-function.mir | 13 - test/CodeGen/MIR/X86/callee-saved-info.mir | 4 +- test/CodeGen/MIR/X86/empty0.mir | 6 + test/CodeGen/MIR/X86/empty1.mir | 8 + test/CodeGen/MIR/X86/empty2.mir | 8 + test/CodeGen/MIR/X86/fixed-stack-objects.mir | 2 +- test/CodeGen/MIR/X86/generic-instr-type.mir | 10 +- test/CodeGen/MIR/X86/inline-asm.mir | 12 + .../MIR/X86/register-operand-class.mir | 12 +- test/CodeGen/MIR/X86/roundtrip.mir | 20 + .../X86/simple-register-allocation-hints.mir | 2 +- .../X86/spill-slot-fixed-stack-objects.mir | 2 +- .../MIR/X86/stack-object-debug-info.mir | 5 +- test/CodeGen/MIR/X86/stack-objects.mir | 9 +- .../MIR/X86/variable-sized-stack-objects.mir | 8 +- test/CodeGen/MIR/X86/virtual-registers.mir | 12 +- test/CodeGen/Mips/biggot.ll | 3 + test/CodeGen/Mips/cconv/vector.ll | 1657 ++++++++ test/CodeGen/Mips/ctlz-v.ll | 12 +- test/CodeGen/Mips/cttz-v.ll | 19 +- test/CodeGen/Mips/dsp-r1.ll | 12 +- test/CodeGen/Mips/fmadd1.ll | 435 +- test/CodeGen/Mips/llvm-ir/mul.ll | 2 +- test/CodeGen/Mips/llvm-ir/sdiv.ll | 12 +- test/CodeGen/Mips/llvm-ir/srem.ll | 11 +- test/CodeGen/Mips/llvm-ir/udiv.ll | 11 +- test/CodeGen/Mips/llvm-ir/urem.ll | 6 +- test/CodeGen/Mips/micromips-gp-rc.ll | 2 +- test/CodeGen/Mips/mips64fpldst.ll | 12 +- test/CodeGen/Mips/pbqp-reserved-physreg.ll | 35 + test/CodeGen/Mips/return-vector.ll | 33 +- test/CodeGen/Mips/tailcall/tailcall.ll | 4 +- test/CodeGen/PowerPC/BoolRetToIntTest-2.ll | 19 + test/CodeGen/PowerPC/BoolRetToIntTest.ll | 28 +- test/CodeGen/PowerPC/crbits.ll | 46 +- test/CodeGen/PowerPC/logic-ops-on-compares.ll | 73 +- .../memCmpUsedInZeroEqualityComparison.ll | 281 +- test/CodeGen/PowerPC/ppc-crbits-onoff.ll | 18 +- test/CodeGen/PowerPC/setcc-logic.ll | 16 +- test/CodeGen/PowerPC/testComparesinesc.ll | 121 + test/CodeGen/PowerPC/testComparesinesi.ll | 121 + test/CodeGen/PowerPC/testComparesinesll.ll | 125 + test/CodeGen/PowerPC/testComparesiness.ll | 121 + test/CodeGen/PowerPC/testComparesineuc.ll | 136 + test/CodeGen/PowerPC/testComparesineui.ll | 121 + test/CodeGen/PowerPC/testComparesineull.ll | 125 + test/CodeGen/PowerPC/testComparesineus.ll | 137 + test/CodeGen/PowerPC/testComparesllnesll.ll | 125 + test/CodeGen/PowerPC/testComparesllneull.ll | 125 + test/CodeGen/PowerPC/vec_int_ext.ll | 90 + test/CodeGen/X86/2006-05-11-InstrSched.ll | 2 +- .../X86/GlobalISel/irtranslator-call.ll | 30 - .../GlobalISel/irtranslator-callingconv.ll | 122 +- .../irtranslator-callingconv_64bit.ll | 25 - .../X86/GlobalISel/legalize-mul-scalar.mir | 18 +- .../X86/GlobalISel/legalize-mul-v128.mir | 18 +- .../X86/GlobalISel/legalize-mul-v256.mir | 18 +- .../X86/GlobalISel/legalize-mul-v512.mir | 18 +- .../X86/GlobalISel/regbankselect-AVX2.mir | 20 +- .../X86/GlobalISel/regbankselect-AVX512.mir | 20 +- .../X86/GlobalISel/regbankselect-X32.mir | 10 +- .../X86/GlobalISel/regbankselect-X86_64.mir | 170 +- .../X86/GlobalISel/select-add-v128.mir | 72 +- .../X86/GlobalISel/select-add-v256.mir | 72 +- .../X86/GlobalISel/select-add-v512.mir | 24 +- .../CodeGen/X86/GlobalISel/select-add-x32.mir | 20 +- test/CodeGen/X86/GlobalISel/select-add.mir | 72 +- test/CodeGen/X86/GlobalISel/select-cmp.mir | 130 +- .../X86/GlobalISel/select-constant.mir | 12 +- .../X86/GlobalISel/select-ext-x86-64.mir | 16 +- test/CodeGen/X86/GlobalISel/select-ext.mir | 22 +- test/CodeGen/X86/GlobalISel/select-gep.mir | 6 +- test/CodeGen/X86/GlobalISel/select-inc.mir | 8 +- .../X86/GlobalISel/select-leaf-constant.mir | 8 +- .../GlobalISel/select-memop-scalar-x32.mir | 56 +- .../X86/GlobalISel/select-memop-scalar.mir | 76 +- .../X86/GlobalISel/select-memop-v128.mir | 24 +- .../X86/GlobalISel/select-memop-v256.mir | 32 +- .../X86/GlobalISel/select-memop-v512.mir | 16 +- .../X86/GlobalISel/select-mul-scalar.mir | 18 +- .../CodeGen/X86/GlobalISel/select-mul-vec.mir | 90 +- .../X86/GlobalISel/select-sub-v128.mir | 72 +- .../X86/GlobalISel/select-sub-v256.mir | 72 +- .../X86/GlobalISel/select-sub-v512.mir | 24 +- test/CodeGen/X86/GlobalISel/select-sub.mir | 60 +- test/CodeGen/X86/GlobalISel/select-trunc.mir | 24 +- test/CodeGen/X86/O0-pipeline.ll | 2 +- test/CodeGen/X86/atom-fixup-lea3.ll | 11 +- test/CodeGen/X86/avx-schedule.ll | 32 +- test/CodeGen/X86/avx-splat.ll | 36 +- test/CodeGen/X86/avx512-cvt.ll | 4 - test/CodeGen/X86/build-vector-128.ll | 96 +- test/CodeGen/X86/buildvec-insertvec.ll | 76 +- .../X86/clear_upper_vector_element_bits.ll | 201 +- test/CodeGen/X86/fast-isel-nontemporal.ll | 108 +- test/CodeGen/X86/full-lsr.ll | 10 +- test/CodeGen/X86/haddsub-2.ll | 362 +- test/CodeGen/X86/haddsub-undef.ll | 5 +- test/CodeGen/X86/hoist-spill.ll | 2 - test/CodeGen/X86/loop-strength-reduce4.ll | 15 +- test/CodeGen/X86/madd.ll | 78 +- test/CodeGen/X86/masked-iv-safe.ll | 16 +- test/CodeGen/X86/memcmp.ll | 330 +- .../X86/merge-consecutive-loads-128.ll | 36 +- test/CodeGen/X86/mul-constant-i16.ll | 139 +- test/CodeGen/X86/mul-constant-i32.ll | 1578 +++++++- test/CodeGen/X86/mul-constant-i64.ll | 1605 +++++++- test/CodeGen/X86/mul-constant-result.ll | 1291 ++++++ test/CodeGen/X86/nontemporal-loads.ll | 779 ++-- test/CodeGen/X86/pr32659.ll | 83 + test/CodeGen/X86/select.ll | 14 +- test/CodeGen/X86/selectiondag-dominator.ll | 30 + test/CodeGen/X86/sse-intrinsics-fast-isel.ll | 74 +- test/CodeGen/X86/sse1.ll | 80 +- test/CodeGen/X86/sse2-intrinsics-fast-isel.ll | 167 +- test/CodeGen/X86/sse3-avx-addsub-2.ll | 14 +- .../CodeGen/X86/sse42-intrinsics-fast-isel.ll | 6 +- test/CodeGen/X86/stack-folding-fp-avx1.ll | 21 +- test/CodeGen/X86/stack-folding-int-sse42.ll | 17 +- test/CodeGen/X86/trunc-to-bool.ll | 70 +- test/CodeGen/X86/vec_fp_to_int.ll | 18 +- test/CodeGen/X86/vec_int_to_fp.ll | 306 +- test/CodeGen/X86/vec_set.ll | 24 +- test/CodeGen/X86/vector-compare-results.ll | 538 ++- test/CodeGen/X86/vector-rem.ll | 34 +- test/CodeGen/X86/vector-sext.ll | 876 ++-- test/CodeGen/X86/vector-shuffle-v48.ll | 49 + .../X86/vector-shuffle-variable-128.ll | 314 +- test/CodeGen/X86/vector-sqrt.ll | 18 +- test/CodeGen/X86/vector-unsigned-cmp.ll | 134 +- ...rs-cleared-in-machine-functions-liveins.ll | 4 +- test/CodeGen/X86/vshift-1.ll | 9 +- test/CodeGen/X86/vshift-2.ll | 9 +- test/CodeGen/X86/x86-interleaved-access.ll | 93 + .../Inputs/dwarfdump-str-offsets-dwp.s | 277 ++ .../Inputs/dwarfdump-str-offsets-invalid-1.s | 34 + .../dwarfdump-str-offsets-invalid-1.x86_64.o | Bin 0 -> 824 bytes .../Inputs/dwarfdump-str-offsets-invalid-2.s | 36 + .../dwarfdump-str-offsets-invalid-2.x86_64.o | Bin 0 -> 832 bytes .../Inputs/dwarfdump-str-offsets-invalid-3.s | 88 + .../dwarfdump-str-offsets-invalid-3.x86_64.o | Bin 0 -> 2296 bytes .../Inputs/dwarfdump-str-offsets-invalid-4.s | 50 + .../dwarfdump-str-offsets-invalid-4.x86_64.o | Bin 0 -> 1264 bytes .../Inputs/dwarfdump-str-offsets-invalid-5.s | 10 + .../dwarfdump-str-offsets-invalid-5.x86_64.o | Bin 0 -> 464 bytes test/DebugInfo/Inputs/dwarfdump-str-offsets.s | 500 +++ .../Inputs/dwarfdump-str-offsets.x86_64.o | Bin 0 -> 4000 bytes test/DebugInfo/PDB/DIA/pdbdump-flags.test | 8 +- .../PDB/DIA/pdbdump-linenumbers.test | 4 +- .../PDB/DIA/pdbdump-symbol-format.test | 10 +- .../PDB/Inputs/debug-subsections.yaml | 91 + .../PDB/Inputs/simple-line-info.yaml | 44 - .../PDB/Native/pdb-native-compilands.test | 4 +- .../PDB/Native/pdb-native-summary.test | 2 +- .../PDB/pdb-longname-truncation.test | 2 +- test/DebugInfo/PDB/pdb-minimal-construct.test | 22 +- test/DebugInfo/PDB/pdb-yaml-symbols.test | 2 +- test/DebugInfo/PDB/pdb-yaml-types.test | 6 +- .../PDB/pdbdump-debug-subsections.test | 210 + test/DebugInfo/PDB/pdbdump-headers.test | 70 +- .../PDB/pdbdump-merge-ids-and-types.test | 18 +- test/DebugInfo/PDB/pdbdump-mergeids.test | 12 +- test/DebugInfo/PDB/pdbdump-mergetypes.test | 10 +- test/DebugInfo/PDB/pdbdump-raw-blocks.test | 10 +- test/DebugInfo/PDB/pdbdump-raw-stream.test | 4 +- test/DebugInfo/PDB/pdbdump-readwrite.test | 10 +- test/DebugInfo/PDB/pdbdump-source-names.test | 8 +- test/DebugInfo/PDB/pdbdump-write.test | 14 +- .../PDB/pdbdump-yaml-lineinfo-write.test | 71 - test/DebugInfo/PDB/pdbdump-yaml-lineinfo.test | 60 - test/DebugInfo/PDB/pdbdump-yaml-types.test | 2 +- test/DebugInfo/PDB/pdbdump-yaml.test | 4 +- .../dwarfdump-str-offsets-invalid.test | 24 + test/DebugInfo/dwarfdump-str-offsets.test | 76 + test/FileCheck/check-dag.txt | 9 + test/Instrumentation/MemorySanitizer/csr.ll | 2 +- .../MemorySanitizer/msan_x86intrinsics.ll | 2 +- .../MemorySanitizer/vector_arith.ll | 2 +- .../MemorySanitizer/vector_cmp.ll | 2 +- .../MemorySanitizer/vector_cvt.ll | 2 +- .../MemorySanitizer/vector_pack.ll | 2 +- .../MemorySanitizer/vector_shift.ll | 2 +- test/LTO/ARM/Inputs/thumb.ll | 15 + test/LTO/ARM/link-arm-and-thumb.ll | 32 + test/LTO/Resolution/X86/linker-redef.ll | 16 + test/Linker/Inputs/thumb.ll | 16 + test/Linker/link-arm-and-thumb.ll | 23 + test/MC/AMDGPU/sopp-err.s | 8 +- test/MC/AMDGPU/sym_option.s | 4 +- test/MC/ARM/arm-thumb-tail-call.ll | 25 + test/MC/ARM/big-endian-thumb2-fixup.s | 6 + .../ARM/t2-modified-immediate-fixup-error1.s | 13 + .../ARM/t2-modified-immediate-fixup-error2.s | 12 + test/MC/ARM/t2-modified-immediate-fixup.s | 45 + test/MC/ARM/thumb2-diagnostics.s | 2 - test/MC/AsmParser/empty-comment.s | 4 + .../Disassembler/Mips/micromips-dsp/valid.txt | 2 +- test/MC/ELF/ARM/clang-section.s | 399 ++ test/MC/MachO/alias.s | 12 + test/MC/MachO/variable-exprs.s | 8 +- test/MC/Mips/dsp/invalid.s | 4 +- test/MC/Mips/micromips-dsp/invalid.s | 2 + test/MC/Mips/micromips-dsp/valid.s | 2 +- test/MC/WebAssembly/reloc-code.ll | 20 +- test/Object/AMDGPU/elf-definitions.yaml | 21 +- test/Object/objc-imageinfo-coff.ll | 15 + test/Object/objc-imageinfo-elf.ll | 15 + test/Object/objc-imageinfo-macho.ll | 15 + test/Transforms/CodeGenPrepare/X86/memcmp.ll | 337 ++ test/Transforms/ConstProp/sse.ll | 2 +- test/Transforms/DCE/calls-errno.ll | 4 + test/Transforms/GVNSink/sink-common-code.ll | 57 +- test/Transforms/IRCE/correct-loop-info.ll | 182 + .../IndVarSimplify/lftr_disabled.ll | 28 + .../NVPTX/clone_constexpr.ll | 36 + test/Transforms/Inline/basictest.ll | 24 + .../InstCombine/constant-fold-libfunc.ll | 20 + .../InstCombine/insert-extract-shuffle.ll | 23 + test/Transforms/InstCombine/intrinsics.ll | 154 +- test/Transforms/InstCombine/lshr.ll | 43 +- test/Transforms/InstSimplify/call.ll | 33 + test/Transforms/InstSimplify/compare.ll | 2 +- .../InstSimplify/simplify-nested-bitcast.ll | 54 + test/Transforms/InstSimplify/vector_gep.ll | 2 +- .../X86/interleaved-accesses-64bits-avx.ll | 61 +- .../LoopIdiom/X86/unordered-atomic-memcpy.ll | 452 +++ .../unordered-atomic-memcpy-noarch.ll | 28 + .../LoopStrengthReduce/X86/canonical.ll | 2 +- .../LoopStrengthReduce/X86/ivchain-X86.ll | 4 +- .../X86/lsr-expand-quadratic.ll | 14 +- .../LoopStrengthReduce/X86/lsr-insns-1.ll | 4 +- .../LoopStrengthReduce/X86/lsr-insns-2.ll | 4 +- .../LoopStrengthReduce/X86/nested-loop.ll | 22 +- .../AArch64/loop-vectorization-factors.ll | 46 +- .../LowerExpectIntrinsic/PR33346.ll | 22 + test/Transforms/LowerTypeTests/simple.ll | 16 +- test/Transforms/LowerTypeTests/simplify.ll | 37 + test/Transforms/NewGVN/completeness.ll | 2 +- test/Transforms/NewGVN/loadforward.ll | 4 +- test/Transforms/NewGVN/pr32403.ll | 3 +- test/Transforms/NewGVN/pr32897.ll | 1 - test/Transforms/NewGVN/pr33187.ll | 148 + test/Transforms/SLPVectorizer/X86/arith-fp.ll | 48 +- .../X86/reverse_extract_elements.ll | 138 + test/Transforms/SROA/address-spaces.ll | 28 + .../SampleProfile/Inputs/indirect-call.prof | 3 + .../Transforms/SampleProfile/indirect-call.ll | 13 + test/Transforms/Sink/badloadsink.ll | 18 + test/Transforms/ThinLTOBitcodeWriter/split.ll | 4 + .../Util/PredicateInfo/condprop2.ll | 2 +- .../Util/PredicateInfo/testandor2.ll | 2 +- test/lit.cfg | 3 + test/lit.site.cfg.in | 1 + .../llvm-cvtres/Inputs/test_resource.obj.coff | Bin 0 -> 3468 bytes test/tools/llvm-cvtres/object.test | 229 ++ .../llvm-cvtres/{resource.test => parse.test} | 2 +- test/tools/llvm-dwarfdump/X86/brief.s | 131 + test/tools/llvm-dwarfdump/X86/lit.local.cfg | 2 + test/tools/llvm-pdbdump/class-layout.test | 2 +- .../complex-padding-graphical.test | 2 +- test/tools/llvm-pdbdump/enum-layout.test | 2 +- test/tools/llvm-pdbdump/load-address.test | 4 +- test/tools/llvm-pdbdump/raw-stream-data.test | 6 +- test/tools/llvm-pdbdump/regex-filter.test | 20 +- .../simple-padding-graphical.test | 2 +- test/tools/llvm-pdbdump/symbol-filters.test | 16 +- .../Inputs/trivial.elf-amdhsa-kaveri | Bin 13208 -> 0 bytes .../Inputs/trivial.obj.elf-amdhsa-gfx803 | Bin 0 -> 2208 bytes .../llvm-readobj/amdgpu-elf-definitions.test | 11 + test/tools/llvm-readobj/amdgpu-elf-defs.test | 28 - test/tools/llvm-readobj/elf-sec-flags.test | 29 +- tools/LLVMBuild.txt | 2 +- tools/bugpoint/OptimizerDriver.cpp | 5 +- tools/dsymutil/DwarfLinker.cpp | 9 +- tools/llc/llc.cpp | 95 +- tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp | 3 + tools/llvm-cvtres/LLVMBuild.txt | 2 +- tools/llvm-cvtres/llvm-cvtres.cpp | 73 +- tools/llvm-cvtres/llvm-cvtres.h | 2 - tools/llvm-dwarfdump/llvm-dwarfdump.cpp | 4 + tools/llvm-lto2/llvm-lto2.cpp | 2 + tools/llvm-mc/llvm-mc.cpp | 22 +- tools/llvm-nm/llvm-nm.cpp | 2 +- tools/llvm-objdump/MachODump.cpp | 7 +- .../llvm-pdbdump/C13DebugFragmentVisitor.cpp | 87 - tools/llvm-pdbdump/C13DebugFragmentVisitor.h | 60 - .../Analyze.cpp | 0 .../{llvm-pdbdump => llvm-pdbutil}/Analyze.h | 0 .../CMakeLists.txt | 5 +- .../CompactTypeDumpVisitor.cpp | 0 .../CompactTypeDumpVisitor.h | 0 tools/{llvm-pdbdump => llvm-pdbutil}/Diff.cpp | 2 +- tools/{llvm-pdbdump => llvm-pdbutil}/Diff.h | 0 .../LLVMBuild.txt | 4 +- .../LLVMOutputStyle.cpp | 308 +- .../LLVMOutputStyle.h | 0 .../LinePrinter.cpp | 2 +- .../LinePrinter.h | 0 .../OutputStyle.h | 0 .../PdbYaml.cpp | 0 .../{llvm-pdbdump => llvm-pdbutil}/PdbYaml.h | 0 .../PrettyBuiltinDumper.cpp | 2 +- .../PrettyBuiltinDumper.h | 0 .../PrettyClassDefinitionDumper.cpp | 2 +- .../PrettyClassDefinitionDumper.h | 0 .../PrettyClassLayoutGraphicalDumper.cpp | 2 +- .../PrettyClassLayoutGraphicalDumper.h | 0 .../PrettyCompilandDumper.cpp | 4 +- .../PrettyCompilandDumper.h | 2 +- .../PrettyEnumDumper.cpp | 2 +- .../PrettyEnumDumper.h | 0 .../PrettyExternalSymbolDumper.cpp | 0 .../PrettyExternalSymbolDumper.h | 0 .../PrettyFunctionDumper.cpp | 2 +- .../PrettyFunctionDumper.h | 0 .../PrettyTypeDumper.cpp | 128 +- .../PrettyTypeDumper.h | 0 .../PrettyTypedefDumper.cpp | 2 +- .../PrettyTypedefDumper.h | 2 +- .../PrettyVariableDumper.cpp | 2 +- .../PrettyVariableDumper.h | 0 .../StreamUtil.cpp | 0 .../StreamUtil.h | 0 .../YAMLOutputStyle.cpp | 75 +- .../YAMLOutputStyle.h | 0 .../fuzzer/CMakeLists.txt | 6 +- .../fuzzer/llvm-pdbutil-fuzzer.cpp} | 4 +- .../llvm-pdbutil.cpp} | 140 +- .../llvm-pdbutil.h} | 41 +- tools/llvm-readobj/CMakeLists.txt | 1 + tools/llvm-readobj/COFFDumper.cpp | 2 +- tools/llvm-readobj/COFFImportDumper.cpp | 2 +- tools/llvm-readobj/ELFDumper.cpp | 202 +- tools/llvm-readobj/LLVMBuild.txt | 2 +- tools/llvm-stress/llvm-stress.cpp | 3 + tools/llvm-xray/xray-extract.cc | 2 +- tools/obj2yaml/dwarf2yaml.cpp | 1 + tools/obj2yaml/macho2yaml.cpp | 2 +- tools/yaml2obj/yaml2elf.cpp | 2 +- tools/yaml2obj/yaml2macho.cpp | 6 +- unittests/ADT/DAGDeltaAlgorithmTest.cpp | 2 +- unittests/ADT/DeltaAlgorithmTest.cpp | 2 +- unittests/ADT/DenseMapTest.cpp | 2 +- unittests/ADT/DepthFirstIteratorTest.cpp | 2 +- unittests/ADT/FoldingSet.cpp | 2 +- unittests/ADT/HashingTest.cpp | 2 +- unittests/ADT/ImmutableMapTest.cpp | 2 +- unittests/ADT/ImmutableSetTest.cpp | 2 +- unittests/ADT/IteratorTest.cpp | 2 +- unittests/ADT/MapVectorTest.cpp | 2 +- unittests/ADT/OptionalTest.cpp | 2 +- unittests/ADT/PointerEmbeddedIntTest.cpp | 2 +- unittests/ADT/PointerIntPairTest.cpp | 2 +- unittests/ADT/PointerSumTypeTest.cpp | 2 +- unittests/ADT/PointerUnionTest.cpp | 2 +- unittests/ADT/PostOrderIteratorTest.cpp | 2 +- unittests/ADT/RangeAdapterTest.cpp | 2 +- unittests/ADT/ReverseIterationTest.cpp | 2 +- unittests/ADT/SCCIteratorTest.cpp | 2 +- unittests/ADT/SmallPtrSetTest.cpp | 4 +- unittests/ADT/SmallVectorTest.cpp | 63 +- unittests/ADT/TripleTest.cpp | 7 +- unittests/ADT/TwineTest.cpp | 2 +- unittests/ADT/VariadicFunctionTest.cpp | 4 +- .../Analysis/BranchProbabilityInfoTest.cpp | 2 +- unittests/Analysis/CFGTest.cpp | 2 +- unittests/Analysis/CMakeLists.txt | 1 + unittests/Analysis/GlobalsModRefTest.cpp | 55 + unittests/Analysis/LazyCallGraphTest.cpp | 2 +- unittests/Analysis/MemorySSA.cpp | 48 +- unittests/Analysis/ProfileSummaryInfoTest.cpp | 2 +- unittests/Analysis/ScalarEvolutionTest.cpp | 1 - unittests/Analysis/TBAATest.cpp | 2 +- unittests/Analysis/UnrollAnalyzer.cpp | 4 +- unittests/BinaryFormat/CMakeLists.txt | 9 + .../{Support => BinaryFormat}/DwarfTest.cpp | 5 +- unittests/BinaryFormat/TestFileMagic.cpp | 128 + unittests/Bitcode/BitstreamReaderTest.cpp | 2 +- unittests/Bitcode/BitstreamWriterTest.cpp | 2 +- unittests/CMakeLists.txt | 1 + unittests/CodeGen/DIEHashTest.cpp | 4 +- .../MachineInstrBundleIteratorTest.cpp | 2 +- .../DebugInfo/DWARF/DWARFDebugInfoTest.cpp | 2 +- .../DebugInfo/DWARF/DWARFFormValueTest.cpp | 2 +- unittests/DebugInfo/DWARF/DwarfGenerator.cpp | 4 +- .../ExecutionEngine/ExecutionEngineTest.cpp | 2 +- .../ExecutionEngine/MCJIT/MCJITCAPITest.cpp | 2 +- .../MCJIT/MCJITMultipleModuleTest.cpp | 2 +- unittests/ExecutionEngine/MCJIT/MCJITTest.cpp | 2 +- .../Orc/CompileOnDemandLayerTest.cpp | 2 +- .../Orc/IndirectionUtilsTest.cpp | 2 +- .../Orc/LazyEmittingLayerTest.cpp | 2 +- .../Orc/ObjectTransformLayerTest.cpp | 2 +- unittests/ExecutionEngine/Orc/OrcCAPITest.cpp | 2 +- unittests/ExecutionEngine/Orc/OrcTestCommon.h | 4 +- unittests/ExecutionEngine/Orc/QueueChannel.h | 2 +- .../Orc/RTDyldObjectLinkingLayerTest.cpp | 4 +- unittests/IR/AsmWriterTest.cpp | 2 +- unittests/IR/ConstantRangeTest.cpp | 17 + unittests/IR/ConstantsTest.cpp | 4 +- unittests/IR/DebugTypeODRUniquingTest.cpp | 2 +- unittests/IR/DominatorTreeTest.cpp | 52 + unittests/IR/IRBuilderTest.cpp | 2 +- unittests/IR/MetadataTest.cpp | 2 +- unittests/IR/ModuleTest.cpp | 2 +- unittests/IR/PassManagerTest.cpp | 2 +- unittests/IR/PatternMatch.cpp | 2 +- unittests/IR/UserTest.cpp | 2 +- unittests/IR/ValueTest.cpp | 2 +- unittests/IR/VerifierTest.cpp | 4 +- unittests/Linker/LinkModulesTest.cpp | 4 +- unittests/MC/DwarfLineTables.cpp | 2 +- unittests/MC/StringTableBuilderTest.cpp | 2 +- unittests/MI/LiveIntervalTest.cpp | 15 +- unittests/ProfileData/CoverageMappingTest.cpp | 2 +- unittests/ProfileData/InstrProfTest.cpp | 2 +- unittests/ProfileData/SampleProfTest.cpp | 2 +- unittests/Support/ARMAttributeParser.cpp | 2 +- unittests/Support/BinaryStreamTest.cpp | 9 +- unittests/Support/CMakeLists.txt | 6 +- unittests/Support/CommandLineTest.cpp | 6 +- unittests/Support/CompressionTest.cpp | 2 +- unittests/Support/CrashRecoveryTest.cpp | 2 +- unittests/Support/DataExtractorTest.cpp | 2 +- .../Support/DynamicLibrary/CMakeLists.txt | 25 +- .../DynamicLibrary/DynamicLibraryTest.cpp | 53 +- .../Support/DynamicLibrary/PipSqueak.cxx | 42 +- unittests/Support/DynamicLibrary/PipSqueak.h | 13 + unittests/Support/EndianStreamTest.cpp | 2 +- unittests/Support/FileOutputBufferTest.cpp | 2 +- unittests/Support/FormatVariadicTest.cpp | 2 +- unittests/Support/LEB128Test.cpp | 4 +- unittests/Support/MD5Test.cpp | 2 +- unittests/Support/MathExtrasTest.cpp | 2 +- unittests/Support/MemoryBufferTest.cpp | 2 +- unittests/Support/MemoryTest.cpp | 2 +- unittests/Support/Path.cpp | 83 +- unittests/Support/ProgramTest.cpp | 4 +- unittests/Support/SpecialCaseListTest.cpp | 2 +- unittests/Support/SwapByteOrderTest.cpp | 2 +- unittests/Support/TarWriterTest.cpp | 2 +- unittests/Support/TargetParserTest.cpp | 2 +- unittests/Support/TrigramIndexTest.cpp | 2 +- unittests/Support/YAMLIOTest.cpp | 1 - unittests/Support/YAMLParserTest.cpp | 2 +- .../Support/formatted_raw_ostream_test.cpp | 2 +- unittests/Support/raw_ostream_test.cpp | 2 +- unittests/Support/raw_pwrite_stream_test.cpp | 2 +- unittests/Support/raw_sha1_ostream_test.cpp | 2 +- unittests/Target/AArch64/InstSizes.cpp | 24 +- .../Transforms/Scalar/LoopPassManagerTest.cpp | 2 +- unittests/Transforms/Utils/CMakeLists.txt | 1 + .../Transforms/Utils/FunctionComparator.cpp | 2 +- .../Transforms/Utils/OrderedInstructions.cpp | 65 + .../Transforms/Utils/ValueMapperTest.cpp | 2 +- utils/FileCheck/FileCheck.cpp | 2 +- utils/TableGen/CMakeLists.txt | 1 - utils/TableGen/TableGen.cpp | 6 - utils/TableGen/TableGenBackends.h | 1 - utils/TableGen/X86FoldTablesEmitter.cpp | 732 ---- utils/gdb-scripts/prettyprinters.py | 36 +- utils/git-svn/git-llvm | 5 + utils/opt-viewer/optrecord.py | 2 +- utils/release/test-release.sh | 29 +- 1717 files changed, 40056 insertions(+), 15366 deletions(-) create mode 100644 include/llvm/BinaryFormat/COFF.h rename include/llvm/{Support => BinaryFormat}/Dwarf.def (100%) rename include/llvm/{Support => BinaryFormat}/Dwarf.h (92%) rename include/llvm/{Support => BinaryFormat}/ELF.h (98%) rename include/llvm/{Support => BinaryFormat}/ELFRelocs/AArch64.def (100%) rename include/llvm/{Support => BinaryFormat}/ELFRelocs/AMDGPU.def (100%) rename include/llvm/{Support => BinaryFormat}/ELFRelocs/ARM.def (100%) rename include/llvm/{Support => BinaryFormat}/ELFRelocs/AVR.def (100%) rename include/llvm/{Support => BinaryFormat}/ELFRelocs/BPF.def (100%) rename include/llvm/{Support => BinaryFormat}/ELFRelocs/Hexagon.def (100%) rename include/llvm/{Support => BinaryFormat}/ELFRelocs/Lanai.def (100%) rename include/llvm/{Support => BinaryFormat}/ELFRelocs/Mips.def (100%) rename include/llvm/{Support => BinaryFormat}/ELFRelocs/PowerPC.def (100%) rename include/llvm/{Support => BinaryFormat}/ELFRelocs/PowerPC64.def (100%) rename include/llvm/{Support => BinaryFormat}/ELFRelocs/RISCV.def (100%) rename include/llvm/{Support => BinaryFormat}/ELFRelocs/Sparc.def (100%) rename include/llvm/{Support => BinaryFormat}/ELFRelocs/SystemZ.def (100%) rename include/llvm/{Support => BinaryFormat}/ELFRelocs/WebAssembly.def (100%) rename include/llvm/{Support => BinaryFormat}/ELFRelocs/i386.def (100%) rename include/llvm/{Support => BinaryFormat}/ELFRelocs/x86_64.def (100%) rename include/llvm/{Support => BinaryFormat}/MachO.def (100%) create mode 100644 include/llvm/BinaryFormat/MachO.h create mode 100644 include/llvm/BinaryFormat/Magic.h rename include/llvm/{Support => BinaryFormat}/Wasm.h (80%) rename include/llvm/{Support => BinaryFormat}/WasmRelocs/WebAssembly.def (100%) delete mode 100644 include/llvm/CodeGen/MachineFunctionInitializer.h create mode 100644 include/llvm/DebugInfo/CodeView/DebugCrossExSubsection.h create mode 100644 include/llvm/DebugInfo/CodeView/DebugCrossImpSubsection.h create mode 100644 include/llvm/DebugInfo/CodeView/DebugSymbolRVASubsection.h rename {lib/Target/AMDGPU/MCTargetDesc => include/llvm/Support}/AMDGPUCodeObjectMetadata.h (98%) delete mode 100644 include/llvm/Support/COFF.h delete mode 100644 include/llvm/Support/MachO.h create mode 100644 include/llvm/Transforms/Utils/OrderedInstructions.h create mode 100644 lib/BinaryFormat/CMakeLists.txt rename lib/{Support => BinaryFormat}/Dwarf.cpp (77%) create mode 100644 lib/BinaryFormat/LLVMBuild.txt create mode 100644 lib/BinaryFormat/Magic.cpp create mode 100644 lib/DebugInfo/CodeView/DebugCrossExSubsection.cpp create mode 100644 lib/DebugInfo/CodeView/DebugCrossImpSubsection.cpp create mode 100644 lib/DebugInfo/CodeView/DebugSymbolRVASubsection.cpp create mode 100644 lib/Support/AMDGPUCodeObjectMetadata.cpp create mode 100644 lib/Target/AMDGPU/AMDGPURegAsmNames.inc.cpp create mode 100644 lib/Transforms/Utils/OrderedInstructions.cpp create mode 100644 test/Analysis/BranchProbabilityInfo/libfunc_call.ll create mode 100644 test/BugPoint/unsymbolized.ll create mode 100644 test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll create mode 100644 test/CodeGen/AArch64/spill-undef.mir create mode 100644 test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir create mode 100644 test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir create mode 100644 test/CodeGen/AMDGPU/llvm.amdgcn.alignb.ll create mode 100644 test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll create mode 100644 test/CodeGen/AMDGPU/rename-independent-subregs-invalid-mac-operands.mir create mode 100644 test/CodeGen/ARM/GlobalISel/arm-instruction-select-combos.mir create mode 100644 test/CodeGen/ARM/clang-section.ll create mode 100644 test/CodeGen/Hexagon/common-gep-inbounds.ll create mode 100644 test/CodeGen/Hexagon/mux-undef.ll delete mode 100644 test/CodeGen/MIR/Generic/function-missing-machine-function.mir create mode 100644 test/CodeGen/MIR/X86/empty0.mir create mode 100644 test/CodeGen/MIR/X86/empty1.mir create mode 100644 test/CodeGen/MIR/X86/empty2.mir create mode 100644 test/CodeGen/MIR/X86/inline-asm.mir create mode 100644 test/CodeGen/MIR/X86/roundtrip.mir create mode 100644 test/CodeGen/Mips/cconv/vector.ll create mode 100644 test/CodeGen/Mips/pbqp-reserved-physreg.ll create mode 100644 test/CodeGen/PowerPC/BoolRetToIntTest-2.ll create mode 100644 test/CodeGen/PowerPC/testComparesinesc.ll create mode 100644 test/CodeGen/PowerPC/testComparesinesi.ll create mode 100644 test/CodeGen/PowerPC/testComparesinesll.ll create mode 100644 test/CodeGen/PowerPC/testComparesiness.ll create mode 100644 test/CodeGen/PowerPC/testComparesineuc.ll create mode 100644 test/CodeGen/PowerPC/testComparesineui.ll create mode 100644 test/CodeGen/PowerPC/testComparesineull.ll create mode 100644 test/CodeGen/PowerPC/testComparesineus.ll create mode 100644 test/CodeGen/PowerPC/testComparesllnesll.ll create mode 100644 test/CodeGen/PowerPC/testComparesllneull.ll create mode 100644 test/CodeGen/PowerPC/vec_int_ext.ll delete mode 100644 test/CodeGen/X86/GlobalISel/irtranslator-call.ll delete mode 100644 test/CodeGen/X86/GlobalISel/irtranslator-callingconv_64bit.ll create mode 100644 test/CodeGen/X86/mul-constant-result.ll create mode 100644 test/CodeGen/X86/pr32659.ll create mode 100644 test/CodeGen/X86/selectiondag-dominator.ll create mode 100644 test/CodeGen/X86/vector-shuffle-v48.ll create mode 100644 test/DebugInfo/Inputs/dwarfdump-str-offsets-dwp.s create mode 100644 test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-1.s create mode 100644 test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-1.x86_64.o create mode 100644 test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-2.s create mode 100644 test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-2.x86_64.o create mode 100644 test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-3.s create mode 100644 test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-3.x86_64.o create mode 100644 test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-4.s create mode 100644 test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-4.x86_64.o create mode 100644 test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-5.s create mode 100644 test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-5.x86_64.o create mode 100644 test/DebugInfo/Inputs/dwarfdump-str-offsets.s create mode 100644 test/DebugInfo/Inputs/dwarfdump-str-offsets.x86_64.o create mode 100644 test/DebugInfo/PDB/Inputs/debug-subsections.yaml delete mode 100644 test/DebugInfo/PDB/Inputs/simple-line-info.yaml create mode 100644 test/DebugInfo/PDB/pdbdump-debug-subsections.test delete mode 100644 test/DebugInfo/PDB/pdbdump-yaml-lineinfo-write.test delete mode 100644 test/DebugInfo/PDB/pdbdump-yaml-lineinfo.test create mode 100644 test/DebugInfo/dwarfdump-str-offsets-invalid.test create mode 100644 test/DebugInfo/dwarfdump-str-offsets.test create mode 100644 test/LTO/ARM/Inputs/thumb.ll create mode 100644 test/LTO/ARM/link-arm-and-thumb.ll create mode 100644 test/LTO/Resolution/X86/linker-redef.ll create mode 100644 test/Linker/Inputs/thumb.ll create mode 100644 test/Linker/link-arm-and-thumb.ll create mode 100644 test/MC/ARM/arm-thumb-tail-call.ll create mode 100644 test/MC/ARM/t2-modified-immediate-fixup-error1.s create mode 100644 test/MC/ARM/t2-modified-immediate-fixup-error2.s create mode 100644 test/MC/ARM/t2-modified-immediate-fixup.s create mode 100644 test/MC/AsmParser/empty-comment.s create mode 100644 test/MC/ELF/ARM/clang-section.s create mode 100644 test/MC/MachO/alias.s create mode 100644 test/Object/objc-imageinfo-coff.ll create mode 100644 test/Object/objc-imageinfo-elf.ll create mode 100644 test/Object/objc-imageinfo-macho.ll create mode 100644 test/Transforms/CodeGenPrepare/X86/memcmp.ll create mode 100644 test/Transforms/IRCE/correct-loop-info.ll create mode 100644 test/Transforms/IndVarSimplify/lftr_disabled.ll create mode 100644 test/Transforms/InferAddressSpaces/NVPTX/clone_constexpr.ll create mode 100644 test/Transforms/InstCombine/constant-fold-libfunc.ll create mode 100644 test/Transforms/InstSimplify/simplify-nested-bitcast.ll create mode 100644 test/Transforms/LoopIdiom/X86/unordered-atomic-memcpy.ll create mode 100644 test/Transforms/LoopIdiom/unordered-atomic-memcpy-noarch.ll create mode 100644 test/Transforms/LowerExpectIntrinsic/PR33346.ll create mode 100644 test/Transforms/LowerTypeTests/simplify.ll create mode 100644 test/Transforms/NewGVN/pr33187.ll create mode 100644 test/Transforms/SLPVectorizer/X86/reverse_extract_elements.ll create mode 100644 test/Transforms/Sink/badloadsink.ll create mode 100644 test/tools/llvm-cvtres/Inputs/test_resource.obj.coff create mode 100644 test/tools/llvm-cvtres/object.test rename test/tools/llvm-cvtres/{resource.test => parse.test} (93%) create mode 100644 test/tools/llvm-dwarfdump/X86/brief.s create mode 100644 test/tools/llvm-dwarfdump/X86/lit.local.cfg delete mode 100755 test/tools/llvm-readobj/Inputs/trivial.elf-amdhsa-kaveri create mode 100644 test/tools/llvm-readobj/Inputs/trivial.obj.elf-amdhsa-gfx803 create mode 100644 test/tools/llvm-readobj/amdgpu-elf-definitions.test delete mode 100644 test/tools/llvm-readobj/amdgpu-elf-defs.test delete mode 100644 tools/llvm-pdbdump/C13DebugFragmentVisitor.cpp delete mode 100644 tools/llvm-pdbdump/C13DebugFragmentVisitor.h rename tools/{llvm-pdbdump => llvm-pdbutil}/Analyze.cpp (100%) rename tools/{llvm-pdbdump => llvm-pdbutil}/Analyze.h (100%) rename tools/{llvm-pdbdump => llvm-pdbutil}/CMakeLists.txt (89%) rename tools/{llvm-pdbdump => llvm-pdbutil}/CompactTypeDumpVisitor.cpp (100%) rename tools/{llvm-pdbdump => llvm-pdbutil}/CompactTypeDumpVisitor.h (100%) rename tools/{llvm-pdbdump => llvm-pdbutil}/Diff.cpp (99%) rename tools/{llvm-pdbdump => llvm-pdbutil}/Diff.h (100%) rename tools/{llvm-pdbdump => llvm-pdbutil}/LLVMBuild.txt (88%) rename tools/{llvm-pdbdump => llvm-pdbutil}/LLVMOutputStyle.cpp (75%) rename tools/{llvm-pdbdump => llvm-pdbutil}/LLVMOutputStyle.h (100%) rename tools/{llvm-pdbdump => llvm-pdbutil}/LinePrinter.cpp (99%) rename tools/{llvm-pdbdump => llvm-pdbutil}/LinePrinter.h (100%) rename tools/{llvm-pdbdump => llvm-pdbutil}/OutputStyle.h (100%) rename tools/{llvm-pdbdump => llvm-pdbutil}/PdbYaml.cpp (100%) rename tools/{llvm-pdbdump => llvm-pdbutil}/PdbYaml.h (100%) rename tools/{llvm-pdbdump => llvm-pdbutil}/PrettyBuiltinDumper.cpp (98%) rename tools/{llvm-pdbdump => llvm-pdbutil}/PrettyBuiltinDumper.h (100%) rename tools/{llvm-pdbdump => llvm-pdbutil}/PrettyClassDefinitionDumper.cpp (99%) rename tools/{llvm-pdbdump => llvm-pdbutil}/PrettyClassDefinitionDumper.h (100%) rename tools/{llvm-pdbdump => llvm-pdbutil}/PrettyClassLayoutGraphicalDumper.cpp (99%) rename tools/{llvm-pdbdump => llvm-pdbutil}/PrettyClassLayoutGraphicalDumper.h (100%) rename tools/{llvm-pdbdump => llvm-pdbutil}/PrettyCompilandDumper.cpp (98%) rename tools/{llvm-pdbdump => llvm-pdbutil}/PrettyCompilandDumper.h (95%) rename tools/{llvm-pdbdump => llvm-pdbutil}/PrettyEnumDumper.cpp (98%) rename tools/{llvm-pdbdump => llvm-pdbutil}/PrettyEnumDumper.h (100%) rename tools/{llvm-pdbdump => llvm-pdbutil}/PrettyExternalSymbolDumper.cpp (100%) rename tools/{llvm-pdbdump => llvm-pdbutil}/PrettyExternalSymbolDumper.h (100%) rename tools/{llvm-pdbdump => llvm-pdbutil}/PrettyFunctionDumper.cpp (99%) rename tools/{llvm-pdbdump => llvm-pdbutil}/PrettyFunctionDumper.h (100%) rename tools/{llvm-pdbdump => llvm-pdbutil}/PrettyTypeDumper.cpp (68%) rename tools/{llvm-pdbdump => llvm-pdbutil}/PrettyTypeDumper.h (100%) rename tools/{llvm-pdbdump => llvm-pdbutil}/PrettyTypedefDumper.cpp (99%) rename tools/{llvm-pdbdump => llvm-pdbutil}/PrettyTypedefDumper.h (94%) rename tools/{llvm-pdbdump => llvm-pdbutil}/PrettyVariableDumper.cpp (99%) rename tools/{llvm-pdbdump => llvm-pdbutil}/PrettyVariableDumper.h (100%) rename tools/{llvm-pdbdump => llvm-pdbutil}/StreamUtil.cpp (100%) rename tools/{llvm-pdbdump => llvm-pdbutil}/StreamUtil.h (100%) rename tools/{llvm-pdbdump => llvm-pdbutil}/YAMLOutputStyle.cpp (84%) rename tools/{llvm-pdbdump => llvm-pdbutil}/YAMLOutputStyle.h (100%) rename tools/{llvm-pdbdump => llvm-pdbutil}/fuzzer/CMakeLists.txt (53%) rename tools/{llvm-pdbdump/fuzzer/llvm-pdbdump-fuzzer.cpp => llvm-pdbutil/fuzzer/llvm-pdbutil-fuzzer.cpp} (96%) rename tools/{llvm-pdbdump/llvm-pdbdump.cpp => llvm-pdbutil/llvm-pdbutil.cpp} (90%) rename tools/{llvm-pdbdump/llvm-pdbdump.h => llvm-pdbutil/llvm-pdbutil.h} (85%) create mode 100644 unittests/Analysis/GlobalsModRefTest.cpp create mode 100644 unittests/BinaryFormat/CMakeLists.txt rename unittests/{Support => BinaryFormat}/DwarfTest.cpp (97%) create mode 100644 unittests/BinaryFormat/TestFileMagic.cpp create mode 100644 unittests/Transforms/Utils/OrderedInstructions.cpp delete mode 100644 utils/TableGen/X86FoldTablesEmitter.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index a5b96569f9c6..431785d3dd0c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -385,6 +385,7 @@ else() option(LLVM_ENABLE_LOCAL_SUBMODULE_VISIBILITY "Compile with -fmodules-local-submodule-visibility." ON) endif() option(LLVM_ENABLE_CXX1Y "Compile with C++1y enabled." OFF) +option(LLVM_ENABLE_CXX1Z "Compile with C++1z enabled." OFF) option(LLVM_ENABLE_LIBCXX "Use libc++ if available." OFF) option(LLVM_ENABLE_LLD "Use lld as C and C++ linker." OFF) option(LLVM_ENABLE_PEDANTIC "Compile with pedantic enabled." ON) @@ -853,7 +854,6 @@ if( LLVM_INCLUDE_UTILS ) add_subdirectory(utils/not) add_subdirectory(utils/llvm-lit) add_subdirectory(utils/yaml-bench) - add_subdirectory(utils/unittest) else() if ( LLVM_INCLUDE_TESTS ) message(FATAL_ERROR "Including tests when not building utils will not work. @@ -897,6 +897,10 @@ if( LLVM_INCLUDE_TESTS ) endif() add_subdirectory(test) add_subdirectory(unittests) + if( LLVM_INCLUDE_UTILS ) + add_subdirectory(utils/unittest) + endif() + if (WIN32) # This utility is used to prevent crashing tests from calling Dr. Watson on # Windows. diff --git a/bindings/go/llvm/ir.go b/bindings/go/llvm/ir.go index fe191beb3813..222097034307 100644 --- a/bindings/go/llvm/ir.go +++ b/bindings/go/llvm/ir.go @@ -611,6 +611,12 @@ func (t Type) StructElementTypes() []Type { } // Operations on array, pointer, and vector types (sequence types) +func (t Type) Subtypes() (ret []Type) { + ret = make([]Type, C.LLVMGetNumContainedTypes(t.C)) + C.LLVMGetSubtypes(t.C, llvmTypeRefPtr(&ret[0])) + return +} + func ArrayType(elementType Type, elementCount int) (t Type) { t.C = C.LLVMArrayType(elementType.C, C.unsigned(elementCount)) return diff --git a/bindings/go/llvm/ir_test.go b/bindings/go/llvm/ir_test.go index c823615a4293..325ee4890f4c 100644 --- a/bindings/go/llvm/ir_test.go +++ b/bindings/go/llvm/ir_test.go @@ -134,3 +134,29 @@ func TestDebugLoc(t *testing.T) { t.Errorf("Got metadata %v as scope, though wanted %v", loc.Scope.C, scope.C) } } + +func TestSubtypes(t *testing.T) { + cont := NewContext() + defer cont.Dispose() + + int_pointer := PointerType(cont.Int32Type(), 0) + int_inner := int_pointer.Subtypes() + if len(int_inner) != 1 { + t.Errorf("Got size %d, though wanted 1") + } + if int_inner[0] != cont.Int32Type() { + t.Errorf("Expected int32 type") + } + + st_pointer := cont.StructType([]Type{cont.Int32Type(), cont.Int8Type()}, false) + st_inner := st_pointer.Subtypes() + if len(st_inner) != 2 { + t.Errorf("Got size %d, though wanted 2") + } + if st_inner[0] != cont.Int32Type() { + t.Errorf("Expected first struct field to be int32") + } + if st_inner[1] != cont.Int8Type() { + t.Errorf("Expected second struct field to be int8") + } +} diff --git a/bindings/ocaml/llvm/llvm.ml b/bindings/ocaml/llvm/llvm.ml index 399fd2d27c20..6e8ca662ef67 100644 --- a/bindings/ocaml/llvm/llvm.ml +++ b/bindings/ocaml/llvm/llvm.ml @@ -459,6 +459,8 @@ external is_packed : lltype -> bool = "llvm_is_packed" external is_opaque : lltype -> bool = "llvm_is_opaque" (*--... Operations on pointer, vector, and array types .....................--*) + +external subtypes : lltype -> lltype array = "llvm_subtypes" external array_type : lltype -> int -> lltype = "llvm_array_type" external pointer_type : lltype -> lltype = "llvm_pointer_type" external qualified_pointer_type : lltype -> int -> lltype diff --git a/bindings/ocaml/llvm/llvm.mli b/bindings/ocaml/llvm/llvm.mli index 4068126e2cbf..c422e78f5d2d 100644 --- a/bindings/ocaml/llvm/llvm.mli +++ b/bindings/ocaml/llvm/llvm.mli @@ -658,6 +658,9 @@ val is_opaque : lltype -> bool (** {7 Operations on pointer, vector, and array types} *) +(** [subtypes ty] returns [ty]'s subtypes *) +val subtypes : lltype -> lltype array + (** [array_type ty n] returns the array type containing [n] elements of type [ty]. See the method [llvm::ArrayType::get]. *) val array_type : lltype -> int -> lltype diff --git a/bindings/ocaml/llvm/llvm_ocaml.c b/bindings/ocaml/llvm/llvm_ocaml.c index af04ea25c8ab..4b6d1c5072bc 100644 --- a/bindings/ocaml/llvm/llvm_ocaml.c +++ b/bindings/ocaml/llvm/llvm_ocaml.c @@ -506,6 +506,20 @@ CAMLprim value llvm_is_opaque(LLVMTypeRef StructTy) { /*--... Operations on array, pointer, and vector types .....................--*/ +/* lltype -> lltype array */ +CAMLprim value llvm_subtypes(LLVMTypeRef Ty) { + CAMLparam0(); + CAMLlocal1(Arr); + + unsigned Size = LLVMGetNumContainedTypes(Ty); + + Arr = caml_alloc(Size, 0); + + LLVMGetSubtypes(Ty, (LLVMTypeRef *) Arr); + + CAMLreturn(Arr); +} + /* lltype -> int -> lltype */ CAMLprim LLVMTypeRef llvm_array_type(LLVMTypeRef ElementTy, value Count) { return LLVMArrayType(ElementTy, Int_val(Count)); diff --git a/cmake/modules/HandleLLVMOptions.cmake b/cmake/modules/HandleLLVMOptions.cmake index 3dd16d51f0b7..c3325db11788 100644 --- a/cmake/modules/HandleLLVMOptions.cmake +++ b/cmake/modules/HandleLLVMOptions.cmake @@ -101,6 +101,10 @@ else() message(FATAL_ERROR "Unknown value for LLVM_ABI_BREAKING_CHECKS: \"${LLVM_ABI_BREAKING_CHECKS}\"!") endif() +if( LLVM_REVERSE_ITERATION ) + set( LLVM_ENABLE_REVERSE_ITERATION 1 ) +endif() + if(WIN32) set(LLVM_HAVE_LINK_VERSION_SCRIPT 0) if(CYGWIN) @@ -381,6 +385,9 @@ elseif( LLVM_COMPILER_IS_GCC_COMPATIBLE ) if (LLVM_ENABLE_CXX1Y) check_cxx_compiler_flag("-std=c++1y" CXX_SUPPORTS_CXX1Y) append_if(CXX_SUPPORTS_CXX1Y "-std=c++1y" CMAKE_CXX_FLAGS) + elseif(LLVM_ENABLE_CXX1Z) + check_cxx_compiler_flag("-std=c++1z" CXX_SUPPORTS_CXX1Z) + append_if(CXX_SUPPORTS_CXX1Z "-std=c++1z" CMAKE_CXX_FLAGS) else() check_cxx_compiler_flag("-std=c++11" CXX_SUPPORTS_CXX11) if (CXX_SUPPORTS_CXX11) diff --git a/cmake/modules/TableGen.cmake b/cmake/modules/TableGen.cmake index da0858e54d44..17ae1c9e7717 100644 --- a/cmake/modules/TableGen.cmake +++ b/cmake/modules/TableGen.cmake @@ -30,19 +30,43 @@ function(tablegen project ofn) endif() endif() - add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${ofn}.tmp - # Generate tablegen output in a temporary file. - COMMAND ${${project}_TABLEGEN_EXE} ${ARGN} -I ${CMAKE_CURRENT_SOURCE_DIR} - ${LLVM_TABLEGEN_FLAGS} - ${LLVM_TARGET_DEFINITIONS_ABSOLUTE} - -o ${CMAKE_CURRENT_BINARY_DIR}/${ofn}.tmp - # The file in LLVM_TARGET_DEFINITIONS may be not in the current - # directory and local_tds may not contain it, so we must - # explicitly list it here: - DEPENDS ${${project}_TABLEGEN_TARGET} ${local_tds} ${global_tds} - ${LLVM_TARGET_DEFINITIONS_ABSOLUTE} - COMMENT "Building ${ofn}..." - ) + # We need both _TABLEGEN_TARGET and _TABLEGEN_EXE in the DEPENDS list + # (both the target and the file) to have .inc files rebuilt on + # a tablegen change, as cmake does not propagate file-level dependencies + # of custom targets. See the following ticket for more information: + # https://cmake.org/Bug/view.php?id=15858 + # We could always have just one dependency on both the target and + # the file, but these 2 cases would produce cleaner cmake files. + if (${${project}_TABLEGEN_TARGET} STREQUAL ${${project}_TABLEGEN_EXE}) + add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${ofn}.tmp + # Generate tablegen output in a temporary file. + COMMAND ${${project}_TABLEGEN_EXE} ${ARGN} -I ${CMAKE_CURRENT_SOURCE_DIR} + ${LLVM_TABLEGEN_FLAGS} + ${LLVM_TARGET_DEFINITIONS_ABSOLUTE} + -o ${CMAKE_CURRENT_BINARY_DIR}/${ofn}.tmp + # The file in LLVM_TARGET_DEFINITIONS may be not in the current + # directory and local_tds may not contain it, so we must + # explicitly list it here: + DEPENDS ${${project}_TABLEGEN_TARGET} ${local_tds} ${global_tds} + ${LLVM_TARGET_DEFINITIONS_ABSOLUTE} + COMMENT "Building ${ofn}..." + ) + else() + add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${ofn}.tmp + # Generate tablegen output in a temporary file. + COMMAND ${${project}_TABLEGEN_EXE} ${ARGN} -I ${CMAKE_CURRENT_SOURCE_DIR} + ${LLVM_TABLEGEN_FLAGS} + ${LLVM_TARGET_DEFINITIONS_ABSOLUTE} + -o ${CMAKE_CURRENT_BINARY_DIR}/${ofn}.tmp + # The file in LLVM_TARGET_DEFINITIONS may be not in the current + # directory and local_tds may not contain it, so we must + # explicitly list it here: + DEPENDS ${${project}_TABLEGEN_TARGET} ${${project}_TABLEGEN_EXE} + ${local_tds} ${global_tds} + ${LLVM_TARGET_DEFINITIONS_ABSOLUTE} + COMMENT "Building ${ofn}..." + ) + endif() add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${ofn} # Only update the real output file if there are any differences. # This prevents recompilation of all the files depending on it if there diff --git a/docs/AMDGPUUsage.rst b/docs/AMDGPUUsage.rst index 81c067b317d3..caa697ca28cd 100644 --- a/docs/AMDGPUUsage.rst +++ b/docs/AMDGPUUsage.rst @@ -1,109 +1,3441 @@ -============================== -User Guide for AMDGPU Back-end -============================== +============================= +User Guide for AMDGPU Backend +============================= + +.. contents:: + :local: Introduction ============ -The AMDGPU back-end provides ISA code generation for AMD GPUs, starting with -the R600 family up until the current Volcanic Islands (GCN Gen 3). +The AMDGPU backend provides ISA code generation for AMD GPUs, starting with the +R600 family up until the current GCN families. It lives in the +``lib/Target/AMDGPU`` directory. -Refer to `AMDGPU section in Architecture & Platform Information for Compiler Writers `_ -for additional documentation. +LLVM +==== -Conventions -=========== +.. _amdgpu-target-triples: + +Target Triples +-------------- + +Use the ``clang -target ---`` option to +specify the target triple: + + .. table:: AMDGPU Target Triples + :name: amdgpu-target-triples-table + + ============ ======== ========= =========== + Architecture Vendor OS Environment + ============ ======== ========= =========== + r600 amd + amdgcn amd + amdgcn amd amdhsa + amdgcn amd amdhsa opencl + amdgcn amd amdhsa amdgizcl + amdgcn amd amdhsa amdgiz + amdgcn amd amdhsa hcc + ============ ======== ========= =========== + +``r600-amd--`` + Supports AMD GPUs HD2XXX-HD6XXX for graphics and compute shaders executed on + the MESA runtime. + +``amdgcn-amd--`` + Supports AMD GPUs GCN 6 onwards for graphics and compute shaders executed on + the MESA runtime. + +``amdgcn-amd-amdhsa-`` + Supports AMD GCN GPUs GFX6 onwards for compute kernels executed on HSA [HSA]_ + compatible runtimes such as AMD's ROCm [AMD-ROCm]_. + +``amdgcn-amd-amdhsa-opencl`` + Supports AMD GCN GPUs GFX6 onwards for OpenCL compute kernels executed on HSA + [HSA]_ compatible runtimes such as AMD's ROCm [AMD-ROCm]_. See + :ref:`amdgpu-opencl`. + +``amdgcn-amd-amdhsa-amdgizcl`` + Same as ``amdgcn-amd-amdhsa-opencl`` except a different address space mapping + is used (see :ref:`amdgpu-address-spaces`). + +``amdgcn-amd-amdhsa-amdgiz`` + Same as ``amdgcn-amd-amdhsa-`` except a different address space mapping is + used (see :ref:`amdgpu-address-spaces`). + +``amdgcn-amd-amdhsa-hcc`` + Supports AMD GCN GPUs GFX6 onwards for AMD HC language compute kernels + executed on HSA [HSA]_ compatible runtimes such as AMD's ROCm [AMD-ROCm]_. See + :ref:`amdgpu-hcc`. + +.. _amdgpu-processors: + +Processors +---------- + +Use the ``clang -mcpu `` option to specify the AMD GPU processor. The +names from both the *Processor* and *Alternative Processor* can be used. + + .. table:: AMDGPU Processors + :name: amdgpu-processors-table + + ========== =========== ============ ===== ======= ================== + Processor Alternative Target dGPU/ Runtime Example + Processor Triple APU Support Products + Architecture + ========== =========== ============ ===== ======= ================== + **R600** [AMD-R6xx]_ + -------------------------------------------------------------------- + r600 r600 dGPU + r630 r600 dGPU + rs880 r600 dGPU + rv670 r600 dGPU + **R700** [AMD-R7xx]_ + -------------------------------------------------------------------- + rv710 r600 dGPU + rv730 r600 dGPU + rv770 r600 dGPU + **Evergreen** [AMD-Evergreen]_ + -------------------------------------------------------------------- + cedar r600 dGPU + redwood r600 dGPU + sumo r600 dGPU + juniper r600 dGPU + cypress r600 dGPU + **Northern Islands** [AMD-Cayman-Trinity]_ + -------------------------------------------------------------------- + barts r600 dGPU + turks r600 dGPU + caicos r600 dGPU + cayman r600 dGPU + **GCN GFX6 (Southern Islands (SI))** [AMD-Souther-Islands]_ + -------------------------------------------------------------------- + gfx600 - SI amdgcn dGPU + - tahiti + gfx601 - pitcairn amdgcn dGPU + - verde + - oland + - hainan + **GCN GFX7 (Sea Islands (CI))** [AMD-Sea-Islands]_ + -------------------------------------------------------------------- + gfx700 - bonaire amdgcn dGPU - Radeon HD 7790 + - Radeon HD 8770 + - R7 260 + - R7 260X + \ - kaveri amdgcn APU - A6-7000 + - A6 Pro-7050B + - A8-7100 + - A8 Pro-7150B + - A10-7300 + - A10 Pro-7350B + - FX-7500 + - A8-7200P + - A10-7400P + - FX-7600P + gfx701 - hawaii amdgcn dGPU ROCm - FirePro W8100 + - FirePro W9100 + - FirePro S9150 + - FirePro S9170 + gfx702 dGPU ROCm - Radeon R9 290 + - Radeon R9 290x + - Radeon R390 + - Radeon R390x + gfx703 - kabini amdgcn APU - E1-2100 + - mullins - E1-2200 + - E1-2500 + - E2-3000 + - E2-3800 + - A4-5000 + - A4-5100 + - A6-5200 + - A4 Pro-3340B + **GCN GFX8 (Volcanic Islands (VI))** [AMD-Volcanic-Islands]_ + -------------------------------------------------------------------- + gfx800 - iceland amdgcn dGPU - FirePro S7150 + - FirePro S7100 + - FirePro W7100 + - Radeon R285 + - Radeon R9 380 + - Radeon R9 385 + - Mobile FirePro + M7170 + gfx801 - carrizo amdgcn APU - A6-8500P + - Pro A6-8500B + - A8-8600P + - Pro A8-8600B + - FX-8800P + - Pro A12-8800B + \ amdgcn APU ROCm - A10-8700P + - Pro A10-8700B + - A10-8780P + \ amdgcn APU - A10-9600P + - A10-9630P + - A12-9700P + - A12-9730P + - FX-9800P + - FX-9830P + \ amdgcn APU - E2-9010 + - A6-9210 + - A9-9410 + gfx802 - tonga amdgcn dGPU ROCm Same as gfx800 + gfx803 - fiji amdgcn dGPU ROCm - Radeon R9 Nano + - Radeon R9 Fury + - Radeon R9 FuryX + - Radeon Pro Duo + - FirePro S9300x2 + \ - polaris10 amdgcn dGPU ROCm - Radeon RX 470 + - Radeon RX 480 + \ - polaris11 amdgcn dGPU ROCm - Radeon RX 460 + gfx804 amdgcn dGPU Same as gfx803 + gfx810 - stoney amdgcn APU + **GCN GFX9** + -------------------------------------------------------------------- + gfx900 amdgcn dGPU - FirePro W9500 + - FirePro S9500 + - FirePro S9500x2 + gfx901 amdgcn dGPU ROCm Same as gfx900 + except XNACK is + enabled + gfx902 amdgcn APU *TBA* + + .. TODO + Add product + names. + gfx903 amdgcn APU Same as gfx902 + except XNACK is + enabled + ========== =========== ============ ===== ======= ================== + +.. _amdgpu-address-spaces: Address Spaces -------------- -The AMDGPU back-end uses the following address space mapping: +The AMDGPU backend uses the following address space mappings. - ================== =================== ============== - LLVM Address Space DWARF Address Space Memory Space - ================== =================== ============== - 0 1 Private - 1 N/A Global - 2 N/A Constant - 3 2 Local - 4 N/A Generic (Flat) - 5 N/A Region - ================== =================== ============== +The memory space names used in the table, aside from the region memory space, is +from the OpenCL standard. -The terminology in the table, aside from the region memory space, is from the -OpenCL standard. +LLVM Address Space number is used throughout LLVM (for example, in LLVM IR). -LLVM Address Space is used throughout LLVM (for example, in LLVM IR). DWARF -Address Space is emitted in DWARF, and is used by tools, such as debugger, -profiler and others. + .. table:: Address Space Mapping + :name: amdgpu-address-space-mapping-table + + ================== ================= ================= ================= ================= + LLVM Address Space Memory Space + ------------------ ----------------------------------------------------------------------- + \ Current Default amdgiz/amdgizcl hcc Future Default + ================== ================= ================= ================= ================= + 0 Private (Scratch) Generic (Flat) Generic (Flat) Generic (Flat) + 1 Global Global Global Global + 2 Constant Constant Constant Region (GDS) + 3 Local (group/LDS) Local (group/LDS) Local (group/LDS) Local (group/LDS) + 4 Generic (Flat) Region (GDS) Region (GDS) Constant + 5 Region (GDS) Private (Scratch) Private (Scratch) Private (Scratch) + ================== ================= ================= ================= ================= + +Current Default + This is the current default address space mapping used for all languages + except hcc. This will shortly be deprecated. + +amdgiz/amdgizcl + This is the current address space mapping used when ``amdgiz`` or ``amdgizcl`` + is specified as the target triple environment value. + +hcc + This is the current address space mapping used when ``hcc`` is specified as + the target triple environment value.This will shortly be deprecated. + +Future Default + This will shortly be the only address space mapping for all languages using + AMDGPU backend. + +.. _amdgpu-memory-scopes: + +Memory Scopes +------------- + +This section provides LLVM memory synchronization scopes supported by the AMDGPU +backend memory model when the target triple OS is ``amdhsa`` (see +:ref:`amdgpu-amdhsa-memory-model` and :ref:`amdgpu-target-triples`). + +The memory model supported is based on the HSA memory model [HSA]_ which is +based in turn on HRF-indirect with scope inclusion [HRF]_. The happens-before +relation is transitive over the synchonizes-with relation independent of scope, +and synchonizes-with allows the memory scope instances to be inclusive (see +table :ref:`amdgpu-amdhsa-llvm-sync-scopes-amdhsa-table`). + +This is different to the OpenCL [OpenCL]_ memory model which does not have scope +inclusion and requires the memory scopes to exactly match. However, this +is conservatively correct for OpenCL. + + .. table:: AMDHSA LLVM Sync Scopes for AMDHSA + :name: amdgpu-amdhsa-llvm-sync-scopes-amdhsa-table + + ================ ========================================================== + LLVM Sync Scope Description + ================ ========================================================== + *none* The default: ``system``. + + Synchronizes with, and participates in modification and + seq_cst total orderings with, other operations (except + image operations) for all address spaces (except private, + or generic that accesses private) provided the other + operation's sync scope is: + + - ``system``. + - ``agent`` and executed by a thread on the same agent. + - ``workgroup`` and executed by a thread in the same + workgroup. + - ``wavefront`` and executed by a thread in the same + wavefront. + + ``agent`` Synchronizes with, and participates in modification and + seq_cst total orderings with, other operations (except + image operations) for all address spaces (except private, + or generic that accesses private) provided the other + operation's sync scope is: + + - ``system`` or ``agent`` and executed by a thread on the + same agent. + - ``workgroup`` and executed by a thread in the same + workgroup. + - ``wavefront`` and executed by a thread in the same + wavefront. + + ``workgroup`` Synchronizes with, and participates in modification and + seq_cst total orderings with, other operations (except + image operations) for all address spaces (except private, + or generic that accesses private) provided the other + operation's sync scope is: + + - ``system``, ``agent`` or ``workgroup`` and executed by a + thread in the same workgroup. + - ``wavefront`` and executed by a thread in the same + wavefront. + + ``wavefront`` Synchronizes with, and participates in modification and + seq_cst total orderings with, other operations (except + image operations) for all address spaces (except private, + or generic that accesses private) provided the other + operation's sync scope is: + + - ``system``, ``agent``, ``workgroup`` or ``wavefront`` + and executed by a thread in the same wavefront. + + ``singlethread`` Only synchronizes with, and participates in modification + and seq_cst total orderings with, other operations (except + image operations) running in the same thread for all + address spaces (for example, in signal handlers). + ================ ========================================================== + +AMDGPU Intrinsics +----------------- + +The AMDGPU backend implements the following intrinsics. + +*This section is WIP.* + +.. TODO + List AMDGPU intrinsics + +Code Object +=========== + +The AMDGPU backend generates a standard ELF [ELF]_ relocatable code object that +can be linked by ``lld`` to produce a standard ELF shared code object which can +be loaded and executed on an AMDGPU target. + +Header +------ + +The AMDGPU backend uses the following ELF header: + + .. table:: AMDGPU ELF Header + :name: amdgpu-elf-header-table + + ========================== ========================= + Field Value + ========================== ========================= + ``e_ident[EI_CLASS]`` ``ELFCLASS64`` + ``e_ident[EI_DATA]`` ``ELFDATA2LSB`` + ``e_ident[EI_OSABI]`` ``ELFOSABI_AMDGPU_HSA`` + ``e_ident[EI_ABIVERSION]`` ``ELFABIVERSION_AMDGPU_HSA`` + ``e_type`` ``ET_REL`` or ``ET_DYN`` + ``e_machine`` ``EM_AMDGPU`` + ``e_entry`` 0 + ``e_flags`` 0 + ========================== ========================= + +.. + + .. table:: AMDGPU ELF Header Enumeration Values + :name: amdgpu-elf-header-enumeration-values-table + + ============================ ===== + Name Value + ============================ ===== + ``EM_AMDGPU`` 224 + ``ELFOSABI_AMDGPU_HSA`` 64 + ``ELFABIVERSION_AMDGPU_HSA`` 1 + ============================ ===== + +``e_ident[EI_CLASS]`` + The ELF class is always ``ELFCLASS64``. The AMDGPU backend only supports 64 bit + applications. + +``e_ident[EI_DATA]`` + All AMDGPU targets use ELFDATA2LSB for little-endian byte ordering. + +``e_ident[EI_OSABI]`` + The AMD GPU architecture specific OS ABI of ``ELFOSABI_AMDGPU_HSA`` is used to + specify that the code object conforms to the AMD HSA runtime ABI [HSA]_. + +``e_ident[EI_ABIVERSION]`` + The AMD GPU architecture specific OS ABI version of + ``ELFABIVERSION_AMDGPU_HSA`` is used to specify the version of AMD HSA runtime + ABI to which the code object conforms. + +``e_type`` + Can be one of the following values: + + + ``ET_REL`` + The type produced by the AMD GPU backend compiler as it is relocatable code + object. + + ``ET_DYN`` + The type produced by the linker as it is a shared code object. + + The AMD HSA runtime loader requires a ``ET_DYN`` code object. + +``e_machine`` + The value ``EM_AMDGPU`` is used for the machine for all members of the AMD GPU + architecture family. The specific member is specified in the + ``NT_AMD_AMDGPU_ISA`` entry in the ``.note`` section (see + :ref:`amdgpu-note-records`). + +``e_entry`` + The entry point is 0 as the entry points for individual kernels must be + selected in order to invoke them through AQL packets. + +``e_flags`` + The value is 0 as no flags are used. + +Sections +-------- + +An AMDGPU target ELF code object has the standard ELF sections which include: + + .. table:: AMDGPU ELF Sections + :name: amdgpu-elf-sections-table + + ================== ================ ================================= + Name Type Attributes + ================== ================ ================================= + ``.bss`` ``SHT_NOBITS`` ``SHF_ALLOC`` + ``SHF_WRITE`` + ``.data`` ``SHT_PROGBITS`` ``SHF_ALLOC`` + ``SHF_WRITE`` + ``.debug_``\ *\** ``SHT_PROGBITS`` *none* + ``.dynamic`` ``SHT_DYNAMIC`` ``SHF_ALLOC`` + ``.dynstr`` ``SHT_PROGBITS`` ``SHF_ALLOC`` + ``.dynsym`` ``SHT_PROGBITS`` ``SHF_ALLOC`` + ``.got`` ``SHT_PROGBITS`` ``SHF_ALLOC`` + ``SHF_WRITE`` + ``.hash`` ``SHT_HASH`` ``SHF_ALLOC`` + ``.note`` ``SHT_NOTE`` *none* + ``.rela``\ *name* ``SHT_RELA`` *none* + ``.rela.dyn`` ``SHT_RELA`` *none* + ``.rodata`` ``SHT_PROGBITS`` ``SHF_ALLOC`` + ``.shstrtab`` ``SHT_STRTAB`` *none* + ``.strtab`` ``SHT_STRTAB`` *none* + ``.symtab`` ``SHT_SYMTAB`` *none* + ``.text`` ``SHT_PROGBITS`` ``SHF_ALLOC`` + ``SHF_EXECINSTR`` + ================== ================ ================================= + +These sections have their standard meanings (see [ELF]_) and are only generated +if needed. + +``.debug``\ *\** + The standard DWARF sections. See :ref:`amdgpu-dwarf` for information on the + DWARF produced by the AMDGPU backend. + +``.dynamic``, ``.dynstr``, ``.dynstr``, ``.hash`` + The standard sections used by a dynamic loader. + +``.note`` + See :ref:`amdgpu-note-records` for the note records supported by the AMDGPU + backend. + +``.rela``\ *name*, ``.rela.dyn`` + For relocatable code objects, *name* is the name of the section that the + relocation records apply. For example, ``.rela.text`` is the section name for + relocation records associated with the ``.text`` section. + + For linked shared code objects, ``.rela.dyn`` contains all the relocation + records from each of the relocatable code object's ``.rela``\ *name* sections. + + See :ref:`amdgpu-relocation-records` for the relocation records supported by + the AMDGPU backend. + +``.text`` + The executable machine code for the kernels and functions they call. Generated + as position independent code. See :ref:`amdgpu-code-conventions` for + information on conventions used in the isa generation. + +.. _amdgpu-note-records: + +Note Records +------------ + +As required by ``ELFCLASS64``, minimal zero byte padding must be generated after +the ``name`` field to ensure the ``desc`` field is 4 byte aligned. In addition, +minimal zero byte padding must be generated to ensure the ``desc`` field size is +a multiple of 4 bytes. The ``sh_addralign`` field of the ``.note`` section must +be at least 4 to indicate at least 8 byte alignment. + +The AMDGPU backend code object uses the following ELF note records in the +``.note`` section. The *Description* column specifies the layout of the note +record’s ``desc`` field. All fields are consecutive bytes. Note records with +variable size strings have a corresponding ``*_size`` field that specifies the +number of bytes, including the terminating null character, in the string. The +string(s) come immediately after the preceding fields. + +Additional note records can be present. + + .. table:: AMDGPU ELF Note Records + :name: amdgpu-elf-note-records-table + + ===== ========================== ========================================== + Name Type Description + ===== ========================== ========================================== + "AMD" ``NT_AMD_AMDGPU_METADATA`` + "AMD" ``NT_AMD_AMDGPU_ISA`` + ===== ========================== ========================================== + +.. + + .. table:: AMDGPU ELF Note Record Enumeration Values + :name: amdgpu-elf-note-record-enumeration-values-table + + ============================= ===== + Name Value + ============================= ===== + *reserved* 0-9 + ``NT_AMD_AMDGPU_METADATA`` 10 + ``NT_AMD_AMDGPU_ISA`` 11 + ============================= ===== + +``NT_AMD_AMDGPU_ISA`` + Specifies the instruction set architecture used by the machine code contained + in the code object. + + This note record is required for code objects containing machine code for + processors matching the ``amdgcn`` architecture in table + :ref:`amdgpu-processors`. + + The null terminated string has the following syntax: + + *architecture*\ ``-``\ *vendor*\ ``-``\ *os*\ ``-``\ *environment*\ ``-``\ *processor* + + where: + + *architecture* + The architecture from table :ref:`amdgpu-target-triples-table`. + + This is always ``amdgcn`` when the target triple OS is ``amdhsa`` (see + :ref:`amdgpu-target-triples`). + + *vendor* + The vendor from table :ref:`amdgpu-target-triples-table`. + + For the AMDGPU backend this is always ``amd``. + + *os* + The OS from table :ref:`amdgpu-target-triples-table`. + + *environment* + An environment from table :ref:`amdgpu-target-triples-table`, or blank if + the environment has no affect on the execution of the code object. + + For the AMDGPU backend this is currently always blank. + *processor* + The processor from table :ref:`amdgpu-processors-table`. + + For example: + + ``amdgcn-amd-amdhsa--gfx901`` + +``NT_AMD_AMDGPU_METADATA`` + Specifies extensible metadata associated with the code object. See + :ref:`amdgpu-code-object-metadata` for the syntax of the code object metadata + string. + + This note record is required and must contain the minimum information + necessary to support the ROCM kernel queries. For example, the segment sizes + needed in a dispatch packet. In addition, a high level language runtime may + require other information to be included. For example, the AMD OpenCL runtime + records kernel argument information. + + .. TODO + Is the string null terminated? It probably should not if YAML allows it to + contain null characters, otherwise it should be. + +.. _amdgpu-code-object-metadata: + +Code Object Metadata +-------------------- + +The code object metadata is specified by the ``NT_AMD_AMDHSA_METADATA`` note +record (see :ref:`amdgpu-note-records`). + +The metadata is specified as a YAML formated string (see [YAML]_ and +:doc:`YamlIO`). + +The metadata is represented as a single YAML document comprised of the mapping +defined in table :ref:`amdgpu-amdhsa-code-object-metadata-mapping-table` and +referenced tables. + +For boolean values, the string values of ``false`` and ``true`` are used for +false and true respectively. + +Additional information can be added to the mappings. To avoid conflicts, any +non-AMD key names should be prefixed by "*vendor-name*.". + + .. table:: AMDHSA Code Object Metadata Mapping + :name: amdgpu-amdhsa-code-object-metadata-mapping-table + + ========== ============== ========= ======================================= + String Key Value Type Required? Description + ========== ============== ========= ======================================= + "Version" sequence of Required - The first integer is the major + 2 integers version. Currently 1. + - The second integer is the minor + version. Currently 0. + "Printf" sequence of Each string is encoded information + strings about a printf function call. The + encoded information is organized as + fields separated by colon (':'): + + ``ID:N:S[0]:S[1]:...:S[N-1]:FormatString`` + + where: + + ``ID`` + A 32 bit integer as a unique id for + each printf function call + + ``N`` + A 32 bit integer equal to the number + of arguments of printf function call + minus 1 + + ``S[i]`` (where i = 0, 1, ... , N-1) + 32 bit integers for the size in bytes + of the i-th FormatString argument of + the printf function call + + FormatString + The format string passed to the + printf function call. + "Kernels" sequence of Required Sequence of the mappings for each + mapping kernel in the code object. See + :ref:`amdgpu-amdhsa-code-object-kernel-metadata-mapping-table` + for the definition of the mapping. + ========== ============== ========= ======================================= + +.. + + .. table:: AMDHSA Code Object Kernel Metadata Mapping + :name: amdgpu-amdhsa-code-object-kernel-metadata-mapping-table + + ================= ============== ========= ================================ + String Key Value Type Required? Description + ================= ============== ========= ================================ + "Name" string Required Source name of the kernel. + "SymbolName" string Required Name of the kernel + descriptor ELF symbol. + "Language" string Source language of the kernel. + Values include: + + - "OpenCL C" + - "OpenCL C++" + - "HCC" + - "OpenMP" + + "LanguageVersion" sequence of - The first integer is the major + 2 integers version. + - The second integer is the + minor version. + "Attrs" mapping Mapping of kernel attributes. + See + :ref:`amdgpu-amdhsa-code-object-kernel-attribute-metadata-mapping-table` + for the mapping definition. + "Arguments" sequence of Sequence of mappings of the + mapping kernel arguments. See + :ref:`amdgpu-amdhsa-code-object-kernel-argument-metadata-mapping-table` + for the definition of the mapping. + "CodeProps" mapping Mapping of properties related to + the kernel code. See + :ref:`amdgpu-amdhsa-code-object-kernel-code-properties-metadata-mapping-table` + for the mapping definition. + "DebugProps" mapping Mapping of properties related to + the kernel debugging. See + :ref:`amdgpu-amdhsa-code-object-kernel-debug-properties-metadata-mapping-table` + for the mapping definition. + ================= ============== ========= ================================ + +.. + + .. table:: AMDHSA Code Object Kernel Attribute Metadata Mapping + :name: amdgpu-amdhsa-code-object-kernel-attribute-metadata-mapping-table + + =================== ============== ========= ============================== + String Key Value Type Required? Description + =================== ============== ========= ============================== + "ReqdWorkGroupSize" sequence of The dispatch work-group size + 3 integers X, Y, Z must correspond to the + specified values. + + Corresponds to the OpenCL + ``reqd_work_group_size`` + attribute. + "WorkGroupSizeHint" sequence of The dispatch work-group size + 3 integers X, Y, Z is likely to be the + specified values. + + Corresponds to the OpenCL + ``work_group_size_hint`` + attribute. + "VecTypeHint" string The name of a scalar or vector + type. + + Corresponds to the OpenCL + ``vec_type_hint`` attribute. + =================== ============== ========= ============================== + +.. + + .. table:: AMDHSA Code Object Kernel Argument Metadata Mapping + :name: amdgpu-amdhsa-code-object-kernel-argument-metadata-mapping-table + + ================= ============== ========= ================================ + String Key Value Type Required? Description + ================= ============== ========= ================================ + "Name" string Kernel argument name. + "TypeName" string Kernel argument type name. + "Size" integer Required Kernel argument size in bytes. + "Align" integer Required Kernel argument alignment in + bytes. Must be a power of two. + "ValueKind" string Required Kernel argument kind that + specifies how to set up the + corresponding argument. + Values include: + + "ByValue" + The argument is copied + directly into the kernarg. + + "GlobalBuffer" + A global address space pointer + to the buffer data is passed + in the kernarg. + + "DynamicSharedPointer" + A group address space pointer + to dynamically allocated LDS + is passed in the kernarg. + + "Sampler" + A global address space + pointer to a S# is passed in + the kernarg. + + "Image" + A global address space + pointer to a T# is passed in + the kernarg. + + "Pipe" + A global address space pointer + to an OpenCL pipe is passed in + the kernarg. + + "Queue" + A global address space pointer + to an OpenCL device enqueue + queue is passed in the + kernarg. + + "HiddenGlobalOffsetX" + The OpenCL grid dispatch + global offset for the X + dimension is passed in the + kernarg. + + "HiddenGlobalOffsetY" + The OpenCL grid dispatch + global offset for the Y + dimension is passed in the + kernarg. + + "HiddenGlobalOffsetZ" + The OpenCL grid dispatch + global offset for the Z + dimension is passed in the + kernarg. + + "HiddenNone" + An argument that is not used + by the kernel. Space needs to + be left for it, but it does + not need to be set up. + + "HiddenPrintfBuffer" + A global address space pointer + to the runtime printf buffer + is passed in kernarg. + + "HiddenDefaultQueue" + A global address space pointer + to the OpenCL device enqueue + queue that should be used by + the kernel by default is + passed in the kernarg. + + "HiddenCompletionAction" + *TBD* + + .. TODO + Add description. + + "ValueType" string Required Kernel argument value type. Only + present if "ValueKind" is + "ByValue". For vector data + types, the value is for the + element type. Values include: + + - "Struct" + - "I8" + - "U8" + - "I16" + - "U16" + - "F16" + - "I32" + - "U32" + - "F32" + - "I64" + - "U64" + - "F64" + + .. TODO + How can it be determined if a + vector type, and what size + vector? + "PointeeAlign" integer Alignment in bytes of pointee + type for pointer type kernel + argument. Must be a power + of 2. Only present if + "ValueKind" is + "DynamicSharedPointer". + "AddrSpaceQual" string Kernel argument address space + qualifier. Only present if + "ValueKind" is "GlobalBuffer" or + "DynamicSharedPointer". Values + are: + + - "Private" + - "Global" + - "Constant" + - "Local" + - "Generic" + - "Region" + + .. TODO + Is GlobalBuffer only Global + or Constant? Is + DynamicSharedPointer always + Local? Can HCC allow Generic? + How can Private or Region + ever happen? + "AccQual" string Kernel argument access + qualifier. Only present if + "ValueKind" is "Image" or + "Pipe". Values + are: + + - "ReadOnly" + - "WriteOnly" + - "ReadWrite" + + .. TODO + Does this apply to + GlobalBuffer? + "ActualAcc" string The actual memory accesses + performed by the kernel on the + kernel argument. Only present if + "ValueKind" is "GlobalBuffer", + "Image", or "Pipe". This may be + more restrictive than indicated + by "AccQual" to reflect what the + kernel actual does. If not + present then the runtime must + assume what is implied by + "AccQual" and "IsConst". Values + are: + + - "ReadOnly" + - "WriteOnly" + - "ReadWrite" + + "IsConst" boolean Indicates if the kernel argument + is const qualified. Only present + if "ValueKind" is + "GlobalBuffer". + + "IsRestrict" boolean Indicates if the kernel argument + is restrict qualified. Only + present if "ValueKind" is + "GlobalBuffer". + + "IsVolatile" boolean Indicates if the kernel argument + is volatile qualified. Only + present if "ValueKind" is + "GlobalBuffer". + + "IsPipe" boolean Indicates if the kernel argument + is pipe qualified. Only present + if "ValueKind" is "Pipe". + + .. TODO + Can GlobalBuffer be pipe + qualified? + ================= ============== ========= ================================ + +.. + + .. table:: AMDHSA Code Object Kernel Code Properties Metadata Mapping + :name: amdgpu-amdhsa-code-object-kernel-code-properties-metadata-mapping-table + + ============================ ============== ========= ===================== + String Key Value Type Required? Description + ============================ ============== ========= ===================== + "KernargSegmentSize" integer Required The size in bytes of + the kernarg segment + that holds the values + of the arguments to + the kernel. + "GroupSegmentFixedSize" integer Required The amount of group + segment memory + required by a + work-group in + bytes. This does not + include any + dynamically allocated + group segment memory + that may be added + when the kernel is + dispatched. + "PrivateSegmentFixedSize" integer Required The amount of fixed + private address space + memory required for a + work-item in + bytes. If + IsDynamicCallstack + is 1 then additional + space must be added + to this value for the + call stack. + "KernargSegmentAlign" integer Required The maximum byte + alignment of + arguments in the + kernarg segment. Must + be a power of 2. + "WavefrontSize" integer Required Wavefront size. Must + be a power of 2. + "NumSGPRs" integer Number of scalar + registers used by a + wavefront for + GFX6-GFX9. This + includes the special + SGPRs for VCC, Flat + Scratch (GFX7-GFX9) + and XNACK (for + GFX8-GFX9). It does + not include the 16 + SGPR added if a trap + handler is + enabled. It is not + rounded up to the + allocation + granularity. + "NumVGPRs" integer Number of vector + registers used by + each work-item for + GFX6-GFX9 + "MaxFlatWorkgroupSize" integer Maximum flat + work-group size + supported by the + kernel in work-items. + "IsDynamicCallStack" boolean Indicates if the + generated machine + code is using a + dynamically sized + call stack. + "IsXNACKEnabled" boolean Indicates if the + generated machine + code is capable of + supporting XNACK. + ============================ ============== ========= ===================== + +.. + + .. table:: AMDHSA Code Object Kernel Debug Properties Metadata Mapping + :name: amdgpu-amdhsa-code-object-kernel-debug-properties-metadata-mapping-table + + =================================== ============== ========= ============== + String Key Value Type Required? Description + =================================== ============== ========= ============== + "DebuggerABIVersion" string + "ReservedNumVGPRs" integer + "ReservedFirstVGPR" integer + "PrivateSegmentBufferSGPR" integer + "WavefrontPrivateSegmentOffsetSGPR" integer + =================================== ============== ========= ============== + +.. TODO + Plan to remove the debug properties metadata. + +.. _amdgpu-symbols: + +Symbols +------- + +Symbols include the following: + + .. table:: AMDGPU ELF Symbols + :name: amdgpu-elf-symbols-table + + ===================== ============== ============= ================== + Name Type Section Description + ===================== ============== ============= ================== + *link-name* ``STT_OBJECT`` - ``.data`` Global variable + - ``.rodata`` + - ``.bss`` + *link-name*\ ``@kd`` ``STT_OBJECT`` - ``.rodata`` Kernel descriptor + *link-name* ``STT_FUNC`` - ``.text`` Kernel entry point + ===================== ============== ============= ================== + +Global variable + Global variables both used and defined by the compilation unit. + + If the symbol is defined in the compilation unit then it is allocated in the + appropriate section according to if it has initialized data or is readonly. + + If the symbol is external then its section is ``STN_UNDEF`` and the loader + will resolve relocations using the defintion provided by another code object + or explicitly defined by the runtime. + + All global symbols, whether defined in the compilation unit or external, are + accessed by the machine code indirectly throught a GOT table entry. This + allows them to be preemptable. The GOT table is only supported when the target + triple OS is ``amdhsa`` (see :ref:`amdgpu-target-triples`). + + .. TODO + Add description of linked shared object symbols. Seems undefined symbols + are marked as STT_NOTYPE. + +Kernel descriptor + Every HSA kernel has an associated kernel descriptor. It is the address of the + kernel descriptor that is used in the AQL dispatch packet used to invoke the + kernel, not the kernel entry point. The layout of the HSA kernel descriptor is + defined in :ref:`amdgpu-amdhsa-kernel-descriptor`. + +Kernel entry point + Every HSA kernel also has a symbol for its machine code entry point. + +.. _amdgpu-relocation-records: + +Relocation Records +------------------ + +AMDGPU backend generates ``Elf64_Rela`` relocation records. Supported +relocatable fields are: + +``word32`` + This specifies a 32-bit field occupying 4 bytes with arbitrary byte + alignment. These values use the same byte order as other word values in the + AMD GPU architecture. + +``word64`` + This specifies a 64-bit field occupying 8 bytes with arbitrary byte + alignment. These values use the same byte order as other word values in the + AMD GPU architecture. + +Following notations are used for specifying relocation calculations: + +**A** + Represents the addend used to compute the value of the relocatable field. + +**G** + Represents the offset into the global offset table at which the relocation + entry’s symbol will reside during execution. + +**GOT** + Represents the address of the global offset table. + +**P** + Represents the place (section offset for ``et_rel`` or address for ``et_dyn``) + of the storage unit being relocated (computed using ``r_offset``). + +**S** + Represents the value of the symbol whose index resides in the relocation + entry. + +The following relocation types are supported: + + .. table:: AMDGPU ELF Relocation Records + :name: amdgpu-elf-relocation-records-table + + ========================== ===== ========== ============================== + Relocation Type Value Field Calculation + ========================== ===== ========== ============================== + ``R_AMDGPU_NONE`` 0 *none* *none* + ``R_AMDGPU_ABS32_LO`` 1 ``word32`` (S + A) & 0xFFFFFFFF + ``R_AMDGPU_ABS32_HI`` 2 ``word32`` (S + A) >> 32 + ``R_AMDGPU_ABS64`` 3 ``word64`` S + A + ``R_AMDGPU_REL32`` 4 ``word32`` S + A - P + ``R_AMDGPU_REL64`` 5 ``word64`` S + A - P + ``R_AMDGPU_ABS32`` 6 ``word32`` S + A + ``R_AMDGPU_GOTPCREL`` 7 ``word32`` G + GOT + A - P + ``R_AMDGPU_GOTPCREL32_LO`` 8 ``word32`` (G + GOT + A - P) & 0xFFFFFFFF + ``R_AMDGPU_GOTPCREL32_HI`` 9 ``word32`` (G + GOT + A - P) >> 32 + ``R_AMDGPU_REL32_LO`` 10 ``word32`` (S + A - P) & 0xFFFFFFFF + ``R_AMDGPU_REL32_HI`` 11 ``word32`` (S + A - P) >> 32 + ========================== ===== ========== ============================== + +.. _amdgpu-dwarf: + +DWARF +----- + +Standard DWARF [DWARF]_ Version 2 sections can be generated. These contain +information that maps the code object executable code and data to the source +language constructs. It can be used by tools such as debuggers and profilers. + +Address Space Mapping +~~~~~~~~~~~~~~~~~~~~~ + +The following address space mapping is used: + + .. table:: AMDGPU DWARF Address Space Mapping + :name: amdgpu-dwarf-address-space-mapping-table + + =================== ================= + DWARF Address Space Memory Space + =================== ================= + 1 Private (Scratch) + 2 Local (group/LDS) + *omitted* Global + *omitted* Constant + *omitted* Generic (Flat) + *not supported* Region (GDS) + =================== ================= + +See :ref:`amdgpu-address-spaces` for infomration on the memory space terminology +used in the table. + +An ``address_class`` attribute is generated on pointer type DIEs to specify the +DWARF address space of the value of the pointer when it is in the *private* or +*local* address space. Otherwise the attribute is omitted. + +An ``XDEREF`` operation is generated in location list expressions for variables +that are allocated in the *private* and *local* address space. Otherwise no +``XDREF`` is omitted. + +Register Mapping +~~~~~~~~~~~~~~~~ + +*This section is WIP.* + +.. TODO + Define DWARF register enumeration. + + If want to present a wavefront state then should expose vector registers as + 64 wide (rather than per work-item view that LLVM uses). Either as seperate + registers, or a 64x4 byte single register. In either case use a new LANE op + (akin to XDREF) to select the current lane usage in a location + expression. This would also allow scalar register spilling to vector register + lanes to be expressed (currently no debug information is being generated for + spilling). If choose a wide single register approach then use LANE in + conjunction with PIECE operation to select the dword part of the register for + the current lane. If the separate register approach then use LANE to select + the register. + +Source Text +~~~~~~~~~~~ + +*This section is WIP.* + +.. TODO + DWARF extension to include runtime generated source text. + +.. _amdgpu-code-conventions: + +Code Conventions +================ + +AMDHSA +------ + +This section provides code conventions used when the target triple OS is +``amdhsa`` (see :ref:`amdgpu-target-triples`). + +Kernel Dispatch +~~~~~~~~~~~~~~~ + +The HSA architected queuing language (AQL) defines a user space memory interface +that can be used to control the dispatch of kernels, in an agent independent +way. An agent can have zero or more AQL queues created for it using the ROCm +runtime, in which AQL packets (all of which are 64 bytes) can be placed. See the +*HSA Platform System Architecture Specification* [HSA]_ for the AQL queue +mechanics and packet layouts. + +The packet processor of a kernel agent is responsible for detecting and +dispatching HSA kernels from the AQL queues associated with it. For AMD GPUs the +packet processor is implemented by the hardware command processor (CP), +asynchronous dispatch controller (ADC) and shader processor input controller +(SPI). + +The ROCm runtime can be used to allocate an AQL queue object. It uses the kernel +mode driver to initialize and register the AQL queue with CP. + +To dispatch a kernel the following actions are performed. This can occur in the +CPU host program, or from an HSA kernel executing on a GPU. + +1. A pointer to an AQL queue for the kernel agent on which the kernel is to be + executed is obtained. +2. A pointer to the kernel descriptor (see + :ref:`amdgpu-amdhsa-kernel-descriptor`) of the kernel to execute is + obtained. It must be for a kernel that is contained in a code object that that + was loaded by the ROCm runtime on the kernel agent with which the AQL queue is + associated. +3. Space is allocated for the kernel arguments using the ROCm runtime allocator + for a memory region with the kernarg property for the kernel agent that will + execute the kernel. It must be at least 16 byte aligned. +4. Kernel argument values are assigned to the kernel argument memory + allocation. The layout is defined in the *HSA Programmer’s Language Reference* + [HSA]_. For AMDGPU the kernel execution directly accesses the kernel argument + memory in the same way constant memory is accessed. (Note that the HSA + specification allows an implementation to copy the kernel argument contents to + another location that is accessed by the kernel.) +5. An AQL kernel dispatch packet is created on the AQL queue. The ROCm runtime + api uses 64 bit atomic operations to reserve space in the AQL queue for the + packet. The packet must be set up, and the final write must use an atomic + store release to set the packet kind to ensure the packet contents are + visible to the kernel agent. AQL defines a doorbell signal mechanism to + notify the kernel agent that the AQL queue has been updated. These rules, and + the layout of the AQL queue and kernel dispatch packet is defined in the *HSA + System Architecture Specification* [HSA]_. +6. A kernel dispatch packet includes information about the actual dispatch, + such as grid and work-group size, together with information from the code + object about the kernel, such as segment sizes. The ROCm runtime queries on + the kernel symbol can be used to obtain the code object values which are + recorded in the :ref:`amdgpu-code-object-metadata`. +7. CP executes micro-code and is responsible for detecting and setting up the + GPU to execute the wavefronts of a kernel dispatch. +8. CP ensures that when the a wavefront starts executing the kernel machine + code, the scalar general purpose registers (SGPR) and vector general purpose + registers (VGPR) are set up as required by the machine code. The required + setup is defined in the :ref:`amdgpu-amdhsa-kernel-descriptor`. The initial + register state is defined in + :ref:`amdgpu-amdhsa-initial-kernel-execution-state`. +9. The prolog of the kernel machine code (see + :ref:`amdgpu-amdhsa-kernel-prolog`) sets up the machine state as necessary + before continuing executing the machine code that corresponds to the kernel. +10. When the kernel dispatch has completed execution, CP signals the completion + signal specified in the kernel dispatch packet if not 0. + +.. _amdgpu-amdhsa-memory-spaces: + +Memory Spaces +~~~~~~~~~~~~~ + +The memory space properties are: + + .. table:: AMDHSA Memory Spaces + :name: amdgpu-amdhsa-memory-spaces-table + + ================= =========== ======== ======= ================== + Memory Space Name HSA Segment Hardware Address NULL Value + Name Name Size + ================= =========== ======== ======= ================== + Private private scratch 32 0x00000000 + Local group LDS 32 0xFFFFFFFF + Global global global 64 0x0000000000000000 + Constant constant *same as 64 0x0000000000000000 + global* + Generic flat flat 64 0x0000000000000000 + Region N/A GDS 32 *not implemented + for AMDHSA* + ================= =========== ======== ======= ================== + +The global and constant memory spaces both use global virtual addresses, which +are the same virtual address space used by the CPU. However, some virtual +addresses may only be accessible to the CPU, some only accessible by the GPU, +and some by both. + +Using the constant memory space indicates that the data will not change during +the execution of the kernel. This allows scalar read instructions to be +used. The vector and scalar L1 caches are invalidated of volatile data before +each kernel dispatch execution to allow constant memory to change values between +kernel dispatches. + +The local memory space uses the hardware Local Data Store (LDS) which is +automatically allocated when the hardware creates work-groups of wavefronts, and +freed when all the wavefronts of a work-group have terminated. The data store +(DS) instructions can be used to access it. + +The private memory space uses the hardware scratch memory support. If the kernel +uses scratch, then the hardware allocates memory that is accessed using +wavefront lane dword (4 byte) interleaving. The mapping used from private +address to physical address is: + + ``wavefront-scratch-base + + (private-address * wavefront-size * 4) + + (wavefront-lane-id * 4)`` + +There are different ways that the wavefront scratch base address is determined +by a wavefront (see :ref:`amdgpu-amdhsa-initial-kernel-execution-state`). This +memory can be accessed in an interleaved manner using buffer instruction with +the scratch buffer descriptor and per wave scratch offset, by the scratch +instructions, or by flat instructions. If each lane of a wavefront accesses the +same private address, the interleaving results in adjacent dwords being accessed +and hence requires fewer cache lines to be fetched. Multi-dword access is not +supported except by flat and scratch instructions in GFX9. + +The generic address space uses the hardware flat address support available in +GFX7-GFX9. This uses two fixed ranges of virtual addresses (the private and +local appertures), that are outside the range of addressible global memory, to +map from a flat address to a private or local address. + +FLAT instructions can take a flat address and access global, private (scratch) +and group (LDS) memory depending in if the address is within one of the +apperture ranges. Flat access to scratch requires hardware aperture setup and +setup in the kernel prologue (see :ref:`amdgpu-amdhsa-flat-scratch`). Flat +access to LDS requires hardware aperture setup and M0 (GFX7-GFX8) register setup +(see :ref:`amdgpu-amdhsa-m0`). + +To convert between a segment address and a flat address the base address of the +appertures address can be used. For GFX7-GFX8 these are available in the +:ref:`amdgpu-amdhsa-hsa-aql-queue` the address of which can be obtained with +Queue Ptr SGPR (see :ref:`amdgpu-amdhsa-initial-kernel-execution-state`). For +GFX9 the appature base addresses are directly available as inline constant +registers ``SRC_SHARED_BASE/LIMIT`` and ``SRC_PRIVATE_BASE/LIMIT``. In 64 bit +address mode the apperture sizes are 2^32 bytes and the base is aligned to 2^32 +which makes it easier to convert from flat to segment or segment to flat. + +HSA Image and Samplers +~~~~~~~~~~~~~~~~~~~~~~ + +Image and sample handles created by the ROCm runtime are 64 bit addresses of a +hardware 32 byte V# and 48 byte S# object respectively. In order to support the +HSA ``query_sampler`` operations two extra dwords are used to store the HSA BRIG +enumeration values for the queries that are not trivially deducible from the S# +representation. + +HSA Signals +~~~~~~~~~~~ + +Signal handles created by the ROCm runtime are 64 bit addresses of a structure +allocated in memory accessible from both the CPU and GPU. The structure is +defined by the ROCm runtime and subject to change between releases (see +[AMD-ROCm-github]_). + +.. _amdgpu-amdhsa-hsa-aql-queue: + +HSA AQL Queue +~~~~~~~~~~~~~ + +The AQL queue structure is defined by the ROCm runtime and subject to change +between releases (see [AMD-ROCm-github]_). For some processors it contains +fields needed to implement certain language features such as the flat address +aperture bases. It also contains fields used by CP such as managing the +allocation of scratch memory. + +.. _amdgpu-amdhsa-kernel-descriptor: + +Kernel Descriptor +~~~~~~~~~~~~~~~~~ + +A kernel descriptor consists of the information needed by CP to initiate the +execution of a kernel, including the entry point address of the machine code +that implements the kernel. + +Kernel Descriptor for GFX6-GFX9 ++++++++++++++++++++++++++++++++ + +CP microcode requires the Kernel descritor to be allocated on 64 byte alignment. + + .. table:: Kernel Descriptor for GFX6-GFX9 + :name: amdgpu-amdhsa-kernel-descriptor-gfx6-gfx9-table + + ======= ======= =============================== =========================== + Bits Size Field Name Description + ======= ======= =============================== =========================== + 31:0 4 bytes group_segment_fixed_size The amount of fixed local + address space memory + required for a work-group + in bytes. This does not + include any dynamically + allocated local address + space memory that may be + added when the kernel is + dispatched. + 63:32 4 bytes private_segment_fixed_size The amount of fixed + private address space + memory required for a + work-item in bytes. If + is_dynamic_callstack is 1 + then additional space must + be added to this value for + the call stack. + 95:64 4 bytes max_flat_workgroup_size Maximum flat work-group + size supported by the + kernel in work-items. + 96 1 bit is_dynamic_call_stack Indicates if the generated + machine code is using a + dynamically sized call + stack. + 97 1 bit is_xnack_enabled Indicates if the generated + machine code is capable of + suppoting XNACK. + 127:98 30 bits Reserved. Must be 0. + 191:128 8 bytes kernel_code_entry_byte_offset Byte offset (possibly + negative) from base + address of kernel + descriptor to kernel's + entry point instruction + which must be 256 byte + aligned. + 383:192 24 Reserved. Must be 0. + bytes + 415:384 4 bytes compute_pgm_rsrc1 Compute Shader (CS) + program settings used by + CP to set up + ``COMPUTE_PGM_RSRC1`` + configuration + register. See + :ref:`amdgpu-amdhsa-compute_pgm_rsrc1_t-gfx6-gfx9-table`. + 447:416 4 bytes compute_pgm_rsrc2 Compute Shader (CS) + program settings used by + CP to set up + ``COMPUTE_PGM_RSRC2`` + configuration + register. See + :ref:`amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx9-table`. + 448 1 bit enable_sgpr_private_segment Enable the setup of the + _buffer SGPR user data registers + (see + :ref:`amdgpu-amdhsa-initial-kernel-execution-state`). + + The total number of SGPR + user data registers + requested must not exceed + 16 and match value in + ``compute_pgm_rsrc2.user_sgpr.user_sgpr_count``. + Any requests beyond 16 + will be ignored. + 449 1 bit enable_sgpr_dispatch_ptr *see above* + 450 1 bit enable_sgpr_queue_ptr *see above* + 451 1 bit enable_sgpr_kernarg_segment_ptr *see above* + 452 1 bit enable_sgpr_dispatch_id *see above* + 453 1 bit enable_sgpr_flat_scratch_init *see above* + 454 1 bit enable_sgpr_private_segment *see above* + _size + 455 1 bit enable_sgpr_grid_workgroup Not implemented in CP and + _count_X should always be 0. + 456 1 bit enable_sgpr_grid_workgroup Not implemented in CP and + _count_Y should always be 0. + 457 1 bit enable_sgpr_grid_workgroup Not implemented in CP and + _count_Z should always be 0. + 463:458 6 bits Reserved. Must be 0. + 511:464 4 Reserved. Must be 0. + bytes + 512 **Total size 64 bytes.** + ======= =================================================================== + +.. + + .. table:: compute_pgm_rsrc1 for GFX6-GFX9 + :name: amdgpu-amdhsa-compute_pgm_rsrc1_t-gfx6-gfx9-table + + ======= ======= =============================== =========================================================================== + Bits Size Field Name Description + ======= ======= =============================== =========================================================================== + 5:0 6 bits granulated_workitem_vgpr_count Number of vector registers + used by each work-item, + granularity is device + specific: + + GFX6-9 + roundup((max-vgpg + 1) + / 4) - 1 + + Used by CP to set up + ``COMPUTE_PGM_RSRC1.VGPRS``. + 9:6 4 bits granulated_wavefront_sgpr_count Number of scalar registers + used by a wavefront, + granularity is device + specific: + + GFX6-8 + roundup((max-sgpg + 1) + / 8) - 1 + GFX9 + roundup((max-sgpg + 1) + / 16) - 1 + + Includes the special SGPRs + for VCC, Flat Scratch (for + GFX7 onwards) and XNACK + (for GFX8 onwards). It does + not include the 16 SGPR + added if a trap handler is + enabled. + + Used by CP to set up + ``COMPUTE_PGM_RSRC1.SGPRS``. + 11:10 2 bits priority Must be 0. + + Start executing wavefront + at the specified priority. + + CP is responsible for + filling in + ``COMPUTE_PGM_RSRC1.PRIORITY``. + 13:12 2 bits float_mode_round_32 Wavefront starts execution + with specified rounding + mode for single (32 + bit) floating point + precision floating point + operations. + + Floating point rounding + mode values are defined in + :ref:`amdgpu-amdhsa-floating-point-rounding-mode-enumeration-values-table`. + + Used by CP to set up + ``COMPUTE_PGM_RSRC1.FLOAT_MODE``. + 15:14 2 bits float_mode_round_16_64 Wavefront starts execution + with specified rounding + denorm mode for half/double (16 + and 64 bit) floating point + precision floating point + operations. + + Floating point rounding + mode values are defined in + :ref:`amdgpu-amdhsa-floating-point-rounding-mode-enumeration-values-table`. + + Used by CP to set up + ``COMPUTE_PGM_RSRC1.FLOAT_MODE``. + 17:16 2 bits float_mode_denorm_32 Wavefront starts execution + with specified denorm mode + for single (32 + bit) floating point + precision floating point + operations. + + Floating point denorm mode + values are defined in + :ref:`amdgpu-amdhsa-floating-point-denorm-mode-enumeration-values-table`. + + Used by CP to set up + ``COMPUTE_PGM_RSRC1.FLOAT_MODE``. + 19:18 2 bits float_mode_denorm_16_64 Wavefront starts execution + with specified denorm mode + for half/double (16 + and 64 bit) floating point + precision floating point + operations. + + Floating point denorm mode + values are defined in + :ref:`amdgpu-amdhsa-floating-point-denorm-mode-enumeration-values-table`. + + Used by CP to set up + ``COMPUTE_PGM_RSRC1.FLOAT_MODE``. + 20 1 bit priv Must be 0. + + Start executing wavefront + in privilege trap handler + mode. + + CP is responsible for + filling in + ``COMPUTE_PGM_RSRC1.PRIV``. + 21 1 bit enable_dx10_clamp Wavefront starts execution + with DX10 clamp mode + enabled. Used by the vector + ALU to force DX-10 style + treatment of NaN's (when + set, clamp NaN to zero, + otherwise pass NaN + through). + + Used by CP to set up + ``COMPUTE_PGM_RSRC1.DX10_CLAMP``. + 22 1 bit debug_mode Must be 0. + + Start executing wavefront + in single step mode. + + CP is responsible for + filling in + ``COMPUTE_PGM_RSRC1.DEBUG_MODE``. + 23 1 bit enable_ieee_mode Wavefront starts execution + with IEEE mode + enabled. Floating point + opcodes that support + exception flag gathering + will quiet and propagate + signaling-NaN inputs per + IEEE 754-2008. Min_dx10 and + max_dx10 become IEEE + 754-2008 compliant due to + signaling-NaN propagation + and quieting. + + Used by CP to set up + ``COMPUTE_PGM_RSRC1.IEEE_MODE``. + 24 1 bit bulky Must be 0. + + Only one work-group allowed + to execute on a compute + unit. + + CP is responsible for + filling in + ``COMPUTE_PGM_RSRC1.BULKY``. + 25 1 bit cdbg_user Must be 0. + + Flag that can be used to + control debugging code. + + CP is responsible for + filling in + ``COMPUTE_PGM_RSRC1.CDBG_USER``. + 31:26 6 bits Reserved. Must be 0. + 32 **Total size 4 bytes** + ======= =================================================================================================================== + +.. + + .. table:: compute_pgm_rsrc2 for GFX6-GFX9 + :name: amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx9-table + + ======= ======= =============================== =========================================================================== + Bits Size Field Name Description + ======= ======= =============================== =========================================================================== + 0 1 bit enable_sgpr_private_segment Enable the setup of the + _wave_offset SGPR wave scratch offset + system register (see + :ref:`amdgpu-amdhsa-initial-kernel-execution-state`). + + Used by CP to set up + ``COMPUTE_PGM_RSRC2.SCRATCH_EN``. + 5:1 5 bits user_sgpr_count The total number of SGPR + user data registers + requested. This number must + match the number of user + data registers enabled. + + Used by CP to set up + ``COMPUTE_PGM_RSRC2.USER_SGPR``. + 6 1 bit enable_trap_handler Set to 1 if code contains a + TRAP instruction which + requires a trap hander to + be enabled. + + CP sets + ``COMPUTE_PGM_RSRC2.TRAP_PRESENT`` + if the runtime has + installed a trap handler + regardless of the setting + of this field. + 7 1 bit enable_sgpr_workgroup_id_x Enable the setup of the + system SGPR register for + the work-group id in the X + dimension (see + :ref:`amdgpu-amdhsa-initial-kernel-execution-state`). + + Used by CP to set up + ``COMPUTE_PGM_RSRC2.TGID_X_EN``. + 8 1 bit enable_sgpr_workgroup_id_y Enable the setup of the + system SGPR register for + the work-group id in the Y + dimension (see + :ref:`amdgpu-amdhsa-initial-kernel-execution-state`). + + Used by CP to set up + ``COMPUTE_PGM_RSRC2.TGID_Y_EN``. + 9 1 bit enable_sgpr_workgroup_id_z Enable the setup of the + system SGPR register for + the work-group id in the Z + dimension (see + :ref:`amdgpu-amdhsa-initial-kernel-execution-state`). + + Used by CP to set up + ``COMPUTE_PGM_RSRC2.TGID_Z_EN``. + 10 1 bit enable_sgpr_workgroup_info Enable the setup of the + system SGPR register for + work-group information (see + :ref:`amdgpu-amdhsa-initial-kernel-execution-state`). + + Used by CP to set up + ``COMPUTE_PGM_RSRC2.TGID_SIZE_EN``. + 12:11 2 bits enable_vgpr_workitem_id Enable the setup of the + VGPR system registers used + for the work-item ID. + :ref:`amdgpu-amdhsa-system-vgpr-work-item-id-enumeration-values-table` + defines the values. + + Used by CP to set up + ``COMPUTE_PGM_RSRC2.TIDIG_CMP_CNT``. + 13 1 bit enable_exception_address_watch Must be 0. + + Wavefront starts execution + with address watch + exceptions enabled which + are generated when L1 has + witnessed a thread access + an *address of + interest*. + + CP is responsible for + filling in the address + watch bit in + ``COMPUTE_PGM_RSRC2.EXCP_EN_MSB`` + according to what the + runtime requests. + 14 1 bit enable_exception_memory Must be 0. + + Wavefront starts execution + with memory violation + exceptions exceptions + enabled which are generated + when a memory violation has + occurred for this wave from + L1 or LDS + (write-to-read-only-memory, + mis-aligned atomic, LDS + address out of range, + illegal address, etc.). + + CP sets the memory + violation bit in + ``COMPUTE_PGM_RSRC2.EXCP_EN_MSB`` + according to what the + runtime requests. + 23:15 9 bits granulated_lds_size Must be 0. + + CP uses the rounded value + from the dispatch packet, + not this value, as the + dispatch may contain + dynamically allocated group + segment memory. CP writes + directly to + ``COMPUTE_PGM_RSRC2.LDS_SIZE``. + + Amount of group segment + (LDS) to allocate for each + work-group. Granularity is + device specific: + + GFX6: + roundup(lds-size / (64 * 4)) + GFX7-GFX9: + roundup(lds-size / (128 * 4)) + + 24 1 bit enable_exception_ieee_754_fp Wavefront starts execution + _invalid_operation with specified exceptions + enabled. + + Used by CP to set up + ``COMPUTE_PGM_RSRC2.EXCP_EN`` + (set from bits 0..6). + + IEEE 754 FP Invalid + Operation + 25 1 bit enable_exception_fp_denormal FP Denormal one or more + _source input operands is a + denormal number + 26 1 bit enable_exception_ieee_754_fp IEEE 754 FP Division by + _division_by_zero Zero + 27 1 bit enable_exception_ieee_754_fp IEEE 754 FP FP Overflow + _overflow + 28 1 bit enable_exception_ieee_754_fp IEEE 754 FP Underflow + _underflow + 29 1 bit enable_exception_ieee_754_fp IEEE 754 FP Inexact + _inexact + 30 1 bit enable_exception_int_divide_by Integer Division by Zero + _zero (rcp_iflag_f32 instruction + only) + 31 1 bit Reserved. Must be 0. + 32 **Total size 4 bytes.** + ======= =================================================================================================================== + +.. + + .. table:: Floating Point Rounding Mode Enumeration Values + :name: amdgpu-amdhsa-floating-point-rounding-mode-enumeration-values-table + + ===================================== ===== =============================== + Enumeration Name Value Description + ===================================== ===== =============================== + AMD_FLOAT_ROUND_MODE_NEAR_EVEN 0 Round Ties To Even + AMD_FLOAT_ROUND_MODE_PLUS_INFINITY 1 Round Toward +infinity + AMD_FLOAT_ROUND_MODE_MINUS_INFINITY 2 Round Toward -infinity + AMD_FLOAT_ROUND_MODE_ZERO 3 Round Toward 0 + ===================================== ===== =============================== + +.. + + .. table:: Floating Point Denorm Mode Enumeration Values + :name: amdgpu-amdhsa-floating-point-denorm-mode-enumeration-values-table + + ===================================== ===== =============================== + Enumeration Name Value Description + ===================================== ===== =============================== + AMD_FLOAT_DENORM_MODE_FLUSH_SRC_DST 0 Flush Source and Destination + Denorms + AMD_FLOAT_DENORM_MODE_FLUSH_DST 1 Flush Output Denorms + AMD_FLOAT_DENORM_MODE_FLUSH_SRC 2 Flush Source Denorms + AMD_FLOAT_DENORM_MODE_FLUSH_NONE 3 No Flush + ===================================== ===== =============================== + +.. + + .. table:: System VGPR Work-Item ID Enumeration Values + :name: amdgpu-amdhsa-system-vgpr-work-item-id-enumeration-values-table + + ===================================== ===== =============================== + Enumeration Name Value Description + ===================================== ===== =============================== + AMD_SYSTEM_VGPR_WORKITEM_ID_X 0 Set work-item X dimension ID. + AMD_SYSTEM_VGPR_WORKITEM_ID_X_Y 1 Set work-item X and Y + dimensions ID. + AMD_SYSTEM_VGPR_WORKITEM_ID_X_Y_Z 2 Set work-item X, Y and Z + dimensions ID. + AMD_SYSTEM_VGPR_WORKITEM_ID_UNDEFINED 3 Undefined. + ===================================== ===== =============================== + +.. _amdgpu-amdhsa-initial-kernel-execution-state: + +Initial Kernel Execution State +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This section defines the register state that will be set up by the packet +processor prior to the start of execution of every wavefront. This is limited by +the constraints of the hardware controllers of CP/ADC/SPI. + +The order of the SGPR registers is defined, but the compiler can specify which +ones are actually setup in the kernel descriptor using the ``enable_sgpr_*`` bit +fields (see :ref:`amdgpu-amdhsa-kernel-descriptor`). The register numbers used +for enabled registers are dense starting at SGPR0: the first enabled register is +SGPR0, the next enabled register is SGPR1 etc.; disabled registers do not have +an SGPR number. + +The initial SGPRs comprise up to 16 User SRGPs that are set by CP and apply to +all waves of the grid. It is possible to specify more than 16 User SGPRs using +the ``enable_sgpr_*`` bit fields, in which case only the first 16 are actually +initialized. These are then immediately followed by the System SGPRs that are +set up by ADC/SPI and can have different values for each wave of the grid +dispatch. + +SGPR register initial state is defined in +:ref:`amdgpu-amdhsa-sgpr-register-set-up-order-table`. + + .. table:: SGPR Register Set Up Order + :name: amdgpu-amdhsa-sgpr-register-set-up-order-table + + ========== ========================== ====== ============================== + SGPR Order Name Number Description + (kernel descriptor enable of + field) SGPRs + ========== ========================== ====== ============================== + First Private Segment Buffer 4 V# that can be used, together + (enable_sgpr_private with Scratch Wave Offset as an + _segment_buffer) offset, to access the private + memory space using a segment + address. + + CP uses the value provided by + the runtime. + then Dispatch Ptr 2 64 bit address of AQL dispatch + (enable_sgpr_dispatch_ptr) packet for kernel dispatch + actually executing. + then Queue Ptr 2 64 bit address of amd_queue_t + (enable_sgpr_queue_ptr) object for AQL queue on which + the dispatch packet was + queued. + then Kernarg Segment Ptr 2 64 bit address of Kernarg + (enable_sgpr_kernarg segment. This is directly + _segment_ptr) copied from the + kernarg_address in the kernel + dispatch packet. + + Having CP load it once avoids + loading it at the beginning of + every wavefront. + then Dispatch Id 2 64 bit Dispatch ID of the + (enable_sgpr_dispatch_id) dispatch packet being + executed. + then Flat Scratch Init 2 This is 2 SGPRs: + (enable_sgpr_flat_scratch + _init) GFX6 + Not supported. + GFX7-GFX8 + The first SGPR is a 32 bit + byte offset from + ``SH_HIDDEN_PRIVATE_BASE_VIMID`` + to per SPI base of memory + for scratch for the queue + executing the kernel + dispatch. CP obtains this + from the runtime. + + This is the same offset used + in computing the Scratch + Segment Buffer base + address. The value of + Scratch Wave Offset must be + added by the kernel machine + code and moved to SGPRn-4 + for use as the FLAT SCRATCH + BASE in flat memory + instructions. + + The second SGPR is 32 bit + byte size of a single + work-item’s scratch memory + usage. This is directly + loaded from the kernel + dispatch packet Private + Segment Byte Size and + rounded up to a multiple of + DWORD. + + The kernel code must move to + SGPRn-3 for use as the FLAT + SCRATCH SIZE in flat memory + instructions. Having CP load + it once avoids loading it at + the beginning of every + wavefront. + GFX9 + This is the 64 bit base + address of the per SPI + scratch backing memory + managed by SPI for the queue + executing the kernel + dispatch. CP obtains this + from the runtime (and + divides it if there are + multiple Shader Arrays each + with its own SPI). The value + of Scratch Wave Offset must + be added by the kernel + machine code and moved to + SGPRn-4 and SGPRn-3 for use + as the FLAT SCRATCH BASE in + flat memory instructions. + then Private Segment Size 1 The 32 bit byte size of a + (enable_sgpr_private single work-item’s scratch + _segment_size) memory allocation. This is the + value from the kernel dispatch + packet Private Segment Byte + Size rounded up by CP to a + multiple of DWORD. + + Having CP load it once avoids + loading it at the beginning of + every wavefront. + + This is not used for + GFX7-GFX8 since it is the same + value as the second SGPR of + Flat Scratch Init. However, it + may be needed for GFX9 which + changes the meaning of the + Flat Scratch Init value. + then Grid Work-Group Count X 1 32 bit count of the number of + (enable_sgpr_grid work-groups in the X dimension + _workgroup_count_X) for the grid being + executed. Computed from the + fields in the kernel dispatch + packet as ((grid_size.x + + workgroup_size.x - 1) / + workgroup_size.x). + then Grid Work-Group Count Y 1 32 bit count of the number of + (enable_sgpr_grid work-groups in the Y dimension + _workgroup_count_Y && for the grid being + less than 16 previous executed. Computed from the + SGPRs) fields in the kernel dispatch + packet as ((grid_size.y + + workgroup_size.y - 1) / + workgroupSize.y). + + Only initialized if <16 + previous SGPRs initialized. + then Grid Work-Group Count Z 1 32 bit count of the number of + (enable_sgpr_grid work-groups in the Z dimension + _workgroup_count_Z && for the grid being + less than 16 previous executed. Computed from the + SGPRs) fields in the kernel dispatch + packet as ((grid_size.z + + workgroup_size.z - 1) / + workgroupSize.z). + + Only initialized if <16 + previous SGPRs initialized. + then Work-Group Id X 1 32 bit work-group id in X + (enable_sgpr_workgroup_id dimension of grid for + _X) wavefront. + then Work-Group Id Y 1 32 bit work-group id in Y + (enable_sgpr_workgroup_id dimension of grid for + _Y) wavefront. + then Work-Group Id Z 1 32 bit work-group id in Z + (enable_sgpr_workgroup_id dimension of grid for + _Z) wavefront. + then Work-Group Info 1 {first_wave, 14’b0000, + (enable_sgpr_workgroup ordered_append_term[10:0], + _info) threadgroup_size_in_waves[5:0]} + then Scratch Wave Offset 1 32 bit byte offset from base + (enable_sgpr_private of scratch base of queue + _segment_wave_offset) executing the kernel + dispatch. Must be used as an + offset with Private + segment address when using + Scratch Segment Buffer. It + must be used to set up FLAT + SCRATCH for flat addressing + (see + :ref:`amdgpu-amdhsa-flat-scratch`). + ========== ========================== ====== ============================== + +The order of the VGPR registers is defined, but the compiler can specify which +ones are actually setup in the kernel descriptor using the ``enable_vgpr*`` bit +fields (see :ref:`amdgpu-amdhsa-kernel-descriptor`). The register numbers used +for enabled registers are dense starting at VGPR0: the first enabled register is +VGPR0, the next enabled register is VGPR1 etc.; disabled registers do not have a +VGPR number. + +VGPR register initial state is defined in +:ref:`amdgpu-amdhsa-vgpr-register-set-up-order-table`. + + .. table:: VGPR Register Set Up Order + :name: amdgpu-amdhsa-vgpr-register-set-up-order-table + + ========== ========================== ====== ============================== + VGPR Order Name Number Description + (kernel descriptor enable of + field) VGPRs + ========== ========================== ====== ============================== + First Work-Item Id X 1 32 bit work item id in X + (Always initialized) dimension of work-group for + wavefront lane. + then Work-Item Id Y 1 32 bit work item id in Y + (enable_vgpr_workitem_id dimension of work-group for + > 0) wavefront lane. + then Work-Item Id Z 1 32 bit work item id in Z + (enable_vgpr_workitem_id dimension of work-group for + > 1) wavefront lane. + ========== ========================== ====== ============================== + +The setting of registers is is done by GPU CP/ADC/SPI hardware as follows: + +1. SGPRs before the Work-Group Ids are set by CP using the 16 User Data + registers. +2. Work-group Id registers X, Y, Z are set by ADC which supports any + combination including none. +3. Scratch Wave Offset is set by SPI in a per wave basis which is why its value + cannot included with the flat scratch init value which is per queue. +4. The VGPRs are set by SPI which only supports specifying either (X), (X, Y) + or (X, Y, Z). + +Flat Scratch register pair are adjacent SGRRs so they can be moved as a 64 bit +value to the hardware required SGPRn-3 and SGPRn-4 respectively. + +The global segment can be accessed either using buffer instructions (GFX6 which +has V# 64 bit address support), flat instructions (GFX7-9), or global +instructions (GFX9). + +If buffer operations are used then the compiler can generate a V# with the +following properties: + +* base address of 0 +* no swizzle +* ATC: 1 if IOMMU present (such as APU) +* ptr64: 1 +* MTYPE set to support memory coherence that matches the runtime (such as CC for + APU and NC for dGPU). + +.. _amdgpu-amdhsa-kernel-prolog: + +Kernel Prolog +~~~~~~~~~~~~~ + +.. _amdgpu-amdhsa-m0: + +M0 +++ + +GFX6-GFX8 + The M0 register must be initialized with a value at least the total LDS size + if the kernel may access LDS via DS or flat operations. Total LDS size is + available in dispatch packet. For M0, it is also possible to use maximum + possible value of LDS for given target (0x7FFF for GFX6 and 0xFFFF for + GFX7-GFX8). +GFX9 + The M0 register is not used for range checking LDS accesses and so does not + need to be initialized in the prolog. + +.. _amdgpu-amdhsa-flat-scratch: + +Flat Scratch +++++++++++++ + +If the kernel may use flat operations to access scratch memory, the prolog code +must set up FLAT_SCRATCH register pair (FLAT_SCRATCH_LO/FLAT_SCRATCH_HI which +are in SGPRn-4/SGPRn-3). Initialization uses Flat Scratch Init and Scratch Wave +Offset SGPR registers (see :ref:`amdgpu-amdhsa-initial-kernel-execution-state`): + +GFX6 + Flat scratch is not supported. + +GFX7-8 + 1. The low word of Flat Scratch Init is 32 bit byte offset from + ``SH_HIDDEN_PRIVATE_BASE_VIMID`` to the base of scratch backing memory + being managed by SPI for the queue executing the kernel dispatch. This is + the same value used in the Scratch Segment Buffer V# base address. The + prolog must add the value of Scratch Wave Offset to get the wave's byte + scratch backing memory offset from ``SH_HIDDEN_PRIVATE_BASE_VIMID``. Since + FLAT_SCRATCH_LO is in units of 256 bytes, the offset must be right shifted + by 8 before moving into FLAT_SCRATCH_LO. + 2. The second word of Flat Scratch Init is 32 bit byte size of a single + work-items scratch memory usage. This is directly loaded from the kernel + dispatch packet Private Segment Byte Size and rounded up to a multiple of + DWORD. Having CP load it once avoids loading it at the beginning of every + wavefront. The prolog must move it to FLAT_SCRATCH_LO for use as FLAT SCRATCH + SIZE. +GFX9 + The Flat Scratch Init is the 64 bit address of the base of scratch backing + memory being managed by SPI for the queue executing the kernel dispatch. The + prolog must add the value of Scratch Wave Offset and moved to the FLAT_SCRATCH + pair for use as the flat scratch base in flat memory instructions. + +.. _amdgpu-amdhsa-memory-model: + +Memory Model +~~~~~~~~~~~~ + +This section describes the mapping of LLVM memory model onto AMDGPU machine code +(see :ref:`memmodel`). *The implementation is WIP.* + +.. TODO + Update when implementation complete. + + Support more relaxed OpenCL memory model to be controled by environment + component of target triple. + +The AMDGPU backend supports the memory synchronization scopes specified in +:ref:`amdgpu-memory-scopes`. + +The code sequences used to implement the memory model are defined in table +:ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx6-gfx9-table`. + +The sequences specify the order of instructions that a single thread must +execute. The ``s_waitcnt`` and ``buffer_wbinvl1_vol`` are defined with respect +to other memory instructions executed by the same thread. This allows them to be +moved earlier or later which can allow them to be combined with other instances +of the same instruction, or hoisted/sunk out of loops to improve +performance. Only the instructions related to the memory model are given; +additional ``s_waitcnt`` instructions are required to ensure registers are +defined before being used. These may be able to be combined with the memory +model ``s_waitcnt`` instructions as described above. + +The AMDGPU memory model supports both the HSA [HSA]_ memory model, and the +OpenCL [OpenCL]_ memory model. The HSA memory model uses a single happens-before +relation for all address spaces (see :ref:`amdgpu-address-spaces`). The OpenCL +memory model which has separate happens-before relations for the global and +local address spaces, and only a fence specifying both global and local address +space joins the relationships. Since the LLVM ``memfence`` instruction does not +allow an address space to be specified the OpenCL fence has to convervatively +assume both local and global address space was specified. However, optimizations +can often be done to eliminate the additional ``s_waitcnt``instructions when +there are no intervening corresponding ``ds/flat_load/store/atomic`` memory +instructions. The code sequences in the table indicate what can be omitted for +the OpenCL memory. The target triple environment is used to determine if the +source language is OpenCL (see :ref:`amdgpu-opencl`). + +``ds/flat_load/store/atomic`` instructions to local memory are termed LDS +operations. + +``buffer/global/flat_load/store/atomic`` instructions to global memory are +termed vector memory operations. + +For GFX6-GFX9: + +* Each agent has multiple compute units (CU). +* Each CU has multiple SIMDs that execute wavefronts. +* The wavefronts for a single work-group are executed in the same CU but may be + executed by different SIMDs. +* Each CU has a single LDS memory shared by the wavefronts of the work-groups + executing on it. +* All LDS operations of a CU are performed as wavefront wide operations in a + global order and involve no caching. Completion is reported to a wavefront in + execution order. +* The LDS memory has multiple request queues shared by the SIMDs of a + CU. Therefore, the LDS operations performed by different waves of a work-group + can be reordered relative to each other, which can result in reordering the + visibility of vector memory operations with respect to LDS operations of other + wavefronts in the same work-group. A ``s_waitcnt lgkmcnt(0)`` is required to + ensure synchonization between LDS operations and vector memory operations + between waves of a work-group, but not between operations performed by the + same wavefront. +* The vector memory operations are performed as wavefront wide operations and + completion is reported to a wavefront in execution order. The exception is + that for GFX7-9 ``flat_load/store/atomic`` instructions can report out of + vector memory order if they access LDS memory, and out of LDS operation order + if they access global memory. +* The vector memory operations access a vector L1 cache shared by all wavefronts + on a CU. Therefore, no special action is required for coherence between + wavefronts in the same work-group. A ``buffer_wbinvl1_vol`` is required for + coherence between waves executing in different work-groups as they may be + executing on different CUs. +* The scalar memory operations access a scalar L1 cache shared by all wavefronts + on a group of CUs. The scalar and vector L1 caches are not coherent. However, + scalar operations are used in a restricted way so do not impact the memory + model. See :ref:`amdgpu-amdhsa-memory-spaces`. +* The vector and scalar memory operations use an L2 cache shared by all CUs on + the same agent. +* The L2 cache has independent channels to service disjoint ranges of virtual + addresses. +* Each CU has a separate request queue per channel. Therefore, the vector and + scalar memory operations performed by waves executing in different work-groups + (which may be executing on different CUs) of an agent can be reordered + relative to each other. A ``s_waitcnt vmcnt(0)`` is required to ensure + synchonization between vector memory operations of different CUs. It ensures a + previous vector memory operation has completed before executing a subsequent + vector memory or LDS operation and so can be used to meet the requirements of + acquire and release. +* The L2 cache can be kept coherent with other agents on some targets, or ranges + of virtual addresses can be set up to bypass it to ensure system coherence. + +Private address space uses ``buffer_load/store`` using the scratch V# (GFX6-8), +or ``scratch_load/store`` (GFX9). Since only a single thread is accessing the +memory, atomic memory orderings are not meaningful and all accesses are treated +as non-atomic. + +Constant address space uses ``buffer/global_load`` instructions (or equivalent +scalar memory instructions). Since the constant address space contents do not +change during the execution of a kernel dispatch it is not legal to perform +stores, and atomic memory orderings are not meaningful and all access are +treated as non-atomic. + +A memory synchronization scope wider than work-group is not meaningful for the +group (LDS) address space and is treated as work-group. + +The memory model does not support the region address space which is treated as +non-atomic. + +Acquire memory ordering is not meaningful on store atomic instructions and is +treated as non-atomic. + +Release memory ordering is not meaningful on load atomic instructions and is +treated a non-atomic. + +Acquire-release memory ordering is not meaningful on load or store atomic +instructions and is treated as acquire and release respectively. + +AMDGPU backend only uses scalar memory operations to access memory that is +proven to not change during the execution of the kernel dispatch. This includes +constant address space and global address space for program scope const +variables. Therefore the kernel machine code does not have to maintain the +scalar L1 cache to ensure it is coherent with the vector L1 cache. The scalar +and vector L1 caches are invalidated between kernel dispatches by CP since +constant address space data may change between kernel dispatch executions. See +:ref:`amdgpu-amdhsa-memory-spaces`. + +The one exeception is if scalar writes are used to spill SGPR registers. In this +case the AMDGPU backend ensures the memory location used to spill is never +accessed by vector memory operations at the same time. If scalar writes are used +then a ``s_dcache_wb`` is inserted before the ``s_endpgm`` and before a function +return since the locations may be used for vector memory instructions by a +future wave that uses the same scratch area, or a function call that creates a +frame at the same address, respectively. There is no need for a ``s_dcache_inv`` +as all scalar writes are write-before-read in the same thread. + +Scratch backing memory (which is used for the private address space) is accessed +with MTYPE NC_NV (non-coherenent non-volatile). Since the private address space +is only accessed by a single thread, and is always write-before-read, +there is never a need to invalidate these entries from the L1 cache. Hence all +cache invalidates are done as ``*_vol`` to only invalidate the volatile cache +lines. + +On dGPU the kernarg backing memory is accessed as UC (uncached) to avoid needing +to invalidate the L2 cache. This also causes it to be treated as non-volatile +and so is not invalidated by ``*_vol``. On APU it is accessed as CC (cache +coherent) and so the L2 cache will coherent with the CPU and other agents. + + .. table:: AMDHSA Memory Model Code Sequences GFX6-GFX9 + :name: amdgpu-amdhsa-memory-model-code-sequences-gfx6-gfx9-table + + ============ ============ ============== ========== ======================= + LLVM Instr LLVM Memory LLVM Memory AMDGPU AMDGPU Machine Code + Ordering Sync Scope Address + Space + ============ ============ ============== ========== ======================= + **Non-Atomic** + --------------------------------------------------------------------------- + load *none* *none* - global non-volatile + - generic 1. buffer/global/flat_load + volatile + 1. buffer/global/flat_load + glc=1 + load *none* *none* - local 1. ds_load + store *none* *none* - global 1. buffer/global/flat_store + - generic + store *none* *none* - local 1. ds_store + **Unordered Atomic** + --------------------------------------------------------------------------- + load atomic unordered *any* *any* *Same as non-atomic*. + store atomic unordered *any* *any* *Same as non-atomic*. + atomicrmw unordered *any* *any* *Same as monotonic + atomic*. + **Monotonic Atomic** + --------------------------------------------------------------------------- + load atomic monotonic - singlethread - global 1. buffer/global/flat_load + - wavefront - generic + - workgroup + load atomic monotonic - singlethread - local 1. ds_load + - wavefront + - workgroup + load atomic monotonic - agent - global 1. buffer/global/flat_load + - system - generic glc=1 + store atomic monotonic - singlethread - global 1. buffer/global/flat_store + - wavefront - generic + - workgroup + - agent + - system + store atomic monotonic - singlethread - local 1. ds_store + - wavefront + - workgroup + atomicrmw monotonic - singlethread - global 1. buffer/global/flat_atomic + - wavefront - generic + - workgroup + - agent + - system + atomicrmw monotonic - singlethread - local 1. ds_atomic + - wavefront + - workgroup + **Acquire Atomic** + --------------------------------------------------------------------------- + load atomic acquire - singlethread - global 1. buffer/global/ds/flat_load + - wavefront - local + - generic + load atomic acquire - workgroup - global 1. buffer/global_load + load atomic acquire - workgroup - local 1. ds/flat_load + - generic 2. s_waitcnt lgkmcnt(0) + + - If OpenCL, omit + waitcnt. + - Must happen before + any following + global/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Ensures any + following global + data read is no + older than the load + atomic value being + acquired. + + load atomic acquire - agent - global 1. buffer/global_load + - system glc=1 + 2. s_waitcnt vmcnt(0) + + - Must happen before + following + buffer_wbinvl1_vol. + - Ensures the load + has completed + before invalidating + the cache. + + 3. buffer_wbinvl1_vol + + - Must happen before + any following + global/generic + load/load + atomic/atomicrmw. + - Ensures that + following + loads will not see + stale global data. + + load atomic acquire - agent - generic 1. flat_load glc=1 + - system 2. s_waitcnt vmcnt(0) & + lgkmcnt(0) + + - If OpenCL omit + lgkmcnt(0). + - Must happen before + following + buffer_wbinvl1_vol. + - Ensures the flat_load + has completed + before invalidating + the cache. + + 3. buffer_wbinvl1_vol + + - Must happen before + any following + global/generic + load/load + atomic/atomicrmw. + - Ensures that + following loads + will not see stale + global data. + + atomicrmw acquire - singlethread - global 1. buffer/global/ds/flat_atomic + - wavefront - local + - generic + atomicrmw acquire - workgroup - global 1. buffer/global_atomic + atomicrmw acquire - workgroup - local 1. ds/flat_atomic + - generic 2. waitcnt lgkmcnt(0) + + - If OpenCL, omit + waitcnt. + - Must happen before + any following + global/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Ensures any + following global + data read is no + older than the + atomicrmw value + being acquired. + + atomicrmw acquire - agent - global 1. buffer/global_atomic + - system 2. s_waitcnt vmcnt(0) + + - Must happen before + following + buffer_wbinvl1_vol. + - Ensures the + atomicrmw has + completed before + invalidating the + cache. + + 3. buffer_wbinvl1_vol + + - Must happen before + any following + global/generic + load/load + atomic/atomicrmw. + - Ensures that + following loads + will not see stale + global data. + + atomicrmw acquire - agent - generic 1. flat_atomic + - system 2. s_waitcnt vmcnt(0) & + lgkmcnt(0) + + - If OpenCL, omit + lgkmcnt(0). + - Must happen before + following + buffer_wbinvl1_vol. + - Ensures the + atomicrmw has + completed before + invalidating the + cache. + + 3. buffer_wbinvl1_vol + + - Must happen before + any following + global/generic + load/load + atomic/atomicrmw. + - Ensures that + following loads + will not see stale + global data. + + fence acquire - singlethread *none* *none* + - wavefront + fence acquire - workgroup *none* 1. s_waitcnt lgkmcnt(0) + + - If OpenCL and + address space is + not generic, omit + waitcnt. However, + since LLVM + currently has no + address space on + the fence need to + conservatively + always generate. If + fence had an + address space then + set to address + space of OpenCL + fence flag, or to + generic if both + local and global + flags are + specified. + - Must happen after + any preceding + local/generic load + atomic/atomicrmw + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + fence-paired-atomic). + - Must happen before + any following + global/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Ensures any + following global + data read is no + older than the + value read by the + fence-paired-atomic. + + fence acquire - agent *none* 1. s_waitcnt vmcnt(0) & + - system lgkmcnt(0) + + - If OpenCL and + address space is + not generic, omit + lgkmcnt(0). + However, since LLVM + currently has no + address space on + the fence need to + conservatively + always generate + (see comment for + previous fence). + - Could be split into + separate s_waitcnt + vmcnt(0) and + s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic load + atomic/atomicrmw + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + fence-paired-atomic). + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + group/generic load + atomic/atomicrmw + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + fence-paired-atomic). + - Must happen before + the following + buffer_wbinvl1_vol. + - Ensures that the + fence-paired atomic + has completed + before invalidating + the + cache. Therefore + any following + locations read must + be no older than + the value read by + the + fence-paired-atomic. + + 2. buffer_wbinvl1_vol + + - Must happen before + any following global/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Ensures that + following loads + will not see stale + global data. + + **Release Atomic** + --------------------------------------------------------------------------- + store atomic release - singlethread - global 1. buffer/global/ds/flat_store + - wavefront - local + - generic + store atomic release - workgroup - global 1. s_waitcnt lgkmcnt(0) + - generic + - If OpenCL, omit + waitcnt. + - Must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + store. + - Ensures that all + memory operations + to local have + completed before + performing the + store that is being + released. + + 2. buffer/global/flat_store + store atomic release - workgroup - local 1. ds_store + store atomic release - agent - global 1. s_waitcnt vmcnt(0) & + - system - generic lgkmcnt(0) + + - If OpenCL, omit + lgkmcnt(0). + - Could be split into + separate s_waitcnt + vmcnt(0) and + s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic + load/store/load + atomic/store + atomic/atomicrmw. + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + store. + - Ensures that all + memory operations + to global have + completed before + performing the + store that is being + released. + + 2. buffer/global/ds/flat_store + atomicrmw release - singlethread - global 1. buffer/global/ds/flat_atomic + - wavefront - local + - generic + atomicrmw release - workgroup - global 1. s_waitcnt lgkmcnt(0) + - generic + - If OpenCL, omit + waitcnt. + - Must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + atomicrmw. + - Ensures that all + memory operations + to local have + completed before + performing the + atomicrmw that is + being released. + + 2. buffer/global/flat_atomic + atomicrmw release - workgroup - local 1. ds_atomic + atomicrmw release - agent - global 1. s_waitcnt vmcnt(0) & + - system - generic lgkmcnt(0) + + - If OpenCL, omit + lgkmcnt(0). + - Could be split into + separate s_waitcnt + vmcnt(0) and + s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic + load/store/load + atomic/store + atomic/atomicrmw. + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + atomicrmw. + - Ensures that all + memory operations + to global and local + have completed + before performing + the atomicrmw that + is being released. + + 2. buffer/global/ds/flat_atomic* + fence release - singlethread *none* *none* + - wavefront + fence release - workgroup *none* 1. s_waitcnt lgkmcnt(0) + + - If OpenCL and + address space is + not generic, omit + waitcnt. However, + since LLVM + currently has no + address space on + the fence need to + conservatively + always generate + (see comment for + previous fence). + - Must happen after + any preceding + local/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Must happen before + any following store + atomic/atomicrmw + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + fence-paired-atomic). + - Ensures that all + memory operations + to local have + completed before + performing the + following + fence-paired-atomic. + + fence release - agent *none* 1. s_waitcnt vmcnt(0) & + - system lgkmcnt(0) + + - If OpenCL and + address space is + not generic, omit + lgkmcnt(0). + However, since LLVM + currently has no + address space on + the fence need to + conservatively + always generate + (see comment for + previous fence). + - Could be split into + separate s_waitcnt + vmcnt(0) and + s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic + load/store/load + atomic/store + atomic/atomicrmw. + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + any following store + atomic/atomicrmw + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + fence-paired-atomic). + - Ensures that all + memory operations + to global have + completed before + performing the + following + fence-paired-atomic. + + **Acquire-Release Atomic** + --------------------------------------------------------------------------- + atomicrmw acq_rel - singlethread - global 1. buffer/global/ds/flat_atomic + - wavefront - local + - generic + atomicrmw acq_rel - workgroup - global 1. s_waitcnt lgkmcnt(0) + + - If OpenCL, omit + waitcnt. + - Must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + atomicrmw. + - Ensures that all + memory operations + to local have + completed before + performing the + atomicrmw that is + being released. + + 2. buffer/global_atomic + atomicrmw acq_rel - workgroup - local 1. ds_atomic + 2. s_waitcnt lgkmcnt(0) + + - If OpenCL, omit + waitcnt. + - Must happen before + any following + global/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Ensures any + following global + data read is no + older than the load + atomic value being + acquired. + + atomicrmw acq_rel - workgroup - generic 1. s_waitcnt lgkmcnt(0) + + - If OpenCL, omit + waitcnt. + - Must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + atomicrmw. + - Ensures that all + memory operations + to local have + completed before + performing the + atomicrmw that is + being released. + + 2. flat_atomic + 3. s_waitcnt lgkmcnt(0) + + - If OpenCL, omit + waitcnt. + - Must happen before + any following + global/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Ensures any + following global + data read is no + older than the load + atomic value being + acquired. + atomicrmw acq_rel - agent - global 1. s_waitcnt vmcnt(0) & + - system lgkmcnt(0) + + - If OpenCL, omit + lgkmcnt(0). + - Could be split into + separate s_waitcnt + vmcnt(0) and + s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic + load/store/load + atomic/store + atomic/atomicrmw. + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + atomicrmw. + - Ensures that all + memory operations + to global have + completed before + performing the + atomicrmw that is + being released. + + 2. buffer/global_atomic + 3. s_waitcnt vmcnt(0) + + - Must happen before + following + buffer_wbinvl1_vol. + - Ensures the + atomicrmw has + completed before + invalidating the + cache. + + 4. buffer_wbinvl1_vol + + - Must happen before + any following + global/generic + load/load + atomic/atomicrmw. + - Ensures that + following loads + will not see stale + global data. + + atomicrmw acq_rel - agent - generic 1. s_waitcnt vmcnt(0) & + - system lgkmcnt(0) + + - If OpenCL, omit + lgkmcnt(0). + - Could be split into + separate s_waitcnt + vmcnt(0) and + s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic + load/store/load + atomic/store + atomic/atomicrmw. + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + atomicrmw. + - Ensures that all + memory operations + to global have + completed before + performing the + atomicrmw that is + being released. + + 2. flat_atomic + 3. s_waitcnt vmcnt(0) & + lgkmcnt(0) + + - If OpenCL, omit + lgkmcnt(0). + - Must happen before + following + buffer_wbinvl1_vol. + - Ensures the + atomicrmw has + completed before + invalidating the + cache. + + 4. buffer_wbinvl1_vol + + - Must happen before + any following + global/generic + load/load + atomic/atomicrmw. + - Ensures that + following loads + will not see stale + global data. + + fence acq_rel - singlethread *none* *none* + - wavefront + fence acq_rel - workgroup *none* 1. s_waitcnt lgkmcnt(0) + + - If OpenCL and + address space is + not generic, omit + waitcnt. However, + since LLVM + currently has no + address space on + the fence need to + conservatively + always generate + (see comment for + previous fence). + - Must happen after + any preceding + local/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Must happen before + any following + global/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Ensures that all + memory operations + to local have + completed before + performing any + following global + memory operations. + - Ensures that the + preceding + local/generic load + atomic/atomicrmw + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + fence-paired-atomic) + has completed + before following + global memory + operations. This + satisfies the + requirements of + acquire. + - Ensures that all + previous memory + operations have + completed before a + following + local/generic store + atomic/atomicrmw + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + fence-paired-atomic). + This satisfies the + requirements of + release. + + fence acq_rel - agent *none* 1. s_waitcnt vmcnt(0) & + - system lgkmcnt(0) + + - If OpenCL and + address space is + not generic, omit + lgkmcnt(0). + However, since LLVM + currently has no + address space on + the fence need to + conservatively + always generate + (see comment for + previous fence). + - Could be split into + separate s_waitcnt + vmcnt(0) and + s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic + load/store/load + atomic/store + atomic/atomicrmw. + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + buffer_wbinvl1_vol. + - Ensures that the + preceding + global/local/generic + load + atomic/atomicrmw + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + fence-paired-atomic) + has completed + before invalidating + the cache. This + satisfies the + requirements of + acquire. + - Ensures that all + previous memory + operations have + completed before a + following + global/local/generic + store + atomic/atomicrmw + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + fence-paired-atomic). + This satisfies the + requirements of + release. + + 2. buffer_wbinvl1_vol + + - Must happen before + any following + global/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Ensures that + following loads + will not see stale + global data. This + satisfies the + requirements of + acquire. + + **Sequential Consistent Atomic** + --------------------------------------------------------------------------- + load atomic seq_cst - singlethread - global *Same as corresponding + - wavefront - local load atomic acquire*. + - workgroup - generic + load atomic seq_cst - agent - global 1. s_waitcnt vmcnt(0) + - system - local + - generic - Must happen after + preceding + global/generic load + atomic/store + atomic/atomicrmw + with memory + ordering of seq_cst + and with equal or + wider sync scope. + (Note that seq_cst + fences have their + own s_waitcnt + vmcnt(0) and so do + not need to be + considered.) + - Ensures any + preceding + sequential + consistent global + memory instructions + have completed + before executing + this sequentially + consistent + instruction. This + prevents reordering + a seq_cst store + followed by a + seq_cst load (Note + that seq_cst is + stronger than + acquire/release as + the reordering of + load acquire + followed by a store + release is + prevented by the + waitcnt vmcnt(0) of + the release, but + there is nothing + preventing a store + release followed by + load acquire from + competing out of + order.) + + 2. *Following + instructions same as + corresponding load + atomic acquire*. + + store atomic seq_cst - singlethread - global *Same as corresponding + - wavefront - local store atomic release*. + - workgroup - generic + store atomic seq_cst - agent - global *Same as corresponding + - system - generic store atomic release*. + atomicrmw seq_cst - singlethread - global *Same as corresponding + - wavefront - local atomicrmw acq_rel*. + - workgroup - generic + atomicrmw seq_cst - agent - global *Same as corresponding + - system - generic atomicrmw acq_rel*. + fence seq_cst - singlethread *none* *Same as corresponding + - wavefront fence acq_rel*. + - workgroup + - agent + - system + ============ ============ ============== ========== ======================= + +The memory order also adds the single thread optimization constrains defined in +table +:ref:`amdgpu-amdhsa-memory-model-single-thread-optimization-constraints-gfx6-gfx9-table`. + + .. table:: AMDHSA Memory Model Single Thread Optimization Constraints GFX6-GFX9 + :name: amdgpu-amdhsa-memory-model-single-thread-optimization-constraints-gfx6-gfx9-table + + ============ ============================================================== + LLVM Memory Optimization Constraints + Ordering + ============ ============================================================== + unordered *none* + monotonic *none* + acquire - If a load atomic/atomicrmw then no following load/load + atomic/store/ store atomic/atomicrmw/fence instruction can + be moved before the acquire. + - If a fence then same as load atomic, plus no preceding + associated fence-paired-atomic can be moved after the fence. + release - If a store atomic/atomicrmw then no preceeding load/load + atomic/store/ store atomic/atomicrmw/fence instruction can + be moved after the release. + - If a fence then same as store atomic, plus no following + associated fence-paired-atomic can be moved before the + fence. + acq_rel Same constraints as both acquire and release. + seq_cst - If a load atomic then same constraints as acquire, plus no + preceding sequentially consistent load atomic/store + atomic/atomicrmw/fence instruction can be moved after the + seq_cst. + - If a store atomic then the same constraints as release, plus + no following sequentially consistent load atomic/store + atomic/atomicrmw/fence instruction can be moved before the + seq_cst. + - If an atomicrmw/fence then same constraints as acq_rel. + ============ ============================================================== Trap Handler ABI ----------------- -The OS element of the target triple controls the trap handler behavior. +~~~~~~~~~~~~~~~~ -HSA OS -^^^^^^ -For code objects generated by AMDGPU back-end for the HSA OS, the runtime -installs a trap handler that supports the s_trap instruction with the following -usage: +For code objects generated by AMDGPU backend for HSA [HSA]_ compatible runtimes +(such as ROCm [AMD-ROCm]_), the runtime installs a trap handler that supports +the ``s_trap`` instruction with the following usage: - +--------------+-------------+-------------------+----------------------------+ - |Usage |Code Sequence|Trap Handler Inputs|Description | - +==============+=============+===================+============================+ - |reserved |s_trap 0x00 | |Reserved by hardware. | - +--------------+-------------+-------------------+----------------------------+ - |HSA debugtrap |s_trap 0x01 |SGPR0-1: queue_ptr |Reserved for HSA debugtrap | - |(arg) | |VGPR0: arg |intrinsic (not implemented).| - +--------------+-------------+-------------------+----------------------------+ - |llvm.trap |s_trap 0x02 |SGPR0-1: queue_ptr |Causes dispatch to be | - | | | |terminated and its | - | | | |associated queue put into | - | | | |the error state. | - +--------------+-------------+-------------------+----------------------------+ - |llvm.debugtrap| s_trap 0x03 |SGPR0-1: queue_ptr |If debugger not installed | - | | | |handled same as llvm.trap. | - +--------------+-------------+-------------------+----------------------------+ - |debugger |s_trap 0x07 | |Reserved for debugger | - |breakpoint | | |breakpoints. | - +--------------+-------------+-------------------+----------------------------+ - |debugger |s_trap 0x08 | |Reserved for debugger. | - +--------------+-------------+-------------------+----------------------------+ - |debugger |s_trap 0xfe | |Reserved for debugger. | - +--------------+-------------+-------------------+----------------------------+ - |debugger |s_trap 0xff | |Reserved for debugger. | - +--------------+-------------+-------------------+----------------------------+ + .. table:: AMDGPU Trap Handler for AMDHSA OS + :name: amdgpu-trap-handler-for-amdhsa-os-table -Non-HSA OS -^^^^^^^^^^ -For code objects generated by AMDGPU back-end for non-HSA OS, the runtime does -not install a trap handler. The llvm.trap and llvm.debugtrap instructions are -handler as follows: + =================== =============== =============== ======================= + Usage Code Sequence Trap Handler Description + Inputs + =================== =============== =============== ======================= + reserved ``s_trap 0x00`` Reserved by hardware. + ``debugtrap(arg)`` ``s_trap 0x01`` ``SGPR0-1``: Reserved for HSA + ``queue_ptr`` ``debugtrap`` + ``VGPR0``: intrinsic (not + ``arg`` implemented). + ``llvm.trap`` ``s_trap 0x02`` ``SGPR0-1``: Causes dispatch to be + ``queue_ptr`` terminated and its + associated queue put + into the error state. + ``llvm.debugtrap`` ``s_trap 0x03`` ``SGPR0-1``: If debugger not + ``queue_ptr`` installed handled + same as ``llvm.trap``. + debugger breakpoint ``s_trap 0x07`` Reserved for debugger + breakpoints. + debugger ``s_trap 0x08`` Reserved for debugger. + debugger ``s_trap 0xfe`` Reserved for debugger. + debugger ``s_trap 0xff`` Reserved for debugger. + =================== =============== =============== ======================= - =============== ============= =============================================== - Usage Code Sequence Description - =============== ============= =============================================== - llvm.trap s_endpgm Causes wavefront to be terminated. - llvm.debugtrap Nothing Compiler warning generated that there is no trap handler installed. - =============== ============= =============================================== +Non-AMDHSA +---------- + +Trap Handler ABI +~~~~~~~~~~~~~~~~ + +For code objects generated by AMDGPU backend for non-amdhsa OS, the runtime does +not install a trap handler. The ``llvm.trap`` and ``llvm.debugtrap`` +instructions are handled as follows: + + .. table:: AMDGPU Trap Handler for Non-AMDHSA OS + :name: amdgpu-trap-handler-for-non-amdhsa-os-table + + =============== =============== =========================================== + Usage Code Sequence Description + =============== =============== =========================================== + llvm.trap s_endpgm Causes wavefront to be terminated. + llvm.debugtrap *none* Compiler warning given that there is no + trap handler installed. + =============== =============== =========================================== + +Source Languages +================ + +.. _amdgpu-opencl: + +OpenCL +------ + +When generating code for the OpenCL language the target triple environment +should be ``opencl`` or ``amdgizcl`` (see :ref:`amdgpu-target-triples`). + +When the language is OpenCL the following differences occur: + +1. The OpenCL memory model is used (see :ref:`amdgpu-amdhsa-memory-model`). +2. The AMDGPU backend adds additional arguments to the kernel. +3. Additional metadata is generated (:ref:`amdgpu-code-object-metadata`). + +.. TODO + Specify what affect this has. Hidden arguments added. Additional metadata + generated. + +.. _amdgpu-hcc: + +HCC +--- + +When generating code for the OpenCL language the target triple environment +should be ``hcc`` (see :ref:`amdgpu-target-triples`). + +When the language is OpenCL the following differences occur: + +1. The HSA memory model is used (see :ref:`amdgpu-amdhsa-memory-model`). + +.. TODO + Specify what affect this has. Assembler -========= +--------- AMDGPU backend has LLVM-MC based assembler which is currently in development. -It supports Southern Islands ISA, Sea Islands and Volcanic Islands. +It supports AMDGCN GFX6-GFX8. -This document describes general syntax for instructions and operands. For more -information about instructions, their semantics and supported combinations -of operands, refer to one of Instruction Set Architecture manuals. +This section describes general syntax for instructions and operands. For more +information about instructions, their semantics and supported combinations of +operands, refer to one of instruction set architecture manuals +[AMD-Souther-Islands]_ [AMD-Sea-Islands]_ [AMD-Volcanic-Islands]_. -An instruction has the following syntax (register operands are -normally comma-separated while extra operands are space-separated): +An instruction has the following syntax (register operands are normally +comma-separated while extra operands are space-separated): * , ... ...* - Operands --------- +~~~~~~~~ The following syntax for register operands is supported: @@ -140,8 +3472,11 @@ The following extra operands are supported: - dst_unused (UNUSED_PAD, UNUSED_SEXT, UNUSED_PRESERVE) - abs, neg, sext -DS Instructions Examples ------------------------- +Instruction Examples +~~~~~~~~~~~~~~~~~~~~ + +DS +~~ .. code-block:: nasm @@ -153,8 +3488,8 @@ DS Instructions Examples For full list of supported instructions, refer to "LDS/GDS instructions" in ISA Manual. -FLAT Instruction Examples --------------------------- +FLAT +++++ .. code-block:: nasm @@ -166,8 +3501,8 @@ FLAT Instruction Examples For full list of supported instructions, refer to "FLAT instructions" in ISA Manual. -MUBUF Instruction Examples ---------------------------- +MUBUF ++++++ .. code-block:: nasm @@ -179,8 +3514,8 @@ MUBUF Instruction Examples For full list of supported instructions, refer to "MUBUF Instructions" in ISA Manual. -SMRD/SMEM Instruction Examples -------------------------------- +SMRD/SMEM ++++++++++ .. code-block:: nasm @@ -192,8 +3527,8 @@ SMRD/SMEM Instruction Examples For full list of supported instructions, refer to "Scalar Memory Operations" in ISA Manual. -SOP1 Instruction Examples --------------------------- +SOP1 +++++ .. code-block:: nasm @@ -207,8 +3542,8 @@ SOP1 Instruction Examples For full list of supported instructions, refer to "SOP1 Instructions" in ISA Manual. -SOP2 Instruction Examples -------------------------- +SOP2 +++++ .. code-block:: nasm @@ -224,8 +3559,8 @@ SOP2 Instruction Examples For full list of supported instructions, refer to "SOP2 Instructions" in ISA Manual. -SOPC Instruction Examples --------------------------- +SOPC +++++ .. code-block:: nasm @@ -236,8 +3571,8 @@ SOPC Instruction Examples For full list of supported instructions, refer to "SOPC Instructions" in ISA Manual. -SOPP Instruction Examples --------------------------- +SOPP +++++ .. code-block:: nasm @@ -259,8 +3594,8 @@ Unless otherwise mentioned, little verification is performed on the operands of SOPP Instructions, so it is up to the programmer to be familiar with the range or acceptable values. -Vector ALU Instruction Examples -------------------------------- +VALU +++++ For vector ALU instruction opcodes (VOP1, VOP2, VOP3, VOPC, VOP_DPP, VOP_SDWA), the assembler will automatically use optimal encoding based on its operands. @@ -314,19 +3649,20 @@ VOP_SDWA examples: For full list of supported instructions, refer to "Vector ALU instructions". HSA Code Object Directives --------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~ AMDGPU ABI defines auxiliary data in output code object. In assembly source, one can specify them with assembler directives. .hsa_code_object_version major, minor -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ++++++++++++++++++++++++++++++++++++++ *major* and *minor* are integers that specify the version of the HSA code object that will be generated by the assembler. .hsa_code_object_isa [major, minor, stepping, vendor, arch] -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + *major*, *minor*, and *stepping* are all integers that describe the instruction set architecture (ISA) version of the assembly program. @@ -338,13 +3674,13 @@ By default, the assembler will derive the ISA version, *vendor*, and *arch* from the value of the -mcpu option that is passed to the assembler. .amdgpu_hsa_kernel (name) -^^^^^^^^^^^^^^^^^^^^^^^^^ ++++++++++++++++++++++++++ This directives specifies that the symbol with given name is a kernel entry point (label) and the object should contain corresponding symbol of type STT_AMDGPU_HSA_KERNEL. .amd_kernel_code_t -^^^^^^^^^^^^^^^^^^ +++++++++++++++++++ This directive marks the beginning of a list of key / value pairs that are used to specify the amd_kernel_code_t object that will be emitted by the assembler. @@ -403,3 +3739,25 @@ Here is an example of a minimal amd_kernel_code_t specification: s_endpgm .Lfunc_end0: .size hello_world, .Lfunc_end0-hello_world + +Additional Documentation +======================== + +.. [AMD-R6xx] `AMD R6xx shader ISA `__ +.. [AMD-R7xx] `AMD R7xx shader ISA `__ +.. [AMD-Evergreen] `AMD Evergreen shader ISA `__ +.. [AMD-Cayman-Trinity] `AMD Cayman/Trinity shader ISA `__ +.. [AMD-Souther-Islands] `AMD Southern Islands Series ISA `__ +.. [AMD-Sea-Islands] `AMD Sea Islands Series ISA `_ +.. [AMD-Volcanic-Islands] `AMD GCN3 Instruction Set Architecture `__ +.. [AMD-OpenCL_Programming-Guide] `AMD Accelerated Parallel Processing OpenCL Programming Guide `_ +.. [AMD-APP-SDK] `AMD Accelerated Parallel Processing APP SDK Documentation `__ +.. [AMD-ROCm] `ROCm: Open Platform for Development, Discovery and Education Around GPU Computing `__ +.. [AMD-ROCm-github] `ROCm github `__ +.. [HSA] `Heterogeneous System Architecture (HSA) Foundation `__ +.. [ELF] `Executable and Linkable Format (ELF) `__ +.. [DWARF] `DWARF Debugging Information Format `__ +.. [YAML] `YAML Ain’t Markup Language (YAMLâ„¢) Version 1.2 `__ +.. [OpenCL] `The OpenCL Specification Version 2.0 `__ +.. [HRF] `Heterogeneous-race-free Memory Models `__ +.. [AMD-AMDGPU-Compute-Application-Binary-Interface] `AMDGPU Compute Application Binary Interface `__ diff --git a/docs/CodeGenerator.rst b/docs/CodeGenerator.rst index 106fc8456f61..bcdc72283566 100644 --- a/docs/CodeGenerator.rst +++ b/docs/CodeGenerator.rst @@ -2642,59 +2642,6 @@ to ensure valid register usage and operand types. The AMDGPU backend ------------------ -The AMDGPU code generator lives in the lib/Target/AMDGPU directory, and is an -open source native AMD GCN ISA code generator. - -Target triples supported -^^^^^^^^^^^^^^^^^^^^^^^^ - -The following are the known target triples that are supported by the AMDGPU -backend. - -* **amdgcn--** --- AMD GCN GPUs (AMDGPU.7.0.0+) -* **amdgcn--amdhsa** --- AMD GCN GPUs (AMDGPU.7.0.0+) with HSA support -* **r600--** --- AMD GPUs HD2XXX-HD6XXX - -Relocations -^^^^^^^^^^^ - -Supported relocatable fields are: - -* **word32** --- This specifies a 32-bit field occupying 4 bytes with arbitrary - byte alignment. These values use the same byte order as other word values in - the AMD GPU architecture -* **word64** --- This specifies a 64-bit field occupying 8 bytes with arbitrary - byte alignment. These values use the same byte order as other word values in - the AMD GPU architecture - -Following notations are used for specifying relocation calculations: - -* **A** --- Represents the addend used to compute the value of the relocatable - field -* **G** --- Represents the offset into the global offset table at which the - relocation entry’s symbol will reside during execution. -* **GOT** --- Represents the address of the global offset table. -* **P** --- Represents the place (section offset or address) of the storage unit - being relocated (computed using ``r_offset``) -* **S** --- Represents the value of the symbol whose index resides in the - relocation entry - -AMDGPU Backend generates *Elf64_Rela* relocation records with the following -supported relocation types: - - ========================== ===== ========== ============================== - Relocation type Value Field Calculation - ========================== ===== ========== ============================== - ``R_AMDGPU_NONE`` 0 ``none`` ``none`` - ``R_AMDGPU_ABS32_LO`` 1 ``word32`` (S + A) & 0xFFFFFFFF - ``R_AMDGPU_ABS32_HI`` 2 ``word32`` (S + A) >> 32 - ``R_AMDGPU_ABS64`` 3 ``word64`` S + A - ``R_AMDGPU_REL32`` 4 ``word32`` S + A - P - ``R_AMDGPU_REL64`` 5 ``word64`` S + A - P - ``R_AMDGPU_ABS32`` 6 ``word32`` S + A - ``R_AMDGPU_GOTPCREL`` 7 ``word32`` G + GOT + A - P - ``R_AMDGPU_GOTPCREL32_LO`` 8 ``word32`` (G + GOT + A - P) & 0xFFFFFFFF - ``R_AMDGPU_GOTPCREL32_HI`` 9 ``word32`` (G + GOT + A - P) >> 32 - ``R_AMDGPU_REL32_LO`` 10 ``word32`` (S + A - P) & 0xFFFFFFFF - ``R_AMDGPU_REL32_HI`` 11 ``word32`` (S + A - P) >> 32 - ========================== ===== ========== ============================== +The AMDGPU code generator lives in the ``lib/Target/AMDGPU`` +directory. This code generator is capable of targeting a variety of +AMD GPU processors. Refer to :doc:`AMDGPUUsage` for more information. diff --git a/docs/CompilerWriterInfo.rst b/docs/CompilerWriterInfo.rst index 8ce999033b7f..24375fb70d4e 100644 --- a/docs/CompilerWriterInfo.rst +++ b/docs/CompilerWriterInfo.rst @@ -72,16 +72,7 @@ Other documents, collections, notes AMDGPU ------ -* `AMD R6xx shader ISA `_ -* `AMD R7xx shader ISA `_ -* `AMD Evergreen shader ISA `_ -* `AMD Cayman/Trinity shader ISA `_ -* `AMD Southern Islands Series ISA `_ -* `AMD Sea Islands Series ISA `_ -* `AMD GCN3 Instruction Set Architecture `__ -* `AMD GPU Programming Guide `_ -* `AMD Compute Resources `_ -* `AMDGPU Compute Application Binary Interface `__ +Refer to :doc:`AMDGPUUsage` for additional documentation. RISC-V ------ diff --git a/docs/LangRef.rst b/docs/LangRef.rst index 2e339183ef11..e063f6bd35fe 100644 --- a/docs/LangRef.rst +++ b/docs/LangRef.rst @@ -6691,15 +6691,14 @@ Semantics: The value produced is ``op1`` \* 2\ :sup:`op2` mod 2\ :sup:`n`, where ``n`` is the width of the result. If ``op2`` is (statically or dynamically) equal to or larger than the number of bits in -``op1``, the result is undefined. If the arguments are vectors, each -vector element of ``op1`` is shifted by the corresponding shift amount -in ``op2``. +``op1``, this instruction returns a :ref:`poison value `. +If the arguments are vectors, each vector element of ``op1`` is shifted +by the corresponding shift amount in ``op2``. -If the ``nuw`` keyword is present, then the shift produces a :ref:`poison -value ` if it shifts out any non-zero bits. If the -``nsw`` keyword is present, then the shift produces a :ref:`poison -value ` if it shifts out any bits that disagree with the -resultant sign bit. +If the ``nuw`` keyword is present, then the shift produces a poison +value if it shifts out any non-zero bits. +If the ``nsw`` keyword is present, then the shift produces a poison +value it shifts out any bits that disagree with the resultant sign bit. Example: """""""" @@ -6742,13 +6741,12 @@ Semantics: This instruction always performs a logical shift right operation. The most significant bits of the result will be filled with zero bits after the shift. If ``op2`` is (statically or dynamically) equal to or larger -than the number of bits in ``op1``, the result is undefined. If the -arguments are vectors, each vector element of ``op1`` is shifted by the -corresponding shift amount in ``op2``. +than the number of bits in ``op1``, this instruction returns a :ref:`poison +value `. If the arguments are vectors, each vector element +of ``op1`` is shifted by the corresponding shift amount in ``op2``. If the ``exact`` keyword is present, the result value of the ``lshr`` is -a :ref:`poison value ` if any of the bits shifted out are -non-zero. +a poison value if any of the bits shifted out are non-zero. Example: """""""" @@ -6793,13 +6791,12 @@ Semantics: This instruction always performs an arithmetic shift right operation, The most significant bits of the result will be filled with the sign bit of ``op1``. If ``op2`` is (statically or dynamically) equal to or larger -than the number of bits in ``op1``, the result is undefined. If the -arguments are vectors, each vector element of ``op1`` is shifted by the -corresponding shift amount in ``op2``. +than the number of bits in ``op1``, this instruction returns a :ref:`poison +value `. If the arguments are vectors, each vector element +of ``op1`` is shifted by the corresponding shift amount in ``op2``. If the ``exact`` keyword is present, the result value of the ``ashr`` is -a :ref:`poison value ` if any of the bits shifted out are -non-zero. +a poison value if any of the bits shifted out are non-zero. Example: """""""" diff --git a/docs/ReleaseNotes.rst b/docs/ReleaseNotes.rst index bc35e62189a2..95025fb91c72 100644 --- a/docs/ReleaseNotes.rst +++ b/docs/ReleaseNotes.rst @@ -43,6 +43,17 @@ Non-comprehensive list of changes in this release * LLVM's ``WeakVH`` has been renamed to ``WeakTrackingVH`` and a new ``WeakVH`` has been introduced. The new ``WeakVH`` nulls itself out on deletion, but does not track values across RAUW. + +* A new library named ``BinaryFormat`` has been created which holds a collection + of code which previously lived in ``Support``. This includes the + ``file_magic`` structure and ``identify_magic`` functions, as well as all the + structure and type definitions for DWARF, ELF, COFF, WASM, and MachO file + formats. + +* The tool ``llvm-pdbdump`` has been renamed ``llvm-pdbutil`` to better reflect + its nature as a general purpose PDB manipulation / diagnostics tool that does + more than just dumping contents. + * ... next change ... diff --git a/docs/index.rst b/docs/index.rst index 220df1566bd5..54b608236530 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -360,10 +360,10 @@ For API clients and LLVM developers. Answers some questions about the new Attributes infrastructure. :doc:`NVPTXUsage` - This document describes using the NVPTX back-end to compile GPU kernels. + This document describes using the NVPTX backend to compile GPU kernels. :doc:`AMDGPUUsage` - This document describes how to use the AMDGPU back-end. + This document describes using the AMDGPU backend to compile GPU kernels. :doc:`StackMaps` LLVM support for mapping instruction addresses to the location of diff --git a/examples/ExceptionDemo/ExceptionDemo.cpp b/examples/ExceptionDemo/ExceptionDemo.cpp index a8b82e1da778..d4c2a8cc5ad9 100644 --- a/examples/ExceptionDemo/ExceptionDemo.cpp +++ b/examples/ExceptionDemo/ExceptionDemo.cpp @@ -49,7 +49,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/STLExtras.h" -#include "llvm/IR/Verifier.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/ExecutionEngine/MCJIT.h" #include "llvm/ExecutionEngine/SectionMemoryManager.h" #include "llvm/IR/DataLayout.h" @@ -59,7 +59,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" -#include "llvm/Support/Dwarf.h" +#include "llvm/IR/Verifier.h" #include "llvm/Support/TargetSelect.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/Scalar.h" diff --git a/include/llvm-c/Core.h b/include/llvm-c/Core.h index 0a1d8faf99b7..22cef23007c3 100644 --- a/include/llvm-c/Core.h +++ b/include/llvm-c/Core.h @@ -1039,6 +1039,20 @@ LLVMBool LLVMIsOpaqueStruct(LLVMTypeRef StructTy); */ LLVMTypeRef LLVMGetElementType(LLVMTypeRef Ty); +/** + * Returns type's subtypes + * + * @see llvm::Type::subtypes() + */ +void LLVMGetSubtypes(LLVMTypeRef Tp, LLVMTypeRef *Arr); + +/** + * Return the number of types in the derived type. + * + * @see llvm::Type::getNumContainedTypes() + */ +unsigned LLVMGetNumContainedTypes(LLVMTypeRef Tp); + /** * Create a fixed size array type that refers to a specific type. * diff --git a/include/llvm-c/ExecutionEngine.h b/include/llvm-c/ExecutionEngine.h index b72a91a8b137..51830fe139c6 100644 --- a/include/llvm-c/ExecutionEngine.h +++ b/include/llvm-c/ExecutionEngine.h @@ -19,9 +19,9 @@ #ifndef LLVM_C_EXECUTIONENGINE_H #define LLVM_C_EXECUTIONENGINE_H -#include "llvm-c/Types.h" #include "llvm-c/Target.h" #include "llvm-c/TargetMachine.h" +#include "llvm-c/Types.h" #ifdef __cplusplus extern "C" { diff --git a/include/llvm-c/Support.h b/include/llvm-c/Support.h index 735d1fbc78cc..6de184ccab49 100644 --- a/include/llvm-c/Support.h +++ b/include/llvm-c/Support.h @@ -14,8 +14,8 @@ #ifndef LLVM_C_SUPPORT_H #define LLVM_C_SUPPORT_H -#include "llvm/Support/DataTypes.h" #include "llvm-c/Types.h" +#include "llvm/Support/DataTypes.h" #ifdef __cplusplus extern "C" { diff --git a/include/llvm-c/TargetMachine.h b/include/llvm-c/TargetMachine.h index 1d1f61f1a5b4..f4f7f7698c45 100644 --- a/include/llvm-c/TargetMachine.h +++ b/include/llvm-c/TargetMachine.h @@ -19,8 +19,8 @@ #ifndef LLVM_C_TARGETMACHINE_H #define LLVM_C_TARGETMACHINE_H -#include "llvm-c/Types.h" #include "llvm-c/Target.h" +#include "llvm-c/Types.h" #ifdef __cplusplus extern "C" { diff --git a/include/llvm/ADT/APInt.h b/include/llvm/ADT/APInt.h index fe75e25bd8d2..ef9c66d2d700 100644 --- a/include/llvm/ADT/APInt.h +++ b/include/llvm/ADT/APInt.h @@ -392,6 +392,11 @@ public: /// not. bool isNullValue() const { return !*this; } + /// \brief Determine if this is a value of 1. + /// + /// This checks to see if the value of this APInt is one. + bool isOneValue() const { return getActiveBits() == 1; } + /// \brief Determine if this is the largest unsigned value. /// /// This checks to see if the value of this APInt is the maximum unsigned diff --git a/include/llvm/ADT/GraphTraits.h b/include/llvm/ADT/GraphTraits.h index 2c88c4271b48..68149d9e3bf5 100644 --- a/include/llvm/ADT/GraphTraits.h +++ b/include/llvm/ADT/GraphTraits.h @@ -52,7 +52,6 @@ struct GraphTraits { // Return total number of nodes in the graph // - // If anyone tries to use this class without having an appropriate // specialization, make an error. If you get this error, it's because you // need to include the appropriate specialization of GraphTraits<> for your diff --git a/include/llvm/ADT/ImmutableSet.h b/include/llvm/ADT/ImmutableSet.h index 0724a28306a0..9c9bcb81f76b 100644 --- a/include/llvm/ADT/ImmutableSet.h +++ b/include/llvm/ADT/ImmutableSet.h @@ -16,16 +16,16 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/FoldingSet.h" -#include "llvm/ADT/iterator.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/iterator.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/ErrorHandling.h" #include -#include -#include #include +#include #include #include +#include namespace llvm { diff --git a/include/llvm/ADT/PointerUnion.h b/include/llvm/ADT/PointerUnion.h index 9eb15524c0f3..7ce70ebc8ce0 100644 --- a/include/llvm/ADT/PointerUnion.h +++ b/include/llvm/ADT/PointerUnion.h @@ -19,8 +19,8 @@ #include "llvm/ADT/PointerIntPair.h" #include "llvm/Support/PointerLikeTypeTraits.h" #include -#include #include +#include namespace llvm { diff --git a/include/llvm/ADT/PostOrderIterator.h b/include/llvm/ADT/PostOrderIterator.h index a179d29956b1..dc8a9b6e78b2 100644 --- a/include/llvm/ADT/PostOrderIterator.h +++ b/include/llvm/ADT/PostOrderIterator.h @@ -17,9 +17,9 @@ #define LLVM_ADT_POSTORDERITERATOR_H #include "llvm/ADT/GraphTraits.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/iterator_range.h" #include #include #include diff --git a/include/llvm/ADT/PriorityWorklist.h b/include/llvm/ADT/PriorityWorklist.h index 35891e931801..aa531f3337d9 100644 --- a/include/llvm/ADT/PriorityWorklist.h +++ b/include/llvm/ADT/PriorityWorklist.h @@ -17,8 +17,8 @@ #define LLVM_ADT_PRIORITYWORKLIST_H #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Support/Compiler.h" #include #include diff --git a/include/llvm/ADT/SCCIterator.h b/include/llvm/ADT/SCCIterator.h index 734a58f87da2..784a58dc002f 100644 --- a/include/llvm/ADT/SCCIterator.h +++ b/include/llvm/ADT/SCCIterator.h @@ -232,16 +232,6 @@ template scc_iterator scc_end(const T &G) { return scc_iterator::end(G); } -/// \brief Construct the begin iterator for a deduced graph type T's Inverse. -template scc_iterator> scc_begin(const Inverse &G) { - return scc_iterator>::begin(G); -} - -/// \brief Construct the end iterator for a deduced graph type T's Inverse. -template scc_iterator> scc_end(const Inverse &G) { - return scc_iterator>::end(G); -} - } // end namespace llvm #endif // LLVM_ADT_SCCITERATOR_H diff --git a/include/llvm/ADT/SmallPtrSet.h b/include/llvm/ADT/SmallPtrSet.h index a0b380b237da..a2ad74b1e04a 100644 --- a/include/llvm/ADT/SmallPtrSet.h +++ b/include/llvm/ADT/SmallPtrSet.h @@ -21,8 +21,8 @@ #include "llvm/Support/type_traits.h" #include #include -#include #include +#include #include #include #include @@ -31,8 +31,12 @@ namespace llvm { #if LLVM_ENABLE_ABI_BREAKING_CHECKS template struct ReverseIterate { static bool value; }; +#if LLVM_ENABLE_REVERSE_ITERATION +template bool ReverseIterate::value = true; +#else template bool ReverseIterate::value = false; #endif +#endif /// SmallPtrSetImplBase - This is the common code shared among all the /// SmallPtrSet<>'s, which is almost everything. SmallPtrSet has two modes, one diff --git a/include/llvm/ADT/SmallVector.h b/include/llvm/ADT/SmallVector.h index 35c255002001..bf2a62f43aff 100644 --- a/include/llvm/ADT/SmallVector.h +++ b/include/llvm/ADT/SmallVector.h @@ -388,7 +388,10 @@ public: void swap(SmallVectorImpl &RHS); /// Add the specified range to the end of the SmallVector. - template + template ::iterator_category, + std::input_iterator_tag>::value>::type> void append(in_iter in_start, in_iter in_end) { size_type NumInputs = std::distance(in_start, in_end); // Grow allocated space if needed. @@ -426,7 +429,11 @@ public: std::uninitialized_fill(this->begin(), this->end(), Elt); } - template void assign(in_iter in_start, in_iter in_end) { + template ::iterator_category, + std::input_iterator_tag>::value>::type> + void assign(in_iter in_start, in_iter in_end) { clear(); append(in_start, in_end); } @@ -579,7 +586,10 @@ public: return I; } - template + template ::iterator_category, + std::input_iterator_tag>::value>::type> iterator insert(iterator I, ItTy From, ItTy To) { // Convert iterator to elt# to avoid invalidating iterator when we reserve() size_t InsertElt = I - this->begin(); @@ -860,7 +870,10 @@ public: this->assign(Size, Value); } - template + template ::iterator_category, + std::input_iterator_tag>::value>::type> SmallVector(ItTy S, ItTy E) : SmallVectorImpl(N) { this->append(S, E); } diff --git a/include/llvm/ADT/SparseMultiSet.h b/include/llvm/ADT/SparseMultiSet.h index b3a413aa3aa5..c91e0d70f65a 100644 --- a/include/llvm/ADT/SparseMultiSet.h +++ b/include/llvm/ADT/SparseMultiSet.h @@ -21,9 +21,9 @@ #ifndef LLVM_ADT_SPARSEMULTISET_H #define LLVM_ADT_SPARSEMULTISET_H +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SparseSet.h" -#include "llvm/ADT/STLExtras.h" #include #include #include diff --git a/include/llvm/ADT/StringExtras.h b/include/llvm/ADT/StringExtras.h index e22a3f688c40..bbea8619a673 100644 --- a/include/llvm/ADT/StringExtras.h +++ b/include/llvm/ADT/StringExtras.h @@ -15,11 +15,11 @@ #define LLVM_ADT_STRINGEXTRAS_H #include "llvm/ADT/StringRef.h" -#include #include #include #include #include +#include #include #include diff --git a/include/llvm/ADT/StringRef.h b/include/llvm/ADT/StringRef.h index 4b25f56432df..f6c93a858db1 100644 --- a/include/llvm/ADT/StringRef.h +++ b/include/llvm/ADT/StringRef.h @@ -18,8 +18,8 @@ #include #include #include -#include #include +#include #include namespace llvm { diff --git a/include/llvm/ADT/iterator_range.h b/include/llvm/ADT/iterator_range.h index 3dd679bd9b79..3cbf6198eb60 100644 --- a/include/llvm/ADT/iterator_range.h +++ b/include/llvm/ADT/iterator_range.h @@ -19,8 +19,8 @@ #ifndef LLVM_ADT_ITERATOR_RANGE_H #define LLVM_ADT_ITERATOR_RANGE_H -#include #include +#include namespace llvm { diff --git a/include/llvm/Analysis/AliasAnalysis.h b/include/llvm/Analysis/AliasAnalysis.h index 1b8b9751faa1..e00ae4f3beec 100644 --- a/include/llvm/Analysis/AliasAnalysis.h +++ b/include/llvm/Analysis/AliasAnalysis.h @@ -38,11 +38,11 @@ #ifndef LLVM_ANALYSIS_ALIASANALYSIS_H #define LLVM_ANALYSIS_ALIASANALYSIS_H +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/PassManager.h" -#include "llvm/Analysis/MemoryLocation.h" -#include "llvm/Analysis/TargetLibraryInfo.h" namespace llvm { class BasicAAResult; diff --git a/include/llvm/Analysis/AssumptionCache.h b/include/llvm/Analysis/AssumptionCache.h index 04c6fd70e07f..58d72afdc1b6 100644 --- a/include/llvm/Analysis/AssumptionCache.h +++ b/include/llvm/Analysis/AssumptionCache.h @@ -21,8 +21,8 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/ValueHandle.h" #include "llvm/IR/PassManager.h" +#include "llvm/IR/ValueHandle.h" #include "llvm/Pass.h" #include diff --git a/include/llvm/Analysis/BranchProbabilityInfo.h b/include/llvm/Analysis/BranchProbabilityInfo.h index 6a876679543d..94d3d4de6c9d 100644 --- a/include/llvm/Analysis/BranchProbabilityInfo.h +++ b/include/llvm/Analysis/BranchProbabilityInfo.h @@ -26,6 +26,7 @@ namespace llvm { class LoopInfo; +class TargetLibraryInfo; class raw_ostream; /// \brief Analysis providing branch probability information. @@ -43,8 +44,9 @@ class raw_ostream; class BranchProbabilityInfo { public: BranchProbabilityInfo() {} - BranchProbabilityInfo(const Function &F, const LoopInfo &LI) { - calculate(F, LI); + BranchProbabilityInfo(const Function &F, const LoopInfo &LI, + const TargetLibraryInfo *TLI = nullptr) { + calculate(F, LI, TLI); } BranchProbabilityInfo(BranchProbabilityInfo &&Arg) @@ -116,7 +118,8 @@ public: return IsLikely ? LikelyProb : LikelyProb.getCompl(); } - void calculate(const Function &F, const LoopInfo &LI); + void calculate(const Function &F, const LoopInfo &LI, + const TargetLibraryInfo *TLI = nullptr); /// Forget analysis results for the given basic block. void eraseBlock(const BasicBlock *BB); @@ -171,7 +174,7 @@ private: bool calcColdCallHeuristics(const BasicBlock *BB); bool calcPointerHeuristics(const BasicBlock *BB); bool calcLoopBranchHeuristics(const BasicBlock *BB, const LoopInfo &LI); - bool calcZeroHeuristics(const BasicBlock *BB); + bool calcZeroHeuristics(const BasicBlock *BB, const TargetLibraryInfo *TLI); bool calcFloatingPointHeuristics(const BasicBlock *BB); bool calcInvokeHeuristics(const BasicBlock *BB); }; diff --git a/include/llvm/Analysis/ConstantFolding.h b/include/llvm/Analysis/ConstantFolding.h index ff6ca1959153..42034741b8e3 100644 --- a/include/llvm/Analysis/ConstantFolding.h +++ b/include/llvm/Analysis/ConstantFolding.h @@ -31,6 +31,7 @@ class DataLayout; class Function; class GlobalValue; class Instruction; +class ImmutableCallSite; class TargetLibraryInfo; class Type; @@ -125,11 +126,12 @@ Constant *ConstantFoldLoadThroughGEPIndices(Constant *C, /// canConstantFoldCallTo - Return true if its even possible to fold a call to /// the specified function. -bool canConstantFoldCallTo(const Function *F); +bool canConstantFoldCallTo(ImmutableCallSite CS, const Function *F); /// ConstantFoldCall - Attempt to constant fold a call to the specified function /// with the specified arguments, returning null if unsuccessful. -Constant *ConstantFoldCall(Function *F, ArrayRef Operands, +Constant *ConstantFoldCall(ImmutableCallSite CS, Function *F, + ArrayRef Operands, const TargetLibraryInfo *TLI = nullptr); /// \brief Check whether the given call has no side-effects. diff --git a/include/llvm/Analysis/DemandedBits.h b/include/llvm/Analysis/DemandedBits.h index e5fd8a0007fe..e52c66f361c3 100644 --- a/include/llvm/Analysis/DemandedBits.h +++ b/include/llvm/Analysis/DemandedBits.h @@ -22,11 +22,11 @@ #ifndef LLVM_ANALYSIS_DEMANDED_BITS_H #define LLVM_ANALYSIS_DEMANDED_BITS_H -#include "llvm/Pass.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" namespace llvm { diff --git a/include/llvm/Analysis/InlineCost.h b/include/llvm/Analysis/InlineCost.h index d91d08a524dc..ce0b7895f253 100644 --- a/include/llvm/Analysis/InlineCost.h +++ b/include/llvm/Analysis/InlineCost.h @@ -14,8 +14,8 @@ #ifndef LLVM_ANALYSIS_INLINECOST_H #define LLVM_ANALYSIS_INLINECOST_H -#include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/CallGraphSCCPass.h" #include #include diff --git a/include/llvm/Analysis/InstructionSimplify.h b/include/llvm/Analysis/InstructionSimplify.h index ca48b5483512..be0f32ef444a 100644 --- a/include/llvm/Analysis/InstructionSimplify.h +++ b/include/llvm/Analysis/InstructionSimplify.h @@ -41,6 +41,7 @@ template class ArrayRef; class AssumptionCache; class DominatorTree; class Instruction; +class ImmutableCallSite; class DataLayout; class FastMathFlags; struct LoopStandardAnalysisResults; @@ -194,11 +195,12 @@ Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, /// Given a function and iterators over arguments, fold the result or return /// null. -Value *SimplifyCall(Value *V, User::op_iterator ArgBegin, +Value *SimplifyCall(ImmutableCallSite CS, Value *V, User::op_iterator ArgBegin, User::op_iterator ArgEnd, const SimplifyQuery &Q); /// Given a function and set of arguments, fold the result or return null. -Value *SimplifyCall(Value *V, ArrayRef Args, const SimplifyQuery &Q); +Value *SimplifyCall(ImmutableCallSite CS, Value *V, ArrayRef Args, + const SimplifyQuery &Q); /// See if we can compute a simplified version of this instruction. If not, /// return null. diff --git a/include/llvm/Analysis/LazyBranchProbabilityInfo.h b/include/llvm/Analysis/LazyBranchProbabilityInfo.h index 067d7ebfd1f5..e1d404b1ada2 100644 --- a/include/llvm/Analysis/LazyBranchProbabilityInfo.h +++ b/include/llvm/Analysis/LazyBranchProbabilityInfo.h @@ -24,6 +24,7 @@ namespace llvm { class AnalysisUsage; class Function; class LoopInfo; +class TargetLibraryInfo; /// \brief This is an alternative analysis pass to /// BranchProbabilityInfoWrapperPass. The difference is that with this pass the @@ -55,14 +56,15 @@ class LazyBranchProbabilityInfoPass : public FunctionPass { /// analysis without paying for the overhead if BPI doesn't end up being used. class LazyBranchProbabilityInfo { public: - LazyBranchProbabilityInfo(const Function *F, const LoopInfo *LI) - : Calculated(false), F(F), LI(LI) {} + LazyBranchProbabilityInfo(const Function *F, const LoopInfo *LI, + const TargetLibraryInfo *TLI) + : Calculated(false), F(F), LI(LI), TLI(TLI) {} /// Retrieve the BPI with the branch probabilities computed. BranchProbabilityInfo &getCalculated() { if (!Calculated) { assert(F && LI && "call setAnalysis"); - BPI.calculate(*F, *LI); + BPI.calculate(*F, *LI, TLI); Calculated = true; } return BPI; @@ -77,6 +79,7 @@ class LazyBranchProbabilityInfoPass : public FunctionPass { bool Calculated; const Function *F; const LoopInfo *LI; + const TargetLibraryInfo *TLI; }; std::unique_ptr LBPI; diff --git a/include/llvm/Analysis/LazyValueInfo.h b/include/llvm/Analysis/LazyValueInfo.h index 49e088e533dc..7b178fc7bcc2 100644 --- a/include/llvm/Analysis/LazyValueInfo.h +++ b/include/llvm/Analysis/LazyValueInfo.h @@ -100,8 +100,11 @@ public: /// Inform the analysis cache that we have erased a block. void eraseBlock(BasicBlock *BB); - /// Print the \LazyValueInfoCache. - void printCache(Function &F, raw_ostream &OS); + /// Print the \LazyValueInfo Analysis. + /// We pass in the DTree that is required for identifying which basic blocks + /// we can solve/print for, in the LVIPrinter. The DT is optional + /// in LVI, so we need to pass it here as an argument. + void printLVI(Function &F, DominatorTree &DTree, raw_ostream &OS); // For old PM pass. Delete once LazyValueInfoWrapperPass is gone. void releaseMemory(); diff --git a/include/llvm/Analysis/LoopInfoImpl.h b/include/llvm/Analysis/LoopInfoImpl.h index 249fa572c024..6ff4335f1ad5 100644 --- a/include/llvm/Analysis/LoopInfoImpl.h +++ b/include/llvm/Analysis/LoopInfoImpl.h @@ -17,8 +17,8 @@ #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/PostOrderIterator.h" -#include "llvm/ADT/SetVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/Dominators.h" diff --git a/include/llvm/Analysis/MemoryDependenceAnalysis.h b/include/llvm/Analysis/MemoryDependenceAnalysis.h index a401887016c9..1dbbf6cc6add 100644 --- a/include/llvm/Analysis/MemoryDependenceAnalysis.h +++ b/include/llvm/Analysis/MemoryDependenceAnalysis.h @@ -15,8 +15,8 @@ #define LLVM_ANALYSIS_MEMORYDEPENDENCEANALYSIS_H #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/PointerSumType.h" #include "llvm/ADT/PointerEmbeddedInt.h" +#include "llvm/ADT/PointerSumType.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/IR/BasicBlock.h" diff --git a/include/llvm/Analysis/MemorySSAUpdater.h b/include/llvm/Analysis/MemorySSAUpdater.h index d30eeeaa95b6..b36b2f01dac6 100644 --- a/include/llvm/Analysis/MemorySSAUpdater.h +++ b/include/llvm/Analysis/MemorySSAUpdater.h @@ -34,6 +34,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/MemorySSA.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Module.h" @@ -45,7 +46,6 @@ #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Analysis/MemorySSA.h" namespace llvm { diff --git a/include/llvm/Analysis/ObjCARCAnalysisUtils.h b/include/llvm/Analysis/ObjCARCAnalysisUtils.h index 5f4d8ecbbfbb..e80412a30564 100644 --- a/include/llvm/Analysis/ObjCARCAnalysisUtils.h +++ b/include/llvm/Analysis/ObjCARCAnalysisUtils.h @@ -23,8 +23,8 @@ #ifndef LLVM_LIB_ANALYSIS_OBJCARCANALYSISUTILS_H #define LLVM_LIB_ANALYSIS_OBJCARCANALYSISUTILS_H -#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ObjCARCInstKind.h" #include "llvm/Analysis/Passes.h" diff --git a/include/llvm/Analysis/ObjCARCInstKind.h b/include/llvm/Analysis/ObjCARCInstKind.h index 3b37ddf78f58..02ff03578238 100644 --- a/include/llvm/Analysis/ObjCARCInstKind.h +++ b/include/llvm/Analysis/ObjCARCInstKind.h @@ -10,8 +10,8 @@ #ifndef LLVM_ANALYSIS_OBJCARCINSTKIND_H #define LLVM_ANALYSIS_OBJCARCINSTKIND_H -#include "llvm/IR/Instructions.h" #include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" namespace llvm { namespace objcarc { diff --git a/include/llvm/Analysis/ScalarEvolutionNormalization.h b/include/llvm/Analysis/ScalarEvolutionNormalization.h index b73ad95278a0..51c92121c8f0 100644 --- a/include/llvm/Analysis/ScalarEvolutionNormalization.h +++ b/include/llvm/Analysis/ScalarEvolutionNormalization.h @@ -36,8 +36,8 @@ #ifndef LLVM_ANALYSIS_SCALAREVOLUTIONNORMALIZATION_H #define LLVM_ANALYSIS_SCALAREVOLUTIONNORMALIZATION_H -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" namespace llvm { diff --git a/include/llvm/Analysis/TargetLibraryInfo.h b/include/llvm/Analysis/TargetLibraryInfo.h index 0e3bdaa11c9a..d75e7833279b 100644 --- a/include/llvm/Analysis/TargetLibraryInfo.h +++ b/include/llvm/Analysis/TargetLibraryInfo.h @@ -13,6 +13,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/Triple.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" @@ -239,6 +240,13 @@ public: return Impl->getLibFunc(FDecl, F); } + /// If a callsite does not have the 'nobuiltin' attribute, return if the + /// called function is a known library function and set F to that function. + bool getLibFunc(ImmutableCallSite CS, LibFunc &F) const { + return !CS.isNoBuiltin() && CS.getCalledFunction() && + getLibFunc(*(CS.getCalledFunction()), F); + } + /// Tests whether a library function is available. bool has(LibFunc F) const { return Impl->getState(F) != TargetLibraryInfoImpl::Unavailable; diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h index 7211508e975a..cd8c2cd24244 100644 --- a/include/llvm/Analysis/TargetTransformInfo.h +++ b/include/llvm/Analysis/TargetTransformInfo.h @@ -267,6 +267,19 @@ public: /// incurs significant execution cost. bool isLoweredToCall(const Function *F) const; + struct LSRCost { + /// TODO: Some of these could be merged. Also, a lexical ordering + /// isn't always optimal. + unsigned Insns; + unsigned NumRegs; + unsigned AddRecCost; + unsigned NumIVMuls; + unsigned NumBaseAdds; + unsigned ImmCost; + unsigned SetupCost; + unsigned ScaleCost; + }; + /// Parameters that control the generic loop unrolling transformation. struct UnrollingPreferences { /// The cost threshold for the unrolled loop. Should be relative to the @@ -385,6 +398,10 @@ public: bool HasBaseReg, int64_t Scale, unsigned AddrSpace = 0) const; + /// \brief Return true if LSR cost of C1 is lower than C1. + bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, + TargetTransformInfo::LSRCost &C2) const; + /// \brief Return true if the target supports masked load/store /// AVX2 and AVX-512 targets allow masks for consecutive load and store bool isLegalMaskedStore(Type *DataType) const; @@ -705,6 +722,10 @@ public: /// if false is returned. bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const; + /// \returns The maximum element size, in bytes, for an element + /// unordered-atomic memory intrinsic. + unsigned getAtomicMemIntrinsicMaxElementSize() const; + /// \returns A value which is the result of the given memory intrinsic. New /// instructions may be created to extract the result from the given intrinsic /// memory operation. Returns nullptr if the target cannot create a result @@ -809,6 +830,8 @@ public: int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) = 0; + virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, + TargetTransformInfo::LSRCost &C2) = 0; virtual bool isLegalMaskedStore(Type *DataType) = 0; virtual bool isLegalMaskedLoad(Type *DataType) = 0; virtual bool isLegalMaskedScatter(Type *DataType) = 0; @@ -904,6 +927,7 @@ public: virtual unsigned getCostOfKeepingLiveOverCall(ArrayRef Tys) = 0; virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) = 0; + virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0; virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType) = 0; virtual bool areInlineCompatible(const Function *Caller, @@ -996,6 +1020,10 @@ public: return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale, AddrSpace); } + bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, + TargetTransformInfo::LSRCost &C2) override { + return Impl.isLSRCostLess(C1, C2); + } bool isLegalMaskedStore(Type *DataType) override { return Impl.isLegalMaskedStore(DataType); } @@ -1201,6 +1229,9 @@ public: MemIntrinsicInfo &Info) override { return Impl.getTgtMemIntrinsic(Inst, Info); } + unsigned getAtomicMemIntrinsicMaxElementSize() const override { + return Impl.getAtomicMemIntrinsicMaxElementSize(); + } Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType) override { return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType); diff --git a/include/llvm/Analysis/TargetTransformInfoImpl.h b/include/llvm/Analysis/TargetTransformInfoImpl.h index d73a60eba850..72de7c12eb3e 100644 --- a/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -17,13 +17,13 @@ #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/Operator.h" #include "llvm/IR/Type.h" -#include "llvm/Analysis/VectorUtils.h" namespace llvm { @@ -229,6 +229,13 @@ public: return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1); } + bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2) { + return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds, + C1.ScaleCost, C1.ImmCost, C1.SetupCost) < + std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds, + C2.ScaleCost, C2.ImmCost, C2.SetupCost); + } + bool isLegalMaskedStore(Type *DataType) { return false; } bool isLegalMaskedLoad(Type *DataType) { return false; } @@ -420,6 +427,15 @@ public: return false; } + unsigned getAtomicMemIntrinsicMaxElementSize() const { + // Note for overrides: You must ensure for all element unordered-atomic + // memory intrinsics that all power-of-2 element sizes up to, and + // including, the return value of this method have a corresponding + // runtime lib call. These runtime lib call definitions can be found + // in RuntimeLibcalls.h + return 0; + } + Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType) { return nullptr; diff --git a/include/llvm/BinaryFormat/COFF.h b/include/llvm/BinaryFormat/COFF.h new file mode 100644 index 000000000000..5171c72b9e67 --- /dev/null +++ b/include/llvm/BinaryFormat/COFF.h @@ -0,0 +1,713 @@ +//===-- llvm/BinaryFormat/COFF.h --------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains an definitions used in Windows COFF Files. +// +// Structures and enums defined within this file where created using +// information from Microsoft's publicly available PE/COFF format document: +// +// Microsoft Portable Executable and Common Object File Format Specification +// Revision 8.1 - February 15, 2008 +// +// As of 5/2/2010, hosted by Microsoft at: +// http://www.microsoft.com/whdc/system/platform/firmware/pecoff.mspx +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_BINARYFORMAT_COFF_H +#define LLVM_BINARYFORMAT_COFF_H + +#include "llvm/Support/DataTypes.h" +#include +#include + +namespace llvm { +namespace COFF { + +// The maximum number of sections that a COFF object can have (inclusive). +const int32_t MaxNumberOfSections16 = 65279; + +// The PE signature bytes that follows the DOS stub header. +static const char PEMagic[] = {'P', 'E', '\0', '\0'}; + +static const char BigObjMagic[] = { + '\xc7', '\xa1', '\xba', '\xd1', '\xee', '\xba', '\xa9', '\x4b', + '\xaf', '\x20', '\xfa', '\xf6', '\x6a', '\xa4', '\xdc', '\xb8', +}; + +static const char ClGlObjMagic[] = { + '\x38', '\xfe', '\xb3', '\x0c', '\xa5', '\xd9', '\xab', '\x4d', + '\xac', '\x9b', '\xd6', '\xb6', '\x22', '\x26', '\x53', '\xc2', +}; + +// Sizes in bytes of various things in the COFF format. +enum { + Header16Size = 20, + Header32Size = 56, + NameSize = 8, + Symbol16Size = 18, + Symbol32Size = 20, + SectionSize = 40, + RelocationSize = 10 +}; + +struct header { + uint16_t Machine; + int32_t NumberOfSections; + uint32_t TimeDateStamp; + uint32_t PointerToSymbolTable; + uint32_t NumberOfSymbols; + uint16_t SizeOfOptionalHeader; + uint16_t Characteristics; +}; + +struct BigObjHeader { + enum : uint16_t { MinBigObjectVersion = 2 }; + + uint16_t Sig1; ///< Must be IMAGE_FILE_MACHINE_UNKNOWN (0). + uint16_t Sig2; ///< Must be 0xFFFF. + uint16_t Version; + uint16_t Machine; + uint32_t TimeDateStamp; + uint8_t UUID[16]; + uint32_t unused1; + uint32_t unused2; + uint32_t unused3; + uint32_t unused4; + uint32_t NumberOfSections; + uint32_t PointerToSymbolTable; + uint32_t NumberOfSymbols; +}; + +enum MachineTypes { + MT_Invalid = 0xffff, + + IMAGE_FILE_MACHINE_UNKNOWN = 0x0, + IMAGE_FILE_MACHINE_AM33 = 0x13, + IMAGE_FILE_MACHINE_AMD64 = 0x8664, + IMAGE_FILE_MACHINE_ARM = 0x1C0, + IMAGE_FILE_MACHINE_ARMNT = 0x1C4, + IMAGE_FILE_MACHINE_ARM64 = 0xAA64, + IMAGE_FILE_MACHINE_EBC = 0xEBC, + IMAGE_FILE_MACHINE_I386 = 0x14C, + IMAGE_FILE_MACHINE_IA64 = 0x200, + IMAGE_FILE_MACHINE_M32R = 0x9041, + IMAGE_FILE_MACHINE_MIPS16 = 0x266, + IMAGE_FILE_MACHINE_MIPSFPU = 0x366, + IMAGE_FILE_MACHINE_MIPSFPU16 = 0x466, + IMAGE_FILE_MACHINE_POWERPC = 0x1F0, + IMAGE_FILE_MACHINE_POWERPCFP = 0x1F1, + IMAGE_FILE_MACHINE_R4000 = 0x166, + IMAGE_FILE_MACHINE_SH3 = 0x1A2, + IMAGE_FILE_MACHINE_SH3DSP = 0x1A3, + IMAGE_FILE_MACHINE_SH4 = 0x1A6, + IMAGE_FILE_MACHINE_SH5 = 0x1A8, + IMAGE_FILE_MACHINE_THUMB = 0x1C2, + IMAGE_FILE_MACHINE_WCEMIPSV2 = 0x169 +}; + +enum Characteristics { + C_Invalid = 0, + + /// The file does not contain base relocations and must be loaded at its + /// preferred base. If this cannot be done, the loader will error. + IMAGE_FILE_RELOCS_STRIPPED = 0x0001, + /// The file is valid and can be run. + IMAGE_FILE_EXECUTABLE_IMAGE = 0x0002, + /// COFF line numbers have been stripped. This is deprecated and should be + /// 0. + IMAGE_FILE_LINE_NUMS_STRIPPED = 0x0004, + /// COFF symbol table entries for local symbols have been removed. This is + /// deprecated and should be 0. + IMAGE_FILE_LOCAL_SYMS_STRIPPED = 0x0008, + /// Aggressively trim working set. This is deprecated and must be 0. + IMAGE_FILE_AGGRESSIVE_WS_TRIM = 0x0010, + /// Image can handle > 2GiB addresses. + IMAGE_FILE_LARGE_ADDRESS_AWARE = 0x0020, + /// Little endian: the LSB precedes the MSB in memory. This is deprecated + /// and should be 0. + IMAGE_FILE_BYTES_REVERSED_LO = 0x0080, + /// Machine is based on a 32bit word architecture. + IMAGE_FILE_32BIT_MACHINE = 0x0100, + /// Debugging info has been removed. + IMAGE_FILE_DEBUG_STRIPPED = 0x0200, + /// If the image is on removable media, fully load it and copy it to swap. + IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP = 0x0400, + /// If the image is on network media, fully load it and copy it to swap. + IMAGE_FILE_NET_RUN_FROM_SWAP = 0x0800, + /// The image file is a system file, not a user program. + IMAGE_FILE_SYSTEM = 0x1000, + /// The image file is a DLL. + IMAGE_FILE_DLL = 0x2000, + /// This file should only be run on a uniprocessor machine. + IMAGE_FILE_UP_SYSTEM_ONLY = 0x4000, + /// Big endian: the MSB precedes the LSB in memory. This is deprecated + /// and should be 0. + IMAGE_FILE_BYTES_REVERSED_HI = 0x8000 +}; + +enum ResourceTypeID { + RID_Cursor = 1, + RID_Bitmap = 2, + RID_Icon = 3, + RID_Menu = 4, + RID_Dialog = 5, + RID_String = 6, + RID_FontDir = 7, + RID_Font = 8, + RID_Accelerator = 9, + RID_RCData = 10, + RID_MessageTable = 11, + RID_Group_Cursor = 12, + RID_Group_Icon = 14, + RID_Version = 16, + RID_DLGInclude = 17, + RID_PlugPlay = 19, + RID_VXD = 20, + RID_AniCursor = 21, + RID_AniIcon = 22, + RID_HTML = 23, + RID_Manifest = 24, +}; + +struct symbol { + char Name[NameSize]; + uint32_t Value; + int32_t SectionNumber; + uint16_t Type; + uint8_t StorageClass; + uint8_t NumberOfAuxSymbols; +}; + +enum SymbolSectionNumber : int32_t { + IMAGE_SYM_DEBUG = -2, + IMAGE_SYM_ABSOLUTE = -1, + IMAGE_SYM_UNDEFINED = 0 +}; + +/// Storage class tells where and what the symbol represents +enum SymbolStorageClass { + SSC_Invalid = 0xff, + + IMAGE_SYM_CLASS_END_OF_FUNCTION = -1, ///< Physical end of function + IMAGE_SYM_CLASS_NULL = 0, ///< No symbol + IMAGE_SYM_CLASS_AUTOMATIC = 1, ///< Stack variable + IMAGE_SYM_CLASS_EXTERNAL = 2, ///< External symbol + IMAGE_SYM_CLASS_STATIC = 3, ///< Static + IMAGE_SYM_CLASS_REGISTER = 4, ///< Register variable + IMAGE_SYM_CLASS_EXTERNAL_DEF = 5, ///< External definition + IMAGE_SYM_CLASS_LABEL = 6, ///< Label + IMAGE_SYM_CLASS_UNDEFINED_LABEL = 7, ///< Undefined label + IMAGE_SYM_CLASS_MEMBER_OF_STRUCT = 8, ///< Member of structure + IMAGE_SYM_CLASS_ARGUMENT = 9, ///< Function argument + IMAGE_SYM_CLASS_STRUCT_TAG = 10, ///< Structure tag + IMAGE_SYM_CLASS_MEMBER_OF_UNION = 11, ///< Member of union + IMAGE_SYM_CLASS_UNION_TAG = 12, ///< Union tag + IMAGE_SYM_CLASS_TYPE_DEFINITION = 13, ///< Type definition + IMAGE_SYM_CLASS_UNDEFINED_STATIC = 14, ///< Undefined static + IMAGE_SYM_CLASS_ENUM_TAG = 15, ///< Enumeration tag + IMAGE_SYM_CLASS_MEMBER_OF_ENUM = 16, ///< Member of enumeration + IMAGE_SYM_CLASS_REGISTER_PARAM = 17, ///< Register parameter + IMAGE_SYM_CLASS_BIT_FIELD = 18, ///< Bit field + /// ".bb" or ".eb" - beginning or end of block + IMAGE_SYM_CLASS_BLOCK = 100, + /// ".bf" or ".ef" - beginning or end of function + IMAGE_SYM_CLASS_FUNCTION = 101, + IMAGE_SYM_CLASS_END_OF_STRUCT = 102, ///< End of structure + IMAGE_SYM_CLASS_FILE = 103, ///< File name + /// Line number, reformatted as symbol + IMAGE_SYM_CLASS_SECTION = 104, + IMAGE_SYM_CLASS_WEAK_EXTERNAL = 105, ///< Duplicate tag + /// External symbol in dmert public lib + IMAGE_SYM_CLASS_CLR_TOKEN = 107 +}; + +enum SymbolBaseType { + IMAGE_SYM_TYPE_NULL = 0, ///< No type information or unknown base type. + IMAGE_SYM_TYPE_VOID = 1, ///< Used with void pointers and functions. + IMAGE_SYM_TYPE_CHAR = 2, ///< A character (signed byte). + IMAGE_SYM_TYPE_SHORT = 3, ///< A 2-byte signed integer. + IMAGE_SYM_TYPE_INT = 4, ///< A natural integer type on the target. + IMAGE_SYM_TYPE_LONG = 5, ///< A 4-byte signed integer. + IMAGE_SYM_TYPE_FLOAT = 6, ///< A 4-byte floating-point number. + IMAGE_SYM_TYPE_DOUBLE = 7, ///< An 8-byte floating-point number. + IMAGE_SYM_TYPE_STRUCT = 8, ///< A structure. + IMAGE_SYM_TYPE_UNION = 9, ///< An union. + IMAGE_SYM_TYPE_ENUM = 10, ///< An enumerated type. + IMAGE_SYM_TYPE_MOE = 11, ///< A member of enumeration (a specific value). + IMAGE_SYM_TYPE_BYTE = 12, ///< A byte; unsigned 1-byte integer. + IMAGE_SYM_TYPE_WORD = 13, ///< A word; unsigned 2-byte integer. + IMAGE_SYM_TYPE_UINT = 14, ///< An unsigned integer of natural size. + IMAGE_SYM_TYPE_DWORD = 15 ///< An unsigned 4-byte integer. +}; + +enum SymbolComplexType { + IMAGE_SYM_DTYPE_NULL = 0, ///< No complex type; simple scalar variable. + IMAGE_SYM_DTYPE_POINTER = 1, ///< A pointer to base type. + IMAGE_SYM_DTYPE_FUNCTION = 2, ///< A function that returns a base type. + IMAGE_SYM_DTYPE_ARRAY = 3, ///< An array of base type. + + /// Type is formed as (base + (derived << SCT_COMPLEX_TYPE_SHIFT)) + SCT_COMPLEX_TYPE_SHIFT = 4 +}; + +enum AuxSymbolType { IMAGE_AUX_SYMBOL_TYPE_TOKEN_DEF = 1 }; + +struct section { + char Name[NameSize]; + uint32_t VirtualSize; + uint32_t VirtualAddress; + uint32_t SizeOfRawData; + uint32_t PointerToRawData; + uint32_t PointerToRelocations; + uint32_t PointerToLineNumbers; + uint16_t NumberOfRelocations; + uint16_t NumberOfLineNumbers; + uint32_t Characteristics; +}; + +enum SectionCharacteristics : uint32_t { + SC_Invalid = 0xffffffff, + + IMAGE_SCN_TYPE_NOLOAD = 0x00000002, + IMAGE_SCN_TYPE_NO_PAD = 0x00000008, + IMAGE_SCN_CNT_CODE = 0x00000020, + IMAGE_SCN_CNT_INITIALIZED_DATA = 0x00000040, + IMAGE_SCN_CNT_UNINITIALIZED_DATA = 0x00000080, + IMAGE_SCN_LNK_OTHER = 0x00000100, + IMAGE_SCN_LNK_INFO = 0x00000200, + IMAGE_SCN_LNK_REMOVE = 0x00000800, + IMAGE_SCN_LNK_COMDAT = 0x00001000, + IMAGE_SCN_GPREL = 0x00008000, + IMAGE_SCN_MEM_PURGEABLE = 0x00020000, + IMAGE_SCN_MEM_16BIT = 0x00020000, + IMAGE_SCN_MEM_LOCKED = 0x00040000, + IMAGE_SCN_MEM_PRELOAD = 0x00080000, + IMAGE_SCN_ALIGN_1BYTES = 0x00100000, + IMAGE_SCN_ALIGN_2BYTES = 0x00200000, + IMAGE_SCN_ALIGN_4BYTES = 0x00300000, + IMAGE_SCN_ALIGN_8BYTES = 0x00400000, + IMAGE_SCN_ALIGN_16BYTES = 0x00500000, + IMAGE_SCN_ALIGN_32BYTES = 0x00600000, + IMAGE_SCN_ALIGN_64BYTES = 0x00700000, + IMAGE_SCN_ALIGN_128BYTES = 0x00800000, + IMAGE_SCN_ALIGN_256BYTES = 0x00900000, + IMAGE_SCN_ALIGN_512BYTES = 0x00A00000, + IMAGE_SCN_ALIGN_1024BYTES = 0x00B00000, + IMAGE_SCN_ALIGN_2048BYTES = 0x00C00000, + IMAGE_SCN_ALIGN_4096BYTES = 0x00D00000, + IMAGE_SCN_ALIGN_8192BYTES = 0x00E00000, + IMAGE_SCN_LNK_NRELOC_OVFL = 0x01000000, + IMAGE_SCN_MEM_DISCARDABLE = 0x02000000, + IMAGE_SCN_MEM_NOT_CACHED = 0x04000000, + IMAGE_SCN_MEM_NOT_PAGED = 0x08000000, + IMAGE_SCN_MEM_SHARED = 0x10000000, + IMAGE_SCN_MEM_EXECUTE = 0x20000000, + IMAGE_SCN_MEM_READ = 0x40000000, + IMAGE_SCN_MEM_WRITE = 0x80000000 +}; + +struct relocation { + uint32_t VirtualAddress; + uint32_t SymbolTableIndex; + uint16_t Type; +}; + +enum RelocationTypeI386 { + IMAGE_REL_I386_ABSOLUTE = 0x0000, + IMAGE_REL_I386_DIR16 = 0x0001, + IMAGE_REL_I386_REL16 = 0x0002, + IMAGE_REL_I386_DIR32 = 0x0006, + IMAGE_REL_I386_DIR32NB = 0x0007, + IMAGE_REL_I386_SEG12 = 0x0009, + IMAGE_REL_I386_SECTION = 0x000A, + IMAGE_REL_I386_SECREL = 0x000B, + IMAGE_REL_I386_TOKEN = 0x000C, + IMAGE_REL_I386_SECREL7 = 0x000D, + IMAGE_REL_I386_REL32 = 0x0014 +}; + +enum RelocationTypeAMD64 { + IMAGE_REL_AMD64_ABSOLUTE = 0x0000, + IMAGE_REL_AMD64_ADDR64 = 0x0001, + IMAGE_REL_AMD64_ADDR32 = 0x0002, + IMAGE_REL_AMD64_ADDR32NB = 0x0003, + IMAGE_REL_AMD64_REL32 = 0x0004, + IMAGE_REL_AMD64_REL32_1 = 0x0005, + IMAGE_REL_AMD64_REL32_2 = 0x0006, + IMAGE_REL_AMD64_REL32_3 = 0x0007, + IMAGE_REL_AMD64_REL32_4 = 0x0008, + IMAGE_REL_AMD64_REL32_5 = 0x0009, + IMAGE_REL_AMD64_SECTION = 0x000A, + IMAGE_REL_AMD64_SECREL = 0x000B, + IMAGE_REL_AMD64_SECREL7 = 0x000C, + IMAGE_REL_AMD64_TOKEN = 0x000D, + IMAGE_REL_AMD64_SREL32 = 0x000E, + IMAGE_REL_AMD64_PAIR = 0x000F, + IMAGE_REL_AMD64_SSPAN32 = 0x0010 +}; + +enum RelocationTypesARM { + IMAGE_REL_ARM_ABSOLUTE = 0x0000, + IMAGE_REL_ARM_ADDR32 = 0x0001, + IMAGE_REL_ARM_ADDR32NB = 0x0002, + IMAGE_REL_ARM_BRANCH24 = 0x0003, + IMAGE_REL_ARM_BRANCH11 = 0x0004, + IMAGE_REL_ARM_TOKEN = 0x0005, + IMAGE_REL_ARM_BLX24 = 0x0008, + IMAGE_REL_ARM_BLX11 = 0x0009, + IMAGE_REL_ARM_SECTION = 0x000E, + IMAGE_REL_ARM_SECREL = 0x000F, + IMAGE_REL_ARM_MOV32A = 0x0010, + IMAGE_REL_ARM_MOV32T = 0x0011, + IMAGE_REL_ARM_BRANCH20T = 0x0012, + IMAGE_REL_ARM_BRANCH24T = 0x0014, + IMAGE_REL_ARM_BLX23T = 0x0015 +}; + +enum RelocationTypesARM64 { + IMAGE_REL_ARM64_ABSOLUTE = 0x0000, + IMAGE_REL_ARM64_ADDR32 = 0x0001, + IMAGE_REL_ARM64_ADDR32NB = 0x0002, + IMAGE_REL_ARM64_BRANCH26 = 0x0003, + IMAGE_REL_ARM64_PAGEBASE_REL2 = 0x0004, + IMAGE_REL_ARM64_REL21 = 0x0005, + IMAGE_REL_ARM64_PAGEOFFSET_12A = 0x0006, + IMAGE_REL_ARM64_PAGEOFFSET_12L = 0x0007, + IMAGE_REL_ARM64_SECREL = 0x0008, + IMAGE_REL_ARM64_SECREL_LOW12A = 0x0009, + IMAGE_REL_ARM64_SECREL_HIGH12A = 0x000A, + IMAGE_REL_ARM64_SECREL_LOW12L = 0x000B, + IMAGE_REL_ARM64_TOKEN = 0x000C, + IMAGE_REL_ARM64_SECTION = 0x000D, + IMAGE_REL_ARM64_ADDR64 = 0x000E, + IMAGE_REL_ARM64_BRANCH19 = 0x000F, + IMAGE_REL_ARM64_BRANCH14 = 0x0010, +}; + +enum COMDATType { + IMAGE_COMDAT_SELECT_NODUPLICATES = 1, + IMAGE_COMDAT_SELECT_ANY, + IMAGE_COMDAT_SELECT_SAME_SIZE, + IMAGE_COMDAT_SELECT_EXACT_MATCH, + IMAGE_COMDAT_SELECT_ASSOCIATIVE, + IMAGE_COMDAT_SELECT_LARGEST, + IMAGE_COMDAT_SELECT_NEWEST +}; + +// Auxiliary Symbol Formats +struct AuxiliaryFunctionDefinition { + uint32_t TagIndex; + uint32_t TotalSize; + uint32_t PointerToLinenumber; + uint32_t PointerToNextFunction; + char unused[2]; +}; + +struct AuxiliarybfAndefSymbol { + uint8_t unused1[4]; + uint16_t Linenumber; + uint8_t unused2[6]; + uint32_t PointerToNextFunction; + uint8_t unused3[2]; +}; + +struct AuxiliaryWeakExternal { + uint32_t TagIndex; + uint32_t Characteristics; + uint8_t unused[10]; +}; + +enum WeakExternalCharacteristics { + IMAGE_WEAK_EXTERN_SEARCH_NOLIBRARY = 1, + IMAGE_WEAK_EXTERN_SEARCH_LIBRARY = 2, + IMAGE_WEAK_EXTERN_SEARCH_ALIAS = 3 +}; + +struct AuxiliarySectionDefinition { + uint32_t Length; + uint16_t NumberOfRelocations; + uint16_t NumberOfLinenumbers; + uint32_t CheckSum; + uint32_t Number; + uint8_t Selection; + char unused; +}; + +struct AuxiliaryCLRToken { + uint8_t AuxType; + uint8_t unused1; + uint32_t SymbolTableIndex; + char unused2[12]; +}; + +union Auxiliary { + AuxiliaryFunctionDefinition FunctionDefinition; + AuxiliarybfAndefSymbol bfAndefSymbol; + AuxiliaryWeakExternal WeakExternal; + AuxiliarySectionDefinition SectionDefinition; +}; + +/// @brief The Import Directory Table. +/// +/// There is a single array of these and one entry per imported DLL. +struct ImportDirectoryTableEntry { + uint32_t ImportLookupTableRVA; + uint32_t TimeDateStamp; + uint32_t ForwarderChain; + uint32_t NameRVA; + uint32_t ImportAddressTableRVA; +}; + +/// @brief The PE32 Import Lookup Table. +/// +/// There is an array of these for each imported DLL. It represents either +/// the ordinal to import from the target DLL, or a name to lookup and import +/// from the target DLL. +/// +/// This also happens to be the same format used by the Import Address Table +/// when it is initially written out to the image. +struct ImportLookupTableEntry32 { + uint32_t data; + + /// @brief Is this entry specified by ordinal, or name? + bool isOrdinal() const { return data & 0x80000000; } + + /// @brief Get the ordinal value of this entry. isOrdinal must be true. + uint16_t getOrdinal() const { + assert(isOrdinal() && "ILT entry is not an ordinal!"); + return data & 0xFFFF; + } + + /// @brief Set the ordinal value and set isOrdinal to true. + void setOrdinal(uint16_t o) { + data = o; + data |= 0x80000000; + } + + /// @brief Get the Hint/Name entry RVA. isOrdinal must be false. + uint32_t getHintNameRVA() const { + assert(!isOrdinal() && "ILT entry is not a Hint/Name RVA!"); + return data; + } + + /// @brief Set the Hint/Name entry RVA and set isOrdinal to false. + void setHintNameRVA(uint32_t rva) { data = rva; } +}; + +/// @brief The DOS compatible header at the front of all PEs. +struct DOSHeader { + uint16_t Magic; + uint16_t UsedBytesInTheLastPage; + uint16_t FileSizeInPages; + uint16_t NumberOfRelocationItems; + uint16_t HeaderSizeInParagraphs; + uint16_t MinimumExtraParagraphs; + uint16_t MaximumExtraParagraphs; + uint16_t InitialRelativeSS; + uint16_t InitialSP; + uint16_t Checksum; + uint16_t InitialIP; + uint16_t InitialRelativeCS; + uint16_t AddressOfRelocationTable; + uint16_t OverlayNumber; + uint16_t Reserved[4]; + uint16_t OEMid; + uint16_t OEMinfo; + uint16_t Reserved2[10]; + uint32_t AddressOfNewExeHeader; +}; + +struct PE32Header { + enum { PE32 = 0x10b, PE32_PLUS = 0x20b }; + + uint16_t Magic; + uint8_t MajorLinkerVersion; + uint8_t MinorLinkerVersion; + uint32_t SizeOfCode; + uint32_t SizeOfInitializedData; + uint32_t SizeOfUninitializedData; + uint32_t AddressOfEntryPoint; // RVA + uint32_t BaseOfCode; // RVA + uint32_t BaseOfData; // RVA + uint32_t ImageBase; + uint32_t SectionAlignment; + uint32_t FileAlignment; + uint16_t MajorOperatingSystemVersion; + uint16_t MinorOperatingSystemVersion; + uint16_t MajorImageVersion; + uint16_t MinorImageVersion; + uint16_t MajorSubsystemVersion; + uint16_t MinorSubsystemVersion; + uint32_t Win32VersionValue; + uint32_t SizeOfImage; + uint32_t SizeOfHeaders; + uint32_t CheckSum; + uint16_t Subsystem; + // FIXME: This should be DllCharacteristics to match the COFF spec. + uint16_t DLLCharacteristics; + uint32_t SizeOfStackReserve; + uint32_t SizeOfStackCommit; + uint32_t SizeOfHeapReserve; + uint32_t SizeOfHeapCommit; + uint32_t LoaderFlags; + // FIXME: This should be NumberOfRvaAndSizes to match the COFF spec. + uint32_t NumberOfRvaAndSize; +}; + +struct DataDirectory { + uint32_t RelativeVirtualAddress; + uint32_t Size; +}; + +enum DataDirectoryIndex { + EXPORT_TABLE = 0, + IMPORT_TABLE, + RESOURCE_TABLE, + EXCEPTION_TABLE, + CERTIFICATE_TABLE, + BASE_RELOCATION_TABLE, + DEBUG_DIRECTORY, + ARCHITECTURE, + GLOBAL_PTR, + TLS_TABLE, + LOAD_CONFIG_TABLE, + BOUND_IMPORT, + IAT, + DELAY_IMPORT_DESCRIPTOR, + CLR_RUNTIME_HEADER, + + NUM_DATA_DIRECTORIES +}; + +enum WindowsSubsystem { + IMAGE_SUBSYSTEM_UNKNOWN = 0, ///< An unknown subsystem. + IMAGE_SUBSYSTEM_NATIVE = 1, ///< Device drivers and native Windows processes + IMAGE_SUBSYSTEM_WINDOWS_GUI = 2, ///< The Windows GUI subsystem. + IMAGE_SUBSYSTEM_WINDOWS_CUI = 3, ///< The Windows character subsystem. + IMAGE_SUBSYSTEM_OS2_CUI = 5, ///< The OS/2 character subsytem. + IMAGE_SUBSYSTEM_POSIX_CUI = 7, ///< The POSIX character subsystem. + IMAGE_SUBSYSTEM_NATIVE_WINDOWS = 8, ///< Native Windows 9x driver. + IMAGE_SUBSYSTEM_WINDOWS_CE_GUI = 9, ///< Windows CE. + IMAGE_SUBSYSTEM_EFI_APPLICATION = 10, ///< An EFI application. + IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER = 11, ///< An EFI driver with boot + /// services. + IMAGE_SUBSYSTEM_EFI_RUNTIME_DRIVER = 12, ///< An EFI driver with run-time + /// services. + IMAGE_SUBSYSTEM_EFI_ROM = 13, ///< An EFI ROM image. + IMAGE_SUBSYSTEM_XBOX = 14, ///< XBOX. + IMAGE_SUBSYSTEM_WINDOWS_BOOT_APPLICATION = 16 ///< A BCD application. +}; + +enum DLLCharacteristics { + /// ASLR with 64 bit address space. + IMAGE_DLL_CHARACTERISTICS_HIGH_ENTROPY_VA = 0x0020, + /// DLL can be relocated at load time. + IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE = 0x0040, + /// Code integrity checks are enforced. + IMAGE_DLL_CHARACTERISTICS_FORCE_INTEGRITY = 0x0080, + ///< Image is NX compatible. + IMAGE_DLL_CHARACTERISTICS_NX_COMPAT = 0x0100, + /// Isolation aware, but do not isolate the image. + IMAGE_DLL_CHARACTERISTICS_NO_ISOLATION = 0x0200, + /// Does not use structured exception handling (SEH). No SEH handler may be + /// called in this image. + IMAGE_DLL_CHARACTERISTICS_NO_SEH = 0x0400, + /// Do not bind the image. + IMAGE_DLL_CHARACTERISTICS_NO_BIND = 0x0800, + ///< Image should execute in an AppContainer. + IMAGE_DLL_CHARACTERISTICS_APPCONTAINER = 0x1000, + ///< A WDM driver. + IMAGE_DLL_CHARACTERISTICS_WDM_DRIVER = 0x2000, + ///< Image supports Control Flow Guard. + IMAGE_DLL_CHARACTERISTICS_GUARD_CF = 0x4000, + /// Terminal Server aware. + IMAGE_DLL_CHARACTERISTICS_TERMINAL_SERVER_AWARE = 0x8000 +}; + +enum DebugType { + IMAGE_DEBUG_TYPE_UNKNOWN = 0, + IMAGE_DEBUG_TYPE_COFF = 1, + IMAGE_DEBUG_TYPE_CODEVIEW = 2, + IMAGE_DEBUG_TYPE_FPO = 3, + IMAGE_DEBUG_TYPE_MISC = 4, + IMAGE_DEBUG_TYPE_EXCEPTION = 5, + IMAGE_DEBUG_TYPE_FIXUP = 6, + IMAGE_DEBUG_TYPE_OMAP_TO_SRC = 7, + IMAGE_DEBUG_TYPE_OMAP_FROM_SRC = 8, + IMAGE_DEBUG_TYPE_BORLAND = 9, + IMAGE_DEBUG_TYPE_RESERVED10 = 10, + IMAGE_DEBUG_TYPE_CLSID = 11, + IMAGE_DEBUG_TYPE_VC_FEATURE = 12, + IMAGE_DEBUG_TYPE_POGO = 13, + IMAGE_DEBUG_TYPE_ILTCG = 14, + IMAGE_DEBUG_TYPE_MPX = 15, + IMAGE_DEBUG_TYPE_REPRO = 16, +}; + +enum BaseRelocationType { + IMAGE_REL_BASED_ABSOLUTE = 0, + IMAGE_REL_BASED_HIGH = 1, + IMAGE_REL_BASED_LOW = 2, + IMAGE_REL_BASED_HIGHLOW = 3, + IMAGE_REL_BASED_HIGHADJ = 4, + IMAGE_REL_BASED_MIPS_JMPADDR = 5, + IMAGE_REL_BASED_ARM_MOV32A = 5, + IMAGE_REL_BASED_ARM_MOV32T = 7, + IMAGE_REL_BASED_MIPS_JMPADDR16 = 9, + IMAGE_REL_BASED_DIR64 = 10 +}; + +enum ImportType { IMPORT_CODE = 0, IMPORT_DATA = 1, IMPORT_CONST = 2 }; + +enum ImportNameType { + /// Import is by ordinal. This indicates that the value in the Ordinal/Hint + /// field of the import header is the import's ordinal. If this constant is + /// not specified, then the Ordinal/Hint field should always be interpreted + /// as the import's hint. + IMPORT_ORDINAL = 0, + /// The import name is identical to the public symbol name + IMPORT_NAME = 1, + /// The import name is the public symbol name, but skipping the leading ?, + /// @, or optionally _. + IMPORT_NAME_NOPREFIX = 2, + /// The import name is the public symbol name, but skipping the leading ?, + /// @, or optionally _, and truncating at the first @. + IMPORT_NAME_UNDECORATE = 3 +}; + +struct ImportHeader { + uint16_t Sig1; ///< Must be IMAGE_FILE_MACHINE_UNKNOWN (0). + uint16_t Sig2; ///< Must be 0xFFFF. + uint16_t Version; + uint16_t Machine; + uint32_t TimeDateStamp; + uint32_t SizeOfData; + uint16_t OrdinalHint; + uint16_t TypeInfo; + + ImportType getType() const { return static_cast(TypeInfo & 0x3); } + + ImportNameType getNameType() const { + return static_cast((TypeInfo & 0x1C) >> 2); + } +}; + +enum CodeViewIdentifiers { + DEBUG_SECTION_MAGIC = 0x4, +}; + +inline bool isReservedSectionNumber(int32_t SectionNumber) { + return SectionNumber <= 0; +} + +} // End namespace COFF. +} // End namespace llvm. + +#endif diff --git a/include/llvm/Support/Dwarf.def b/include/llvm/BinaryFormat/Dwarf.def similarity index 100% rename from include/llvm/Support/Dwarf.def rename to include/llvm/BinaryFormat/Dwarf.def diff --git a/include/llvm/Support/Dwarf.h b/include/llvm/BinaryFormat/Dwarf.h similarity index 92% rename from include/llvm/Support/Dwarf.h rename to include/llvm/BinaryFormat/Dwarf.h index 3061b7b5fa0f..b7a056b18119 100644 --- a/include/llvm/Support/Dwarf.h +++ b/include/llvm/BinaryFormat/Dwarf.h @@ -1,4 +1,4 @@ -//===-- llvm/Support/Dwarf.h ---Dwarf Constants------------------*- C++ -*-===// +//===-- llvm/BinaryFormat/Dwarf.h ---Dwarf Constants-------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -17,8 +17,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_SUPPORT_DWARF_H -#define LLVM_SUPPORT_DWARF_H +#ifndef LLVM_BINARYFORMAT_DWARF_H +#define LLVM_BINARYFORMAT_DWARF_H #include "llvm/Support/Compiler.h" #include "llvm/Support/DataTypes.h" @@ -37,7 +37,7 @@ namespace dwarf { // enumeration base type. enum LLVMConstants : uint32_t { - // LLVM mock tags (see also llvm/Support/Dwarf.def). + // LLVM mock tags (see also llvm/BinaryFormat/Dwarf.def). DW_TAG_invalid = ~0U, // Tag for invalid results. DW_VIRTUALITY_invalid = ~0U, // Virtuality for invalid results. DW_MACINFO_invalid = ~0U, // Macinfo type for invalid results. @@ -48,7 +48,7 @@ enum LLVMConstants : uint32_t { DW_PUBNAMES_VERSION = 2, // Section version number for .debug_pubnames. DW_ARANGES_VERSION = 2, // Section version number for .debug_aranges. // Identifiers we use to distinguish vendor extensions. - DWARF_VENDOR_DWARF = 0, // Defined in v2 or later of the DWARF standard. + DWARF_VENDOR_DWARF = 0, // Defined in v2 or later of the DWARF standard. DWARF_VENDOR_APPLE = 1, DWARF_VENDOR_BORLAND = 2, DWARF_VENDOR_GNU = 3, @@ -64,7 +64,7 @@ const uint64_t DW64_CIE_ID = UINT64_MAX; enum Tag : uint16_t { #define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR) DW_TAG_##NAME = ID, -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" DW_TAG_lo_user = 0x4080, DW_TAG_hi_user = 0xffff, DW_TAG_user_base = 0x1000 // Recommended base for user tags. @@ -101,20 +101,20 @@ inline bool isType(Tag T) { /// Attributes. enum Attribute : uint16_t { #define HANDLE_DW_AT(ID, NAME, VERSION, VENDOR) DW_AT_##NAME = ID, -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" DW_AT_lo_user = 0x2000, DW_AT_hi_user = 0x3fff, }; enum Form : uint16_t { #define HANDLE_DW_FORM(ID, NAME, VERSION, VENDOR) DW_FORM_##NAME = ID, -#include "llvm/Support/Dwarf.def" - DW_FORM_lo_user = 0x1f00, ///< Not specified by DWARF. +#include "llvm/BinaryFormat/Dwarf.def" + DW_FORM_lo_user = 0x1f00, ///< Not specified by DWARF. }; enum LocationAtom { #define HANDLE_DW_OP(ID, NAME, VERSION, VENDOR) DW_OP_##NAME = ID, -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" DW_OP_lo_user = 0xe0, DW_OP_hi_user = 0xff, DW_OP_LLVM_fragment = 0x1000 ///< Only used in LLVM metadata. @@ -122,7 +122,7 @@ enum LocationAtom { enum TypeKind { #define HANDLE_DW_ATE(ID, NAME, VERSION, VENDOR) DW_ATE_##NAME = ID, -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" DW_ATE_lo_user = 0x80, DW_ATE_hi_user = 0xff }; @@ -161,19 +161,19 @@ enum VisibilityAttribute { enum VirtualityAttribute { #define HANDLE_DW_VIRTUALITY(ID, NAME) DW_VIRTUALITY_##NAME = ID, -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" DW_VIRTUALITY_max = 0x02 }; enum DefaultedMemberAttribute { #define HANDLE_DW_DEFAULTED(ID, NAME) DW_DEFAULTED_##NAME = ID, -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" DW_DEFAULTED_max = 0x02 }; enum SourceLanguage { #define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR) DW_LANG_##NAME = ID, -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" DW_LANG_lo_user = 0x8000, DW_LANG_hi_user = 0xffff }; @@ -187,9 +187,9 @@ enum CaseSensitivity { }; enum CallingConvention { - // Calling convention codes +// Calling convention codes #define HANDLE_DW_CC(ID, NAME) DW_CC_##NAME = ID, -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" DW_CC_lo_user = 0x40, DW_CC_hi_user = 0xff }; @@ -217,20 +217,20 @@ enum DiscriminantList { /// Line Number Standard Opcode Encodings. enum LineNumberOps : uint8_t { #define HANDLE_DW_LNS(ID, NAME) DW_LNS_##NAME = ID, -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" }; /// Line Number Extended Opcode Encodings. enum LineNumberExtendedOps { #define HANDLE_DW_LNE(ID, NAME) DW_LNE_##NAME = ID, -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" DW_LNE_lo_user = 0x80, DW_LNE_hi_user = 0xff }; enum LineNumberEntryFormat { #define HANDLE_DW_LNCT(ID, NAME) DW_LNCT_##NAME = ID, -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" DW_LNCT_lo_user = 0x2000, DW_LNCT_hi_user = 0x3fff, }; @@ -247,7 +247,7 @@ enum MacinfoRecordType { /// DWARF v5 macro information entry type encodings. enum MacroEntryType { #define HANDLE_DW_MACRO(ID, NAME) DW_MACRO_##NAME = ID, -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" DW_MACRO_lo_user = 0xe0, DW_MACRO_hi_user = 0xff }; @@ -255,14 +255,13 @@ enum MacroEntryType { /// DWARF v5 range list entry encoding values. enum RangeListEntries { #define HANDLE_DW_RLE(ID, NAME) DW_RLE_##NAME = ID, -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" }; - /// Call frame instruction encodings. enum CallFrameInfo { #define HANDLE_DW_CFA(ID, NAME) DW_CFA_##NAME = ID, -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" DW_CFA_extended = 0x00, DW_CFA_lo_user = 0x1c, @@ -310,13 +309,13 @@ enum LocationListEntry : unsigned char { /// Keep this list in sync with clang's DeclSpec.h ObjCPropertyAttributeKind! enum ApplePropertyAttributes { #define HANDLE_DW_APPLE_PROPERTY(ID, NAME) DW_APPLE_PROPERTY_##NAME = ID, -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" }; /// Constants for unit types in DWARF v5. enum UnitType : unsigned char { #define HANDLE_DW_UT(ID, NAME) DW_UT_##NAME = ID, -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" DW_UT_lo_user = 0x80, DW_UT_hi_user = 0xff }; @@ -355,10 +354,7 @@ enum GDBIndexEntryKind { GIEK_UNUSED7 }; -enum GDBIndexEntryLinkage { - GIEL_EXTERNAL, - GIEL_STATIC -}; +enum GDBIndexEntryLinkage { GIEL_EXTERNAL, GIEL_STATIC }; /// \defgroup DwarfConstantsDumping Dwarf constants dumping functions /// @@ -470,8 +466,8 @@ struct PubIndexEntryDescriptor { /* implicit */ PubIndexEntryDescriptor(GDBIndexEntryKind Kind) : Kind(Kind), Linkage(GIEL_EXTERNAL) {} explicit PubIndexEntryDescriptor(uint8_t Value) - : Kind(static_cast((Value & KIND_MASK) >> - KIND_OFFSET)), + : Kind( + static_cast((Value & KIND_MASK) >> KIND_OFFSET)), Linkage(static_cast((Value & LINKAGE_MASK) >> LINKAGE_OFFSET)) {} uint8_t toBits() const { diff --git a/include/llvm/Support/ELF.h b/include/llvm/BinaryFormat/ELF.h similarity index 98% rename from include/llvm/Support/ELF.h rename to include/llvm/BinaryFormat/ELF.h index 33f20a809d6c..3724f555c283 100644 --- a/include/llvm/Support/ELF.h +++ b/include/llvm/BinaryFormat/ELF.h @@ -1,4 +1,4 @@ -//===-- llvm/Support/ELF.h - ELF constants and data structures --*- C++ -*-===// +//===-- llvm/BinaryFormat/ELF.h - ELF constants and structures --*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -17,8 +17,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_SUPPORT_ELF_H -#define LLVM_SUPPORT_ELF_H +#ifndef LLVM_BINARYFORMAT_ELF_H +#define LLVM_BINARYFORMAT_ELF_H #include "llvm/Support/Compiler.h" #include "llvm/Support/DataTypes.h" @@ -808,12 +808,7 @@ enum : unsigned { SHF_MIPS_STRING = 0x80000000, // Make code section unreadable when in execute-only mode - SHF_ARM_PURECODE = 0x20000000, - - SHF_AMDGPU_HSA_GLOBAL = 0x00100000, - SHF_AMDGPU_HSA_READONLY = 0x00200000, - SHF_AMDGPU_HSA_CODE = 0x00400000, - SHF_AMDGPU_HSA_AGENT = 0x00800000 + SHF_ARM_PURECODE = 0x20000000 }; // Section Group Flags @@ -897,9 +892,7 @@ enum { STT_HIPROC = 15, // Highest processor-specific symbol type // AMDGPU symbol types - STT_AMDGPU_HSA_KERNEL = 10, - STT_AMDGPU_HSA_INDIRECT_FUNCTION = 11, - STT_AMDGPU_HSA_METADATA = 12 + STT_AMDGPU_HSA_KERNEL = 10 }; enum { @@ -1050,12 +1043,6 @@ enum { PT_MIPS_OPTIONS = 0x70000002, // Options segment. PT_MIPS_ABIFLAGS = 0x70000003, // Abiflags segment. - // AMDGPU program header types. - PT_AMDGPU_HSA_LOAD_GLOBAL_PROGRAM = 0x60000000, - PT_AMDGPU_HSA_LOAD_GLOBAL_AGENT = 0x60000001, - PT_AMDGPU_HSA_LOAD_READONLY_AGENT = 0x60000002, - PT_AMDGPU_HSA_LOAD_CODE_AGENT = 0x60000003, - // WebAssembly program header types. PT_WEBASSEMBLY_FUNCTIONS = PT_LOPROC + 0, // Function definitions. }; diff --git a/include/llvm/Support/ELFRelocs/AArch64.def b/include/llvm/BinaryFormat/ELFRelocs/AArch64.def similarity index 100% rename from include/llvm/Support/ELFRelocs/AArch64.def rename to include/llvm/BinaryFormat/ELFRelocs/AArch64.def diff --git a/include/llvm/Support/ELFRelocs/AMDGPU.def b/include/llvm/BinaryFormat/ELFRelocs/AMDGPU.def similarity index 100% rename from include/llvm/Support/ELFRelocs/AMDGPU.def rename to include/llvm/BinaryFormat/ELFRelocs/AMDGPU.def diff --git a/include/llvm/Support/ELFRelocs/ARM.def b/include/llvm/BinaryFormat/ELFRelocs/ARM.def similarity index 100% rename from include/llvm/Support/ELFRelocs/ARM.def rename to include/llvm/BinaryFormat/ELFRelocs/ARM.def diff --git a/include/llvm/Support/ELFRelocs/AVR.def b/include/llvm/BinaryFormat/ELFRelocs/AVR.def similarity index 100% rename from include/llvm/Support/ELFRelocs/AVR.def rename to include/llvm/BinaryFormat/ELFRelocs/AVR.def diff --git a/include/llvm/Support/ELFRelocs/BPF.def b/include/llvm/BinaryFormat/ELFRelocs/BPF.def similarity index 100% rename from include/llvm/Support/ELFRelocs/BPF.def rename to include/llvm/BinaryFormat/ELFRelocs/BPF.def diff --git a/include/llvm/Support/ELFRelocs/Hexagon.def b/include/llvm/BinaryFormat/ELFRelocs/Hexagon.def similarity index 100% rename from include/llvm/Support/ELFRelocs/Hexagon.def rename to include/llvm/BinaryFormat/ELFRelocs/Hexagon.def diff --git a/include/llvm/Support/ELFRelocs/Lanai.def b/include/llvm/BinaryFormat/ELFRelocs/Lanai.def similarity index 100% rename from include/llvm/Support/ELFRelocs/Lanai.def rename to include/llvm/BinaryFormat/ELFRelocs/Lanai.def diff --git a/include/llvm/Support/ELFRelocs/Mips.def b/include/llvm/BinaryFormat/ELFRelocs/Mips.def similarity index 100% rename from include/llvm/Support/ELFRelocs/Mips.def rename to include/llvm/BinaryFormat/ELFRelocs/Mips.def diff --git a/include/llvm/Support/ELFRelocs/PowerPC.def b/include/llvm/BinaryFormat/ELFRelocs/PowerPC.def similarity index 100% rename from include/llvm/Support/ELFRelocs/PowerPC.def rename to include/llvm/BinaryFormat/ELFRelocs/PowerPC.def diff --git a/include/llvm/Support/ELFRelocs/PowerPC64.def b/include/llvm/BinaryFormat/ELFRelocs/PowerPC64.def similarity index 100% rename from include/llvm/Support/ELFRelocs/PowerPC64.def rename to include/llvm/BinaryFormat/ELFRelocs/PowerPC64.def diff --git a/include/llvm/Support/ELFRelocs/RISCV.def b/include/llvm/BinaryFormat/ELFRelocs/RISCV.def similarity index 100% rename from include/llvm/Support/ELFRelocs/RISCV.def rename to include/llvm/BinaryFormat/ELFRelocs/RISCV.def diff --git a/include/llvm/Support/ELFRelocs/Sparc.def b/include/llvm/BinaryFormat/ELFRelocs/Sparc.def similarity index 100% rename from include/llvm/Support/ELFRelocs/Sparc.def rename to include/llvm/BinaryFormat/ELFRelocs/Sparc.def diff --git a/include/llvm/Support/ELFRelocs/SystemZ.def b/include/llvm/BinaryFormat/ELFRelocs/SystemZ.def similarity index 100% rename from include/llvm/Support/ELFRelocs/SystemZ.def rename to include/llvm/BinaryFormat/ELFRelocs/SystemZ.def diff --git a/include/llvm/Support/ELFRelocs/WebAssembly.def b/include/llvm/BinaryFormat/ELFRelocs/WebAssembly.def similarity index 100% rename from include/llvm/Support/ELFRelocs/WebAssembly.def rename to include/llvm/BinaryFormat/ELFRelocs/WebAssembly.def diff --git a/include/llvm/Support/ELFRelocs/i386.def b/include/llvm/BinaryFormat/ELFRelocs/i386.def similarity index 100% rename from include/llvm/Support/ELFRelocs/i386.def rename to include/llvm/BinaryFormat/ELFRelocs/i386.def diff --git a/include/llvm/Support/ELFRelocs/x86_64.def b/include/llvm/BinaryFormat/ELFRelocs/x86_64.def similarity index 100% rename from include/llvm/Support/ELFRelocs/x86_64.def rename to include/llvm/BinaryFormat/ELFRelocs/x86_64.def diff --git a/include/llvm/Support/MachO.def b/include/llvm/BinaryFormat/MachO.def similarity index 100% rename from include/llvm/Support/MachO.def rename to include/llvm/BinaryFormat/MachO.def diff --git a/include/llvm/BinaryFormat/MachO.h b/include/llvm/BinaryFormat/MachO.h new file mode 100644 index 000000000000..8ab6dde800c2 --- /dev/null +++ b/include/llvm/BinaryFormat/MachO.h @@ -0,0 +1,1984 @@ +//===-- llvm/BinaryFormat/MachO.h - The MachO file format -------*- C++/-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines manifest constants for the MachO object file format. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_BINARYFORMAT_MACHO_H +#define LLVM_BINARYFORMAT_MACHO_H + +#include "llvm/Support/Compiler.h" +#include "llvm/Support/DataTypes.h" +#include "llvm/Support/Host.h" + +namespace llvm { +namespace MachO { +// Enums from +enum : uint32_t { + // Constants for the "magic" field in llvm::MachO::mach_header and + // llvm::MachO::mach_header_64 + MH_MAGIC = 0xFEEDFACEu, + MH_CIGAM = 0xCEFAEDFEu, + MH_MAGIC_64 = 0xFEEDFACFu, + MH_CIGAM_64 = 0xCFFAEDFEu, + FAT_MAGIC = 0xCAFEBABEu, + FAT_CIGAM = 0xBEBAFECAu, + FAT_MAGIC_64 = 0xCAFEBABFu, + FAT_CIGAM_64 = 0xBFBAFECAu +}; + +enum HeaderFileType { + // Constants for the "filetype" field in llvm::MachO::mach_header and + // llvm::MachO::mach_header_64 + MH_OBJECT = 0x1u, + MH_EXECUTE = 0x2u, + MH_FVMLIB = 0x3u, + MH_CORE = 0x4u, + MH_PRELOAD = 0x5u, + MH_DYLIB = 0x6u, + MH_DYLINKER = 0x7u, + MH_BUNDLE = 0x8u, + MH_DYLIB_STUB = 0x9u, + MH_DSYM = 0xAu, + MH_KEXT_BUNDLE = 0xBu +}; + +enum { + // Constant bits for the "flags" field in llvm::MachO::mach_header and + // llvm::MachO::mach_header_64 + MH_NOUNDEFS = 0x00000001u, + MH_INCRLINK = 0x00000002u, + MH_DYLDLINK = 0x00000004u, + MH_BINDATLOAD = 0x00000008u, + MH_PREBOUND = 0x00000010u, + MH_SPLIT_SEGS = 0x00000020u, + MH_LAZY_INIT = 0x00000040u, + MH_TWOLEVEL = 0x00000080u, + MH_FORCE_FLAT = 0x00000100u, + MH_NOMULTIDEFS = 0x00000200u, + MH_NOFIXPREBINDING = 0x00000400u, + MH_PREBINDABLE = 0x00000800u, + MH_ALLMODSBOUND = 0x00001000u, + MH_SUBSECTIONS_VIA_SYMBOLS = 0x00002000u, + MH_CANONICAL = 0x00004000u, + MH_WEAK_DEFINES = 0x00008000u, + MH_BINDS_TO_WEAK = 0x00010000u, + MH_ALLOW_STACK_EXECUTION = 0x00020000u, + MH_ROOT_SAFE = 0x00040000u, + MH_SETUID_SAFE = 0x00080000u, + MH_NO_REEXPORTED_DYLIBS = 0x00100000u, + MH_PIE = 0x00200000u, + MH_DEAD_STRIPPABLE_DYLIB = 0x00400000u, + MH_HAS_TLV_DESCRIPTORS = 0x00800000u, + MH_NO_HEAP_EXECUTION = 0x01000000u, + MH_APP_EXTENSION_SAFE = 0x02000000u +}; + +enum : uint32_t { + // Flags for the "cmd" field in llvm::MachO::load_command + LC_REQ_DYLD = 0x80000000u +}; + +#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) LCName = LCValue, + +enum LoadCommandType : uint32_t { +#include "llvm/BinaryFormat/MachO.def" +}; + +#undef HANDLE_LOAD_COMMAND + +enum : uint32_t { + // Constant bits for the "flags" field in llvm::MachO::segment_command + SG_HIGHVM = 0x1u, + SG_FVMLIB = 0x2u, + SG_NORELOC = 0x4u, + SG_PROTECTED_VERSION_1 = 0x8u, + + // Constant masks for the "flags" field in llvm::MachO::section and + // llvm::MachO::section_64 + SECTION_TYPE = 0x000000ffu, // SECTION_TYPE + SECTION_ATTRIBUTES = 0xffffff00u, // SECTION_ATTRIBUTES + SECTION_ATTRIBUTES_USR = 0xff000000u, // SECTION_ATTRIBUTES_USR + SECTION_ATTRIBUTES_SYS = 0x00ffff00u // SECTION_ATTRIBUTES_SYS +}; + +/// These are the section type and attributes fields. A MachO section can +/// have only one Type, but can have any of the attributes specified. +enum SectionType : uint32_t { + // Constant masks for the "flags[7:0]" field in llvm::MachO::section and + // llvm::MachO::section_64 (mask "flags" with SECTION_TYPE) + + /// S_REGULAR - Regular section. + S_REGULAR = 0x00u, + /// S_ZEROFILL - Zero fill on demand section. + S_ZEROFILL = 0x01u, + /// S_CSTRING_LITERALS - Section with literal C strings. + S_CSTRING_LITERALS = 0x02u, + /// S_4BYTE_LITERALS - Section with 4 byte literals. + S_4BYTE_LITERALS = 0x03u, + /// S_8BYTE_LITERALS - Section with 8 byte literals. + S_8BYTE_LITERALS = 0x04u, + /// S_LITERAL_POINTERS - Section with pointers to literals. + S_LITERAL_POINTERS = 0x05u, + /// S_NON_LAZY_SYMBOL_POINTERS - Section with non-lazy symbol pointers. + S_NON_LAZY_SYMBOL_POINTERS = 0x06u, + /// S_LAZY_SYMBOL_POINTERS - Section with lazy symbol pointers. + S_LAZY_SYMBOL_POINTERS = 0x07u, + /// S_SYMBOL_STUBS - Section with symbol stubs, byte size of stub in + /// the Reserved2 field. + S_SYMBOL_STUBS = 0x08u, + /// S_MOD_INIT_FUNC_POINTERS - Section with only function pointers for + /// initialization. + S_MOD_INIT_FUNC_POINTERS = 0x09u, + /// S_MOD_TERM_FUNC_POINTERS - Section with only function pointers for + /// termination. + S_MOD_TERM_FUNC_POINTERS = 0x0au, + /// S_COALESCED - Section contains symbols that are to be coalesced. + S_COALESCED = 0x0bu, + /// S_GB_ZEROFILL - Zero fill on demand section (that can be larger than 4 + /// gigabytes). + S_GB_ZEROFILL = 0x0cu, + /// S_INTERPOSING - Section with only pairs of function pointers for + /// interposing. + S_INTERPOSING = 0x0du, + /// S_16BYTE_LITERALS - Section with only 16 byte literals. + S_16BYTE_LITERALS = 0x0eu, + /// S_DTRACE_DOF - Section contains DTrace Object Format. + S_DTRACE_DOF = 0x0fu, + /// S_LAZY_DYLIB_SYMBOL_POINTERS - Section with lazy symbol pointers to + /// lazy loaded dylibs. + S_LAZY_DYLIB_SYMBOL_POINTERS = 0x10u, + /// S_THREAD_LOCAL_REGULAR - Thread local data section. + S_THREAD_LOCAL_REGULAR = 0x11u, + /// S_THREAD_LOCAL_ZEROFILL - Thread local zerofill section. + S_THREAD_LOCAL_ZEROFILL = 0x12u, + /// S_THREAD_LOCAL_VARIABLES - Section with thread local variable + /// structure data. + S_THREAD_LOCAL_VARIABLES = 0x13u, + /// S_THREAD_LOCAL_VARIABLE_POINTERS - Section with pointers to thread + /// local structures. + S_THREAD_LOCAL_VARIABLE_POINTERS = 0x14u, + /// S_THREAD_LOCAL_INIT_FUNCTION_POINTERS - Section with thread local + /// variable initialization pointers to functions. + S_THREAD_LOCAL_INIT_FUNCTION_POINTERS = 0x15u, + + LAST_KNOWN_SECTION_TYPE = S_THREAD_LOCAL_INIT_FUNCTION_POINTERS +}; + +enum : uint32_t { + // Constant masks for the "flags[31:24]" field in llvm::MachO::section and + // llvm::MachO::section_64 (mask "flags" with SECTION_ATTRIBUTES_USR) + + /// S_ATTR_PURE_INSTRUCTIONS - Section contains only true machine + /// instructions. + S_ATTR_PURE_INSTRUCTIONS = 0x80000000u, + /// S_ATTR_NO_TOC - Section contains coalesced symbols that are not to be + /// in a ranlib table of contents. + S_ATTR_NO_TOC = 0x40000000u, + /// S_ATTR_STRIP_STATIC_SYMS - Ok to strip static symbols in this section + /// in files with the MY_DYLDLINK flag. + S_ATTR_STRIP_STATIC_SYMS = 0x20000000u, + /// S_ATTR_NO_DEAD_STRIP - No dead stripping. + S_ATTR_NO_DEAD_STRIP = 0x10000000u, + /// S_ATTR_LIVE_SUPPORT - Blocks are live if they reference live blocks. + S_ATTR_LIVE_SUPPORT = 0x08000000u, + /// S_ATTR_SELF_MODIFYING_CODE - Used with i386 code stubs written on by + /// dyld. + S_ATTR_SELF_MODIFYING_CODE = 0x04000000u, + /// S_ATTR_DEBUG - A debug section. + S_ATTR_DEBUG = 0x02000000u, + + // Constant masks for the "flags[23:8]" field in llvm::MachO::section and + // llvm::MachO::section_64 (mask "flags" with SECTION_ATTRIBUTES_SYS) + + /// S_ATTR_SOME_INSTRUCTIONS - Section contains some machine instructions. + S_ATTR_SOME_INSTRUCTIONS = 0x00000400u, + /// S_ATTR_EXT_RELOC - Section has external relocation entries. + S_ATTR_EXT_RELOC = 0x00000200u, + /// S_ATTR_LOC_RELOC - Section has local relocation entries. + S_ATTR_LOC_RELOC = 0x00000100u, + + // Constant masks for the value of an indirect symbol in an indirect + // symbol table + INDIRECT_SYMBOL_LOCAL = 0x80000000u, + INDIRECT_SYMBOL_ABS = 0x40000000u +}; + +enum DataRegionType { + // Constants for the "kind" field in a data_in_code_entry structure + DICE_KIND_DATA = 1u, + DICE_KIND_JUMP_TABLE8 = 2u, + DICE_KIND_JUMP_TABLE16 = 3u, + DICE_KIND_JUMP_TABLE32 = 4u, + DICE_KIND_ABS_JUMP_TABLE32 = 5u +}; + +enum RebaseType { + REBASE_TYPE_POINTER = 1u, + REBASE_TYPE_TEXT_ABSOLUTE32 = 2u, + REBASE_TYPE_TEXT_PCREL32 = 3u +}; + +enum { REBASE_OPCODE_MASK = 0xF0u, REBASE_IMMEDIATE_MASK = 0x0Fu }; + +enum RebaseOpcode { + REBASE_OPCODE_DONE = 0x00u, + REBASE_OPCODE_SET_TYPE_IMM = 0x10u, + REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB = 0x20u, + REBASE_OPCODE_ADD_ADDR_ULEB = 0x30u, + REBASE_OPCODE_ADD_ADDR_IMM_SCALED = 0x40u, + REBASE_OPCODE_DO_REBASE_IMM_TIMES = 0x50u, + REBASE_OPCODE_DO_REBASE_ULEB_TIMES = 0x60u, + REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB = 0x70u, + REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB = 0x80u +}; + +enum BindType { + BIND_TYPE_POINTER = 1u, + BIND_TYPE_TEXT_ABSOLUTE32 = 2u, + BIND_TYPE_TEXT_PCREL32 = 3u +}; + +enum BindSpecialDylib { + BIND_SPECIAL_DYLIB_SELF = 0, + BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE = -1, + BIND_SPECIAL_DYLIB_FLAT_LOOKUP = -2 +}; + +enum { + BIND_SYMBOL_FLAGS_WEAK_IMPORT = 0x1u, + BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION = 0x8u, + + BIND_OPCODE_MASK = 0xF0u, + BIND_IMMEDIATE_MASK = 0x0Fu +}; + +enum BindOpcode { + BIND_OPCODE_DONE = 0x00u, + BIND_OPCODE_SET_DYLIB_ORDINAL_IMM = 0x10u, + BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB = 0x20u, + BIND_OPCODE_SET_DYLIB_SPECIAL_IMM = 0x30u, + BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM = 0x40u, + BIND_OPCODE_SET_TYPE_IMM = 0x50u, + BIND_OPCODE_SET_ADDEND_SLEB = 0x60u, + BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB = 0x70u, + BIND_OPCODE_ADD_ADDR_ULEB = 0x80u, + BIND_OPCODE_DO_BIND = 0x90u, + BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB = 0xA0u, + BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED = 0xB0u, + BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB = 0xC0u +}; + +enum { + EXPORT_SYMBOL_FLAGS_KIND_MASK = 0x03u, + EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION = 0x04u, + EXPORT_SYMBOL_FLAGS_REEXPORT = 0x08u, + EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER = 0x10u +}; + +enum ExportSymbolKind { + EXPORT_SYMBOL_FLAGS_KIND_REGULAR = 0x00u, + EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL = 0x01u, + EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE = 0x02u +}; + +enum { + // Constant masks for the "n_type" field in llvm::MachO::nlist and + // llvm::MachO::nlist_64 + N_STAB = 0xe0, + N_PEXT = 0x10, + N_TYPE = 0x0e, + N_EXT = 0x01 +}; + +enum NListType : uint8_t { + // Constants for the "n_type & N_TYPE" llvm::MachO::nlist and + // llvm::MachO::nlist_64 + N_UNDF = 0x0u, + N_ABS = 0x2u, + N_SECT = 0xeu, + N_PBUD = 0xcu, + N_INDR = 0xau +}; + +enum SectionOrdinal { + // Constants for the "n_sect" field in llvm::MachO::nlist and + // llvm::MachO::nlist_64 + NO_SECT = 0u, + MAX_SECT = 0xffu +}; + +enum { + // Constant masks for the "n_desc" field in llvm::MachO::nlist and + // llvm::MachO::nlist_64 + // The low 3 bits are the for the REFERENCE_TYPE. + REFERENCE_TYPE = 0x7, + REFERENCE_FLAG_UNDEFINED_NON_LAZY = 0, + REFERENCE_FLAG_UNDEFINED_LAZY = 1, + REFERENCE_FLAG_DEFINED = 2, + REFERENCE_FLAG_PRIVATE_DEFINED = 3, + REFERENCE_FLAG_PRIVATE_UNDEFINED_NON_LAZY = 4, + REFERENCE_FLAG_PRIVATE_UNDEFINED_LAZY = 5, + // Flag bits (some overlap with the library ordinal bits). + N_ARM_THUMB_DEF = 0x0008u, + REFERENCED_DYNAMICALLY = 0x0010u, + N_NO_DEAD_STRIP = 0x0020u, + N_WEAK_REF = 0x0040u, + N_WEAK_DEF = 0x0080u, + N_SYMBOL_RESOLVER = 0x0100u, + N_ALT_ENTRY = 0x0200u, + // For undefined symbols coming from libraries, see GET_LIBRARY_ORDINAL() + // as these are in the top 8 bits. + SELF_LIBRARY_ORDINAL = 0x0, + MAX_LIBRARY_ORDINAL = 0xfd, + DYNAMIC_LOOKUP_ORDINAL = 0xfe, + EXECUTABLE_ORDINAL = 0xff +}; + +enum StabType { + // Constant values for the "n_type" field in llvm::MachO::nlist and + // llvm::MachO::nlist_64 when "(n_type & N_STAB) != 0" + N_GSYM = 0x20u, + N_FNAME = 0x22u, + N_FUN = 0x24u, + N_STSYM = 0x26u, + N_LCSYM = 0x28u, + N_BNSYM = 0x2Eu, + N_PC = 0x30u, + N_AST = 0x32u, + N_OPT = 0x3Cu, + N_RSYM = 0x40u, + N_SLINE = 0x44u, + N_ENSYM = 0x4Eu, + N_SSYM = 0x60u, + N_SO = 0x64u, + N_OSO = 0x66u, + N_LSYM = 0x80u, + N_BINCL = 0x82u, + N_SOL = 0x84u, + N_PARAMS = 0x86u, + N_VERSION = 0x88u, + N_OLEVEL = 0x8Au, + N_PSYM = 0xA0u, + N_EINCL = 0xA2u, + N_ENTRY = 0xA4u, + N_LBRAC = 0xC0u, + N_EXCL = 0xC2u, + N_RBRAC = 0xE0u, + N_BCOMM = 0xE2u, + N_ECOMM = 0xE4u, + N_ECOML = 0xE8u, + N_LENG = 0xFEu +}; + +enum : uint32_t { + // Constant values for the r_symbolnum field in an + // llvm::MachO::relocation_info structure when r_extern is 0. + R_ABS = 0, + + // Constant bits for the r_address field in an + // llvm::MachO::relocation_info structure. + R_SCATTERED = 0x80000000 +}; + +enum RelocationInfoType { + // Constant values for the r_type field in an + // llvm::MachO::relocation_info or llvm::MachO::scattered_relocation_info + // structure. + GENERIC_RELOC_VANILLA = 0, + GENERIC_RELOC_PAIR = 1, + GENERIC_RELOC_SECTDIFF = 2, + GENERIC_RELOC_PB_LA_PTR = 3, + GENERIC_RELOC_LOCAL_SECTDIFF = 4, + GENERIC_RELOC_TLV = 5, + + // Constant values for the r_type field in a PowerPC architecture + // llvm::MachO::relocation_info or llvm::MachO::scattered_relocation_info + // structure. + PPC_RELOC_VANILLA = GENERIC_RELOC_VANILLA, + PPC_RELOC_PAIR = GENERIC_RELOC_PAIR, + PPC_RELOC_BR14 = 2, + PPC_RELOC_BR24 = 3, + PPC_RELOC_HI16 = 4, + PPC_RELOC_LO16 = 5, + PPC_RELOC_HA16 = 6, + PPC_RELOC_LO14 = 7, + PPC_RELOC_SECTDIFF = 8, + PPC_RELOC_PB_LA_PTR = 9, + PPC_RELOC_HI16_SECTDIFF = 10, + PPC_RELOC_LO16_SECTDIFF = 11, + PPC_RELOC_HA16_SECTDIFF = 12, + PPC_RELOC_JBSR = 13, + PPC_RELOC_LO14_SECTDIFF = 14, + PPC_RELOC_LOCAL_SECTDIFF = 15, + + // Constant values for the r_type field in an ARM architecture + // llvm::MachO::relocation_info or llvm::MachO::scattered_relocation_info + // structure. + ARM_RELOC_VANILLA = GENERIC_RELOC_VANILLA, + ARM_RELOC_PAIR = GENERIC_RELOC_PAIR, + ARM_RELOC_SECTDIFF = GENERIC_RELOC_SECTDIFF, + ARM_RELOC_LOCAL_SECTDIFF = 3, + ARM_RELOC_PB_LA_PTR = 4, + ARM_RELOC_BR24 = 5, + ARM_THUMB_RELOC_BR22 = 6, + ARM_THUMB_32BIT_BRANCH = 7, // obsolete + ARM_RELOC_HALF = 8, + ARM_RELOC_HALF_SECTDIFF = 9, + + // Constant values for the r_type field in an ARM64 architecture + // llvm::MachO::relocation_info or llvm::MachO::scattered_relocation_info + // structure. + + // For pointers. + ARM64_RELOC_UNSIGNED = 0, + // Must be followed by an ARM64_RELOC_UNSIGNED + ARM64_RELOC_SUBTRACTOR = 1, + // A B/BL instruction with 26-bit displacement. + ARM64_RELOC_BRANCH26 = 2, + // PC-rel distance to page of target. + ARM64_RELOC_PAGE21 = 3, + // Offset within page, scaled by r_length. + ARM64_RELOC_PAGEOFF12 = 4, + // PC-rel distance to page of GOT slot. + ARM64_RELOC_GOT_LOAD_PAGE21 = 5, + // Offset within page of GOT slot, scaled by r_length. + ARM64_RELOC_GOT_LOAD_PAGEOFF12 = 6, + // For pointers to GOT slots. + ARM64_RELOC_POINTER_TO_GOT = 7, + // PC-rel distance to page of TLVP slot. + ARM64_RELOC_TLVP_LOAD_PAGE21 = 8, + // Offset within page of TLVP slot, scaled by r_length. + ARM64_RELOC_TLVP_LOAD_PAGEOFF12 = 9, + // Must be followed by ARM64_RELOC_PAGE21 or ARM64_RELOC_PAGEOFF12. + ARM64_RELOC_ADDEND = 10, + + // Constant values for the r_type field in an x86_64 architecture + // llvm::MachO::relocation_info or llvm::MachO::scattered_relocation_info + // structure + X86_64_RELOC_UNSIGNED = 0, + X86_64_RELOC_SIGNED = 1, + X86_64_RELOC_BRANCH = 2, + X86_64_RELOC_GOT_LOAD = 3, + X86_64_RELOC_GOT = 4, + X86_64_RELOC_SUBTRACTOR = 5, + X86_64_RELOC_SIGNED_1 = 6, + X86_64_RELOC_SIGNED_2 = 7, + X86_64_RELOC_SIGNED_4 = 8, + X86_64_RELOC_TLV = 9 +}; + +// Values for segment_command.initprot. +// From +enum { VM_PROT_READ = 0x1, VM_PROT_WRITE = 0x2, VM_PROT_EXECUTE = 0x4 }; + +// Values for platform field in build_version_command. +enum { + PLATFORM_MACOS = 1, + PLATFORM_IOS = 2, + PLATFORM_TVOS = 3, + PLATFORM_WATCHOS = 4, + PLATFORM_BRIDGEOS = 5 +}; + +// Values for tools enum in build_tool_version. +enum { TOOL_CLANG = 1, TOOL_SWIFT = 2, TOOL_LD = 3 }; + +// Structs from + +struct mach_header { + uint32_t magic; + uint32_t cputype; + uint32_t cpusubtype; + uint32_t filetype; + uint32_t ncmds; + uint32_t sizeofcmds; + uint32_t flags; +}; + +struct mach_header_64 { + uint32_t magic; + uint32_t cputype; + uint32_t cpusubtype; + uint32_t filetype; + uint32_t ncmds; + uint32_t sizeofcmds; + uint32_t flags; + uint32_t reserved; +}; + +struct load_command { + uint32_t cmd; + uint32_t cmdsize; +}; + +struct segment_command { + uint32_t cmd; + uint32_t cmdsize; + char segname[16]; + uint32_t vmaddr; + uint32_t vmsize; + uint32_t fileoff; + uint32_t filesize; + uint32_t maxprot; + uint32_t initprot; + uint32_t nsects; + uint32_t flags; +}; + +struct segment_command_64 { + uint32_t cmd; + uint32_t cmdsize; + char segname[16]; + uint64_t vmaddr; + uint64_t vmsize; + uint64_t fileoff; + uint64_t filesize; + uint32_t maxprot; + uint32_t initprot; + uint32_t nsects; + uint32_t flags; +}; + +struct section { + char sectname[16]; + char segname[16]; + uint32_t addr; + uint32_t size; + uint32_t offset; + uint32_t align; + uint32_t reloff; + uint32_t nreloc; + uint32_t flags; + uint32_t reserved1; + uint32_t reserved2; +}; + +struct section_64 { + char sectname[16]; + char segname[16]; + uint64_t addr; + uint64_t size; + uint32_t offset; + uint32_t align; + uint32_t reloff; + uint32_t nreloc; + uint32_t flags; + uint32_t reserved1; + uint32_t reserved2; + uint32_t reserved3; +}; + +struct fvmlib { + uint32_t name; + uint32_t minor_version; + uint32_t header_addr; +}; + +// The fvmlib_command is obsolete and no longer supported. +struct fvmlib_command { + uint32_t cmd; + uint32_t cmdsize; + struct fvmlib fvmlib; +}; + +struct dylib { + uint32_t name; + uint32_t timestamp; + uint32_t current_version; + uint32_t compatibility_version; +}; + +struct dylib_command { + uint32_t cmd; + uint32_t cmdsize; + struct dylib dylib; +}; + +struct sub_framework_command { + uint32_t cmd; + uint32_t cmdsize; + uint32_t umbrella; +}; + +struct sub_client_command { + uint32_t cmd; + uint32_t cmdsize; + uint32_t client; +}; + +struct sub_umbrella_command { + uint32_t cmd; + uint32_t cmdsize; + uint32_t sub_umbrella; +}; + +struct sub_library_command { + uint32_t cmd; + uint32_t cmdsize; + uint32_t sub_library; +}; + +// The prebound_dylib_command is obsolete and no longer supported. +struct prebound_dylib_command { + uint32_t cmd; + uint32_t cmdsize; + uint32_t name; + uint32_t nmodules; + uint32_t linked_modules; +}; + +struct dylinker_command { + uint32_t cmd; + uint32_t cmdsize; + uint32_t name; +}; + +struct thread_command { + uint32_t cmd; + uint32_t cmdsize; +}; + +struct routines_command { + uint32_t cmd; + uint32_t cmdsize; + uint32_t init_address; + uint32_t init_module; + uint32_t reserved1; + uint32_t reserved2; + uint32_t reserved3; + uint32_t reserved4; + uint32_t reserved5; + uint32_t reserved6; +}; + +struct routines_command_64 { + uint32_t cmd; + uint32_t cmdsize; + uint64_t init_address; + uint64_t init_module; + uint64_t reserved1; + uint64_t reserved2; + uint64_t reserved3; + uint64_t reserved4; + uint64_t reserved5; + uint64_t reserved6; +}; + +struct symtab_command { + uint32_t cmd; + uint32_t cmdsize; + uint32_t symoff; + uint32_t nsyms; + uint32_t stroff; + uint32_t strsize; +}; + +struct dysymtab_command { + uint32_t cmd; + uint32_t cmdsize; + uint32_t ilocalsym; + uint32_t nlocalsym; + uint32_t iextdefsym; + uint32_t nextdefsym; + uint32_t iundefsym; + uint32_t nundefsym; + uint32_t tocoff; + uint32_t ntoc; + uint32_t modtaboff; + uint32_t nmodtab; + uint32_t extrefsymoff; + uint32_t nextrefsyms; + uint32_t indirectsymoff; + uint32_t nindirectsyms; + uint32_t extreloff; + uint32_t nextrel; + uint32_t locreloff; + uint32_t nlocrel; +}; + +struct dylib_table_of_contents { + uint32_t symbol_index; + uint32_t module_index; +}; + +struct dylib_module { + uint32_t module_name; + uint32_t iextdefsym; + uint32_t nextdefsym; + uint32_t irefsym; + uint32_t nrefsym; + uint32_t ilocalsym; + uint32_t nlocalsym; + uint32_t iextrel; + uint32_t nextrel; + uint32_t iinit_iterm; + uint32_t ninit_nterm; + uint32_t objc_module_info_addr; + uint32_t objc_module_info_size; +}; + +struct dylib_module_64 { + uint32_t module_name; + uint32_t iextdefsym; + uint32_t nextdefsym; + uint32_t irefsym; + uint32_t nrefsym; + uint32_t ilocalsym; + uint32_t nlocalsym; + uint32_t iextrel; + uint32_t nextrel; + uint32_t iinit_iterm; + uint32_t ninit_nterm; + uint32_t objc_module_info_size; + uint64_t objc_module_info_addr; +}; + +struct dylib_reference { + uint32_t isym : 24, flags : 8; +}; + +// The twolevel_hints_command is obsolete and no longer supported. +struct twolevel_hints_command { + uint32_t cmd; + uint32_t cmdsize; + uint32_t offset; + uint32_t nhints; +}; + +// The twolevel_hints_command is obsolete and no longer supported. +struct twolevel_hint { + uint32_t isub_image : 8, itoc : 24; +}; + +// The prebind_cksum_command is obsolete and no longer supported. +struct prebind_cksum_command { + uint32_t cmd; + uint32_t cmdsize; + uint32_t cksum; +}; + +struct uuid_command { + uint32_t cmd; + uint32_t cmdsize; + uint8_t uuid[16]; +}; + +struct rpath_command { + uint32_t cmd; + uint32_t cmdsize; + uint32_t path; +}; + +struct linkedit_data_command { + uint32_t cmd; + uint32_t cmdsize; + uint32_t dataoff; + uint32_t datasize; +}; + +struct data_in_code_entry { + uint32_t offset; + uint16_t length; + uint16_t kind; +}; + +struct source_version_command { + uint32_t cmd; + uint32_t cmdsize; + uint64_t version; +}; + +struct encryption_info_command { + uint32_t cmd; + uint32_t cmdsize; + uint32_t cryptoff; + uint32_t cryptsize; + uint32_t cryptid; +}; + +struct encryption_info_command_64 { + uint32_t cmd; + uint32_t cmdsize; + uint32_t cryptoff; + uint32_t cryptsize; + uint32_t cryptid; + uint32_t pad; +}; + +struct version_min_command { + uint32_t cmd; // LC_VERSION_MIN_MACOSX or + // LC_VERSION_MIN_IPHONEOS + uint32_t cmdsize; // sizeof(struct version_min_command) + uint32_t version; // X.Y.Z is encoded in nibbles xxxx.yy.zz + uint32_t sdk; // X.Y.Z is encoded in nibbles xxxx.yy.zz +}; + +struct note_command { + uint32_t cmd; // LC_NOTE + uint32_t cmdsize; // sizeof(struct note_command) + char data_owner[16]; // owner name for this LC_NOTE + uint64_t offset; // file offset of this data + uint64_t size; // length of data region +}; + +struct build_tool_version { + uint32_t tool; // enum for the tool + uint32_t version; // version of the tool +}; + +struct build_version_command { + uint32_t cmd; // LC_BUILD_VERSION + uint32_t cmdsize; // sizeof(struct build_version_command) + + // ntools * sizeof(struct build_tool_version) + uint32_t platform; // platform + uint32_t minos; // X.Y.Z is encoded in nibbles xxxx.yy.zz + uint32_t sdk; // X.Y.Z is encoded in nibbles xxxx.yy.zz + uint32_t ntools; // number of tool entries following this +}; + +struct dyld_info_command { + uint32_t cmd; + uint32_t cmdsize; + uint32_t rebase_off; + uint32_t rebase_size; + uint32_t bind_off; + uint32_t bind_size; + uint32_t weak_bind_off; + uint32_t weak_bind_size; + uint32_t lazy_bind_off; + uint32_t lazy_bind_size; + uint32_t export_off; + uint32_t export_size; +}; + +struct linker_option_command { + uint32_t cmd; + uint32_t cmdsize; + uint32_t count; +}; + +// The symseg_command is obsolete and no longer supported. +struct symseg_command { + uint32_t cmd; + uint32_t cmdsize; + uint32_t offset; + uint32_t size; +}; + +// The ident_command is obsolete and no longer supported. +struct ident_command { + uint32_t cmd; + uint32_t cmdsize; +}; + +// The fvmfile_command is obsolete and no longer supported. +struct fvmfile_command { + uint32_t cmd; + uint32_t cmdsize; + uint32_t name; + uint32_t header_addr; +}; + +struct tlv_descriptor_32 { + uint32_t thunk; + uint32_t key; + uint32_t offset; +}; + +struct tlv_descriptor_64 { + uint64_t thunk; + uint64_t key; + uint64_t offset; +}; + +struct tlv_descriptor { + uintptr_t thunk; + uintptr_t key; + uintptr_t offset; +}; + +struct entry_point_command { + uint32_t cmd; + uint32_t cmdsize; + uint64_t entryoff; + uint64_t stacksize; +}; + +// Structs from +struct fat_header { + uint32_t magic; + uint32_t nfat_arch; +}; + +struct fat_arch { + uint32_t cputype; + uint32_t cpusubtype; + uint32_t offset; + uint32_t size; + uint32_t align; +}; + +struct fat_arch_64 { + uint32_t cputype; + uint32_t cpusubtype; + uint64_t offset; + uint64_t size; + uint32_t align; + uint32_t reserved; +}; + +// Structs from +struct relocation_info { + int32_t r_address; + uint32_t r_symbolnum : 24, r_pcrel : 1, r_length : 2, r_extern : 1, + r_type : 4; +}; + +struct scattered_relocation_info { +#if defined(BYTE_ORDER) && defined(BIG_ENDIAN) && (BYTE_ORDER == BIG_ENDIAN) + uint32_t r_scattered : 1, r_pcrel : 1, r_length : 2, r_type : 4, + r_address : 24; +#else + uint32_t r_address : 24, r_type : 4, r_length : 2, r_pcrel : 1, + r_scattered : 1; +#endif + int32_t r_value; +}; + +// Structs NOT from , but that make LLVM's life easier +struct any_relocation_info { + uint32_t r_word0, r_word1; +}; + +// Structs from +struct nlist_base { + uint32_t n_strx; + uint8_t n_type; + uint8_t n_sect; + uint16_t n_desc; +}; + +struct nlist { + uint32_t n_strx; + uint8_t n_type; + uint8_t n_sect; + int16_t n_desc; + uint32_t n_value; +}; + +struct nlist_64 { + uint32_t n_strx; + uint8_t n_type; + uint8_t n_sect; + uint16_t n_desc; + uint64_t n_value; +}; + +// Byte order swapping functions for MachO structs + +inline void swapStruct(fat_header &mh) { + sys::swapByteOrder(mh.magic); + sys::swapByteOrder(mh.nfat_arch); +} + +inline void swapStruct(fat_arch &mh) { + sys::swapByteOrder(mh.cputype); + sys::swapByteOrder(mh.cpusubtype); + sys::swapByteOrder(mh.offset); + sys::swapByteOrder(mh.size); + sys::swapByteOrder(mh.align); +} + +inline void swapStruct(fat_arch_64 &mh) { + sys::swapByteOrder(mh.cputype); + sys::swapByteOrder(mh.cpusubtype); + sys::swapByteOrder(mh.offset); + sys::swapByteOrder(mh.size); + sys::swapByteOrder(mh.align); + sys::swapByteOrder(mh.reserved); +} + +inline void swapStruct(mach_header &mh) { + sys::swapByteOrder(mh.magic); + sys::swapByteOrder(mh.cputype); + sys::swapByteOrder(mh.cpusubtype); + sys::swapByteOrder(mh.filetype); + sys::swapByteOrder(mh.ncmds); + sys::swapByteOrder(mh.sizeofcmds); + sys::swapByteOrder(mh.flags); +} + +inline void swapStruct(mach_header_64 &H) { + sys::swapByteOrder(H.magic); + sys::swapByteOrder(H.cputype); + sys::swapByteOrder(H.cpusubtype); + sys::swapByteOrder(H.filetype); + sys::swapByteOrder(H.ncmds); + sys::swapByteOrder(H.sizeofcmds); + sys::swapByteOrder(H.flags); + sys::swapByteOrder(H.reserved); +} + +inline void swapStruct(load_command &lc) { + sys::swapByteOrder(lc.cmd); + sys::swapByteOrder(lc.cmdsize); +} + +inline void swapStruct(symtab_command &lc) { + sys::swapByteOrder(lc.cmd); + sys::swapByteOrder(lc.cmdsize); + sys::swapByteOrder(lc.symoff); + sys::swapByteOrder(lc.nsyms); + sys::swapByteOrder(lc.stroff); + sys::swapByteOrder(lc.strsize); +} + +inline void swapStruct(segment_command_64 &seg) { + sys::swapByteOrder(seg.cmd); + sys::swapByteOrder(seg.cmdsize); + sys::swapByteOrder(seg.vmaddr); + sys::swapByteOrder(seg.vmsize); + sys::swapByteOrder(seg.fileoff); + sys::swapByteOrder(seg.filesize); + sys::swapByteOrder(seg.maxprot); + sys::swapByteOrder(seg.initprot); + sys::swapByteOrder(seg.nsects); + sys::swapByteOrder(seg.flags); +} + +inline void swapStruct(segment_command &seg) { + sys::swapByteOrder(seg.cmd); + sys::swapByteOrder(seg.cmdsize); + sys::swapByteOrder(seg.vmaddr); + sys::swapByteOrder(seg.vmsize); + sys::swapByteOrder(seg.fileoff); + sys::swapByteOrder(seg.filesize); + sys::swapByteOrder(seg.maxprot); + sys::swapByteOrder(seg.initprot); + sys::swapByteOrder(seg.nsects); + sys::swapByteOrder(seg.flags); +} + +inline void swapStruct(section_64 §) { + sys::swapByteOrder(sect.addr); + sys::swapByteOrder(sect.size); + sys::swapByteOrder(sect.offset); + sys::swapByteOrder(sect.align); + sys::swapByteOrder(sect.reloff); + sys::swapByteOrder(sect.nreloc); + sys::swapByteOrder(sect.flags); + sys::swapByteOrder(sect.reserved1); + sys::swapByteOrder(sect.reserved2); +} + +inline void swapStruct(section §) { + sys::swapByteOrder(sect.addr); + sys::swapByteOrder(sect.size); + sys::swapByteOrder(sect.offset); + sys::swapByteOrder(sect.align); + sys::swapByteOrder(sect.reloff); + sys::swapByteOrder(sect.nreloc); + sys::swapByteOrder(sect.flags); + sys::swapByteOrder(sect.reserved1); + sys::swapByteOrder(sect.reserved2); +} + +inline void swapStruct(dyld_info_command &info) { + sys::swapByteOrder(info.cmd); + sys::swapByteOrder(info.cmdsize); + sys::swapByteOrder(info.rebase_off); + sys::swapByteOrder(info.rebase_size); + sys::swapByteOrder(info.bind_off); + sys::swapByteOrder(info.bind_size); + sys::swapByteOrder(info.weak_bind_off); + sys::swapByteOrder(info.weak_bind_size); + sys::swapByteOrder(info.lazy_bind_off); + sys::swapByteOrder(info.lazy_bind_size); + sys::swapByteOrder(info.export_off); + sys::swapByteOrder(info.export_size); +} + +inline void swapStruct(dylib_command &d) { + sys::swapByteOrder(d.cmd); + sys::swapByteOrder(d.cmdsize); + sys::swapByteOrder(d.dylib.name); + sys::swapByteOrder(d.dylib.timestamp); + sys::swapByteOrder(d.dylib.current_version); + sys::swapByteOrder(d.dylib.compatibility_version); +} + +inline void swapStruct(sub_framework_command &s) { + sys::swapByteOrder(s.cmd); + sys::swapByteOrder(s.cmdsize); + sys::swapByteOrder(s.umbrella); +} + +inline void swapStruct(sub_umbrella_command &s) { + sys::swapByteOrder(s.cmd); + sys::swapByteOrder(s.cmdsize); + sys::swapByteOrder(s.sub_umbrella); +} + +inline void swapStruct(sub_library_command &s) { + sys::swapByteOrder(s.cmd); + sys::swapByteOrder(s.cmdsize); + sys::swapByteOrder(s.sub_library); +} + +inline void swapStruct(sub_client_command &s) { + sys::swapByteOrder(s.cmd); + sys::swapByteOrder(s.cmdsize); + sys::swapByteOrder(s.client); +} + +inline void swapStruct(routines_command &r) { + sys::swapByteOrder(r.cmd); + sys::swapByteOrder(r.cmdsize); + sys::swapByteOrder(r.init_address); + sys::swapByteOrder(r.init_module); + sys::swapByteOrder(r.reserved1); + sys::swapByteOrder(r.reserved2); + sys::swapByteOrder(r.reserved3); + sys::swapByteOrder(r.reserved4); + sys::swapByteOrder(r.reserved5); + sys::swapByteOrder(r.reserved6); +} + +inline void swapStruct(routines_command_64 &r) { + sys::swapByteOrder(r.cmd); + sys::swapByteOrder(r.cmdsize); + sys::swapByteOrder(r.init_address); + sys::swapByteOrder(r.init_module); + sys::swapByteOrder(r.reserved1); + sys::swapByteOrder(r.reserved2); + sys::swapByteOrder(r.reserved3); + sys::swapByteOrder(r.reserved4); + sys::swapByteOrder(r.reserved5); + sys::swapByteOrder(r.reserved6); +} + +inline void swapStruct(thread_command &t) { + sys::swapByteOrder(t.cmd); + sys::swapByteOrder(t.cmdsize); +} + +inline void swapStruct(dylinker_command &d) { + sys::swapByteOrder(d.cmd); + sys::swapByteOrder(d.cmdsize); + sys::swapByteOrder(d.name); +} + +inline void swapStruct(uuid_command &u) { + sys::swapByteOrder(u.cmd); + sys::swapByteOrder(u.cmdsize); +} + +inline void swapStruct(rpath_command &r) { + sys::swapByteOrder(r.cmd); + sys::swapByteOrder(r.cmdsize); + sys::swapByteOrder(r.path); +} + +inline void swapStruct(source_version_command &s) { + sys::swapByteOrder(s.cmd); + sys::swapByteOrder(s.cmdsize); + sys::swapByteOrder(s.version); +} + +inline void swapStruct(entry_point_command &e) { + sys::swapByteOrder(e.cmd); + sys::swapByteOrder(e.cmdsize); + sys::swapByteOrder(e.entryoff); + sys::swapByteOrder(e.stacksize); +} + +inline void swapStruct(encryption_info_command &e) { + sys::swapByteOrder(e.cmd); + sys::swapByteOrder(e.cmdsize); + sys::swapByteOrder(e.cryptoff); + sys::swapByteOrder(e.cryptsize); + sys::swapByteOrder(e.cryptid); +} + +inline void swapStruct(encryption_info_command_64 &e) { + sys::swapByteOrder(e.cmd); + sys::swapByteOrder(e.cmdsize); + sys::swapByteOrder(e.cryptoff); + sys::swapByteOrder(e.cryptsize); + sys::swapByteOrder(e.cryptid); + sys::swapByteOrder(e.pad); +} + +inline void swapStruct(dysymtab_command &dst) { + sys::swapByteOrder(dst.cmd); + sys::swapByteOrder(dst.cmdsize); + sys::swapByteOrder(dst.ilocalsym); + sys::swapByteOrder(dst.nlocalsym); + sys::swapByteOrder(dst.iextdefsym); + sys::swapByteOrder(dst.nextdefsym); + sys::swapByteOrder(dst.iundefsym); + sys::swapByteOrder(dst.nundefsym); + sys::swapByteOrder(dst.tocoff); + sys::swapByteOrder(dst.ntoc); + sys::swapByteOrder(dst.modtaboff); + sys::swapByteOrder(dst.nmodtab); + sys::swapByteOrder(dst.extrefsymoff); + sys::swapByteOrder(dst.nextrefsyms); + sys::swapByteOrder(dst.indirectsymoff); + sys::swapByteOrder(dst.nindirectsyms); + sys::swapByteOrder(dst.extreloff); + sys::swapByteOrder(dst.nextrel); + sys::swapByteOrder(dst.locreloff); + sys::swapByteOrder(dst.nlocrel); +} + +inline void swapStruct(any_relocation_info &reloc) { + sys::swapByteOrder(reloc.r_word0); + sys::swapByteOrder(reloc.r_word1); +} + +inline void swapStruct(nlist_base &S) { + sys::swapByteOrder(S.n_strx); + sys::swapByteOrder(S.n_desc); +} + +inline void swapStruct(nlist &sym) { + sys::swapByteOrder(sym.n_strx); + sys::swapByteOrder(sym.n_desc); + sys::swapByteOrder(sym.n_value); +} + +inline void swapStruct(nlist_64 &sym) { + sys::swapByteOrder(sym.n_strx); + sys::swapByteOrder(sym.n_desc); + sys::swapByteOrder(sym.n_value); +} + +inline void swapStruct(linkedit_data_command &C) { + sys::swapByteOrder(C.cmd); + sys::swapByteOrder(C.cmdsize); + sys::swapByteOrder(C.dataoff); + sys::swapByteOrder(C.datasize); +} + +inline void swapStruct(linker_option_command &C) { + sys::swapByteOrder(C.cmd); + sys::swapByteOrder(C.cmdsize); + sys::swapByteOrder(C.count); +} + +inline void swapStruct(version_min_command &C) { + sys::swapByteOrder(C.cmd); + sys::swapByteOrder(C.cmdsize); + sys::swapByteOrder(C.version); + sys::swapByteOrder(C.sdk); +} + +inline void swapStruct(note_command &C) { + sys::swapByteOrder(C.cmd); + sys::swapByteOrder(C.cmdsize); + sys::swapByteOrder(C.offset); + sys::swapByteOrder(C.size); +} + +inline void swapStruct(build_version_command &C) { + sys::swapByteOrder(C.cmd); + sys::swapByteOrder(C.cmdsize); + sys::swapByteOrder(C.platform); + sys::swapByteOrder(C.minos); + sys::swapByteOrder(C.sdk); + sys::swapByteOrder(C.ntools); +} + +inline void swapStruct(build_tool_version &C) { + sys::swapByteOrder(C.tool); + sys::swapByteOrder(C.version); +} + +inline void swapStruct(data_in_code_entry &C) { + sys::swapByteOrder(C.offset); + sys::swapByteOrder(C.length); + sys::swapByteOrder(C.kind); +} + +inline void swapStruct(uint32_t &C) { sys::swapByteOrder(C); } + +// The prebind_cksum_command is obsolete and no longer supported. +inline void swapStruct(prebind_cksum_command &C) { + sys::swapByteOrder(C.cmd); + sys::swapByteOrder(C.cmdsize); + sys::swapByteOrder(C.cksum); +} + +// The twolevel_hints_command is obsolete and no longer supported. +inline void swapStruct(twolevel_hints_command &C) { + sys::swapByteOrder(C.cmd); + sys::swapByteOrder(C.cmdsize); + sys::swapByteOrder(C.offset); + sys::swapByteOrder(C.nhints); +} + +// The prebound_dylib_command is obsolete and no longer supported. +inline void swapStruct(prebound_dylib_command &C) { + sys::swapByteOrder(C.cmd); + sys::swapByteOrder(C.cmdsize); + sys::swapByteOrder(C.name); + sys::swapByteOrder(C.nmodules); + sys::swapByteOrder(C.linked_modules); +} + +// The fvmfile_command is obsolete and no longer supported. +inline void swapStruct(fvmfile_command &C) { + sys::swapByteOrder(C.cmd); + sys::swapByteOrder(C.cmdsize); + sys::swapByteOrder(C.name); + sys::swapByteOrder(C.header_addr); +} + +// The symseg_command is obsolete and no longer supported. +inline void swapStruct(symseg_command &C) { + sys::swapByteOrder(C.cmd); + sys::swapByteOrder(C.cmdsize); + sys::swapByteOrder(C.offset); + sys::swapByteOrder(C.size); +} + +// The ident_command is obsolete and no longer supported. +inline void swapStruct(ident_command &C) { + sys::swapByteOrder(C.cmd); + sys::swapByteOrder(C.cmdsize); +} + +inline void swapStruct(fvmlib &C) { + sys::swapByteOrder(C.name); + sys::swapByteOrder(C.minor_version); + sys::swapByteOrder(C.header_addr); +} + +// The fvmlib_command is obsolete and no longer supported. +inline void swapStruct(fvmlib_command &C) { + sys::swapByteOrder(C.cmd); + sys::swapByteOrder(C.cmdsize); + swapStruct(C.fvmlib); +} + +// Get/Set functions from + +static inline uint16_t GET_LIBRARY_ORDINAL(uint16_t n_desc) { + return (((n_desc) >> 8u) & 0xffu); +} + +static inline void SET_LIBRARY_ORDINAL(uint16_t &n_desc, uint8_t ordinal) { + n_desc = (((n_desc)&0x00ff) | (((ordinal)&0xff) << 8)); +} + +static inline uint8_t GET_COMM_ALIGN(uint16_t n_desc) { + return (n_desc >> 8u) & 0x0fu; +} + +static inline void SET_COMM_ALIGN(uint16_t &n_desc, uint8_t align) { + n_desc = ((n_desc & 0xf0ffu) | ((align & 0x0fu) << 8u)); +} + +// Enums from +enum : uint32_t { + // Capability bits used in the definition of cpu_type. + CPU_ARCH_MASK = 0xff000000, // Mask for architecture bits + CPU_ARCH_ABI64 = 0x01000000 // 64 bit ABI +}; + +// Constants for the cputype field. +enum CPUType { + CPU_TYPE_ANY = -1, + CPU_TYPE_X86 = 7, + CPU_TYPE_I386 = CPU_TYPE_X86, + CPU_TYPE_X86_64 = CPU_TYPE_X86 | CPU_ARCH_ABI64, + /* CPU_TYPE_MIPS = 8, */ + CPU_TYPE_MC98000 = 10, // Old Motorola PowerPC + CPU_TYPE_ARM = 12, + CPU_TYPE_ARM64 = CPU_TYPE_ARM | CPU_ARCH_ABI64, + CPU_TYPE_SPARC = 14, + CPU_TYPE_POWERPC = 18, + CPU_TYPE_POWERPC64 = CPU_TYPE_POWERPC | CPU_ARCH_ABI64 +}; + +enum : uint32_t { + // Capability bits used in the definition of cpusubtype. + CPU_SUBTYPE_MASK = 0xff000000, // Mask for architecture bits + CPU_SUBTYPE_LIB64 = 0x80000000, // 64 bit libraries + + // Special CPU subtype constants. + CPU_SUBTYPE_MULTIPLE = ~0u +}; + +// Constants for the cpusubtype field. +enum CPUSubTypeX86 { + CPU_SUBTYPE_I386_ALL = 3, + CPU_SUBTYPE_386 = 3, + CPU_SUBTYPE_486 = 4, + CPU_SUBTYPE_486SX = 0x84, + CPU_SUBTYPE_586 = 5, + CPU_SUBTYPE_PENT = CPU_SUBTYPE_586, + CPU_SUBTYPE_PENTPRO = 0x16, + CPU_SUBTYPE_PENTII_M3 = 0x36, + CPU_SUBTYPE_PENTII_M5 = 0x56, + CPU_SUBTYPE_CELERON = 0x67, + CPU_SUBTYPE_CELERON_MOBILE = 0x77, + CPU_SUBTYPE_PENTIUM_3 = 0x08, + CPU_SUBTYPE_PENTIUM_3_M = 0x18, + CPU_SUBTYPE_PENTIUM_3_XEON = 0x28, + CPU_SUBTYPE_PENTIUM_M = 0x09, + CPU_SUBTYPE_PENTIUM_4 = 0x0a, + CPU_SUBTYPE_PENTIUM_4_M = 0x1a, + CPU_SUBTYPE_ITANIUM = 0x0b, + CPU_SUBTYPE_ITANIUM_2 = 0x1b, + CPU_SUBTYPE_XEON = 0x0c, + CPU_SUBTYPE_XEON_MP = 0x1c, + + CPU_SUBTYPE_X86_ALL = 3, + CPU_SUBTYPE_X86_64_ALL = 3, + CPU_SUBTYPE_X86_ARCH1 = 4, + CPU_SUBTYPE_X86_64_H = 8 +}; +static inline int CPU_SUBTYPE_INTEL(int Family, int Model) { + return Family | (Model << 4); +} +static inline int CPU_SUBTYPE_INTEL_FAMILY(CPUSubTypeX86 ST) { + return ((int)ST) & 0x0f; +} +static inline int CPU_SUBTYPE_INTEL_MODEL(CPUSubTypeX86 ST) { + return ((int)ST) >> 4; +} +enum { CPU_SUBTYPE_INTEL_FAMILY_MAX = 15, CPU_SUBTYPE_INTEL_MODEL_ALL = 0 }; + +enum CPUSubTypeARM { + CPU_SUBTYPE_ARM_ALL = 0, + CPU_SUBTYPE_ARM_V4T = 5, + CPU_SUBTYPE_ARM_V6 = 6, + CPU_SUBTYPE_ARM_V5 = 7, + CPU_SUBTYPE_ARM_V5TEJ = 7, + CPU_SUBTYPE_ARM_XSCALE = 8, + CPU_SUBTYPE_ARM_V7 = 9, + // unused ARM_V7F = 10, + CPU_SUBTYPE_ARM_V7S = 11, + CPU_SUBTYPE_ARM_V7K = 12, + CPU_SUBTYPE_ARM_V6M = 14, + CPU_SUBTYPE_ARM_V7M = 15, + CPU_SUBTYPE_ARM_V7EM = 16 +}; + +enum CPUSubTypeARM64 { CPU_SUBTYPE_ARM64_ALL = 0 }; + +enum CPUSubTypeSPARC { CPU_SUBTYPE_SPARC_ALL = 0 }; + +enum CPUSubTypePowerPC { + CPU_SUBTYPE_POWERPC_ALL = 0, + CPU_SUBTYPE_POWERPC_601 = 1, + CPU_SUBTYPE_POWERPC_602 = 2, + CPU_SUBTYPE_POWERPC_603 = 3, + CPU_SUBTYPE_POWERPC_603e = 4, + CPU_SUBTYPE_POWERPC_603ev = 5, + CPU_SUBTYPE_POWERPC_604 = 6, + CPU_SUBTYPE_POWERPC_604e = 7, + CPU_SUBTYPE_POWERPC_620 = 8, + CPU_SUBTYPE_POWERPC_750 = 9, + CPU_SUBTYPE_POWERPC_7400 = 10, + CPU_SUBTYPE_POWERPC_7450 = 11, + CPU_SUBTYPE_POWERPC_970 = 100, + + CPU_SUBTYPE_MC980000_ALL = CPU_SUBTYPE_POWERPC_ALL, + CPU_SUBTYPE_MC98601 = CPU_SUBTYPE_POWERPC_601 +}; + +struct x86_thread_state32_t { + uint32_t eax; + uint32_t ebx; + uint32_t ecx; + uint32_t edx; + uint32_t edi; + uint32_t esi; + uint32_t ebp; + uint32_t esp; + uint32_t ss; + uint32_t eflags; + uint32_t eip; + uint32_t cs; + uint32_t ds; + uint32_t es; + uint32_t fs; + uint32_t gs; +}; + +struct x86_thread_state64_t { + uint64_t rax; + uint64_t rbx; + uint64_t rcx; + uint64_t rdx; + uint64_t rdi; + uint64_t rsi; + uint64_t rbp; + uint64_t rsp; + uint64_t r8; + uint64_t r9; + uint64_t r10; + uint64_t r11; + uint64_t r12; + uint64_t r13; + uint64_t r14; + uint64_t r15; + uint64_t rip; + uint64_t rflags; + uint64_t cs; + uint64_t fs; + uint64_t gs; +}; + +enum x86_fp_control_precis { + x86_FP_PREC_24B = 0, + x86_FP_PREC_53B = 2, + x86_FP_PREC_64B = 3 +}; + +enum x86_fp_control_rc { + x86_FP_RND_NEAR = 0, + x86_FP_RND_DOWN = 1, + x86_FP_RND_UP = 2, + x86_FP_CHOP = 3 +}; + +struct fp_control_t { + unsigned short invalid : 1, denorm : 1, zdiv : 1, ovrfl : 1, undfl : 1, + precis : 1, : 2, pc : 2, rc : 2, : 1, : 3; +}; + +struct fp_status_t { + unsigned short invalid : 1, denorm : 1, zdiv : 1, ovrfl : 1, undfl : 1, + precis : 1, stkflt : 1, errsumm : 1, c0 : 1, c1 : 1, c2 : 1, tos : 3, + c3 : 1, busy : 1; +}; + +struct mmst_reg_t { + char mmst_reg[10]; + char mmst_rsrv[6]; +}; + +struct xmm_reg_t { + char xmm_reg[16]; +}; + +struct x86_float_state64_t { + int32_t fpu_reserved[2]; + fp_control_t fpu_fcw; + fp_status_t fpu_fsw; + uint8_t fpu_ftw; + uint8_t fpu_rsrv1; + uint16_t fpu_fop; + uint32_t fpu_ip; + uint16_t fpu_cs; + uint16_t fpu_rsrv2; + uint32_t fpu_dp; + uint16_t fpu_ds; + uint16_t fpu_rsrv3; + uint32_t fpu_mxcsr; + uint32_t fpu_mxcsrmask; + mmst_reg_t fpu_stmm0; + mmst_reg_t fpu_stmm1; + mmst_reg_t fpu_stmm2; + mmst_reg_t fpu_stmm3; + mmst_reg_t fpu_stmm4; + mmst_reg_t fpu_stmm5; + mmst_reg_t fpu_stmm6; + mmst_reg_t fpu_stmm7; + xmm_reg_t fpu_xmm0; + xmm_reg_t fpu_xmm1; + xmm_reg_t fpu_xmm2; + xmm_reg_t fpu_xmm3; + xmm_reg_t fpu_xmm4; + xmm_reg_t fpu_xmm5; + xmm_reg_t fpu_xmm6; + xmm_reg_t fpu_xmm7; + xmm_reg_t fpu_xmm8; + xmm_reg_t fpu_xmm9; + xmm_reg_t fpu_xmm10; + xmm_reg_t fpu_xmm11; + xmm_reg_t fpu_xmm12; + xmm_reg_t fpu_xmm13; + xmm_reg_t fpu_xmm14; + xmm_reg_t fpu_xmm15; + char fpu_rsrv4[6 * 16]; + uint32_t fpu_reserved1; +}; + +struct x86_exception_state64_t { + uint16_t trapno; + uint16_t cpu; + uint32_t err; + uint64_t faultvaddr; +}; + +inline void swapStruct(x86_thread_state32_t &x) { + sys::swapByteOrder(x.eax); + sys::swapByteOrder(x.ebx); + sys::swapByteOrder(x.ecx); + sys::swapByteOrder(x.edx); + sys::swapByteOrder(x.edi); + sys::swapByteOrder(x.esi); + sys::swapByteOrder(x.ebp); + sys::swapByteOrder(x.esp); + sys::swapByteOrder(x.ss); + sys::swapByteOrder(x.eflags); + sys::swapByteOrder(x.eip); + sys::swapByteOrder(x.cs); + sys::swapByteOrder(x.ds); + sys::swapByteOrder(x.es); + sys::swapByteOrder(x.fs); + sys::swapByteOrder(x.gs); +} + +inline void swapStruct(x86_thread_state64_t &x) { + sys::swapByteOrder(x.rax); + sys::swapByteOrder(x.rbx); + sys::swapByteOrder(x.rcx); + sys::swapByteOrder(x.rdx); + sys::swapByteOrder(x.rdi); + sys::swapByteOrder(x.rsi); + sys::swapByteOrder(x.rbp); + sys::swapByteOrder(x.rsp); + sys::swapByteOrder(x.r8); + sys::swapByteOrder(x.r9); + sys::swapByteOrder(x.r10); + sys::swapByteOrder(x.r11); + sys::swapByteOrder(x.r12); + sys::swapByteOrder(x.r13); + sys::swapByteOrder(x.r14); + sys::swapByteOrder(x.r15); + sys::swapByteOrder(x.rip); + sys::swapByteOrder(x.rflags); + sys::swapByteOrder(x.cs); + sys::swapByteOrder(x.fs); + sys::swapByteOrder(x.gs); +} + +inline void swapStruct(x86_float_state64_t &x) { + sys::swapByteOrder(x.fpu_reserved[0]); + sys::swapByteOrder(x.fpu_reserved[1]); + // TODO swap: fp_control_t fpu_fcw; + // TODO swap: fp_status_t fpu_fsw; + sys::swapByteOrder(x.fpu_fop); + sys::swapByteOrder(x.fpu_ip); + sys::swapByteOrder(x.fpu_cs); + sys::swapByteOrder(x.fpu_rsrv2); + sys::swapByteOrder(x.fpu_dp); + sys::swapByteOrder(x.fpu_ds); + sys::swapByteOrder(x.fpu_rsrv3); + sys::swapByteOrder(x.fpu_mxcsr); + sys::swapByteOrder(x.fpu_mxcsrmask); + sys::swapByteOrder(x.fpu_reserved1); +} + +inline void swapStruct(x86_exception_state64_t &x) { + sys::swapByteOrder(x.trapno); + sys::swapByteOrder(x.cpu); + sys::swapByteOrder(x.err); + sys::swapByteOrder(x.faultvaddr); +} + +struct x86_state_hdr_t { + uint32_t flavor; + uint32_t count; +}; + +struct x86_thread_state_t { + x86_state_hdr_t tsh; + union { + x86_thread_state64_t ts64; + x86_thread_state32_t ts32; + } uts; +}; + +struct x86_float_state_t { + x86_state_hdr_t fsh; + union { + x86_float_state64_t fs64; + } ufs; +}; + +struct x86_exception_state_t { + x86_state_hdr_t esh; + union { + x86_exception_state64_t es64; + } ues; +}; + +inline void swapStruct(x86_state_hdr_t &x) { + sys::swapByteOrder(x.flavor); + sys::swapByteOrder(x.count); +} + +enum X86ThreadFlavors { + x86_THREAD_STATE32 = 1, + x86_FLOAT_STATE32 = 2, + x86_EXCEPTION_STATE32 = 3, + x86_THREAD_STATE64 = 4, + x86_FLOAT_STATE64 = 5, + x86_EXCEPTION_STATE64 = 6, + x86_THREAD_STATE = 7, + x86_FLOAT_STATE = 8, + x86_EXCEPTION_STATE = 9, + x86_DEBUG_STATE32 = 10, + x86_DEBUG_STATE64 = 11, + x86_DEBUG_STATE = 12 +}; + +inline void swapStruct(x86_thread_state_t &x) { + swapStruct(x.tsh); + if (x.tsh.flavor == x86_THREAD_STATE64) + swapStruct(x.uts.ts64); +} + +inline void swapStruct(x86_float_state_t &x) { + swapStruct(x.fsh); + if (x.fsh.flavor == x86_FLOAT_STATE64) + swapStruct(x.ufs.fs64); +} + +inline void swapStruct(x86_exception_state_t &x) { + swapStruct(x.esh); + if (x.esh.flavor == x86_EXCEPTION_STATE64) + swapStruct(x.ues.es64); +} + +const uint32_t x86_THREAD_STATE32_COUNT = + sizeof(x86_thread_state32_t) / sizeof(uint32_t); + +const uint32_t x86_THREAD_STATE64_COUNT = + sizeof(x86_thread_state64_t) / sizeof(uint32_t); +const uint32_t x86_FLOAT_STATE64_COUNT = + sizeof(x86_float_state64_t) / sizeof(uint32_t); +const uint32_t x86_EXCEPTION_STATE64_COUNT = + sizeof(x86_exception_state64_t) / sizeof(uint32_t); + +const uint32_t x86_THREAD_STATE_COUNT = + sizeof(x86_thread_state_t) / sizeof(uint32_t); +const uint32_t x86_FLOAT_STATE_COUNT = + sizeof(x86_float_state_t) / sizeof(uint32_t); +const uint32_t x86_EXCEPTION_STATE_COUNT = + sizeof(x86_exception_state_t) / sizeof(uint32_t); + +struct arm_thread_state32_t { + uint32_t r[13]; + uint32_t sp; + uint32_t lr; + uint32_t pc; + uint32_t cpsr; +}; + +inline void swapStruct(arm_thread_state32_t &x) { + for (int i = 0; i < 13; i++) + sys::swapByteOrder(x.r[i]); + sys::swapByteOrder(x.sp); + sys::swapByteOrder(x.lr); + sys::swapByteOrder(x.pc); + sys::swapByteOrder(x.cpsr); +} + +struct arm_thread_state64_t { + uint64_t x[29]; + uint64_t fp; + uint64_t lr; + uint64_t sp; + uint64_t pc; + uint32_t cpsr; + uint32_t pad; +}; + +inline void swapStruct(arm_thread_state64_t &x) { + for (int i = 0; i < 29; i++) + sys::swapByteOrder(x.x[i]); + sys::swapByteOrder(x.fp); + sys::swapByteOrder(x.lr); + sys::swapByteOrder(x.sp); + sys::swapByteOrder(x.pc); + sys::swapByteOrder(x.cpsr); +} + +struct arm_state_hdr_t { + uint32_t flavor; + uint32_t count; +}; + +struct arm_thread_state_t { + arm_state_hdr_t tsh; + union { + arm_thread_state32_t ts32; + } uts; +}; + +inline void swapStruct(arm_state_hdr_t &x) { + sys::swapByteOrder(x.flavor); + sys::swapByteOrder(x.count); +} + +enum ARMThreadFlavors { + ARM_THREAD_STATE = 1, + ARM_VFP_STATE = 2, + ARM_EXCEPTION_STATE = 3, + ARM_DEBUG_STATE = 4, + ARN_THREAD_STATE_NONE = 5, + ARM_THREAD_STATE64 = 6, + ARM_EXCEPTION_STATE64 = 7 +}; + +inline void swapStruct(arm_thread_state_t &x) { + swapStruct(x.tsh); + if (x.tsh.flavor == ARM_THREAD_STATE) + swapStruct(x.uts.ts32); +} + +const uint32_t ARM_THREAD_STATE_COUNT = + sizeof(arm_thread_state32_t) / sizeof(uint32_t); + +const uint32_t ARM_THREAD_STATE64_COUNT = + sizeof(arm_thread_state64_t) / sizeof(uint32_t); + +struct ppc_thread_state32_t { + uint32_t srr0; + uint32_t srr1; + uint32_t r0; + uint32_t r1; + uint32_t r2; + uint32_t r3; + uint32_t r4; + uint32_t r5; + uint32_t r6; + uint32_t r7; + uint32_t r8; + uint32_t r9; + uint32_t r10; + uint32_t r11; + uint32_t r12; + uint32_t r13; + uint32_t r14; + uint32_t r15; + uint32_t r16; + uint32_t r17; + uint32_t r18; + uint32_t r19; + uint32_t r20; + uint32_t r21; + uint32_t r22; + uint32_t r23; + uint32_t r24; + uint32_t r25; + uint32_t r26; + uint32_t r27; + uint32_t r28; + uint32_t r29; + uint32_t r30; + uint32_t r31; + uint32_t ct; + uint32_t xer; + uint32_t lr; + uint32_t ctr; + uint32_t mq; + uint32_t vrsave; +}; + +inline void swapStruct(ppc_thread_state32_t &x) { + sys::swapByteOrder(x.srr0); + sys::swapByteOrder(x.srr1); + sys::swapByteOrder(x.r0); + sys::swapByteOrder(x.r1); + sys::swapByteOrder(x.r2); + sys::swapByteOrder(x.r3); + sys::swapByteOrder(x.r4); + sys::swapByteOrder(x.r5); + sys::swapByteOrder(x.r6); + sys::swapByteOrder(x.r7); + sys::swapByteOrder(x.r8); + sys::swapByteOrder(x.r9); + sys::swapByteOrder(x.r10); + sys::swapByteOrder(x.r11); + sys::swapByteOrder(x.r12); + sys::swapByteOrder(x.r13); + sys::swapByteOrder(x.r14); + sys::swapByteOrder(x.r15); + sys::swapByteOrder(x.r16); + sys::swapByteOrder(x.r17); + sys::swapByteOrder(x.r18); + sys::swapByteOrder(x.r19); + sys::swapByteOrder(x.r20); + sys::swapByteOrder(x.r21); + sys::swapByteOrder(x.r22); + sys::swapByteOrder(x.r23); + sys::swapByteOrder(x.r24); + sys::swapByteOrder(x.r25); + sys::swapByteOrder(x.r26); + sys::swapByteOrder(x.r27); + sys::swapByteOrder(x.r28); + sys::swapByteOrder(x.r29); + sys::swapByteOrder(x.r30); + sys::swapByteOrder(x.r31); + sys::swapByteOrder(x.ct); + sys::swapByteOrder(x.xer); + sys::swapByteOrder(x.lr); + sys::swapByteOrder(x.ctr); + sys::swapByteOrder(x.mq); + sys::swapByteOrder(x.vrsave); +} + +struct ppc_state_hdr_t { + uint32_t flavor; + uint32_t count; +}; + +struct ppc_thread_state_t { + ppc_state_hdr_t tsh; + union { + ppc_thread_state32_t ts32; + } uts; +}; + +inline void swapStruct(ppc_state_hdr_t &x) { + sys::swapByteOrder(x.flavor); + sys::swapByteOrder(x.count); +} + +enum PPCThreadFlavors { + PPC_THREAD_STATE = 1, + PPC_FLOAT_STATE = 2, + PPC_EXCEPTION_STATE = 3, + PPC_VECTOR_STATE = 4, + PPC_THREAD_STATE64 = 5, + PPC_EXCEPTION_STATE64 = 6, + PPC_THREAD_STATE_NONE = 7 +}; + +inline void swapStruct(ppc_thread_state_t &x) { + swapStruct(x.tsh); + if (x.tsh.flavor == PPC_THREAD_STATE) + swapStruct(x.uts.ts32); +} + +const uint32_t PPC_THREAD_STATE_COUNT = + sizeof(ppc_thread_state32_t) / sizeof(uint32_t); + +// Define a union of all load command structs +#define LOAD_COMMAND_STRUCT(LCStruct) LCStruct LCStruct##_data; + +union macho_load_command { +#include "llvm/BinaryFormat/MachO.def" +}; + +} // end namespace MachO +} // end namespace llvm + +#endif diff --git a/include/llvm/BinaryFormat/Magic.h b/include/llvm/BinaryFormat/Magic.h new file mode 100644 index 000000000000..c0e23db5e1ae --- /dev/null +++ b/include/llvm/BinaryFormat/Magic.h @@ -0,0 +1,73 @@ +//===- llvm/BinaryFormat/Magic.h - File magic identification ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_BINARYFORMAT_MAGIC_H +#define LLVM_BINARYFORMAT_MAGIC_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" + +#include + +namespace llvm { +/// file_magic - An "enum class" enumeration of file types based on magic (the +/// first N bytes of the file). +struct file_magic { + enum Impl { + unknown = 0, ///< Unrecognized file + bitcode, ///< Bitcode file + archive, ///< ar style archive file + elf, ///< ELF Unknown type + elf_relocatable, ///< ELF Relocatable object file + elf_executable, ///< ELF Executable image + elf_shared_object, ///< ELF dynamically linked shared lib + elf_core, ///< ELF core image + macho_object, ///< Mach-O Object file + macho_executable, ///< Mach-O Executable + macho_fixed_virtual_memory_shared_lib, ///< Mach-O Shared Lib, FVM + macho_core, ///< Mach-O Core File + macho_preload_executable, ///< Mach-O Preloaded Executable + macho_dynamically_linked_shared_lib, ///< Mach-O dynlinked shared lib + macho_dynamic_linker, ///< The Mach-O dynamic linker + macho_bundle, ///< Mach-O Bundle file + macho_dynamically_linked_shared_lib_stub, ///< Mach-O Shared lib stub + macho_dsym_companion, ///< Mach-O dSYM companion file + macho_kext_bundle, ///< Mach-O kext bundle file + macho_universal_binary, ///< Mach-O universal binary + coff_cl_gl_object, ///< Microsoft cl.exe's intermediate code file + coff_object, ///< COFF object file + coff_import_library, ///< COFF import library + pecoff_executable, ///< PECOFF executable file + windows_resource, ///< Windows compiled resource file (.res) + wasm_object ///< WebAssembly Object file + }; + + bool is_object() const { return V != unknown; } + + file_magic() = default; + file_magic(Impl V) : V(V) {} + operator Impl() const { return V; } + +private: + Impl V = unknown; +}; + +/// @brief Identify the type of a binary file based on how magical it is. +file_magic identify_magic(StringRef magic); + +/// @brief Get and identify \a path's type based on its content. +/// +/// @param path Input path. +/// @param result Set to the type of file, or file_magic::unknown. +/// @returns errc::success if result has been successfully set, otherwise a +/// platform-specific error_code. +std::error_code identify_magic(const Twine &path, file_magic &result); +} // namespace llvm + +#endif diff --git a/include/llvm/Support/Wasm.h b/include/llvm/BinaryFormat/Wasm.h similarity index 80% rename from include/llvm/Support/Wasm.h rename to include/llvm/BinaryFormat/Wasm.h index e3831827062c..fcd8ad957040 100644 --- a/include/llvm/Support/Wasm.h +++ b/include/llvm/BinaryFormat/Wasm.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_SUPPORT_WASM_H -#define LLVM_SUPPORT_WASM_H +#ifndef LLVM_BINARYFORMAT_WASM_H +#define LLVM_BINARYFORMAT_WASM_H #include "llvm/ADT/ArrayRef.h" @@ -106,10 +106,10 @@ struct WasmElemSegment { }; struct WasmRelocation { - uint32_t Type; // The type of the relocation. - int32_t Index; // Index into function to global index space. - uint64_t Offset; // Offset from the start of the section. - int64_t Addend; // A value to add to the symbol. + uint32_t Type; // The type of the relocation. + int32_t Index; // Index into function to global index space. + uint64_t Offset; // Offset from the start of the section. + int64_t Addend; // A value to add to the symbol. }; enum : unsigned { @@ -129,36 +129,36 @@ enum : unsigned { // Type immediate encodings used in various contexts. enum { - WASM_TYPE_I32 = -0x01, - WASM_TYPE_I64 = -0x02, - WASM_TYPE_F32 = -0x03, - WASM_TYPE_F64 = -0x04, - WASM_TYPE_ANYFUNC = -0x10, - WASM_TYPE_FUNC = -0x20, - WASM_TYPE_NORESULT = -0x40, // for blocks with no result values + WASM_TYPE_I32 = -0x01, + WASM_TYPE_I64 = -0x02, + WASM_TYPE_F32 = -0x03, + WASM_TYPE_F64 = -0x04, + WASM_TYPE_ANYFUNC = -0x10, + WASM_TYPE_FUNC = -0x20, + WASM_TYPE_NORESULT = -0x40, // for blocks with no result values }; // Kinds of externals (for imports and exports). enum : unsigned { WASM_EXTERNAL_FUNCTION = 0x0, - WASM_EXTERNAL_TABLE = 0x1, - WASM_EXTERNAL_MEMORY = 0x2, - WASM_EXTERNAL_GLOBAL = 0x3, + WASM_EXTERNAL_TABLE = 0x1, + WASM_EXTERNAL_MEMORY = 0x2, + WASM_EXTERNAL_GLOBAL = 0x3, }; // Opcodes used in initializer expressions. enum : unsigned { - WASM_OPCODE_END = 0x0b, + WASM_OPCODE_END = 0x0b, WASM_OPCODE_GET_GLOBAL = 0x23, - WASM_OPCODE_I32_CONST = 0x41, - WASM_OPCODE_I64_CONST = 0x42, - WASM_OPCODE_F32_CONST = 0x43, - WASM_OPCODE_F64_CONST = 0x44, + WASM_OPCODE_I32_CONST = 0x41, + WASM_OPCODE_I64_CONST = 0x42, + WASM_OPCODE_F32_CONST = 0x43, + WASM_OPCODE_F64_CONST = 0x44, }; enum : unsigned { - WASM_NAMES_FUNCTION = 0x1, - WASM_NAMES_LOCAL = 0x2, + WASM_NAMES_FUNCTION = 0x1, + WASM_NAMES_LOCAL = 0x2, }; enum : unsigned { diff --git a/include/llvm/Support/WasmRelocs/WebAssembly.def b/include/llvm/BinaryFormat/WasmRelocs/WebAssembly.def similarity index 100% rename from include/llvm/Support/WasmRelocs/WebAssembly.def rename to include/llvm/BinaryFormat/WasmRelocs/WebAssembly.def diff --git a/include/llvm/Bitcode/BitcodeReader.h b/include/llvm/Bitcode/BitcodeReader.h index 31ffb7645f3a..61e4f6351b19 100644 --- a/include/llvm/Bitcode/BitcodeReader.h +++ b/include/llvm/Bitcode/BitcodeReader.h @@ -40,6 +40,8 @@ namespace llvm { return std::move(*Val); } + struct BitcodeFileContents; + /// Represents a module in a bitcode file. class BitcodeModule { // This covers the identification (if present) and module blocks. @@ -61,8 +63,8 @@ namespace llvm { IdentificationBit(IdentificationBit), ModuleBit(ModuleBit) {} // Calls the ctor. - friend Expected> - getBitcodeModuleList(MemoryBufferRef Buffer); + friend Expected + getBitcodeFileContents(MemoryBufferRef Buffer); Expected> getModuleImpl(LLVMContext &Context, bool MaterializeAll, @@ -99,6 +101,13 @@ namespace llvm { Error readSummary(ModuleSummaryIndex &CombinedIndex, unsigned ModuleId); }; + struct BitcodeFileContents { + std::vector Mods; + }; + + /// Returns the contents of a bitcode file. + Expected getBitcodeFileContents(MemoryBufferRef Buffer); + /// Returns a list of modules in the specified bitcode buffer. Expected> getBitcodeModuleList(MemoryBufferRef Buffer); diff --git a/include/llvm/Bitcode/LLVMBitCodes.h b/include/llvm/Bitcode/LLVMBitCodes.h index 8ee1e4b583b6..a643bfd1dcea 100644 --- a/include/llvm/Bitcode/LLVMBitCodes.h +++ b/include/llvm/Bitcode/LLVMBitCodes.h @@ -55,6 +55,8 @@ enum BlockIDs { METADATA_KIND_BLOCK_ID, STRTAB_BLOCK_ID, + + FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID, }; /// Identification block contains a string that describes the producer details, diff --git a/include/llvm/CodeGen/BasicTTIImpl.h b/include/llvm/CodeGen/BasicTTIImpl.h index 32542fa87463..9e33df6b55ec 100644 --- a/include/llvm/CodeGen/BasicTTIImpl.h +++ b/include/llvm/CodeGen/BasicTTIImpl.h @@ -17,11 +17,11 @@ #define LLVM_CODEGEN_BASICTTIIMPL_H #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfoImpl.h" #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetSubtargetInfo.h" -#include "llvm/Analysis/TargetLibraryInfo.h" namespace llvm { @@ -117,6 +117,10 @@ public: return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace); } + bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) { + return TargetTransformInfoImplBase::isLSRCostLess(C1, C2); + } + int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) { TargetLoweringBase::AddrMode AM; @@ -1080,46 +1084,46 @@ public: return 0; } + /// Try to calculate arithmetic and shuffle op costs for reduction operations. + /// We're assuming that reduction operation are performing the following way: + /// 1. Non-pairwise reduction + /// %val1 = shufflevector %val, %undef, + /// + /// \----------------v-------------/ \----------v------------/ + /// n/2 elements n/2 elements + /// %red1 = op %val, val1 + /// After this operation we have a vector %red1 where only the first n/2 + /// elements are meaningful, the second n/2 elements are undefined and can be + /// dropped. All other operations are actually working with the vector of + /// length n/2, not n, though the real vector length is still n. + /// %val2 = shufflevector %red1, %undef, + /// + /// \----------------v-------------/ \----------v------------/ + /// n/4 elements 3*n/4 elements + /// %red2 = op %red1, val2 - working with the vector of + /// length n/2, the resulting vector has length n/4 etc. + /// 2. Pairwise reduction: + /// Everything is the same except for an additional shuffle operation which + /// is used to produce operands for pairwise kind of reductions. + /// %val1 = shufflevector %val, %undef, + /// + /// \-------------v----------/ \----------v------------/ + /// n/2 elements n/2 elements + /// %val2 = shufflevector %val, %undef, + /// + /// \-------------v----------/ \----------v------------/ + /// n/2 elements n/2 elements + /// %red1 = op %val1, val2 + /// Again, the operation is performed on vector, but the resulting + /// vector %red1 is vector. + /// + /// The cost model should take into account that the actual length of the + /// vector is reduced on each iteration. unsigned getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwise) { assert(Ty->isVectorTy() && "Expect a vector type"); Type *ScalarTy = Ty->getVectorElementType(); unsigned NumVecElts = Ty->getVectorNumElements(); unsigned NumReduxLevels = Log2_32(NumVecElts); - // Try to calculate arithmetic and shuffle op costs for reduction operations. - // We're assuming that reduction operation are performing the following way: - // 1. Non-pairwise reduction - // %val1 = shufflevector %val, %undef, - // - // \----------------v-------------/ \----------v------------/ - // n/2 elements n/2 elements - // %red1 = op %val, val1 - // After this operation we have a vector %red1 with only maningfull the - // first n/2 elements, the second n/2 elements are undefined and can be - // dropped. All other operations are actually working with the vector of - // length n/2, not n. though the real vector length is still n. - // %val2 = shufflevector %red1, %undef, - // - // \----------------v-------------/ \----------v------------/ - // n/4 elements 3*n/4 elements - // %red2 = op %red1, val2 - working with the vector of - // length n/2, the resulting vector has length n/4 etc. - // 2. Pairwise reduction: - // Everything is the same except for an additional shuffle operation which - // is used to produce operands for pairwise kind of reductions. - // %val1 = shufflevector %val, %undef, - // - // \-------------v----------/ \----------v------------/ - // n/2 elements n/2 elements - // %val2 = shufflevector %val, %undef, - // - // \-------------v----------/ \----------v------------/ - // n/2 elements n/2 elements - // %red1 = op %val1, val2 - // Again, the operation is performed on vector, but the resulting - // vector %red1 is vector. - // - // The cost model should take into account that the actual length of the - // vector is reduced on each iteration. unsigned ArithCost = 0; unsigned ShuffleCost = 0; auto *ConcreteTTI = static_cast(this); diff --git a/include/llvm/CodeGen/DFAPacketizer.h b/include/llvm/CodeGen/DFAPacketizer.h index 8de140e91bf3..77c37ac7abea 100644 --- a/include/llvm/CodeGen/DFAPacketizer.h +++ b/include/llvm/CodeGen/DFAPacketizer.h @@ -1,4 +1,4 @@ -//=- llvm/CodeGen/DFAPacketizer.h - DFA Packetizer for VLIW ---*- C++ -*-=====// +//===- llvm/CodeGen/DFAPacketizer.h - DFA Packetizer for VLIW ---*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -29,17 +29,22 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/ScheduleDAGMutation.h" +#include #include +#include +#include +#include namespace llvm { -class MCInstrDesc; +class DefaultVLIWScheduler; +class InstrItineraryData; +class MachineFunction; class MachineInstr; class MachineLoopInfo; -class MachineDominatorTree; -class InstrItineraryData; -class DefaultVLIWScheduler; +class MCInstrDesc; class SUnit; +class TargetInstrInfo; // -------------------------------------------------------------------- // Definitions shared between DFAPacketizer.cpp and DFAPacketizerEmitter.cpp @@ -64,17 +69,18 @@ class SUnit; #define DFA_MAX_RESTERMS 4 // The max # of AND'ed resource terms. #define DFA_MAX_RESOURCES 16 // The max # of resource bits in one term. -typedef uint64_t DFAInput; -typedef int64_t DFAStateInput; +using DFAInput = uint64_t; +using DFAStateInput = int64_t; + #define DFA_TBLTYPE "int64_t" // For generating DFAStateInputTable. // -------------------------------------------------------------------- class DFAPacketizer { private: - typedef std::pair UnsignPair; + using UnsignPair = std::pair; const InstrItineraryData *InstrItins; - int CurrentState; + int CurrentState = 0; const DFAStateInput (*DFAStateInputTable)[2]; const unsigned *DFAStateEntryTable; @@ -101,24 +107,23 @@ public: // Check if the resources occupied by a MCInstrDesc are available in // the current state. - bool canReserveResources(const llvm::MCInstrDesc *MID); + bool canReserveResources(const MCInstrDesc *MID); // Reserve the resources occupied by a MCInstrDesc and change the current // state to reflect that change. - void reserveResources(const llvm::MCInstrDesc *MID); + void reserveResources(const MCInstrDesc *MID); // Check if the resources occupied by a machine instruction are available // in the current state. - bool canReserveResources(llvm::MachineInstr &MI); + bool canReserveResources(MachineInstr &MI); // Reserve the resources occupied by a machine instruction and change the // current state to reflect that change. - void reserveResources(llvm::MachineInstr &MI); + void reserveResources(MachineInstr &MI); const InstrItineraryData *getInstrItins() const { return InstrItins; } }; - // VLIWPacketizerList implements a simple VLIW packetizer using DFA. The // packetizer works on machine basic blocks. For each instruction I in BB, // the packetizer consults the DFA to see if machine resources are available @@ -205,6 +210,6 @@ public: void addMutation(std::unique_ptr Mutation); }; -} // namespace llvm +} // end namespace llvm -#endif +#endif // LLVM_CODEGEN_DFAPACKETIZER_H diff --git a/include/llvm/CodeGen/DIE.h b/include/llvm/CodeGen/DIE.h index 4f47ba6e3852..5ed5faa2c415 100644 --- a/include/llvm/CodeGen/DIE.h +++ b/include/llvm/CodeGen/DIE.h @@ -21,10 +21,10 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/iterator.h" #include "llvm/ADT/iterator_range.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/DwarfStringPoolEntry.h" #include "llvm/Support/AlignOf.h" #include "llvm/Support/Allocator.h" -#include "llvm/Support/Dwarf.h" #include #include #include diff --git a/include/llvm/CodeGen/ExecutionDepsFix.h b/include/llvm/CodeGen/ExecutionDepsFix.h index 1d5b9684e105..f4db8b7322da 100644 --- a/include/llvm/CodeGen/ExecutionDepsFix.h +++ b/include/llvm/CodeGen/ExecutionDepsFix.h @@ -1,4 +1,4 @@ -//===- llvm/CodeGen/ExecutionDepsFix.h - Execution Dependency Fix -*- C++ -*-=// +//==- llvm/CodeGen/ExecutionDepsFix.h - Execution Dependency Fix -*- C++ -*-==// // // The LLVM Compiler Infrastructure // @@ -20,19 +20,30 @@ // //===----------------------------------------------------------------------===// - #ifndef LLVM_CODEGEN_EXECUTIONDEPSFIX_H #define LLVM_CODEGEN_EXECUTIONDEPSFIX_H +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/Pass.h" #include "llvm/Support/Allocator.h" +#include "llvm/Support/MathExtras.h" +#include +#include +#include #include namespace llvm { +class MachineBasicBlock; +class MachineInstr; +class TargetInstrInfo; + /// A DomainValue is a bit like LiveIntervals' ValNo, but it also keeps track /// of execution domains. /// @@ -50,7 +61,7 @@ namespace llvm { /// domains. struct DomainValue { // Basic reference counting. - unsigned Refs; + unsigned Refs = 0; // Bitmask of available domains. For an open DomainValue, it is the still // possible domains for collapsing. For a collapsed DomainValue it is the @@ -65,6 +76,8 @@ struct DomainValue { // Twiddleable instructions using or defining these registers. SmallVector Instrs; + DomainValue() { clear(); } + // A collapsed DomainValue has no instructions to twiddle - it simply keeps // track of the domains where the registers are already available. bool isCollapsed() const { return Instrs.empty(); } @@ -97,8 +110,6 @@ struct DomainValue { return countTrailingZeros(AvailableDomains); } - DomainValue() : Refs(0) { clear(); } - // Clear this DomainValue and point to next which has all its data. void clear() { AvailableDomains = 0; @@ -136,29 +147,27 @@ class ExecutionDepsFix : public MachineFunctionPass { // Keeps clearance and domain information for all registers. Note that this // is different from the usual definition notion of liveness. The CPU // doesn't care whether or not we consider a register killed. - LiveReg *OutRegs; + LiveReg *OutRegs = nullptr; // Whether we have gotten to this block in primary processing yet. - bool PrimaryCompleted; + bool PrimaryCompleted = false; // The number of predecessors for which primary processing has completed - unsigned IncomingProcessed; + unsigned IncomingProcessed = 0; // The value of `IncomingProcessed` at the start of primary processing - unsigned PrimaryIncoming; + unsigned PrimaryIncoming = 0; // The number of predecessors for which all processing steps are done. - unsigned IncomingCompleted; + unsigned IncomingCompleted = 0; - MBBInfo() - : OutRegs(nullptr), PrimaryCompleted(false), IncomingProcessed(0), - PrimaryIncoming(0), IncomingCompleted(0) {} + MBBInfo() = default; }; - typedef DenseMap MBBInfoMap; + using MBBInfoMap = DenseMap; MBBInfoMap MBBInfos; /// List of undefined register reads in this block in forward order. - std::vector > UndefReads; + std::vector> UndefReads; /// Storage for register unit liveness. LivePhysRegs LiveRegSet; @@ -166,6 +175,7 @@ class ExecutionDepsFix : public MachineFunctionPass { /// Current instruction number. /// The first instruction in each basic block is 0. int CurInstr; + public: ExecutionDepsFix(char &PassID, const TargetRegisterClass &RC) : MachineFunctionPass(PassID), RC(&RC), NumRegs(RC.getNumRegs()) {} @@ -217,4 +227,4 @@ private: } // end namepsace llvm -#endif +#endif // LLVM_CODEGEN_EXECUTIONDEPSFIX_H diff --git a/include/llvm/CodeGen/FastISel.h b/include/llvm/CodeGen/FastISel.h index 57fa0c73d272..74e4179e73e9 100644 --- a/include/llvm/CodeGen/FastISel.h +++ b/include/llvm/CodeGen/FastISel.h @@ -17,11 +17,12 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineValueType.h" #include "llvm/IR/Attributes.h" -#include "llvm/IR/CallingConv.h" #include "llvm/IR/CallSite.h" +#include "llvm/IR/CallingConv.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/InstrTypes.h" @@ -30,19 +31,43 @@ #include #include #include -#include namespace llvm { +class AllocaInst; +class BasicBlock; +class CallInst; +class Constant; +class ConstantFP; +class DataLayout; +class FunctionLoweringInfo; +class LoadInst; class MachineConstantPool; +class MachineFrameInfo; +class MachineFunction; +class MachineInstr; +class MachineMemOperand; +class MachineOperand; +class MachineRegisterInfo; +class MCContext; +class MCInstrDesc; +class MCSymbol; +class TargetInstrInfo; +class TargetLibraryInfo; +class TargetMachine; +class TargetRegisterClass; +class TargetRegisterInfo; +class Type; +class User; +class Value; /// \brief This is a fast-path instruction selection class that generates poor /// code and doesn't support illegal types or non-trivial lowering, but runs /// quickly. class FastISel { public: - typedef TargetLoweringBase::ArgListEntry ArgListEntry; - typedef TargetLoweringBase::ArgListTy ArgListTy; + using ArgListEntry = TargetLoweringBase::ArgListEntry; + using ArgListTy = TargetLoweringBase::ArgListTy; struct CallLoweringInfo { Type *RetTy = nullptr; bool RetSExt : 1; @@ -202,6 +227,8 @@ protected: MachineInstr *EmitStartPt; public: + virtual ~FastISel(); + /// \brief Return the position of the last instruction emitted for /// materializing constants for use in the current block. MachineInstr *getLastLocalValue() { return LastLocalValue; } @@ -293,8 +320,6 @@ public: /// \brief Reset InsertPt to the given old insert position. void leaveLocalValueArea(SavePoint Old); - virtual ~FastISel(); - protected: explicit FastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo, @@ -334,7 +359,7 @@ protected: /// \brief This method is called by target-independent code to request that an /// instruction with the given type, opcode, and register and immediate - // operands be emitted. + /// operands be emitted. virtual unsigned fastEmit_ri(MVT VT, MVT RetVT, unsigned Opcode, unsigned Op0, bool Op0IsKill, uint64_t Imm); diff --git a/include/llvm/CodeGen/FunctionLoweringInfo.h b/include/llvm/CodeGen/FunctionLoweringInfo.h index e7544bd7b70c..7d7c3e8cfd22 100644 --- a/include/llvm/CodeGen/FunctionLoweringInfo.h +++ b/include/llvm/CodeGen/FunctionLoweringInfo.h @@ -1,4 +1,4 @@ -//===-- FunctionLoweringInfo.h - Lower functions from LLVM IR to CodeGen --===// +//===- FunctionLoweringInfo.h - Lower functions from LLVM IR to CodeGen ---===// // // The LLVM Compiler Infrastructure // @@ -23,29 +23,28 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" #include "llvm/Support/KnownBits.h" #include "llvm/Target/TargetRegisterInfo.h" +#include +#include #include namespace llvm { -class AllocaInst; +class Argument; class BasicBlock; class BranchProbabilityInfo; class Function; -class GlobalVariable; class Instruction; -class MachineInstr; -class MachineBasicBlock; class MachineFunction; -class MachineModuleInfo; +class MachineInstr; class MachineRegisterInfo; -class SelectionDAG; class MVT; +class SelectionDAG; class TargetLowering; -class Value; //===--------------------------------------------------------------------===// /// FunctionLoweringInfo - This contains information that is global to a @@ -74,25 +73,24 @@ public: /// A map from swifterror value in a basic block to the virtual register it is /// currently represented by. - llvm::DenseMap, unsigned> + DenseMap, unsigned> SwiftErrorVRegDefMap; /// A list of upward exposed vreg uses that need to be satisfied by either a /// copy def or a phi node at the beginning of the basic block representing /// the predecessor(s) swifterror value. - llvm::DenseMap, unsigned> + DenseMap, unsigned> SwiftErrorVRegUpwardsUse; /// The swifterror argument of the current function. const Value *SwiftErrorArg; - typedef SmallVector SwiftErrorValues; + using SwiftErrorValues = SmallVector; /// A function can only have a single swifterror argument. And if it does /// have a swifterror argument, it must be the first entry in /// SwiftErrorVals. SwiftErrorValues SwiftErrorVals; - /// Get or create the swifterror value virtual register in /// SwiftErrorVRegDefMap for this basic block. unsigned getOrCreateSwiftErrorVReg(const MachineBasicBlock *, @@ -118,7 +116,7 @@ public: /// slot), and we track that here. struct StatepointSpillMap { - typedef DenseMap> SlotMapTy; + using SlotMapTy = DenseMap>; /// Maps uniqued llvm IR values to the slots they were spilled in. If a /// value is mapped to None it means we visited the value but didn't spill @@ -172,8 +170,9 @@ public: struct LiveOutInfo { unsigned NumSignBits : 31; unsigned IsValid : 1; - KnownBits Known; - LiveOutInfo() : NumSignBits(0), IsValid(true), Known(1) {} + KnownBits Known = 1; + + LiveOutInfo() : NumSignBits(0), IsValid(true) {} }; /// Record the preferred extend type (ISD::SIGN_EXTEND or ISD::ZERO_EXTEND) @@ -298,4 +297,4 @@ private: } // end namespace llvm -#endif +#endif // LLVM_CODEGEN_FUNCTIONLOWERINGINFO_H diff --git a/include/llvm/CodeGen/GCMetadata.h b/include/llvm/CodeGen/GCMetadata.h index e6afcbc8ded2..ad2599fc120e 100644 --- a/include/llvm/CodeGen/GCMetadata.h +++ b/include/llvm/CodeGen/GCMetadata.h @@ -1,4 +1,4 @@ -//===-- GCMetadata.h - Garbage collector metadata ---------------*- C++ -*-===// +//===- GCMetadata.h - Garbage collector metadata ----------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -36,15 +36,20 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/GCStrategy.h" #include "llvm/IR/DebugLoc.h" #include "llvm/Pass.h" +#include +#include +#include #include -#include +#include namespace llvm { -class AsmPrinter; + class Constant; +class Function; class MCSymbol; /// GCPoint - Metadata for a collector-safe point in machine code. @@ -62,20 +67,20 @@ struct GCPoint { /// collector. struct GCRoot { int Num; ///< Usually a frame index. - int StackOffset; ///< Offset from the stack pointer. + int StackOffset = -1; ///< Offset from the stack pointer. const Constant *Metadata; ///< Metadata straight from the call ///< to llvm.gcroot. - GCRoot(int N, const Constant *MD) : Num(N), StackOffset(-1), Metadata(MD) {} + GCRoot(int N, const Constant *MD) : Num(N), Metadata(MD) {} }; /// Garbage collection metadata for a single function. Currently, this /// information only applies to GCStrategies which use GCRoot. class GCFunctionInfo { public: - typedef std::vector::iterator iterator; - typedef std::vector::iterator roots_iterator; - typedef std::vector::const_iterator live_iterator; + using iterator = std::vector::iterator; + using roots_iterator = std::vector::iterator; + using live_iterator = std::vector::const_iterator; private: const Function &F; @@ -99,11 +104,9 @@ public: ~GCFunctionInfo(); /// getFunction - Return the function to which this metadata applies. - /// const Function &getFunction() const { return F; } /// getStrategy - Return the GC strategy for the function. - /// GCStrategy &getStrategy() { return S; } /// addStackRoot - Registers a root that lives on the stack. Num is the @@ -126,24 +129,20 @@ public: } /// getFrameSize/setFrameSize - Records the function's frame size. - /// uint64_t getFrameSize() const { return FrameSize; } void setFrameSize(uint64_t S) { FrameSize = S; } /// begin/end - Iterators for safe points. - /// iterator begin() { return SafePoints.begin(); } iterator end() { return SafePoints.end(); } size_t size() const { return SafePoints.size(); } /// roots_begin/roots_end - Iterators for all roots in the function. - /// roots_iterator roots_begin() { return Roots.begin(); } roots_iterator roots_end() { return Roots.end(); } size_t roots_size() const { return Roots.size(); } /// live_begin/live_end - Iterators for live roots at a given safe point. - /// live_iterator live_begin(const iterator &p) { return roots_begin(); } live_iterator live_end(const iterator &p) { return roots_end(); } size_t live_size(const iterator &p) const { return roots_size(); } @@ -166,7 +165,7 @@ public: /// List of per function info objects. In theory, Each of these /// may be associated with a different GC. - typedef std::vector> FuncInfoVec; + using FuncInfoVec = std::vector>; FuncInfoVec::iterator funcinfo_begin() { return Functions.begin(); } FuncInfoVec::iterator funcinfo_end() { return Functions.end(); } @@ -177,11 +176,11 @@ private: /// Non-owning map to bypass linear search when finding the GCFunctionInfo /// associated with a particular Function. - typedef DenseMap finfo_map_type; + using finfo_map_type = DenseMap; finfo_map_type FInfoMap; public: - typedef SmallVector,1>::const_iterator iterator; + using iterator = SmallVector, 1>::const_iterator; static char ID; @@ -202,6 +201,7 @@ public: /// will soon change. GCFunctionInfo &getFunctionInfo(const Function &F); }; -} -#endif +} // end namespace llvm + +#endif // LLVM_CODEGEN_GCMETADATA_H diff --git a/include/llvm/CodeGen/GCMetadataPrinter.h b/include/llvm/CodeGen/GCMetadataPrinter.h index 220847029113..1cc69a7b71af 100644 --- a/include/llvm/CodeGen/GCMetadataPrinter.h +++ b/include/llvm/CodeGen/GCMetadataPrinter.h @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/GCMetadataPrinter.h - Prints asm GC tables -*- C++ -*-===// +//===- llvm/CodeGen/GCMetadataPrinter.h - Prints asm GC tables --*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -20,45 +20,48 @@ #ifndef LLVM_CODEGEN_GCMETADATAPRINTER_H #define LLVM_CODEGEN_GCMETADATAPRINTER_H -#include "llvm/CodeGen/GCMetadata.h" -#include "llvm/CodeGen/GCStrategy.h" #include "llvm/Support/Registry.h" namespace llvm { +class AsmPrinter; class GCMetadataPrinter; +class GCModuleInfo; +class GCStrategy; +class Module; /// GCMetadataPrinterRegistry - The GC assembly printer registry uses all the /// defaults from Registry. -typedef Registry GCMetadataPrinterRegistry; +using GCMetadataPrinterRegistry = Registry; /// GCMetadataPrinter - Emits GC metadata as assembly code. Instances are /// created, managed, and owned by the AsmPrinter. class GCMetadataPrinter { private: - GCStrategy *S; friend class AsmPrinter; + GCStrategy *S; + protected: // May only be subclassed. GCMetadataPrinter(); -private: +public: GCMetadataPrinter(const GCMetadataPrinter &) = delete; GCMetadataPrinter &operator=(const GCMetadataPrinter &) = delete; + virtual ~GCMetadataPrinter(); -public: GCStrategy &getStrategy() { return *S; } /// Called before the assembly for the module is generated by /// the AsmPrinter (but after target specific hooks.) virtual void beginAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) {} + /// Called after the assembly for the module is generated by /// the AsmPrinter (but before target specific hooks) virtual void finishAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) {} - - virtual ~GCMetadataPrinter(); }; -} -#endif +} // end namespace llvm + +#endif // LLVM_CODEGEN_GCMETADATAPRINTER_H diff --git a/include/llvm/CodeGen/GCStrategy.h b/include/llvm/CodeGen/GCStrategy.h index 5b1fafea25b5..16168e785f81 100644 --- a/include/llvm/CodeGen/GCStrategy.h +++ b/include/llvm/CodeGen/GCStrategy.h @@ -174,7 +174,7 @@ public: /// Note that to use a custom GCMetadataPrinter w/gc.roots, you must also /// register your GCMetadataPrinter subclass with the /// GCMetadataPrinterRegistery as well. -typedef Registry GCRegistry; +using GCRegistry = Registry; } // end namespace llvm diff --git a/include/llvm/CodeGen/GlobalISel/InstructionSelector.h b/include/llvm/CodeGen/GlobalISel/InstructionSelector.h index 45f25f96ec1f..1a865c3f0dce 100644 --- a/include/llvm/CodeGen/GlobalISel/InstructionSelector.h +++ b/include/llvm/CodeGen/GlobalISel/InstructionSelector.h @@ -17,8 +17,8 @@ #define LLVM_CODEGEN_GLOBALISEL_INSTRUCTIONSELECTOR_H #include "llvm/ADT/Optional.h" -#include #include +#include #include namespace llvm { diff --git a/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h index 8fecafdc08d0..3148e70b56f8 100644 --- a/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -22,8 +22,8 @@ #define LLVM_CODEGEN_GLOBALISEL_MACHINELEGALIZEHELPER_H #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" -#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/LowLevelType.h" +#include "llvm/CodeGen/MachineFunctionPass.h" namespace llvm { // Forward declarations. diff --git a/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h index 6b662a7f7413..db72f78c8321 100644 --- a/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -16,9 +16,9 @@ #include "llvm/CodeGen/GlobalISel/Types.h" +#include "llvm/CodeGen/LowLevelType.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/LowLevelType.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugLoc.h" diff --git a/include/llvm/CodeGen/LexicalScopes.h b/include/llvm/CodeGen/LexicalScopes.h index 6c35832f963c..79fa12ec2fbb 100644 --- a/include/llvm/CodeGen/LexicalScopes.h +++ b/include/llvm/CodeGen/LexicalScopes.h @@ -31,12 +31,13 @@ namespace llvm { class MachineBasicBlock; class MachineFunction; class MachineInstr; +class MDNode; //===----------------------------------------------------------------------===// /// InsnRange - This is used to track range of instructions with identical /// lexical scope. /// -typedef std::pair InsnRange; +using InsnRange = std::pair; //===----------------------------------------------------------------------===// /// LexicalScope - This class is used to track scope information. diff --git a/include/llvm/CodeGen/LiveInterval.h b/include/llvm/CodeGen/LiveInterval.h index 40cd146f88f8..f4fa872c7f5b 100644 --- a/include/llvm/CodeGen/LiveInterval.h +++ b/include/llvm/CodeGen/LiveInterval.h @@ -23,9 +23,9 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/IntEqClasses.h" -#include "llvm/ADT/iterator_range.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/Support/Allocator.h" diff --git a/include/llvm/CodeGen/LiveRegUnits.h b/include/llvm/CodeGen/LiveRegUnits.h index 5de76c8b87bf..fa1ec867ea3d 100644 --- a/include/llvm/CodeGen/LiveRegUnits.h +++ b/include/llvm/CodeGen/LiveRegUnits.h @@ -16,9 +16,9 @@ #define LLVM_CODEGEN_LIVEREGUNITS_H #include "llvm/ADT/BitVector.h" -#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" #include namespace llvm { diff --git a/include/llvm/CodeGen/MIRParser/MIRParser.h b/include/llvm/CodeGen/MIRParser/MIRParser.h index dd0780397f42..b631a8c0122a 100644 --- a/include/llvm/CodeGen/MIRParser/MIRParser.h +++ b/include/llvm/CodeGen/MIRParser/MIRParser.h @@ -18,7 +18,6 @@ #ifndef LLVM_CODEGEN_MIRPARSER_MIRPARSER_H #define LLVM_CODEGEN_MIRPARSER_MIRPARSER_H -#include "llvm/CodeGen/MachineFunctionInitializer.h" #include "llvm/IR/Module.h" #include "llvm/Support/MemoryBuffer.h" #include @@ -27,29 +26,30 @@ namespace llvm { class StringRef; class MIRParserImpl; +class MachineModuleInfo; class SMDiagnostic; /// This class initializes machine functions by applying the state loaded from /// a MIR file. -class MIRParser : public MachineFunctionInitializer { +class MIRParser { std::unique_ptr Impl; public: MIRParser(std::unique_ptr Impl); MIRParser(const MIRParser &) = delete; - ~MIRParser() override; + ~MIRParser(); - /// Parse the optional LLVM IR module that's embedded in the MIR file. + /// Parses the optional LLVM IR module in the MIR file. /// /// A new, empty module is created if the LLVM IR isn't present. - /// Returns null if a parsing error occurred. - std::unique_ptr parseLLVMModule(); + /// \returns nullptr if a parsing error occurred. + std::unique_ptr parseIRModule(); - /// Initialize the machine function to the state that's described in the MIR - /// file. + /// \brief Parses MachineFunctions in the MIR file and add them to the given + /// MachineModuleInfo \p MMI. /// - /// Return true if error occurred. - bool initializeMachineFunction(MachineFunction &MF) override; + /// \returns true if an error occurred. + bool parseMachineFunctions(Module &M, MachineModuleInfo &MMI); }; /// This function is the main interface to the MIR serialization format parser. diff --git a/include/llvm/CodeGen/MIRYamlMapping.h b/include/llvm/CodeGen/MIRYamlMapping.h index 30e88fe38ac3..1b1ba6a05837 100644 --- a/include/llvm/CodeGen/MIRYamlMapping.h +++ b/include/llvm/CodeGen/MIRYamlMapping.h @@ -72,6 +72,9 @@ template <> struct ScalarTraits { struct BlockStringValue { StringValue Value; + bool operator==(const BlockStringValue &Other) const { + return Value == Other.Value; + } }; template <> struct BlockScalarTraits { @@ -146,6 +149,10 @@ struct VirtualRegisterDefinition { StringValue Class; StringValue PreferredRegister; // TODO: Serialize the target specific register hints. + bool operator==(const VirtualRegisterDefinition &Other) const { + return ID == Other.ID && Class == Other.Class && + PreferredRegister == Other.PreferredRegister; + } }; template <> struct MappingTraits { @@ -162,6 +169,10 @@ template <> struct MappingTraits { struct MachineFunctionLiveIn { StringValue Register; StringValue VirtualRegister; + bool operator==(const MachineFunctionLiveIn &Other) const { + return Register == Other.Register && + VirtualRegister == Other.VirtualRegister; + } }; template <> struct MappingTraits { @@ -196,6 +207,14 @@ struct MachineStackObject { StringValue DebugVar; StringValue DebugExpr; StringValue DebugLoc; + bool operator==(const MachineStackObject &Other) const { + return ID == Other.ID && Name == Other.Name && Type == Other.Type && + Offset == Other.Offset && Size == Other.Size && + Alignment == Other.Alignment && + CalleeSavedRegister == Other.CalleeSavedRegister && + LocalOffset == Other.LocalOffset && DebugVar == Other.DebugVar && + DebugExpr == Other.DebugExpr && DebugLoc == Other.DebugLoc; + } }; template <> struct ScalarEnumerationTraits { @@ -214,13 +233,13 @@ template <> struct MappingTraits { YamlIO.mapOptional( "type", Object.Type, MachineStackObject::DefaultType); // Don't print the default type. - YamlIO.mapOptional("offset", Object.Offset); + YamlIO.mapOptional("offset", Object.Offset, (int64_t)0); if (Object.Type != MachineStackObject::VariableSized) YamlIO.mapRequired("size", Object.Size); - YamlIO.mapOptional("alignment", Object.Alignment); + YamlIO.mapOptional("alignment", Object.Alignment, (unsigned)0); YamlIO.mapOptional("callee-saved-register", Object.CalleeSavedRegister, StringValue()); // Don't print it out when it's empty. - YamlIO.mapOptional("local-offset", Object.LocalOffset); + YamlIO.mapOptional("local-offset", Object.LocalOffset, Optional()); YamlIO.mapOptional("di-variable", Object.DebugVar, StringValue()); // Don't print it out when it's empty. YamlIO.mapOptional("di-expression", Object.DebugExpr, @@ -244,6 +263,12 @@ struct FixedMachineStackObject { bool IsImmutable = false; bool IsAliased = false; StringValue CalleeSavedRegister; + bool operator==(const FixedMachineStackObject &Other) const { + return ID == Other.ID && Type == Other.Type && Offset == Other.Offset && + Size == Other.Size && Alignment == Other.Alignment && + IsImmutable == Other.IsImmutable && IsAliased == Other.IsAliased && + CalleeSavedRegister == Other.CalleeSavedRegister; + } }; template <> @@ -261,12 +286,12 @@ template <> struct MappingTraits { YamlIO.mapOptional( "type", Object.Type, FixedMachineStackObject::DefaultType); // Don't print the default type. - YamlIO.mapOptional("offset", Object.Offset); - YamlIO.mapOptional("size", Object.Size); - YamlIO.mapOptional("alignment", Object.Alignment); + YamlIO.mapOptional("offset", Object.Offset, (int64_t)0); + YamlIO.mapOptional("size", Object.Size, (uint64_t)0); + YamlIO.mapOptional("alignment", Object.Alignment, (unsigned)0); if (Object.Type != FixedMachineStackObject::SpillSlot) { - YamlIO.mapOptional("isImmutable", Object.IsImmutable); - YamlIO.mapOptional("isAliased", Object.IsAliased); + YamlIO.mapOptional("isImmutable", Object.IsImmutable, false); + YamlIO.mapOptional("isAliased", Object.IsAliased, false); } YamlIO.mapOptional("callee-saved-register", Object.CalleeSavedRegister, StringValue()); // Don't print it out when it's empty. @@ -279,13 +304,17 @@ struct MachineConstantPoolValue { UnsignedValue ID; StringValue Value; unsigned Alignment = 0; + bool operator==(const MachineConstantPoolValue &Other) const { + return ID == Other.ID && Value == Other.Value && + Alignment == Other.Alignment; + } }; template <> struct MappingTraits { static void mapping(IO &YamlIO, MachineConstantPoolValue &Constant) { YamlIO.mapRequired("id", Constant.ID); - YamlIO.mapOptional("value", Constant.Value); - YamlIO.mapOptional("alignment", Constant.Alignment); + YamlIO.mapOptional("value", Constant.Value, StringValue()); + YamlIO.mapOptional("alignment", Constant.Alignment, (unsigned)0); } }; @@ -293,16 +322,22 @@ struct MachineJumpTable { struct Entry { UnsignedValue ID; std::vector Blocks; + bool operator==(const Entry &Other) const { + return ID == Other.ID && Blocks == Other.Blocks; + } }; MachineJumpTableInfo::JTEntryKind Kind = MachineJumpTableInfo::EK_Custom32; std::vector Entries; + bool operator==(const MachineJumpTable &Other) const { + return Kind == Other.Kind && Entries == Other.Entries; + } }; template <> struct MappingTraits { static void mapping(IO &YamlIO, MachineJumpTable::Entry &Entry) { YamlIO.mapRequired("id", Entry.ID); - YamlIO.mapOptional("blocks", Entry.Blocks); + YamlIO.mapOptional("blocks", Entry.Blocks, std::vector()); } }; @@ -322,7 +357,8 @@ namespace yaml { template <> struct MappingTraits { static void mapping(IO &YamlIO, MachineJumpTable &JT) { YamlIO.mapRequired("kind", JT.Kind); - YamlIO.mapOptional("entries", JT.Entries); + YamlIO.mapOptional("entries", JT.Entries, + std::vector()); } }; @@ -351,25 +387,43 @@ struct MachineFrameInfo { bool HasMustTailInVarArgFunc = false; StringValue SavePoint; StringValue RestorePoint; + bool operator==(const MachineFrameInfo &Other) const { + return IsFrameAddressTaken == Other.IsFrameAddressTaken && + IsReturnAddressTaken == Other.IsReturnAddressTaken && + HasStackMap == Other.HasStackMap && + HasPatchPoint == Other.HasPatchPoint && + StackSize == Other.StackSize && + OffsetAdjustment == Other.OffsetAdjustment && + MaxAlignment == Other.MaxAlignment && + AdjustsStack == Other.AdjustsStack && HasCalls == Other.HasCalls && + StackProtector == Other.StackProtector && + MaxCallFrameSize == Other.MaxCallFrameSize && + HasOpaqueSPAdjustment == Other.HasOpaqueSPAdjustment && + HasVAStart == Other.HasVAStart && + HasMustTailInVarArgFunc == Other.HasMustTailInVarArgFunc && + SavePoint == Other.SavePoint && RestorePoint == Other.RestorePoint; + } }; template <> struct MappingTraits { static void mapping(IO &YamlIO, MachineFrameInfo &MFI) { - YamlIO.mapOptional("isFrameAddressTaken", MFI.IsFrameAddressTaken); - YamlIO.mapOptional("isReturnAddressTaken", MFI.IsReturnAddressTaken); - YamlIO.mapOptional("hasStackMap", MFI.HasStackMap); - YamlIO.mapOptional("hasPatchPoint", MFI.HasPatchPoint); - YamlIO.mapOptional("stackSize", MFI.StackSize); - YamlIO.mapOptional("offsetAdjustment", MFI.OffsetAdjustment); - YamlIO.mapOptional("maxAlignment", MFI.MaxAlignment); - YamlIO.mapOptional("adjustsStack", MFI.AdjustsStack); - YamlIO.mapOptional("hasCalls", MFI.HasCalls); + YamlIO.mapOptional("isFrameAddressTaken", MFI.IsFrameAddressTaken, false); + YamlIO.mapOptional("isReturnAddressTaken", MFI.IsReturnAddressTaken, false); + YamlIO.mapOptional("hasStackMap", MFI.HasStackMap, false); + YamlIO.mapOptional("hasPatchPoint", MFI.HasPatchPoint, false); + YamlIO.mapOptional("stackSize", MFI.StackSize, (uint64_t)0); + YamlIO.mapOptional("offsetAdjustment", MFI.OffsetAdjustment, (int)0); + YamlIO.mapOptional("maxAlignment", MFI.MaxAlignment, (unsigned)0); + YamlIO.mapOptional("adjustsStack", MFI.AdjustsStack, false); + YamlIO.mapOptional("hasCalls", MFI.HasCalls, false); YamlIO.mapOptional("stackProtector", MFI.StackProtector, StringValue()); // Don't print it out when it's empty. - YamlIO.mapOptional("maxCallFrameSize", MFI.MaxCallFrameSize, ~0u); - YamlIO.mapOptional("hasOpaqueSPAdjustment", MFI.HasOpaqueSPAdjustment); - YamlIO.mapOptional("hasVAStart", MFI.HasVAStart); - YamlIO.mapOptional("hasMustTailInVarArgFunc", MFI.HasMustTailInVarArgFunc); + YamlIO.mapOptional("maxCallFrameSize", MFI.MaxCallFrameSize, (unsigned)~0); + YamlIO.mapOptional("hasOpaqueSPAdjustment", MFI.HasOpaqueSPAdjustment, + false); + YamlIO.mapOptional("hasVAStart", MFI.HasVAStart, false); + YamlIO.mapOptional("hasMustTailInVarArgFunc", MFI.HasMustTailInVarArgFunc, + false); YamlIO.mapOptional("savePoint", MFI.SavePoint, StringValue()); // Don't print it out when it's empty. YamlIO.mapOptional("restorePoint", MFI.RestorePoint, @@ -403,22 +457,28 @@ struct MachineFunction { template <> struct MappingTraits { static void mapping(IO &YamlIO, MachineFunction &MF) { YamlIO.mapRequired("name", MF.Name); - YamlIO.mapOptional("alignment", MF.Alignment); - YamlIO.mapOptional("exposesReturnsTwice", MF.ExposesReturnsTwice); - YamlIO.mapOptional("legalized", MF.Legalized); - YamlIO.mapOptional("regBankSelected", MF.RegBankSelected); - YamlIO.mapOptional("selected", MF.Selected); - YamlIO.mapOptional("tracksRegLiveness", MF.TracksRegLiveness); - YamlIO.mapOptional("registers", MF.VirtualRegisters); - YamlIO.mapOptional("liveins", MF.LiveIns); - YamlIO.mapOptional("calleeSavedRegisters", MF.CalleeSavedRegisters); - YamlIO.mapOptional("frameInfo", MF.FrameInfo); - YamlIO.mapOptional("fixedStack", MF.FixedStackObjects); - YamlIO.mapOptional("stack", MF.StackObjects); - YamlIO.mapOptional("constants", MF.Constants); + YamlIO.mapOptional("alignment", MF.Alignment, (unsigned)0); + YamlIO.mapOptional("exposesReturnsTwice", MF.ExposesReturnsTwice, false); + YamlIO.mapOptional("legalized", MF.Legalized, false); + YamlIO.mapOptional("regBankSelected", MF.RegBankSelected, false); + YamlIO.mapOptional("selected", MF.Selected, false); + YamlIO.mapOptional("tracksRegLiveness", MF.TracksRegLiveness, false); + YamlIO.mapOptional("registers", MF.VirtualRegisters, + std::vector()); + YamlIO.mapOptional("liveins", MF.LiveIns, + std::vector()); + YamlIO.mapOptional("calleeSavedRegisters", MF.CalleeSavedRegisters, + Optional>()); + YamlIO.mapOptional("frameInfo", MF.FrameInfo, MachineFrameInfo()); + YamlIO.mapOptional("fixedStack", MF.FixedStackObjects, + std::vector()); + YamlIO.mapOptional("stack", MF.StackObjects, + std::vector()); + YamlIO.mapOptional("constants", MF.Constants, + std::vector()); if (!YamlIO.outputting() || !MF.JumpTableInfo.Entries.empty()) - YamlIO.mapOptional("jumpTable", MF.JumpTableInfo); - YamlIO.mapOptional("body", MF.Body); + YamlIO.mapOptional("jumpTable", MF.JumpTableInfo, MachineJumpTable()); + YamlIO.mapOptional("body", MF.Body, BlockStringValue()); } }; diff --git a/include/llvm/CodeGen/MachineBasicBlock.h b/include/llvm/CodeGen/MachineBasicBlock.h index 26ed8bb487a2..051908c40df7 100644 --- a/include/llvm/CodeGen/MachineBasicBlock.h +++ b/include/llvm/CodeGen/MachineBasicBlock.h @@ -19,12 +19,12 @@ #include "llvm/ADT/ilist_node.h" #include "llvm/ADT/iterator_range.h" #include "llvm/ADT/simple_ilist.h" -#include "llvm/CodeGen/MachineInstrBundleIterator.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBundleIterator.h" #include "llvm/IR/DebugLoc.h" -#include "llvm/Support/BranchProbability.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/BranchProbability.h" #include #include #include diff --git a/include/llvm/CodeGen/MachineFunction.h b/include/llvm/CodeGen/MachineFunction.h index 10125864cd90..f67da7b01c54 100644 --- a/include/llvm/CodeGen/MachineFunction.h +++ b/include/llvm/CodeGen/MachineFunction.h @@ -22,11 +22,11 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/GraphTraits.h" -#include "llvm/ADT/ilist.h" -#include "llvm/ADT/iterator.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/ilist.h" +#include "llvm/ADT/iterator.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineInstr.h" diff --git a/include/llvm/CodeGen/MachineFunctionInitializer.h b/include/llvm/CodeGen/MachineFunctionInitializer.h deleted file mode 100644 index 0fbcb480b1ab..000000000000 --- a/include/llvm/CodeGen/MachineFunctionInitializer.h +++ /dev/null @@ -1,38 +0,0 @@ -//=- MachineFunctionInitializer.h - machine function initializer --*- C++ -*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares an interface that allows custom machine function -// initialization. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGEN_MACHINEFUNCTIONINITIALIZER_H -#define LLVM_CODEGEN_MACHINEFUNCTIONINITIALIZER_H - -namespace llvm { - -class MachineFunction; - -/// This interface provides a way to initialize machine functions after they are -/// created by the machine function analysis pass. -class MachineFunctionInitializer { - virtual void anchor(); - -public: - virtual ~MachineFunctionInitializer() = default; - - /// Initialize the machine function. - /// - /// Return true if error occurred. - virtual bool initializeMachineFunction(MachineFunction &MF) = 0; -}; - -} // end namespace llvm - -#endif // LLVM_CODEGEN_MACHINEFUNCTIONINITIALIZER_H diff --git a/include/llvm/CodeGen/MachineFunctionPass.h b/include/llvm/CodeGen/MachineFunctionPass.h index 653d1175d04b..6d978daa2018 100644 --- a/include/llvm/CodeGen/MachineFunctionPass.h +++ b/include/llvm/CodeGen/MachineFunctionPass.h @@ -19,8 +19,8 @@ #ifndef LLVM_CODEGEN_MACHINEFUNCTIONPASS_H #define LLVM_CODEGEN_MACHINEFUNCTIONPASS_H -#include "llvm/Pass.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Pass.h" namespace llvm { diff --git a/include/llvm/CodeGen/MachineMemOperand.h b/include/llvm/CodeGen/MachineMemOperand.h index a311124a35ba..4d83f27eac3c 100644 --- a/include/llvm/CodeGen/MachineMemOperand.h +++ b/include/llvm/CodeGen/MachineMemOperand.h @@ -21,7 +21,7 @@ #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Metadata.h" -#include "llvm/IR/Value.h" // PointerLikeTypeTraits +#include "llvm/IR/Value.h" // PointerLikeTypeTraits #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/DataTypes.h" diff --git a/include/llvm/CodeGen/MachineModuleInfo.h b/include/llvm/CodeGen/MachineModuleInfo.h index d4ac58c3bd22..d64941a9e725 100644 --- a/include/llvm/CodeGen/MachineModuleInfo.h +++ b/include/llvm/CodeGen/MachineModuleInfo.h @@ -47,7 +47,6 @@ class BasicBlock; class CallInst; class Function; class MachineFunction; -class MachineFunctionInitializer; class MMIAddrLabelMap; class Module; class TargetMachine; @@ -126,7 +125,6 @@ class MachineModuleInfo : public ImmutablePass { /// comments in lib/Target/X86/X86FrameLowering.cpp for more details. bool UsesMorestackAddr; - MachineFunctionInitializer *MFInitializer; /// Maps IR Functions to their corresponding MachineFunctions. DenseMap> MachineFunctions; /// Next unique number available for a MachineFunction. @@ -150,14 +148,13 @@ public: void setModule(const Module *M) { TheModule = M; } const Module *getModule() const { return TheModule; } - void setMachineFunctionInitializer(MachineFunctionInitializer *MFInit) { - MFInitializer = MFInit; - } - /// Returns the MachineFunction constructed for the IR function \p F. - /// Creates a new MachineFunction and runs the MachineFunctionInitializer - /// if none exists yet. - MachineFunction &getMachineFunction(const Function &F); + /// Creates a new MachineFunction if none exists yet. + MachineFunction &getOrCreateMachineFunction(const Function &F); + + /// \bried Returns the MachineFunction associated to IR function \p F if there + /// is one, otherwise nullptr. + MachineFunction *getMachineFunction(const Function &F) const; /// Delete the MachineFunction \p MF and reset the link in the IR Function to /// Machine Function map. diff --git a/include/llvm/CodeGen/MachineModuleInfoImpls.h b/include/llvm/CodeGen/MachineModuleInfoImpls.h index f28a79c5b5cc..61cff3890b75 100644 --- a/include/llvm/CodeGen/MachineModuleInfoImpls.h +++ b/include/llvm/CodeGen/MachineModuleInfoImpls.h @@ -15,9 +15,9 @@ #ifndef LLVM_CODEGEN_MACHINEMODULEINFOIMPLS_H #define LLVM_CODEGEN_MACHINEMODULEINFOIMPLS_H -#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/BinaryFormat/Wasm.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/Support/Wasm.h" +#include "llvm/CodeGen/ValueTypes.h" namespace llvm { class MCSymbol; diff --git a/include/llvm/CodeGen/MachineOperand.h b/include/llvm/CodeGen/MachineOperand.h index e16354088296..2560399bcf54 100644 --- a/include/llvm/CodeGen/MachineOperand.h +++ b/include/llvm/CodeGen/MachineOperand.h @@ -14,8 +14,8 @@ #ifndef LLVM_CODEGEN_MACHINEOPERAND_H #define LLVM_CODEGEN_MACHINEOPERAND_H -#include "llvm/Support/DataTypes.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/Support/DataTypes.h" #include namespace llvm { diff --git a/include/llvm/CodeGen/MachineRegisterInfo.h b/include/llvm/CodeGen/MachineRegisterInfo.h index c027783aae55..8347f00cbc7a 100644 --- a/include/llvm/CodeGen/MachineRegisterInfo.h +++ b/include/llvm/CodeGen/MachineRegisterInfo.h @@ -18,9 +18,9 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/IndexedMap.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/PointerUnion.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/CodeGen/GlobalISel/RegisterBank.h" #include "llvm/CodeGen/LowLevelType.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -30,13 +30,13 @@ #include "llvm/MC/LaneBitmask.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" -#include #include #include #include #include #include #include +#include namespace llvm { diff --git a/include/llvm/CodeGen/RegAllocRegistry.h b/include/llvm/CodeGen/RegAllocRegistry.h index 5c7e9999cc9a..481747dc163e 100644 --- a/include/llvm/CodeGen/RegAllocRegistry.h +++ b/include/llvm/CodeGen/RegAllocRegistry.h @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/RegAllocRegistry.h -------------------------*- C++ -*-===// +//===- llvm/CodeGen/RegAllocRegistry.h --------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -19,16 +19,16 @@ namespace llvm { +class FunctionPass; + //===----------------------------------------------------------------------===// /// /// RegisterRegAlloc class - Track the registration of register allocators. /// //===----------------------------------------------------------------------===// class RegisterRegAlloc : public MachinePassRegistryNode { - public: - - typedef FunctionPass *(*FunctionPassCtor)(); + using FunctionPassCtor = FunctionPass *(*)(); static MachinePassRegistry Registry; @@ -36,22 +36,26 @@ public: : MachinePassRegistryNode(N, D, (MachinePassCtor)C) { Registry.Add(this); } + ~RegisterRegAlloc() { Registry.Remove(this); } // Accessors. - // RegisterRegAlloc *getNext() const { return (RegisterRegAlloc *)MachinePassRegistryNode::getNext(); } + static RegisterRegAlloc *getList() { return (RegisterRegAlloc *)Registry.getList(); } + static FunctionPassCtor getDefault() { return (FunctionPassCtor)Registry.getDefault(); } + static void setDefault(FunctionPassCtor C) { Registry.setDefault((MachinePassCtor)C); } + static void setListener(MachinePassRegistryListener *L) { Registry.setListener(L); } @@ -59,5 +63,4 @@ public: } // end namespace llvm - -#endif +#endif // LLVM_CODEGEN_REGALLOCREGISTRY_H diff --git a/include/llvm/CodeGen/RegisterPressure.h b/include/llvm/CodeGen/RegisterPressure.h index a3ea41d5236e..e997aaf269e3 100644 --- a/include/llvm/CodeGen/RegisterPressure.h +++ b/include/llvm/CodeGen/RegisterPressure.h @@ -32,7 +32,9 @@ namespace llvm { class LiveIntervals; +class MachineFunction; class MachineInstr; +class MachineRegisterInfo; class RegisterClassInfo; struct RegisterMaskPair { @@ -147,12 +149,14 @@ class PressureDiff { PressureChange PressureChanges[MaxPSets]; - typedef PressureChange* iterator; + using iterator = PressureChange *; + iterator nonconst_begin() { return &PressureChanges[0]; } iterator nonconst_end() { return &PressureChanges[MaxPSets]; } public: - typedef const PressureChange* const_iterator; + using const_iterator = const PressureChange *; + const_iterator begin() const { return &PressureChanges[0]; } const_iterator end() const { return &PressureChanges[MaxPSets]; } @@ -269,7 +273,7 @@ private: } }; - typedef SparseSet RegSet; + using RegSet = SparseSet; RegSet Regs; unsigned NumRegUnits; diff --git a/include/llvm/CodeGen/RegisterUsageInfo.h b/include/llvm/CodeGen/RegisterUsageInfo.h index 3f88032cb638..0a04bc6a89f4 100644 --- a/include/llvm/CodeGen/RegisterUsageInfo.h +++ b/include/llvm/CodeGen/RegisterUsageInfo.h @@ -1,4 +1,4 @@ -//==- RegisterUsageInfo.h - Register Usage Informartion Storage -*- C++ -*-===// +//==- RegisterUsageInfo.h - Register Usage Informartion Storage --*- C++ -*-==// // // The LLVM Compiler Infrastructure // @@ -20,15 +20,15 @@ #define LLVM_CODEGEN_PHYSICALREGISTERUSAGEINFO_H #include "llvm/ADT/DenseMap.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Module.h" #include "llvm/Pass.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/raw_ostream.h" +#include +#include namespace llvm { +class Function; +class TargetMachine; + class PhysicalRegisterUsageInfo : public ImmutablePass { virtual void anchor(); @@ -70,6 +70,7 @@ private: const TargetMachine *TM; }; -} -#endif +} // end namespace llvm + +#endif // LLVM_CODEGEN_PHYSICALREGISTERUSAGEINFO_H diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h index 97aa2aace822..4d72eda5c71a 100644 --- a/include/llvm/CodeGen/ScheduleDAG.h +++ b/include/llvm/CodeGen/ScheduleDAG.h @@ -18,9 +18,9 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/GraphTraits.h" -#include "llvm/ADT/iterator.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/iterator.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetLowering.h" diff --git a/include/llvm/CodeGen/ScheduleDAGInstrs.h b/include/llvm/CodeGen/ScheduleDAGInstrs.h index d62bb9bf0b75..218e22e40234 100644 --- a/include/llvm/CodeGen/ScheduleDAGInstrs.h +++ b/include/llvm/CodeGen/ScheduleDAGInstrs.h @@ -17,10 +17,10 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PointerIntPair.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SparseMultiSet.h" #include "llvm/ADT/SparseSet.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/ScheduleDAG.h" diff --git a/include/llvm/CodeGen/ScheduleDFS.h b/include/llvm/CodeGen/ScheduleDFS.h index c2013661cfff..d6a8c791392c 100644 --- a/include/llvm/CodeGen/ScheduleDFS.h +++ b/include/llvm/CodeGen/ScheduleDFS.h @@ -17,9 +17,9 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/ScheduleDAG.h" -#include #include #include +#include namespace llvm { diff --git a/include/llvm/CodeGen/SchedulerRegistry.h b/include/llvm/CodeGen/SchedulerRegistry.h index a7a6227664de..badf927d0e95 100644 --- a/include/llvm/CodeGen/SchedulerRegistry.h +++ b/include/llvm/CodeGen/SchedulerRegistry.h @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/SchedulerRegistry.h ------------------------*- C++ -*-===// +//===- llvm/CodeGen/SchedulerRegistry.h -------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -16,7 +16,7 @@ #define LLVM_CODEGEN_SCHEDULERREGISTRY_H #include "llvm/CodeGen/MachinePassRegistry.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/CodeGen.h" namespace llvm { @@ -26,15 +26,13 @@ namespace llvm { /// //===----------------------------------------------------------------------===// -class SelectionDAGISel; class ScheduleDAGSDNodes; -class SelectionDAG; -class MachineBasicBlock; +class SelectionDAGISel; class RegisterScheduler : public MachinePassRegistryNode { public: - typedef ScheduleDAGSDNodes *(*FunctionPassCtor)(SelectionDAGISel*, - CodeGenOpt::Level); + using FunctionPassCtor = ScheduleDAGSDNodes *(*)(SelectionDAGISel*, + CodeGenOpt::Level); static MachinePassRegistry Registry; @@ -45,13 +43,14 @@ public: // Accessors. - // RegisterScheduler *getNext() const { return (RegisterScheduler *)MachinePassRegistryNode::getNext(); } + static RegisterScheduler *getList() { return (RegisterScheduler *)Registry.getList(); } + static void setListener(MachinePassRegistryListener *L) { Registry.setListener(L); } @@ -103,4 +102,4 @@ ScheduleDAGSDNodes *createDAGLinearizer(SelectionDAGISel *IS, } // end namespace llvm -#endif +#endif // LLVM_CODEGEN_SCHEDULERREGISTRY_H diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h index 4b1a375abd57..2ef7796a4a07 100644 --- a/include/llvm/CodeGen/SelectionDAG.h +++ b/include/llvm/CodeGen/SelectionDAG.h @@ -21,12 +21,12 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/FoldingSet.h" -#include "llvm/ADT/ilist.h" -#include "llvm/ADT/iterator.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" +#include "llvm/ADT/ilist.h" +#include "llvm/ADT/iterator.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/DAGCombine.h" #include "llvm/CodeGen/ISDOpcodes.h" @@ -1229,39 +1229,6 @@ public: AllNodes.insert(Position, AllNodes.remove(N)); } - /// Returns true if the opcode is a commutative binary operation. - static bool isCommutativeBinOp(unsigned Opcode) { - // FIXME: This should get its info from the td file, so that we can include - // target info. - switch (Opcode) { - case ISD::ADD: - case ISD::SMIN: - case ISD::SMAX: - case ISD::UMIN: - case ISD::UMAX: - case ISD::MUL: - case ISD::MULHU: - case ISD::MULHS: - case ISD::SMUL_LOHI: - case ISD::UMUL_LOHI: - case ISD::FADD: - case ISD::FMUL: - case ISD::AND: - case ISD::OR: - case ISD::XOR: - case ISD::SADDO: - case ISD::UADDO: - case ISD::ADDC: - case ISD::ADDE: - case ISD::FMINNUM: - case ISD::FMAXNUM: - case ISD::FMINNAN: - case ISD::FMAXNAN: - return true; - default: return false; - } - } - /// Returns an APFloat semantics tag appropriate for the given type. If VT is /// a vector type, the element semantics are returned. static const fltSemantics &EVTToAPFloatSemantics(EVT VT) { diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h index 3a4feb322092..0cd26d35a482 100644 --- a/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/include/llvm/CodeGen/SelectionDAGNodes.h @@ -24,11 +24,11 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/GraphTraits.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/ilist_node.h" #include "llvm/ADT/iterator.h" #include "llvm/ADT/iterator_range.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineValueType.h" diff --git a/include/llvm/CodeGen/SlotIndexes.h b/include/llvm/CodeGen/SlotIndexes.h index a275b2721b44..a7b16e7a9ed2 100644 --- a/include/llvm/CodeGen/SlotIndexes.h +++ b/include/llvm/CodeGen/SlotIndexes.h @@ -20,10 +20,10 @@ #define LLVM_CODEGEN_SLOTINDEXES_H #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/ilist.h" #include "llvm/ADT/IntervalMap.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/ilist.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" diff --git a/include/llvm/CodeGen/StackProtector.h b/include/llvm/CodeGen/StackProtector.h index b970de71f862..72de212d0df9 100644 --- a/include/llvm/CodeGen/StackProtector.h +++ b/include/llvm/CodeGen/StackProtector.h @@ -19,18 +19,20 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Triple.h" -#include "llvm/CodeGen/TargetPassConfig.h" -#include "llvm/IR/Dominators.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/ValueMap.h" #include "llvm/Pass.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetMachine.h" namespace llvm { +class BasicBlock; +class DominatorTree; class Function; +class Instruction; class Module; -class PHINode; +class TargetLoweringBase; +class TargetMachine; +class Type; class StackProtector : public FunctionPass { public: @@ -48,7 +50,7 @@ public: }; /// A mapping of AllocaInsts to their required SSP layout. - typedef ValueMap SSPLayoutMap; + using SSPLayoutMap = ValueMap; private: const TargetMachine *TM = nullptr; @@ -119,10 +121,7 @@ public: initializeStackProtectorPass(*PassRegistry::getPassRegistry()); } - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); - AU.addPreserved(); - } + void getAnalysisUsage(AnalysisUsage &AU) const override; SSPLayoutKind getSSPLayout(const AllocaInst *AI) const; diff --git a/include/llvm/CodeGen/TailDuplicator.h b/include/llvm/CodeGen/TailDuplicator.h index b667245fd3c0..483c0ab1eec9 100644 --- a/include/llvm/CodeGen/TailDuplicator.h +++ b/include/llvm/CodeGen/TailDuplicator.h @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/TailDuplicator.h ---------------------------*- C++ -*-===// +//===- llvm/CodeGen/TailDuplicator.h ----------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -15,19 +15,27 @@ #ifndef LLVM_CODEGEN_TAILDUPLICATOR_H #define LLVM_CODEGEN_TAILDUPLICATOR_H +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineSSAUpdater.h" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include +#include namespace llvm { -extern cl::opt TailDupIndirectBranchSize; +class MachineBasicBlock; +class MachineBranchProbabilityInfo; +class MachineFunction; +class MachineInstr; +class MachineModuleInfo; +class MachineRegisterInfo; +class TargetRegisterInfo; /// Utility class to perform tail duplication. class TailDuplicator { @@ -46,7 +54,7 @@ class TailDuplicator { // For each virtual register in SSAUpdateVals keep a list of source virtual // registers. - typedef std::vector> AvailableValsTy; + using AvailableValsTy = std::vector>; DenseMap SSAUpdateVals; @@ -62,11 +70,14 @@ public: void initMF(MachineFunction &MF, const MachineBranchProbabilityInfo *MBPI, bool LayoutMode, unsigned TailDupSize = 0); + bool tailDuplicateBlocks(); static bool isSimpleBB(MachineBasicBlock *TailBB); bool shouldTailDuplicate(bool IsSimple, MachineBasicBlock &TailBB); + /// Returns true if TailBB can successfully be duplicated into PredBB bool canTailDuplicate(MachineBasicBlock *TailBB, MachineBasicBlock *PredBB); + /// Tail duplicate a single basic block into its predecessors, and then clean /// up. /// If \p DuplicatePreds is not null, it will be updated to contain the list @@ -77,10 +88,10 @@ public: bool IsSimple, MachineBasicBlock *MBB, MachineBasicBlock *ForcedLayoutPred, SmallVectorImpl *DuplicatedPreds = nullptr, - llvm::function_ref *RemovalCallback = nullptr); + function_ref *RemovalCallback = nullptr); private: - typedef TargetInstrInfo::RegSubRegPair RegSubRegPair; + using RegSubRegPair = TargetInstrInfo::RegSubRegPair; void addSSAUpdateEntry(unsigned OrigReg, unsigned NewReg, MachineBasicBlock *BB); @@ -112,9 +123,9 @@ private: void removeDeadBlock( MachineBasicBlock *MBB, - llvm::function_ref *RemovalCallback = nullptr); + function_ref *RemovalCallback = nullptr); }; -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_CODEGEN_TAILDUPLICATOR_H diff --git a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h index adf2b3ea1c9b..106a084a95c0 100644 --- a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h +++ b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h @@ -41,6 +41,11 @@ public: TargetLoweringObjectFileELF() = default; ~TargetLoweringObjectFileELF() override = default; + /// Emit Obj-C garbage collection and linker options. + void emitModuleFlags(MCStreamer &Streamer, + ArrayRef ModuleFlags, + const TargetMachine &TM) const override; + void emitPersonalityValue(MCStreamer &Streamer, const DataLayout &TM, const MCSymbol *Sym) const override; @@ -149,8 +154,7 @@ public: MCSection *getSectionForJumpTable(const Function &F, const TargetMachine &TM) const override; - /// Emit Obj-C garbage collection and linker options. Only linker option - /// emission is implemented for COFF. + /// Emit Obj-C garbage collection and linker options. void emitModuleFlags(MCStreamer &Streamer, ArrayRef ModuleFlags, const TargetMachine &TM) const override; diff --git a/include/llvm/CodeGen/TargetPassConfig.h b/include/llvm/CodeGen/TargetPassConfig.h index fcf1937c186e..c109b7489cca 100644 --- a/include/llvm/CodeGen/TargetPassConfig.h +++ b/include/llvm/CodeGen/TargetPassConfig.h @@ -119,6 +119,10 @@ protected: /// callers. bool RequireCodeGenSCCOrder; + /// Add the actual instruction selection passes. This does not include + /// preparation passes on IR. + bool addCoreISelPasses(); + public: TargetPassConfig(LLVMTargetMachine &TM, PassManagerBase &pm); // Dummy constructor. @@ -206,6 +210,13 @@ public: /// has not be overriden on the command line with '-regalloc=...' bool usingDefaultRegAlloc() const; + /// High level function that adds all passes necessary to go from llvm IR + /// representation to the MI representation. + /// Adds IR based lowering and target specific optimization passes and finally + /// the core instruction selection passes. + /// \returns true if an error occured, false otherwise. + bool addISelPasses(); + /// Add common target configurable passes that perform LLVM IR to IR /// transforms following machine independent optimization. virtual void addIRPasses(); diff --git a/include/llvm/CodeGen/TargetSchedule.h b/include/llvm/CodeGen/TargetSchedule.h index 4365fca74bf1..f23667976468 100644 --- a/include/llvm/CodeGen/TargetSchedule.h +++ b/include/llvm/CodeGen/TargetSchedule.h @@ -16,6 +16,7 @@ #ifndef LLVM_CODEGEN_TARGETSCHEDULE_H #define LLVM_CODEGEN_TARGETSCHEDULE_H +#include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/MC/MCSchedule.h" @@ -123,7 +124,7 @@ public: } #endif - typedef const MCWriteProcResEntry *ProcResIter; + using ProcResIter = const MCWriteProcResEntry *; // \brief Get an iterator into the processor resources consumed by this // scheduling class. diff --git a/include/llvm/CodeGen/VirtRegMap.h b/include/llvm/CodeGen/VirtRegMap.h index d7e92094877d..b9076353fd07 100644 --- a/include/llvm/CodeGen/VirtRegMap.h +++ b/include/llvm/CodeGen/VirtRegMap.h @@ -102,14 +102,7 @@ namespace llvm { /// @brief creates a mapping for the specified virtual register to /// the specified physical register - void assignVirt2Phys(unsigned virtReg, unsigned physReg) { - assert(TargetRegisterInfo::isVirtualRegister(virtReg) && - TargetRegisterInfo::isPhysicalRegister(physReg)); - assert(Virt2PhysMap[virtReg] == NO_PHYS_REG && - "attempt to assign physical register to already mapped " - "virtual register"); - Virt2PhysMap[virtReg] = physReg; - } + void assignVirt2Phys(unsigned virtReg, MCPhysReg physReg); /// @brief clears the specified virtual register's, physical /// register mapping diff --git a/include/llvm/Config/abi-breaking.h.cmake b/include/llvm/Config/abi-breaking.h.cmake index 4ce487b8f5f3..7ae401e5b8a8 100644 --- a/include/llvm/Config/abi-breaking.h.cmake +++ b/include/llvm/Config/abi-breaking.h.cmake @@ -15,6 +15,9 @@ /* Define to enable checks that alter the LLVM C++ ABI */ #cmakedefine01 LLVM_ENABLE_ABI_BREAKING_CHECKS +/* Define to enable reverse iteration of unordered llvm containers */ +#cmakedefine01 LLVM_ENABLE_REVERSE_ITERATION + /* Allow selectively disabling link-time mismatch checking so that header-only ADT content from LLVM can be used without linking libSupport. */ #if !LLVM_DISABLE_ABI_BREAKING_CHECKS_ENFORCING diff --git a/include/llvm/Config/config.h.cmake b/include/llvm/Config/config.h.cmake index a64e208fa784..8df15ef97026 100644 --- a/include/llvm/Config/config.h.cmake +++ b/include/llvm/Config/config.h.cmake @@ -350,27 +350,6 @@ /* Host triple LLVM will be executed on */ #cmakedefine LLVM_HOST_TRIPLE "${LLVM_HOST_TRIPLE}" -/* LLVM architecture name for the native architecture, if available */ -#cmakedefine LLVM_NATIVE_ARCH ${LLVM_NATIVE_ARCH} - -/* LLVM name for the native AsmParser init function, if available */ -#cmakedefine LLVM_NATIVE_ASMPARSER LLVMInitialize${LLVM_NATIVE_ARCH}AsmParser - -/* LLVM name for the native AsmPrinter init function, if available */ -#cmakedefine LLVM_NATIVE_ASMPRINTER LLVMInitialize${LLVM_NATIVE_ARCH}AsmPrinter - -/* LLVM name for the native Disassembler init function, if available */ -#cmakedefine LLVM_NATIVE_DISASSEMBLER LLVMInitialize${LLVM_NATIVE_ARCH}Disassembler - -/* LLVM name for the native Target init function, if available */ -#cmakedefine LLVM_NATIVE_TARGET LLVMInitialize${LLVM_NATIVE_ARCH}Target - -/* LLVM name for the native TargetInfo init function, if available */ -#cmakedefine LLVM_NATIVE_TARGETINFO LLVMInitialize${LLVM_NATIVE_ARCH}TargetInfo - -/* LLVM name for the native target MC init function, if available */ -#cmakedefine LLVM_NATIVE_TARGETMC LLVMInitialize${LLVM_NATIVE_ARCH}TargetMC - /* Define if this is Unixish platform */ #cmakedefine LLVM_ON_UNIX ${LLVM_ON_UNIX} diff --git a/include/llvm/DebugInfo/CodeView/CVRecord.h b/include/llvm/DebugInfo/CodeView/CVRecord.h index 68ad09982202..4c6bbedc6bbd 100644 --- a/include/llvm/DebugInfo/CodeView/CVRecord.h +++ b/include/llvm/DebugInfo/CodeView/CVRecord.h @@ -62,10 +62,8 @@ template struct RemappedRecord { template struct VarStreamArrayExtractor> { - typedef void ContextType; - - static Error extract(BinaryStreamRef Stream, uint32_t &Len, - codeview::CVRecord &Item) { + Error operator()(BinaryStreamRef Stream, uint32_t &Len, + codeview::CVRecord &Item) { using namespace codeview; const RecordPrefix *Prefix = nullptr; BinaryStreamReader Reader(Stream); diff --git a/include/llvm/DebugInfo/CodeView/CodeView.h b/include/llvm/DebugInfo/CodeView/CodeView.h index 9890263ae2d2..251c9d1ae62c 100644 --- a/include/llvm/DebugInfo/CodeView/CodeView.h +++ b/include/llvm/DebugInfo/CodeView/CodeView.h @@ -575,6 +575,24 @@ struct FrameData { }; }; +// Corresponds to LocalIdAndGlobalIdPair structure. +// This structure information allows cross-referencing between PDBs. For +// example, when a PDB is being built during compilation it is not yet known +// what other modules may end up in the PDB at link time. So certain types of +// IDs may clash between the various compile time PDBs. For each affected +// module, a subsection would be put into the PDB containing a mapping from its +// local IDs to a single ID namespace for all items in the PDB file. +struct CrossModuleExport { + support::ulittle32_t Local; + support::ulittle32_t Global; +}; + +struct CrossModuleImport { + support::ulittle32_t ModuleNameOffset; + support::ulittle32_t Count; // Number of elements + // support::ulittle32_t ids[Count]; // id from referenced module +}; + enum class CodeViewContainer { ObjectFile, Pdb }; inline uint32_t alignOf(CodeViewContainer Container) { diff --git a/include/llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h b/include/llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h index c958a95ee6de..9fc90f13d347 100644 --- a/include/llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h +++ b/include/llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h @@ -36,8 +36,8 @@ template <> struct VarStreamArrayExtractor { public: typedef void ContextType; - static Error extract(BinaryStreamRef Stream, uint32_t &Len, - codeview::FileChecksumEntry &Item); + Error operator()(BinaryStreamRef Stream, uint32_t &Len, + codeview::FileChecksumEntry &Item); }; } diff --git a/include/llvm/DebugInfo/CodeView/DebugCrossExSubsection.h b/include/llvm/DebugInfo/CodeView/DebugCrossExSubsection.h new file mode 100644 index 000000000000..f755b23422c7 --- /dev/null +++ b/include/llvm/DebugInfo/CodeView/DebugCrossExSubsection.h @@ -0,0 +1,64 @@ +//===- DebugCrossExSubsection.h ---------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_DEBUGCROSSEXSUBSECTION_H +#define LLVM_DEBUGINFO_CODEVIEW_DEBUGCROSSEXSUBSECTION_H + +#include "llvm/DebugInfo/CodeView/DebugSubsection.h" +#include "llvm/Support/BinaryStreamArray.h" +#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/Endian.h" + +#include + +namespace llvm { +namespace codeview { +class DebugCrossModuleExportsSubsectionRef final : public DebugSubsectionRef { + typedef FixedStreamArray ReferenceArray; + typedef ReferenceArray::Iterator Iterator; + +public: + DebugCrossModuleExportsSubsectionRef() + : DebugSubsectionRef(DebugSubsectionKind::CrossScopeExports) {} + + static bool classof(const DebugSubsectionRef *S) { + return S->kind() == DebugSubsectionKind::CrossScopeExports; + } + + Error initialize(BinaryStreamReader Reader); + Error initialize(BinaryStreamRef Stream); + + Iterator begin() const { return References.begin(); } + Iterator end() const { return References.end(); } + +private: + FixedStreamArray References; +}; + +class DebugCrossModuleExportsSubsection final : public DebugSubsection { +public: + DebugCrossModuleExportsSubsection() + : DebugSubsection(DebugSubsectionKind::CrossScopeExports) {} + + static bool classof(const DebugSubsection *S) { + return S->kind() == DebugSubsectionKind::CrossScopeExports; + } + + void addMapping(uint32_t Local, uint32_t Global); + + uint32_t calculateSerializedSize() const override; + Error commit(BinaryStreamWriter &Writer) const override; + +private: + std::map Mappings; +}; +} +} + +#endif diff --git a/include/llvm/DebugInfo/CodeView/DebugCrossImpSubsection.h b/include/llvm/DebugInfo/CodeView/DebugCrossImpSubsection.h new file mode 100644 index 000000000000..ea3a9a43d50b --- /dev/null +++ b/include/llvm/DebugInfo/CodeView/DebugCrossImpSubsection.h @@ -0,0 +1,88 @@ +//===- DebugCrossExSubsection.h ---------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_DEBUGCROSSIMPSUBSECTION_H +#define LLVM_DEBUGINFO_CODEVIEW_DEBUGCROSSIMPSUBSECTION_H + +#include "llvm/ADT/StringMap.h" +#include "llvm/DebugInfo/CodeView/DebugSubsection.h" +#include "llvm/Support/BinaryStreamArray.h" +#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/Endian.h" + +namespace llvm { +namespace codeview { + +struct CrossModuleImportItem { + const CrossModuleImport *Header = nullptr; + llvm::FixedStreamArray Imports; +}; +} +} + +namespace llvm { +template <> struct VarStreamArrayExtractor { +public: + typedef void ContextType; + + Error operator()(BinaryStreamRef Stream, uint32_t &Len, + codeview::CrossModuleImportItem &Item); +}; +} + +namespace llvm { +namespace codeview { +class DebugStringTableSubsection; + +class DebugCrossModuleImportsSubsectionRef final : public DebugSubsectionRef { + typedef VarStreamArray ReferenceArray; + typedef ReferenceArray::Iterator Iterator; + +public: + DebugCrossModuleImportsSubsectionRef() + : DebugSubsectionRef(DebugSubsectionKind::CrossScopeImports) {} + + static bool classof(const DebugSubsectionRef *S) { + return S->kind() == DebugSubsectionKind::CrossScopeImports; + } + + Error initialize(BinaryStreamReader Reader); + Error initialize(BinaryStreamRef Stream); + + Iterator begin() const { return References.begin(); } + Iterator end() const { return References.end(); } + +private: + ReferenceArray References; +}; + +class DebugCrossModuleImportsSubsection final : public DebugSubsection { +public: + explicit DebugCrossModuleImportsSubsection( + DebugStringTableSubsection &Strings) + : DebugSubsection(DebugSubsectionKind::CrossScopeImports), + Strings(Strings) {} + + static bool classof(const DebugSubsection *S) { + return S->kind() == DebugSubsectionKind::CrossScopeImports; + } + + void addImport(StringRef Module, uint32_t ImportId); + + uint32_t calculateSerializedSize() const override; + Error commit(BinaryStreamWriter &Writer) const override; + +private: + DebugStringTableSubsection &Strings; + StringMap> Mappings; +}; +} +} + +#endif diff --git a/include/llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h b/include/llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h index 60440700c265..c9b062717baa 100644 --- a/include/llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h +++ b/include/llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h @@ -43,10 +43,9 @@ struct InlineeSourceLine { } template <> struct VarStreamArrayExtractor { - typedef bool ContextType; - - static Error extract(BinaryStreamRef Stream, uint32_t &Len, - codeview::InlineeSourceLine &Item, bool HasExtraFiles); + Error operator()(BinaryStreamRef Stream, uint32_t &Len, + codeview::InlineeSourceLine &Item); + bool HasExtraFiles = false; }; namespace codeview { diff --git a/include/llvm/DebugInfo/CodeView/DebugLinesSubsection.h b/include/llvm/DebugInfo/CodeView/DebugLinesSubsection.h index 1b63af59c2ed..f1feb1336cc5 100644 --- a/include/llvm/DebugInfo/CodeView/DebugLinesSubsection.h +++ b/include/llvm/DebugInfo/CodeView/DebugLinesSubsection.h @@ -64,10 +64,10 @@ struct LineColumnEntry { class LineColumnExtractor { public: - typedef const LineFragmentHeader *ContextType; + Error operator()(BinaryStreamRef Stream, uint32_t &Len, + LineColumnEntry &Item); - static Error extract(BinaryStreamRef Stream, uint32_t &Len, - LineColumnEntry &Item, const LineFragmentHeader *Ctx); + const LineFragmentHeader *Header = nullptr; }; class DebugLinesSubsectionRef final : public DebugSubsectionRef { @@ -122,7 +122,7 @@ public: uint32_t calculateSerializedSize() const override; Error commit(BinaryStreamWriter &Writer) const override; - void setRelocationAddress(uint16_t Segment, uint16_t Offset); + void setRelocationAddress(uint16_t Segment, uint32_t Offset); void setCodeSize(uint32_t Size); void setFlags(LineFlags Flags); @@ -131,7 +131,7 @@ public: private: DebugChecksumsSubsection &Checksums; - uint16_t RelocOffset = 0; + uint32_t RelocOffset = 0; uint16_t RelocSegment = 0; uint32_t CodeSize = 0; LineFlags Flags = LF_None; diff --git a/include/llvm/DebugInfo/CodeView/DebugStringTableSubsection.h b/include/llvm/DebugInfo/CodeView/DebugStringTableSubsection.h index fbe39cb16f09..be0a2344965b 100644 --- a/include/llvm/DebugInfo/CodeView/DebugStringTableSubsection.h +++ b/include/llvm/DebugInfo/CodeView/DebugStringTableSubsection.h @@ -39,11 +39,14 @@ public: } Error initialize(BinaryStreamRef Contents); + Error initialize(BinaryStreamReader &Reader); Expected getString(uint32_t Offset) const; bool valid() const { return Stream.valid(); } + BinaryStreamRef getBuffer() const { return Stream; } + private: BinaryStreamRef Stream; }; diff --git a/include/llvm/DebugInfo/CodeView/DebugSubsectionRecord.h b/include/llvm/DebugInfo/CodeView/DebugSubsectionRecord.h index 847259c5ceac..49a269d92e35 100644 --- a/include/llvm/DebugInfo/CodeView/DebugSubsectionRecord.h +++ b/include/llvm/DebugInfo/CodeView/DebugSubsectionRecord.h @@ -52,7 +52,7 @@ public: DebugSubsectionRecordBuilder(std::unique_ptr Subsection, CodeViewContainer Container); uint32_t calculateSerializedLength(); - Error commit(BinaryStreamWriter &Writer); + Error commit(BinaryStreamWriter &Writer) const; private: std::unique_ptr Subsection; @@ -62,18 +62,12 @@ private: } // namespace codeview template <> struct VarStreamArrayExtractor { - typedef void ContextType; - - static Error extract(BinaryStreamRef Stream, uint32_t &Length, - codeview::DebugSubsectionRecord &Info) { - // FIXME: We need to pass the container type through to this function, but - // VarStreamArray doesn't easily support stateful contexts. In practice - // this isn't super important since the subsection header describes its - // length and we can just skip it. It's more important when writing. + Error operator()(BinaryStreamRef Stream, uint32_t &Length, + codeview::DebugSubsectionRecord &Info) { if (auto EC = codeview::DebugSubsectionRecord::initialize( Stream, Info, codeview::CodeViewContainer::Pdb)) return EC; - Length = Info.getRecordLength(); + Length = alignTo(Info.getRecordLength(), 4); return Error::success(); } }; diff --git a/include/llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h b/include/llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h index 55bef491c97e..d4a3d9195a36 100644 --- a/include/llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h +++ b/include/llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h @@ -10,6 +10,8 @@ #ifndef LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGFRAGMENTVISITOR_H #define LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGFRAGMENTVISITOR_H +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h" #include "llvm/Support/Error.h" #include @@ -20,9 +22,65 @@ namespace codeview { class DebugChecksumsSubsectionRef; class DebugSubsectionRecord; class DebugInlineeLinesSubsectionRef; +class DebugCrossModuleExportsSubsectionRef; +class DebugCrossModuleImportsSubsectionRef; +class DebugFrameDataSubsectionRef; class DebugLinesSubsectionRef; +class DebugStringTableSubsectionRef; +class DebugSymbolRVASubsectionRef; +class DebugSymbolsSubsectionRef; class DebugUnknownSubsectionRef; +struct DebugSubsectionState { +public: + // If no subsections are known about initially, we find as much as we can. + DebugSubsectionState(); + + // If only a string table subsection is given, we find a checksums subsection. + explicit DebugSubsectionState(const DebugStringTableSubsectionRef &Strings); + + // If both subsections are given, we don't need to find anything. + DebugSubsectionState(const DebugStringTableSubsectionRef &Strings, + const DebugChecksumsSubsectionRef &Checksums); + + template void initialize(T &&FragmentRange) { + for (const DebugSubsectionRecord &R : FragmentRange) { + if (Strings && Checksums) + return; + if (R.kind() == DebugSubsectionKind::FileChecksums) { + initializeChecksums(R); + continue; + } + if (R.kind() == DebugSubsectionKind::StringTable && !Strings) { + // While in practice we should never encounter a string table even + // though the string table is already initialized, in theory it's + // possible. PDBs are supposed to have one global string table and + // then this subsection should not appear. Whereas object files are + // supposed to have this subsection appear exactly once. However, + // for testing purposes it's nice to be able to test this subsection + // independently of one format or the other, so for some tests we + // manually construct a PDB that contains this subsection in addition + // to a global string table. + initializeStrings(R); + continue; + } + } + } + + const DebugStringTableSubsectionRef &strings() const { return *Strings; } + const DebugChecksumsSubsectionRef &checksums() const { return *Checksums; } + +private: + void initializeStrings(const DebugSubsectionRecord &SR); + void initializeChecksums(const DebugSubsectionRecord &FCR); + + std::unique_ptr OwnedStrings; + std::unique_ptr OwnedChecksums; + + const DebugStringTableSubsectionRef *Strings = nullptr; + const DebugChecksumsSubsectionRef *Checksums = nullptr; +}; + class DebugSubsectionVisitor { public: virtual ~DebugSubsectionVisitor() = default; @@ -30,33 +88,71 @@ public: virtual Error visitUnknown(DebugUnknownSubsectionRef &Unknown) { return Error::success(); } - virtual Error visitLines(DebugLinesSubsectionRef &Lines) { - return Error::success(); - } + virtual Error visitLines(DebugLinesSubsectionRef &Lines, + const DebugSubsectionState &State) = 0; + virtual Error visitFileChecksums(DebugChecksumsSubsectionRef &Checksums, + const DebugSubsectionState &State) = 0; + virtual Error visitInlineeLines(DebugInlineeLinesSubsectionRef &Inlinees, + const DebugSubsectionState &State) = 0; + virtual Error + visitCrossModuleExports(DebugCrossModuleExportsSubsectionRef &CSE, + const DebugSubsectionState &State) = 0; + virtual Error + visitCrossModuleImports(DebugCrossModuleImportsSubsectionRef &CSE, + const DebugSubsectionState &State) = 0; - virtual Error visitFileChecksums(DebugChecksumsSubsectionRef &Checksums) { - return Error::success(); - } + virtual Error visitStringTable(DebugStringTableSubsectionRef &ST, + const DebugSubsectionState &State) = 0; - virtual Error visitInlineeLines(DebugInlineeLinesSubsectionRef &Inlinees) { - return Error::success(); - } + virtual Error visitSymbols(DebugSymbolsSubsectionRef &CSE, + const DebugSubsectionState &State) = 0; - virtual Error finished() { return Error::success(); } + virtual Error visitFrameData(DebugFrameDataSubsectionRef &FD, + const DebugSubsectionState &State) = 0; + virtual Error visitCOFFSymbolRVAs(DebugSymbolRVASubsectionRef &RVAs, + const DebugSubsectionState &State) = 0; }; Error visitDebugSubsection(const DebugSubsectionRecord &R, - DebugSubsectionVisitor &V); + DebugSubsectionVisitor &V, + const DebugSubsectionState &State); + +namespace detail { +template +Error visitDebugSubsections(T &&FragmentRange, DebugSubsectionVisitor &V, + DebugSubsectionState &State) { + State.initialize(std::forward(FragmentRange)); + + for (const DebugSubsectionRecord &L : FragmentRange) { + if (auto EC = visitDebugSubsection(L, V, State)) + return EC; + } + return Error::success(); +} +} // namespace detail template Error visitDebugSubsections(T &&FragmentRange, DebugSubsectionVisitor &V) { - for (const auto &L : FragmentRange) { - if (auto EC = visitDebugSubsection(L, V)) - return EC; - } - if (auto EC = V.finished()) - return EC; - return Error::success(); + DebugSubsectionState State; + return detail::visitDebugSubsections(std::forward(FragmentRange), V, + State); +} + +template +Error visitDebugSubsections(T &&FragmentRange, DebugSubsectionVisitor &V, + const DebugStringTableSubsectionRef &Strings) { + DebugSubsectionState State(Strings); + return detail::visitDebugSubsections(std::forward(FragmentRange), V, + State); +} + +template +Error visitDebugSubsections(T &&FragmentRange, DebugSubsectionVisitor &V, + const DebugStringTableSubsectionRef &Strings, + const DebugChecksumsSubsectionRef &Checksums) { + DebugSubsectionState State(Strings, Checksums); + return detail::visitDebugSubsections(std::forward(FragmentRange), V, + State); } } // end namespace codeview diff --git a/include/llvm/DebugInfo/CodeView/DebugSymbolRVASubsection.h b/include/llvm/DebugInfo/CodeView/DebugSymbolRVASubsection.h new file mode 100644 index 000000000000..ad58a293cb09 --- /dev/null +++ b/include/llvm/DebugInfo/CodeView/DebugSymbolRVASubsection.h @@ -0,0 +1,59 @@ +//===- DebugSymbolRVASubsection.h -------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_DEBUGSYMBOLRVASUBSECTION_H +#define LLVM_DEBUGINFO_CODEVIEW_DEBUGSYMBOLRVASUBSECTION_H + +#include "llvm/DebugInfo/CodeView/DebugSubsection.h" +#include "llvm/Support/BinaryStreamArray.h" +#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/Error.h" + +namespace llvm { +namespace codeview { + +class DebugSymbolRVASubsectionRef final : public DebugSubsectionRef { +public: + typedef FixedStreamArray ArrayType; + + DebugSymbolRVASubsectionRef(); + + static bool classof(const DebugSubsectionRef *S) { + return S->kind() == DebugSubsectionKind::CoffSymbolRVA; + } + + ArrayType::Iterator begin() const { return RVAs.begin(); } + ArrayType::Iterator end() const { return RVAs.end(); } + + Error initialize(BinaryStreamReader &Reader); + +private: + ArrayType RVAs; +}; + +class DebugSymbolRVASubsection final : public DebugSubsection { +public: + DebugSymbolRVASubsection(); + + static bool classof(const DebugSubsection *S) { + return S->kind() == DebugSubsectionKind::CoffSymbolRVA; + } + + Error commit(BinaryStreamWriter &Writer) const override; + uint32_t calculateSerializedSize() const override; + + void addRVA(uint32_t RVA) { RVAs.push_back(support::ulittle32_t(RVA)); } + +private: + std::vector RVAs; +}; +} // namespace codeview +} // namespace llvm + +#endif diff --git a/include/llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h b/include/llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h index 3d1eb27ba270..dfda7deb6cb4 100644 --- a/include/llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h +++ b/include/llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h @@ -27,6 +27,9 @@ public: Error initialize(BinaryStreamReader Reader); + CVSymbolArray::Iterator begin() const { return Records.begin(); } + CVSymbolArray::Iterator end() const { return Records.end(); } + private: CVSymbolArray Records; }; diff --git a/include/llvm/DebugInfo/CodeView/EnumTables.h b/include/llvm/DebugInfo/CodeView/EnumTables.h index 10d1c581a196..013e440613fc 100644 --- a/include/llvm/DebugInfo/CodeView/EnumTables.h +++ b/include/llvm/DebugInfo/CodeView/EnumTables.h @@ -11,8 +11,8 @@ #define LLVM_DEBUGINFO_CODEVIEW_ENUMTABLES_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/BinaryFormat/COFF.h" #include "llvm/DebugInfo/CodeView/CodeView.h" -#include "llvm/Support/COFF.h" #include "llvm/Support/ScopedPrinter.h" #include diff --git a/include/llvm/DebugInfo/CodeView/TypeSerializer.h b/include/llvm/DebugInfo/CodeView/TypeSerializer.h index 1dee86a1da79..f785d4509547 100644 --- a/include/llvm/DebugInfo/CodeView/TypeSerializer.h +++ b/include/llvm/DebugInfo/CodeView/TypeSerializer.h @@ -86,6 +86,8 @@ public: void reset(); + BumpPtrAllocator &getAllocator() { return RecordStorage; } + ArrayRef> records() const; TypeIndex insertRecordBytes(ArrayRef &Record); TypeIndex insertRecord(const RemappedType &Record); diff --git a/include/llvm/DebugInfo/CodeView/TypeTableBuilder.h b/include/llvm/DebugInfo/CodeView/TypeTableBuilder.h index 907ed1010e5b..1069dcd45334 100644 --- a/include/llvm/DebugInfo/CodeView/TypeTableBuilder.h +++ b/include/llvm/DebugInfo/CodeView/TypeTableBuilder.h @@ -37,8 +37,9 @@ private: TypeSerializer Serializer; public: - explicit TypeTableBuilder(BumpPtrAllocator &Allocator) - : Allocator(Allocator), Serializer(Allocator) {} + explicit TypeTableBuilder(BumpPtrAllocator &Allocator, + bool WriteUnique = true) + : Allocator(Allocator), Serializer(Allocator, WriteUnique) {} TypeTableBuilder(const TypeTableBuilder &) = delete; TypeTableBuilder &operator=(const TypeTableBuilder &) = delete; diff --git a/include/llvm/DebugInfo/DIContext.h b/include/llvm/DebugInfo/DIContext.h index 2ab1c9508522..2e82a774cc23 100644 --- a/include/llvm/DebugInfo/DIContext.h +++ b/include/llvm/DebugInfo/DIContext.h @@ -135,6 +135,7 @@ enum DIDumpType { DIDT_GnuPubnames, DIDT_GnuPubtypes, DIDT_Str, + DIDT_StrOffsets, DIDT_StrDwo, DIDT_StrOffsetsDwo, DIDT_AppleNames, @@ -152,6 +153,7 @@ struct DIDumpOptions { DIDumpType DumpType = DIDT_All; bool DumpEH = false; bool SummarizeTypes = false; + bool Brief = false; }; class DIContext { diff --git a/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h b/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h index 7324f6e3eb38..e363cff15803 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h +++ b/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h @@ -10,11 +10,11 @@ #ifndef LLVM_DEBUGINFO_DWARFABBREVIATIONDECLARATION_H #define LLVM_DEBUGINFO_DWARFABBREVIATIONDECLARATION_H -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/Support/DataExtractor.h" -#include "llvm/Support/Dwarf.h" #include #include #include diff --git a/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h b/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h index f95a013d7552..72793e97b60d 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h +++ b/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h @@ -11,9 +11,9 @@ #define LLVM_DEBUGINFO_DWARFACCELERATORTABLE_H #include "llvm/ADT/SmallVector.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" #include "llvm/Support/DataExtractor.h" -#include "llvm/Support/Dwarf.h" #include #include diff --git a/include/llvm/DebugInfo/DWARF/DWARFAttribute.h b/include/llvm/DebugInfo/DWARF/DWARFAttribute.h index c3953b62d780..f0672bb0ca75 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFAttribute.h +++ b/include/llvm/DebugInfo/DWARF/DWARFAttribute.h @@ -10,8 +10,8 @@ #ifndef LLVM_DEBUGINFO_DWARFATTRIBUTE_H #define LLVM_DEBUGINFO_DWARFATTRIBUTE_H +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" -#include "llvm/Support/Dwarf.h" #include namespace llvm { diff --git a/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h b/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h index 46c0b7f4ce60..b4e4721e3d51 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h +++ b/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h @@ -19,8 +19,8 @@ class DWARFCompileUnit : public DWARFUnit { public: DWARFCompileUnit(DWARFContext &Context, const DWARFSection &Section, const DWARFDebugAbbrev *DA, const DWARFSection *RS, - StringRef SS, StringRef SOS, const DWARFSection *AOS, - StringRef LS, bool LE, bool IsDWO, + StringRef SS, const DWARFSection &SOS, + const DWARFSection *AOS, StringRef LS, bool LE, bool IsDWO, const DWARFUnitSectionBase &UnitSection, const DWARFUnitIndex::Entry *Entry) : DWARFUnit(Context, Section, DA, RS, SS, SOS, AOS, LS, LE, IsDWO, @@ -29,7 +29,7 @@ public: // VTable anchor. ~DWARFCompileUnit() override; - void dump(raw_ostream &OS); + void dump(raw_ostream &OS, DIDumpOptions DumpOpts); static const DWARFSectionKind Section = DW_SECT_INFO; }; diff --git a/include/llvm/DebugInfo/DWARF/DWARFContext.h b/include/llvm/DebugInfo/DWARF/DWARFContext.h index 519ecf618558..c72604a12bfd 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFContext.h +++ b/include/llvm/DebugInfo/DWARF/DWARFContext.h @@ -11,12 +11,12 @@ #define LLVM_DEBUGINFO_DWARF_DWARFCONTEXT_H #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/DebugInfo/DIContext.h" #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" #include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h" @@ -72,6 +72,9 @@ class DWARFContext : public DIContext { std::unique_ptr AbbrevDWO; std::unique_ptr LocDWO; + /// The maximum DWARF version of all units. + unsigned MaxVersion; + struct DWOFile { object::OwningBinary File; std::unique_ptr Context; @@ -97,7 +100,7 @@ class DWARFContext : public DIContext { void parseDWOTypeUnits(); public: - DWARFContext() : DIContext(CK_DWARF) {} + DWARFContext() : DIContext(CK_DWARF), MaxVersion(0) {} DWARFContext(DWARFContext &) = delete; DWARFContext &operator=(DWARFContext &) = delete; @@ -178,6 +181,13 @@ public: /// Get a DIE given an exact offset. DWARFDie getDIEForOffset(uint32_t Offset); + unsigned getMaxVersion() const { return MaxVersion; } + + void setMaxVersionIfGreater(unsigned Version) { + if (Version > MaxVersion) + MaxVersion = Version; + } + const DWARFUnitIndex &getCUIndex(); DWARFGdbIndex &getGdbIndex(); const DWARFUnitIndex &getTUIndex(); @@ -237,6 +247,11 @@ public: virtual StringRef getGnuPubNamesSection() = 0; virtual StringRef getGnuPubTypesSection() = 0; + /// DWARF v5 + /// @{ + virtual const DWARFSection &getStringOffsetSection() = 0; + /// @} + // Sections for DWARF5 split dwarf proposal. virtual const DWARFSection &getInfoDWOSection() = 0; virtual const TypeSectionMap &getTypesDWOSections() = 0; @@ -244,7 +259,7 @@ public: virtual const DWARFSection &getLineDWOSection() = 0; virtual const DWARFSection &getLocDWOSection() = 0; virtual StringRef getStringDWOSection() = 0; - virtual StringRef getStringOffsetDWOSection() = 0; + virtual const DWARFSection &getStringOffsetDWOSection() = 0; virtual const DWARFSection &getRangeDWOSection() = 0; virtual const DWARFSection &getAddrSection() = 0; virtual const DWARFSection& getAppleNamesSection() = 0; @@ -295,6 +310,11 @@ class DWARFContextInMemory : public DWARFContext { StringRef GnuPubNamesSection; StringRef GnuPubTypesSection; + /// DWARF v5 + /// @{ + DWARFSection StringOffsetSection; + /// @} + // Sections for DWARF5 split dwarf proposal. DWARFSection InfoDWOSection; TypeSectionMap TypesDWOSections; @@ -302,7 +322,7 @@ class DWARFContextInMemory : public DWARFContext { DWARFSection LineDWOSection; DWARFSection LocDWOSection; StringRef StringDWOSection; - StringRef StringOffsetDWOSection; + DWARFSection StringOffsetDWOSection; DWARFSection RangeDWOSection; DWARFSection AddrSection; DWARFSection AppleNamesSection; @@ -353,6 +373,11 @@ public: const DWARFSection& getAppleNamespacesSection() override { return AppleNamespacesSection; } const DWARFSection& getAppleObjCSection() override { return AppleObjCSection; } + // DWARF v5 + const DWARFSection &getStringOffsetSection() override { + return StringOffsetSection; + } + // Sections for DWARF5 split dwarf proposal. const DWARFSection &getInfoDWOSection() override { return InfoDWOSection; } @@ -365,7 +390,7 @@ public: const DWARFSection &getLocDWOSection() override { return LocDWOSection; } StringRef getStringDWOSection() override { return StringDWOSection; } - StringRef getStringOffsetDWOSection() override { + const DWARFSection &getStringOffsetDWOSection() override { return StringOffsetDWOSection; } diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h b/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h index fc2423a2708b..5c591b3de491 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h +++ b/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h @@ -10,8 +10,8 @@ #ifndef LLVM_DEBUGINFO_DWARFDEBUGINFOENTRY_H #define LLVM_DEBUGINFO_DWARFDEBUGINFOENTRY_H +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h" -#include "llvm/Support/Dwarf.h" #include namespace llvm { diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h b/include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h index 9d36bb7ad211..a309fd104f93 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h +++ b/include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h @@ -12,7 +12,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" -#include "llvm/Support/Dwarf.h" +#include "llvm/BinaryFormat/Dwarf.h" #include #include diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h b/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h index b436711ae6ed..437060bc8fec 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h +++ b/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h @@ -10,8 +10,8 @@ #ifndef LLVM_DEBUGINFO_DWARF_DWARFDEBUGRANGELIST_H #define LLVM_DEBUGINFO_DWARF_DWARFDEBUGRANGELIST_H -#include "llvm/Support/DataExtractor.h" #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" +#include "llvm/Support/DataExtractor.h" #include #include diff --git a/include/llvm/DebugInfo/DWARF/DWARFDie.h b/include/llvm/DebugInfo/DWARF/DWARFDie.h index fa41b9e293c0..b216491b615a 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFDie.h +++ b/include/llvm/DebugInfo/DWARF/DWARFDie.h @@ -11,14 +11,14 @@ #define LLVM_DEBUGINFO_DWARFDIE_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/iterator.h" #include "llvm/ADT/iterator_range.h" -#include "llvm/ADT/Optional.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/DebugInfo/DIContext.h" #include "llvm/DebugInfo/DWARF/DWARFAttribute.h" #include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h" #include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h" -#include "llvm/Support/Dwarf.h" #include #include #include @@ -120,7 +120,8 @@ public: /// \param recurseDepth the depth to recurse to when dumping this DIE and its /// children. /// \param indent the number of characters to indent each line that is output. - void dump(raw_ostream &OS, unsigned recurseDepth, unsigned indent = 0) const; + void dump(raw_ostream &OS, unsigned recurseDepth, unsigned indent = 0, + DIDumpOptions DumpOpts = DIDumpOptions()) const; /// Extract the specified attribute from this DIE. /// diff --git a/include/llvm/DebugInfo/DWARF/DWARFFormValue.h b/include/llvm/DebugInfo/DWARF/DWARFFormValue.h index 3a781dde8929..d6a3b52f2fe1 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFFormValue.h +++ b/include/llvm/DebugInfo/DWARF/DWARFFormValue.h @@ -13,8 +13,8 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/Support/DataExtractor.h" -#include "llvm/Support/Dwarf.h" #include namespace llvm { diff --git a/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h b/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h index c77d946c070a..2041d40eb53a 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h +++ b/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h @@ -31,7 +31,7 @@ private: public: DWARFTypeUnit(DWARFContext &Context, const DWARFSection &Section, const DWARFDebugAbbrev *DA, const DWARFSection *RS, - StringRef SS, StringRef SOS, const DWARFSection *AOS, + StringRef SS, const DWARFSection &SOS, const DWARFSection *AOS, StringRef LS, bool LE, bool IsDWO, const DWARFUnitSectionBase &UnitSection, const DWARFUnitIndex::Entry *Entry) diff --git a/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/include/llvm/DebugInfo/DWARF/DWARFUnit.h index d0f7bd0d623f..945b8999ff22 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFUnit.h +++ b/include/llvm/DebugInfo/DWARF/DWARFUnit.h @@ -10,11 +10,12 @@ #ifndef LLVM_DEBUGINFO_DWARF_DWARFUNIT_H #define LLVM_DEBUGINFO_DWARF_DWARFUNIT_H -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/Optional.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h" #include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h" #include "llvm/DebugInfo/DWARF/DWARFDie.h" @@ -24,14 +25,13 @@ #include "llvm/Object/Binary.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/DataExtractor.h" -#include "llvm/Support/Dwarf.h" #include #include #include #include +#include #include #include -#include namespace llvm { @@ -57,8 +57,9 @@ protected: virtual void parseImpl(DWARFContext &Context, const DWARFSection &Section, const DWARFDebugAbbrev *DA, const DWARFSection *RS, - StringRef SS, StringRef SOS, const DWARFSection *AOS, - StringRef LS, bool isLittleEndian, bool isDWO) = 0; + StringRef SS, const DWARFSection &SOS, + const DWARFSection *AOS, StringRef LS, + bool isLittleEndian, bool isDWO) = 0; }; const DWARFUnitIndex &getDWARFUnitIndex(DWARFContext &Context, @@ -89,7 +90,7 @@ public: private: void parseImpl(DWARFContext &Context, const DWARFSection &Section, const DWARFDebugAbbrev *DA, const DWARFSection *RS, - StringRef SS, StringRef SOS, const DWARFSection *AOS, + StringRef SS, const DWARFSection &SOS, const DWARFSection *AOS, StringRef LS, bool LE, bool IsDWO) override { if (Parsed) return; @@ -119,7 +120,8 @@ class DWARFUnit { uint32_t RangeSectionBase; StringRef LineSection; StringRef StringSection; - StringRef StringOffsetSection; + const DWARFSection &StringOffsetSection; + uint64_t StringOffsetSectionBase = 0; const DWARFSection *AddrOffsetSection; uint32_t AddrOffsetSectionBase; bool isLittleEndian; @@ -162,8 +164,8 @@ protected: public: DWARFUnit(DWARFContext &Context, const DWARFSection &Section, const DWARFDebugAbbrev *DA, const DWARFSection *RS, StringRef SS, - StringRef SOS, const DWARFSection *AOS, StringRef LS, bool LE, - bool IsDWO, const DWARFUnitSectionBase &UnitSection, + const DWARFSection &SOS, const DWARFSection *AOS, StringRef LS, + bool LE, bool IsDWO, const DWARFUnitSectionBase &UnitSection, const DWARFUnitIndex::Entry *IndexEntry = nullptr); virtual ~DWARFUnit(); @@ -172,7 +174,9 @@ public: StringRef getLineSection() const { return LineSection; } StringRef getStringSection() const { return StringSection; } - StringRef getStringOffsetSection() const { return StringOffsetSection; } + const DWARFSection &getStringOffsetSection() const { + return StringOffsetSection; + } void setAddrOffsetSection(const DWARFSection *AOS, uint32_t Base) { AddrOffsetSection = AOS; @@ -189,7 +193,8 @@ public: bool getAddrOffsetSectionItem(uint32_t Index, uint64_t &Result) const; // FIXME: Result should be uint64_t in DWARF64. - bool getStringOffsetSectionItem(uint32_t Index, uint32_t &Result) const; + bool getStringOffsetSectionItem(uint32_t Index, uint64_t &Result) const; + uint64_t getStringOffsetSectionRelocation(uint32_t Index) const; DataExtractor getDebugInfoExtractor() const { return DataExtractor(InfoSection.Data, isLittleEndian, AddrSize); @@ -200,6 +205,9 @@ public: } const RelocAddrMap *getRelocMap() const { return &InfoSection.Relocs; } + const RelocAddrMap &getStringOffsetsRelocMap() const { + return StringOffsetSection.Relocs; + } bool extract(DataExtractor debug_info, uint32_t* offset_ptr); diff --git a/include/llvm/DebugInfo/MSF/MappedBlockStream.h b/include/llvm/DebugInfo/MSF/MappedBlockStream.h index 36dce393fc66..02f3cb09b004 100644 --- a/include/llvm/DebugInfo/MSF/MappedBlockStream.h +++ b/include/llvm/DebugInfo/MSF/MappedBlockStream.h @@ -17,7 +17,6 @@ #include "llvm/DebugInfo/MSF/MSFStreamLayout.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/BinaryStream.h" -#include "llvm/Support/BinaryStream.h" #include "llvm/Support/BinaryStreamRef.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" diff --git a/include/llvm/DebugInfo/PDB/DIA/DIAEnumDebugStreams.h b/include/llvm/DebugInfo/PDB/DIA/DIAEnumDebugStreams.h index 941e16a35fac..ffae6645e94b 100644 --- a/include/llvm/DebugInfo/PDB/DIA/DIAEnumDebugStreams.h +++ b/include/llvm/DebugInfo/PDB/DIA/DIAEnumDebugStreams.h @@ -11,6 +11,7 @@ #define LLVM_DEBUGINFO_PDB_DIA_DIAENUMDEBUGSTREAMS_H #include "DIASupport.h" +#include "llvm/DebugInfo/PDB/IPDBDataStream.h" #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" namespace llvm { diff --git a/include/llvm/DebugInfo/PDB/DIA/DIAEnumLineNumbers.h b/include/llvm/DebugInfo/PDB/DIA/DIAEnumLineNumbers.h index 106b84cecfff..08f0de124ede 100644 --- a/include/llvm/DebugInfo/PDB/DIA/DIAEnumLineNumbers.h +++ b/include/llvm/DebugInfo/PDB/DIA/DIAEnumLineNumbers.h @@ -12,6 +12,7 @@ #include "DIASupport.h" #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" +#include "llvm/DebugInfo/PDB/IPDBLineNumber.h" namespace llvm { namespace pdb { diff --git a/include/llvm/DebugInfo/PDB/DIA/DIAEnumSourceFiles.h b/include/llvm/DebugInfo/PDB/DIA/DIAEnumSourceFiles.h index 6c00d6a5e29d..e69d18f5ba37 100644 --- a/include/llvm/DebugInfo/PDB/DIA/DIAEnumSourceFiles.h +++ b/include/llvm/DebugInfo/PDB/DIA/DIAEnumSourceFiles.h @@ -12,6 +12,7 @@ #include "DIASupport.h" #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" +#include "llvm/DebugInfo/PDB/IPDBSourceFile.h" namespace llvm { namespace pdb { diff --git a/include/llvm/DebugInfo/PDB/DIA/DIAEnumSymbols.h b/include/llvm/DebugInfo/PDB/DIA/DIAEnumSymbols.h index b206ff59a6a4..f779cd1f4be3 100644 --- a/include/llvm/DebugInfo/PDB/DIA/DIAEnumSymbols.h +++ b/include/llvm/DebugInfo/PDB/DIA/DIAEnumSymbols.h @@ -12,6 +12,7 @@ #include "DIASupport.h" #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" namespace llvm { namespace pdb { diff --git a/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h b/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h index 7e77f5a3eef9..8200f51e3da9 100644 --- a/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h +++ b/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h @@ -56,9 +56,8 @@ private: } // end namespace pdb template <> struct VarStreamArrayExtractor { - typedef void ContextType; - static Error extract(BinaryStreamRef Stream, uint32_t &Length, - pdb::DbiModuleDescriptor &Info) { + Error operator()(BinaryStreamRef Stream, uint32_t &Length, + pdb::DbiModuleDescriptor &Info) { if (auto EC = pdb::DbiModuleDescriptor::initialize(Stream, Info)) return EC; Length = Info.getRecordLength(); diff --git a/include/llvm/DebugInfo/PDB/Native/DbiStream.h b/include/llvm/DebugInfo/PDB/Native/DbiStream.h index dc35f8c72cd9..7123e88cd642 100644 --- a/include/llvm/DebugInfo/PDB/Native/DbiStream.h +++ b/include/llvm/DebugInfo/PDB/Native/DbiStream.h @@ -96,7 +96,8 @@ private: FixedStreamArray DbgStreams; - PdbRaw_DbiSecContribVer SectionContribVersion; + PdbRaw_DbiSecContribVer SectionContribVersion = + PdbRaw_DbiSecContribVer::DbiSecContribVer60; FixedStreamArray SectionContribs; FixedStreamArray SectionContribs2; FixedStreamArray SectionMap; diff --git a/include/llvm/DebugInfo/PDB/PDBSymbol.h b/include/llvm/DebugInfo/PDB/PDBSymbol.h index b114b7afb0b0..9e883d2f99a7 100644 --- a/include/llvm/DebugInfo/PDB/PDBSymbol.h +++ b/include/llvm/DebugInfo/PDB/PDBSymbol.h @@ -89,6 +89,8 @@ public: template std::unique_ptr findOneChild() const { auto Enumerator(findAllChildren()); + if (!Enumerator) + return nullptr; return Enumerator->getNext(); } @@ -97,6 +99,8 @@ public: template std::unique_ptr> findAllChildren() const { auto BaseIter = RawSymbol->findChildren(T::Tag); + if (!BaseIter) + return nullptr; return llvm::make_unique>(std::move(BaseIter)); } std::unique_ptr findAllChildren(PDB_SymType Type) const; diff --git a/include/llvm/ExecutionEngine/ExecutionEngine.h b/include/llvm/ExecutionEngine/ExecutionEngine.h index f68337c43271..1586f7b80669 100644 --- a/include/llvm/ExecutionEngine/ExecutionEngine.h +++ b/include/llvm/ExecutionEngine/ExecutionEngine.h @@ -27,10 +27,10 @@ #include "llvm/Support/Mutex.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#include #include #include #include -#include namespace llvm { diff --git a/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h b/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h index 1bb911d09cfb..2fccf8a0f625 100644 --- a/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h +++ b/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h @@ -20,9 +20,9 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/ExecutionEngine/JITSymbol.h" -#include "llvm/ExecutionEngine/RuntimeDyld.h" #include "llvm/ExecutionEngine/Orc/IndirectionUtils.h" #include "llvm/ExecutionEngine/Orc/LambdaResolver.h" +#include "llvm/ExecutionEngine/RuntimeDyld.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" diff --git a/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h b/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h index a32278b8a81e..71d847c06264 100644 --- a/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h +++ b/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h @@ -14,8 +14,8 @@ #ifndef LLVM_EXECUTIONENGINE_ORC_EXECUTIONUTILS_H #define LLVM_EXECUTIONENGINE_ORC_EXECUTIONUTILS_H -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/StringMap.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/ExecutionEngine/JITSymbol.h" #include "llvm/ExecutionEngine/RuntimeDyld.h" #include diff --git a/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h b/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h index f16dd021ea51..f81d054440fc 100644 --- a/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h +++ b/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h @@ -14,8 +14,8 @@ #ifndef LLVM_EXECUTIONENGINE_ORC_IRCOMPILELAYER_H #define LLVM_EXECUTIONENGINE_ORC_IRCOMPILELAYER_H -#include "llvm/ExecutionEngine/ObjectCache.h" #include "llvm/ExecutionEngine/JITSymbol.h" +#include "llvm/ExecutionEngine/ObjectCache.h" #include "llvm/Object/ObjectFile.h" #include diff --git a/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h b/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h index 5b3426afe584..aabb44eef99d 100644 --- a/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h +++ b/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h @@ -23,8 +23,8 @@ #include "llvm/ExecutionEngine/SectionMemoryManager.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/Error.h" -#include #include +#include #include #include #include diff --git a/include/llvm/ExecutionEngine/RTDyldMemoryManager.h b/include/llvm/ExecutionEngine/RTDyldMemoryManager.h index 74535fe948ff..a9778514b9f1 100644 --- a/include/llvm/ExecutionEngine/RTDyldMemoryManager.h +++ b/include/llvm/ExecutionEngine/RTDyldMemoryManager.h @@ -14,10 +14,10 @@ #ifndef LLVM_EXECUTIONENGINE_RTDYLDMEMORYMANAGER_H #define LLVM_EXECUTIONENGINE_RTDYLDMEMORYMANAGER_H +#include "llvm-c/ExecutionEngine.h" #include "llvm/ExecutionEngine/JITSymbol.h" #include "llvm/ExecutionEngine/RuntimeDyld.h" #include "llvm/Support/CBindingWrapping.h" -#include "llvm-c/ExecutionEngine.h" #include #include #include diff --git a/include/llvm/IR/Attributes.h b/include/llvm/IR/Attributes.h index 687863857698..0e8adda82cbe 100644 --- a/include/llvm/IR/Attributes.h +++ b/include/llvm/IR/Attributes.h @@ -16,13 +16,13 @@ #ifndef LLVM_IR_ATTRIBUTES_H #define LLVM_IR_ATTRIBUTES_H +#include "llvm-c/Types.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/FoldingSet.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Support/PointerLikeTypeTraits.h" -#include "llvm-c/Types.h" #include #include #include diff --git a/include/llvm/IR/BasicBlock.h b/include/llvm/IR/BasicBlock.h index 235cb57cfd09..23f838b640e0 100644 --- a/include/llvm/IR/BasicBlock.h +++ b/include/llvm/IR/BasicBlock.h @@ -14,15 +14,15 @@ #ifndef LLVM_IR_BASICBLOCK_H #define LLVM_IR_BASICBLOCK_H +#include "llvm-c/Types.h" +#include "llvm/ADT/Twine.h" #include "llvm/ADT/ilist.h" #include "llvm/ADT/ilist_node.h" -#include "llvm/ADT/Twine.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/SymbolTableListTraits.h" #include "llvm/IR/Value.h" #include "llvm/Support/CBindingWrapping.h" #include "llvm/Support/Compiler.h" -#include "llvm-c/Types.h" #include #include diff --git a/include/llvm/IR/CallSite.h b/include/llvm/IR/CallSite.h index 4a806ab501e5..96fbebf42c38 100644 --- a/include/llvm/IR/CallSite.h +++ b/include/llvm/IR/CallSite.h @@ -26,9 +26,9 @@ #ifndef LLVM_IR_CALLSITE_H #define LLVM_IR_CALLSITE_H -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/PointerIntPair.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Function.h" @@ -36,10 +36,10 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" -#include "llvm/Support/Casting.h" #include "llvm/IR/Use.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" #include #include #include diff --git a/include/llvm/IR/Constants.h b/include/llvm/IR/Constants.h index 40a8d1eb27d0..bb5e1931393b 100644 --- a/include/llvm/IR/Constants.h +++ b/include/llvm/IR/Constants.h @@ -194,7 +194,7 @@ public: /// common code. It also correctly performs the comparison without the /// potential for an assertion from getZExtValue(). bool isZero() const { - return Val == 0; + return Val.isNullValue(); } /// This is just a convenience method to make client code smaller for a @@ -202,7 +202,7 @@ public: /// potential for an assertion from getZExtValue(). /// @brief Determine if the value is one. bool isOne() const { - return Val == 1; + return Val.isOneValue(); } /// This function will return true iff every bit in this constant is set @@ -243,7 +243,7 @@ public: /// @returns true iff this constant is greater or equal to the given number. /// @brief Determine if the value is greater or equal to the given number. bool uge(uint64_t Num) const { - return Val.getActiveBits() > 64 || Val.getZExtValue() >= Num; + return Val.uge(Num); } /// getLimitedValue - If the value is smaller than the specified limit, diff --git a/include/llvm/IR/DataLayout.h b/include/llvm/IR/DataLayout.h index c1d398f17b59..daf8f8da689d 100644 --- a/include/llvm/IR/DataLayout.h +++ b/include/llvm/IR/DataLayout.h @@ -21,8 +21,8 @@ #define LLVM_IR_DATALAYOUT_H #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Type.h" diff --git a/include/llvm/IR/DebugInfoMetadata.h b/include/llvm/IR/DebugInfoMetadata.h index 358106aac43b..2174e1f301ee 100644 --- a/include/llvm/IR/DebugInfoMetadata.h +++ b/include/llvm/IR/DebugInfoMetadata.h @@ -16,15 +16,15 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitmaskEnum.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/Metadata.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/Dwarf.h" #include #include #include @@ -148,7 +148,7 @@ public: /// Tagged DWARF-like metadata node. /// /// A metadata node with a DWARF tag (i.e., a constant named \c DW_TAG_*, -/// defined in llvm/Support/Dwarf.h). Called \a DINode because it's +/// defined in llvm/BinaryFormat/Dwarf.h). Called \a DINode because it's /// potentially used for non-DWARF output. class DINode : public MDNode { friend class LLVMContextImpl; @@ -2642,7 +2642,8 @@ public: /// Macro Info DWARF-like metadata node. /// /// A metadata node with a DWARF macro info (i.e., a constant named -/// \c DW_MACINFO_*, defined in llvm/Support/Dwarf.h). Called \a DIMacroNode +/// \c DW_MACINFO_*, defined in llvm/BinaryFormat/Dwarf.h). Called \a +/// DIMacroNode /// because it's potentially used for non-DWARF output. class DIMacroNode : public MDNode { friend class LLVMContextImpl; diff --git a/include/llvm/IR/DiagnosticInfo.h b/include/llvm/IR/DiagnosticInfo.h index 5497652135bd..15d332577113 100644 --- a/include/llvm/IR/DiagnosticInfo.h +++ b/include/llvm/IR/DiagnosticInfo.h @@ -15,6 +15,7 @@ #ifndef LLVM_IR_DIAGNOSTICINFO_H #define LLVM_IR_DIAGNOSTICINFO_H +#include "llvm-c/Types.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" @@ -22,10 +23,9 @@ #include "llvm/IR/DebugLoc.h" #include "llvm/Support/CBindingWrapping.h" #include "llvm/Support/YAMLTraits.h" -#include "llvm-c/Types.h" -#include #include #include +#include #include #include diff --git a/include/llvm/IR/Dominators.h b/include/llvm/IR/Dominators.h index def91e73eb1d..9be6acc33591 100644 --- a/include/llvm/IR/Dominators.h +++ b/include/llvm/IR/Dominators.h @@ -66,6 +66,7 @@ public: return End; } + /// Check if this is the only edge between Start and End. bool isSingleEdge() const; }; @@ -143,6 +144,11 @@ public: bool dominates(const Instruction *Def, const Use &U) const; bool dominates(const Instruction *Def, const Instruction *User) const; bool dominates(const Instruction *Def, const BasicBlock *BB) const; + + /// Return true if an edge dominates a use. + /// + /// If BBE is not a unique edge between start and end of the edge, it can + /// never dominate the use. bool dominates(const BasicBlockEdge &BBE, const Use &U) const; bool dominates(const BasicBlockEdge &BBE, const BasicBlock *BB) const; diff --git a/include/llvm/IR/Function.h b/include/llvm/IR/Function.h index 29f512ddd076..3496806d9362 100644 --- a/include/llvm/IR/Function.h +++ b/include/llvm/IR/Function.h @@ -19,10 +19,10 @@ #define LLVM_IR_FUNCTION_H #include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/ilist_node.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/ADT/ilist_node.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" diff --git a/include/llvm/IR/GetElementPtrTypeIterator.h b/include/llvm/IR/GetElementPtrTypeIterator.h index f017a449d33f..3c143ea5f703 100644 --- a/include/llvm/IR/GetElementPtrTypeIterator.h +++ b/include/llvm/IR/GetElementPtrTypeIterator.h @@ -21,9 +21,9 @@ #include "llvm/IR/Operator.h" #include "llvm/IR/User.h" #include "llvm/Support/Casting.h" -#include +#include #include -#include +#include #include namespace llvm { diff --git a/include/llvm/IR/GlobalValue.h b/include/llvm/IR/GlobalValue.h index 20495725f9d0..d65d43cc5957 100644 --- a/include/llvm/IR/GlobalValue.h +++ b/include/llvm/IR/GlobalValue.h @@ -23,9 +23,9 @@ #include "llvm/IR/Constant.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Value.h" -#include "llvm/Support/MD5.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MD5.h" #include #include #include diff --git a/include/llvm/IR/GlobalVariable.h b/include/llvm/IR/GlobalVariable.h index 3f5d00bd3b3a..454492769c8b 100644 --- a/include/llvm/IR/GlobalVariable.h +++ b/include/llvm/IR/GlobalVariable.h @@ -23,8 +23,8 @@ #include "llvm/ADT/PointerUnion.h" #include "llvm/ADT/Twine.h" #include "llvm/ADT/ilist_node.h" -#include "llvm/IR/GlobalObject.h" #include "llvm/IR/Attributes.h" +#include "llvm/IR/GlobalObject.h" #include "llvm/IR/OperandTraits.h" #include "llvm/IR/Value.h" #include @@ -235,6 +235,13 @@ public: Attrs = A; } + /// Check if section name is present + bool hasImplicitSection() const { + return getAttributes().hasAttribute("bss-section") || + getAttributes().hasAttribute("data-section") || + getAttributes().hasAttribute("rodata-section"); + } + // Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const Value *V) { return V->getValueID() == Value::GlobalVariableVal; diff --git a/include/llvm/IR/IRBuilder.h b/include/llvm/IR/IRBuilder.h index 9d4c13c29f68..5ddaf2b1733b 100644 --- a/include/llvm/IR/IRBuilder.h +++ b/include/llvm/IR/IRBuilder.h @@ -15,6 +15,7 @@ #ifndef LLVM_IR_IRBUILDER_H #define LLVM_IR_IRBUILDER_H +#include "llvm-c/Types.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/None.h" #include "llvm/ADT/StringRef.h" @@ -41,11 +42,10 @@ #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/CBindingWrapping.h" #include "llvm/Support/Casting.h" -#include "llvm-c/Types.h" +#include #include #include #include -#include #include namespace llvm { @@ -435,6 +435,28 @@ public: MDNode *ScopeTag = nullptr, MDNode *NoAliasTag = nullptr); + /// \brief Create and insert an atomic memcpy between the specified + /// pointers. + /// + /// If the pointers aren't i8*, they will be converted. If a TBAA tag is + /// specified, it will be added to the instruction. Likewise with alias.scope + /// and noalias tags. + CallInst *CreateElementAtomicMemCpy( + Value *Dst, Value *Src, uint64_t NumElements, uint32_t ElementSize, + MDNode *TBAATag = nullptr, MDNode *TBAAStructTag = nullptr, + MDNode *ScopeTag = nullptr, MDNode *NoAliasTag = nullptr) { + return CreateElementAtomicMemCpy(Dst, Src, getInt64(NumElements), + ElementSize, TBAATag, TBAAStructTag, + ScopeTag, NoAliasTag); + } + + CallInst *CreateElementAtomicMemCpy(Value *Dst, Value *Src, + Value *NumElements, uint32_t ElementSize, + MDNode *TBAATag = nullptr, + MDNode *TBAAStructTag = nullptr, + MDNode *ScopeTag = nullptr, + MDNode *NoAliasTag = nullptr); + /// \brief Create and insert a memmove between the specified /// pointers. /// diff --git a/include/llvm/IR/InstrTypes.h b/include/llvm/IR/InstrTypes.h index e850c015d711..ff63da50afee 100644 --- a/include/llvm/IR/InstrTypes.h +++ b/include/llvm/IR/InstrTypes.h @@ -17,13 +17,13 @@ #define LLVM_IR_INSTRTYPES_H #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Instruction.h" diff --git a/include/llvm/IR/Instruction.h b/include/llvm/IR/Instruction.h index 6e109735ddd3..00c431834e31 100644 --- a/include/llvm/IR/Instruction.h +++ b/include/llvm/IR/Instruction.h @@ -16,9 +16,9 @@ #define LLVM_IR_INSTRUCTION_H #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/ilist_node.h" #include "llvm/ADT/None.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/ilist_node.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/SymbolTableListTraits.h" #include "llvm/IR/User.h" @@ -360,9 +360,9 @@ public: /// Copy I's fast-math flags void copyFastMathFlags(const Instruction *I); - /// Convenience method to copy supported wrapping, exact, and fast-math flags - /// from V to this instruction. - void copyIRFlags(const Value *V); + /// Convenience method to copy supported exact, fast-math, and (optionally) + /// wrapping flags from V to this instruction. + void copyIRFlags(const Value *V, bool IncludeWrapFlags = true); /// Logical 'and' of any supported wrapping, exact, and fast-math flags of /// V and this instruction. diff --git a/include/llvm/IR/Instructions.h b/include/llvm/IR/Instructions.h index 1f7990b99ebe..6029b0a7c571 100644 --- a/include/llvm/IR/Instructions.h +++ b/include/llvm/IR/Instructions.h @@ -17,13 +17,13 @@ #define LLVM_IR_INSTRUCTIONS_H #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/iterator.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/None.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/ADT/iterator.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallingConv.h" @@ -2234,6 +2234,8 @@ public: return User::operator new(s, 3); } + void *operator new(size_t, unsigned) = delete; + /// Return true if a shufflevector instruction can be /// formed with the specified operands. static bool isValidOperands(const Value *V1, const Value *V2, @@ -2331,9 +2333,6 @@ class ExtractValueInst : public UnaryInstruction { ArrayRef Idxs, const Twine &NameStr, BasicBlock *InsertAtEnd); - // allocate space for exactly one operand - void *operator new(size_t s) { return User::operator new(s, 1); } - void init(ArrayRef Idxs, const Twine &NameStr); protected: @@ -2579,7 +2578,6 @@ class PHINode : public Instruction { unsigned ReservedSpace; PHINode(const PHINode &PN); - // allocate space for exactly zero operands explicit PHINode(Type *Ty, unsigned NumReservedValues, const Twine &NameStr = "", @@ -2598,6 +2596,7 @@ class PHINode : public Instruction { allocHungoffUses(ReservedSpace); } + // allocate space for exactly zero operands void *operator new(size_t s) { return User::operator new(s); } @@ -2970,9 +2969,13 @@ public: private: friend TerminatorInst; - BasicBlock *getSuccessorV(unsigned idx) const; - unsigned getNumSuccessorsV() const; - void setSuccessorV(unsigned idx, BasicBlock *B); + BasicBlock *getSuccessor(unsigned idx) const { + llvm_unreachable("ReturnInst has no successors!"); + } + + void setSuccessor(unsigned idx, BasicBlock *B) { + llvm_unreachable("ReturnInst has no successors!"); + } }; template <> @@ -3078,13 +3081,6 @@ public: static inline bool classof(const Value *V) { return isa(V) && classof(cast(V)); } - -private: - friend TerminatorInst; - - BasicBlock *getSuccessorV(unsigned idx) const; - unsigned getNumSuccessorsV() const; - void setSuccessorV(unsigned idx, BasicBlock *B); }; template <> @@ -3444,13 +3440,6 @@ public: static inline bool classof(const Value *V) { return isa(V) && classof(cast(V)); } - -private: - friend TerminatorInst; - - BasicBlock *getSuccessorV(unsigned idx) const; - unsigned getNumSuccessorsV() const; - void setSuccessorV(unsigned idx, BasicBlock *B); }; template <> @@ -3551,13 +3540,6 @@ public: static inline bool classof(const Value *V) { return isa(V) && classof(cast(V)); } - -private: - friend TerminatorInst; - - BasicBlock *getSuccessorV(unsigned idx) const; - unsigned getNumSuccessorsV() const; - void setSuccessorV(unsigned idx, BasicBlock *B); }; template <> @@ -4036,12 +4018,6 @@ public: } private: - friend TerminatorInst; - - BasicBlock *getSuccessorV(unsigned idx) const; - unsigned getNumSuccessorsV() const; - void setSuccessorV(unsigned idx, BasicBlock *B); - template bool hasFnAttrImpl(AttrKind Kind) const { if (Attrs.hasAttribute(AttributeList::FunctionIndex, Kind)) return true; @@ -4139,9 +4115,13 @@ public: private: friend TerminatorInst; - BasicBlock *getSuccessorV(unsigned idx) const; - unsigned getNumSuccessorsV() const; - void setSuccessorV(unsigned idx, BasicBlock *B); + BasicBlock *getSuccessor(unsigned idx) const { + llvm_unreachable("ResumeInst has no successors!"); + } + + void setSuccessor(unsigned idx, BasicBlock *NewSucc) { + llvm_unreachable("ResumeInst has no successors!"); + } }; template <> @@ -4321,13 +4301,6 @@ public: static inline bool classof(const Value *V) { return isa(V) && classof(cast(V)); } - -private: - friend TerminatorInst; - - BasicBlock *getSuccessorV(unsigned Idx) const; - unsigned getNumSuccessorsV() const; - void setSuccessorV(unsigned Idx, BasicBlock *B); }; template <> @@ -4492,9 +4465,15 @@ public: private: friend TerminatorInst; - BasicBlock *getSuccessorV(unsigned Idx) const; - unsigned getNumSuccessorsV() const; - void setSuccessorV(unsigned Idx, BasicBlock *B); + BasicBlock *getSuccessor(unsigned Idx) const { + assert(Idx < getNumSuccessors() && "Successor # out of range for catchret!"); + return getSuccessor(); + } + + void setSuccessor(unsigned Idx, BasicBlock *B) { + assert(Idx < getNumSuccessors() && "Successor # out of range for catchret!"); + setSuccessor(B); + } }; template <> @@ -4582,9 +4561,15 @@ public: private: friend TerminatorInst; - BasicBlock *getSuccessorV(unsigned Idx) const; - unsigned getNumSuccessorsV() const; - void setSuccessorV(unsigned Idx, BasicBlock *B); + BasicBlock *getSuccessor(unsigned Idx) const { + assert(Idx == 0); + return getUnwindDest(); + } + + void setSuccessor(unsigned Idx, BasicBlock *B) { + assert(Idx == 0); + setUnwindDest(B); + } // Shadow Instruction::setInstructionSubclassData with a private forwarding // method so that subclasses cannot accidentally use it. @@ -4639,9 +4624,13 @@ public: private: friend TerminatorInst; - BasicBlock *getSuccessorV(unsigned idx) const; - unsigned getNumSuccessorsV() const; - void setSuccessorV(unsigned idx, BasicBlock *B); + BasicBlock *getSuccessor(unsigned idx) const { + llvm_unreachable("UnreachableInst has no successors!"); + } + + void setSuccessor(unsigned idx, BasicBlock *B) { + llvm_unreachable("UnreachableInst has no successors!"); + } }; //===----------------------------------------------------------------------===// diff --git a/include/llvm/IR/IntrinsicsAMDGPU.td b/include/llvm/IR/IntrinsicsAMDGPU.td index e1928546607a..8017223c4ab0 100644 --- a/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/include/llvm/IR/IntrinsicsAMDGPU.td @@ -703,6 +703,16 @@ def int_amdgcn_readlane : GCCBuiltin<"__builtin_amdgcn_readlane">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>; +def int_amdgcn_alignbit : Intrinsic<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable] +>; + +def int_amdgcn_alignbyte : Intrinsic<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable] +>; + //===----------------------------------------------------------------------===// // CI+ Intrinsics //===----------------------------------------------------------------------===// diff --git a/include/llvm/IR/Metadata.h b/include/llvm/IR/Metadata.h index 3c753260190e..d538c2595393 100644 --- a/include/llvm/IR/Metadata.h +++ b/include/llvm/IR/Metadata.h @@ -19,18 +19,18 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMapInfo.h" -#include "llvm/ADT/ilist_node.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/None.h" #include "llvm/ADT/PointerUnion.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/ilist_node.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/IR/Constant.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Value.h" -#include "llvm/Support/Casting.h" #include "llvm/Support/CBindingWrapping.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include #include diff --git a/include/llvm/IR/Module.h b/include/llvm/IR/Module.h index 5e1f680c5b36..d47d82a57bff 100644 --- a/include/llvm/IR/Module.h +++ b/include/llvm/IR/Module.h @@ -15,10 +15,11 @@ #ifndef LLVM_IR_MODULE_H #define LLVM_IR_MODULE_H -#include "llvm/ADT/iterator_range.h" +#include "llvm-c/Types.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Comdat.h" #include "llvm/IR/DataLayout.h" @@ -30,7 +31,6 @@ #include "llvm/IR/SymbolTableListTraits.h" #include "llvm/Support/CBindingWrapping.h" #include "llvm/Support/CodeGen.h" -#include "llvm-c/Types.h" #include #include #include diff --git a/include/llvm/IR/ModuleSummaryIndex.h b/include/llvm/IR/ModuleSummaryIndex.h index 757ddf6cf46b..144e45f18d2c 100644 --- a/include/llvm/IR/ModuleSummaryIndex.h +++ b/include/llvm/IR/ModuleSummaryIndex.h @@ -18,13 +18,13 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallString.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" -#include "llvm/IR/Module.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Module.h" #include #include #include diff --git a/include/llvm/IR/OperandTraits.h b/include/llvm/IR/OperandTraits.h index 7b94283856b6..c618aff3df9a 100644 --- a/include/llvm/IR/OperandTraits.h +++ b/include/llvm/IR/OperandTraits.h @@ -88,9 +88,6 @@ struct VariadicOperandTraits { /// HungoffOperandTraits - determine the allocation regime of the Use array /// when it is not a prefix to the User object, but allocated at an unrelated /// heap address. -/// Assumes that the User subclass that is determined by this traits class -/// has an OperandList member of type User::op_iterator. [Note: this is now -/// trivially satisfied, because User has that member for historic reasons.] /// /// This is the traits class that is needed when the Use array must be /// resizable. diff --git a/include/llvm/IR/PatternMatch.h b/include/llvm/IR/PatternMatch.h index 072c6c5ece83..542570aaaa24 100644 --- a/include/llvm/IR/PatternMatch.h +++ b/include/llvm/IR/PatternMatch.h @@ -262,7 +262,7 @@ template struct api_pred_ty : public Predicate { }; struct is_one { - bool isValue(const APInt &C) { return C == 1; } + bool isValue(const APInt &C) { return C.isOneValue(); } }; /// \brief Match an integer 1 or a vector with all elements equal to 1. diff --git a/include/llvm/IR/Statepoint.h b/include/llvm/IR/Statepoint.h index a5f0130f79f4..265e7eb348bf 100644 --- a/include/llvm/IR/Statepoint.h +++ b/include/llvm/IR/Statepoint.h @@ -17,8 +17,8 @@ #ifndef LLVM_IR_STATEPOINT_H #define LLVM_IR_STATEPOINT_H -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/Optional.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallSite.h" diff --git a/include/llvm/IR/Type.h b/include/llvm/IR/Type.h index 82362107e41e..b37b59288e3f 100644 --- a/include/llvm/IR/Type.h +++ b/include/llvm/IR/Type.h @@ -18,8 +18,8 @@ #include "llvm/ADT/APFloat.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/Support/Casting.h" #include "llvm/Support/CBindingWrapping.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include diff --git a/include/llvm/IR/Use.h b/include/llvm/IR/Use.h index d3a59d8a060e..0ac13935c7ce 100644 --- a/include/llvm/IR/Use.h +++ b/include/llvm/IR/Use.h @@ -25,10 +25,10 @@ #ifndef LLVM_IR_USE_H #define LLVM_IR_USE_H +#include "llvm-c/Types.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/Support/CBindingWrapping.h" #include "llvm/Support/Compiler.h" -#include "llvm-c/Types.h" namespace llvm { diff --git a/include/llvm/IR/Value.h b/include/llvm/IR/Value.h index d669b1544070..ccd40e576584 100644 --- a/include/llvm/IR/Value.h +++ b/include/llvm/IR/Value.h @@ -14,11 +14,11 @@ #ifndef LLVM_IR_VALUE_H #define LLVM_IR_VALUE_H +#include "llvm-c/Types.h" #include "llvm/ADT/iterator_range.h" #include "llvm/IR/Use.h" #include "llvm/Support/CBindingWrapping.h" #include "llvm/Support/Casting.h" -#include "llvm-c/Types.h" #include #include #include diff --git a/include/llvm/LTO/LTO.h b/include/llvm/LTO/LTO.h index 3772592757be..774e144b3ef0 100644 --- a/include/llvm/LTO/LTO.h +++ b/include/llvm/LTO/LTO.h @@ -366,8 +366,9 @@ private: /// each global symbol based on its internal resolution of that symbol. struct SymbolResolution { SymbolResolution() - : Prevailing(0), FinalDefinitionInLinkageUnit(0), VisibleToRegularObj(0) { - } + : Prevailing(0), FinalDefinitionInLinkageUnit(0), VisibleToRegularObj(0), + LinkerRedefined(0) {} + /// The linker has chosen this definition of the symbol. unsigned Prevailing : 1; @@ -377,6 +378,10 @@ struct SymbolResolution { /// The definition of this symbol is visible outside of the LTO unit. unsigned VisibleToRegularObj : 1; + + /// Linker redefined version of the symbol which appeared in -wrap or -defsym + /// linker option. + unsigned LinkerRedefined : 1; }; } // namespace lto diff --git a/include/llvm/LinkAllIR.h b/include/llvm/LinkAllIR.h index f078c73f979e..de1d305f8e77 100644 --- a/include/llvm/LinkAllIR.h +++ b/include/llvm/LinkAllIR.h @@ -16,13 +16,13 @@ #ifndef LLVM_LINKALLIR_H #define LLVM_LINKALLIR_H +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/Verifier.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/DynamicLibrary.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Memory.h" diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h index 5c398b2ab567..c309ddbe2f02 100644 --- a/include/llvm/LinkAllPasses.h +++ b/include/llvm/LinkAllPasses.h @@ -16,8 +16,8 @@ #define LLVM_LINKALLPASSES_H #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/AliasSetTracker.h" #include "llvm/Analysis/AliasAnalysisEvaluator.h" +#include "llvm/Analysis/AliasSetTracker.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/CFLAndersAliasAnalysis.h" #include "llvm/Analysis/CFLSteensAliasAnalysis.h" @@ -38,6 +38,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRPrintingPasses.h" +#include "llvm/Support/Valgrind.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/AlwaysInliner.h" #include "llvm/Transforms/IPO/FunctionAttrs.h" @@ -48,7 +49,6 @@ #include "llvm/Transforms/Utils/SymbolRewriter.h" #include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" #include "llvm/Transforms/Vectorize.h" -#include "llvm/Support/Valgrind.h" #include namespace { diff --git a/include/llvm/MC/MCAsmInfo.h b/include/llvm/MC/MCAsmInfo.h index 869706c45483..234762f36dd4 100644 --- a/include/llvm/MC/MCAsmInfo.h +++ b/include/llvm/MC/MCAsmInfo.h @@ -51,12 +51,6 @@ enum LCOMMType { NoAlignment, ByteAlignment, Log2Alignment }; } // end namespace LCOMM -enum class DebugCompressionType { - DCT_None, // no compression - DCT_Zlib, // zlib style complession - DCT_ZlibGnu // zlib-gnu style compression -}; - /// This class is intended to be used as a base class for asm /// properties and features specific to the target. class MCAsmInfo { @@ -366,7 +360,7 @@ protected: bool PreserveAsmComments; /// Compress DWARF debug sections. Defaults to no compression. - DebugCompressionType CompressDebugSections = DebugCompressionType::DCT_None; + DebugCompressionType CompressDebugSections = DebugCompressionType::None; /// True if the integrated assembler should interpret 'a >> b' constant /// expressions as logical rather than arithmetic. diff --git a/include/llvm/MC/MCAssembler.h b/include/llvm/MC/MCAssembler.h index 185b892d9621..63f7057a7076 100644 --- a/include/llvm/MC/MCAssembler.h +++ b/include/llvm/MC/MCAssembler.h @@ -11,11 +11,11 @@ #define LLVM_MC_MCASSEMBLER_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/iterator.h" #include "llvm/ADT/iterator_range.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringRef.h" #include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCFixup.h" diff --git a/include/llvm/MC/MCCodeView.h b/include/llvm/MC/MCCodeView.h index 41521a6549b8..c3f1cecc97f4 100644 --- a/include/llvm/MC/MCCodeView.h +++ b/include/llvm/MC/MCCodeView.h @@ -14,10 +14,10 @@ #ifndef LLVM_MC_MCCODEVIEW_H #define LLVM_MC_MCCODEVIEW_H -#include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringMap.h" -#include "llvm/MC/MCObjectStreamer.h" +#include "llvm/ADT/StringRef.h" #include "llvm/MC/MCFragment.h" +#include "llvm/MC/MCObjectStreamer.h" #include #include diff --git a/include/llvm/MC/MCContext.h b/include/llvm/MC/MCContext.h index 9bea19631303..2c60014adf23 100644 --- a/include/llvm/MC/MCContext.h +++ b/include/llvm/MC/MCContext.h @@ -17,12 +17,12 @@ #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/SectionKind.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Compiler.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/raw_ostream.h" #include #include diff --git a/include/llvm/MC/MCELFObjectWriter.h b/include/llvm/MC/MCELFObjectWriter.h index f22fc11f9b07..2efd37924e2e 100644 --- a/include/llvm/MC/MCELFObjectWriter.h +++ b/include/llvm/MC/MCELFObjectWriter.h @@ -11,8 +11,8 @@ #define LLVM_MC_MCELFOBJECTWRITER_H #include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/raw_ostream.h" #include #include diff --git a/include/llvm/MC/MCFragment.h b/include/llvm/MC/MCFragment.h index 0ca530c45102..0aca922e3cf5 100644 --- a/include/llvm/MC/MCFragment.h +++ b/include/llvm/MC/MCFragment.h @@ -11,10 +11,10 @@ #define LLVM_MC_MCFRAGMENT_H #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/ilist_node.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/ilist_node.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCInst.h" #include "llvm/Support/SMLoc.h" diff --git a/include/llvm/MC/MCMachObjectWriter.h b/include/llvm/MC/MCMachObjectWriter.h index b93638f86408..2d2480a27223 100644 --- a/include/llvm/MC/MCMachObjectWriter.h +++ b/include/llvm/MC/MCMachObjectWriter.h @@ -12,11 +12,11 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCSection.h" #include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCSection.h" #include "llvm/MC/StringTableBuilder.h" -#include "llvm/Support/MachO.h" #include #include #include diff --git a/include/llvm/MC/MCObjectFileInfo.h b/include/llvm/MC/MCObjectFileInfo.h index 8b2a1261b220..4d634447987b 100644 --- a/include/llvm/MC/MCObjectFileInfo.h +++ b/include/llvm/MC/MCObjectFileInfo.h @@ -109,6 +109,9 @@ protected: MCSection *DwarfLineDWOSection; MCSection *DwarfLocDWOSection; MCSection *DwarfStrOffDWOSection; + + /// The DWARF v5 string offset and address table sections. + MCSection *DwarfStrOffSection; MCSection *DwarfAddrSection; // These are for Fission DWP files. @@ -260,6 +263,7 @@ public: MCSection *getDwarfLineDWOSection() const { return DwarfLineDWOSection; } MCSection *getDwarfLocDWOSection() const { return DwarfLocDWOSection; } MCSection *getDwarfStrOffDWOSection() const { return DwarfStrOffDWOSection; } + MCSection *getDwarfStrOffSection() const { return DwarfStrOffSection; } MCSection *getDwarfAddrSection() const { return DwarfAddrSection; } MCSection *getDwarfCUIndexSection() const { return DwarfCUIndexSection; } MCSection *getDwarfTUIndexSection() const { return DwarfTUIndexSection; } diff --git a/include/llvm/MC/MCParser/MCAsmParser.h b/include/llvm/MC/MCParser/MCAsmParser.h index 75d45f490bde..3a659f048ccf 100644 --- a/include/llvm/MC/MCParser/MCAsmParser.h +++ b/include/llvm/MC/MCParser/MCAsmParser.h @@ -11,9 +11,9 @@ #define LLVM_MC_MCPARSER_MCASMPARSER_H #include "llvm/ADT/None.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/MC/MCParser/MCAsmLexer.h" diff --git a/include/llvm/MC/MCSection.h b/include/llvm/MC/MCSection.h index 7bfffbcdb7c2..cc306d47250d 100644 --- a/include/llvm/MC/MCSection.h +++ b/include/llvm/MC/MCSection.h @@ -14,8 +14,8 @@ #ifndef LLVM_MC_MCSECTION_H #define LLVM_MC_MCSECTION_H -#include "llvm/ADT/ilist.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/ilist.h" #include "llvm/MC/MCFragment.h" #include "llvm/MC/SectionKind.h" #include diff --git a/include/llvm/MC/MCSectionMachO.h b/include/llvm/MC/MCSectionMachO.h index 3bc5408a4f75..89db09cbdbdc 100644 --- a/include/llvm/MC/MCSectionMachO.h +++ b/include/llvm/MC/MCSectionMachO.h @@ -15,8 +15,8 @@ #define LLVM_MC_MCSECTIONMACHO_H #include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCSection.h" -#include "llvm/Support/MachO.h" namespace llvm { diff --git a/include/llvm/MC/MCSymbolWasm.h b/include/llvm/MC/MCSymbolWasm.h index 4445be006eb0..7d661ccc5de7 100644 --- a/include/llvm/MC/MCSymbolWasm.h +++ b/include/llvm/MC/MCSymbolWasm.h @@ -9,8 +9,8 @@ #ifndef LLVM_MC_MCSYMBOLWASM_H #define LLVM_MC_MCSYMBOLWASM_H +#include "llvm/BinaryFormat/Wasm.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Support/Wasm.h" namespace llvm { class MCSymbolWasm : public MCSymbol { diff --git a/include/llvm/MC/MCTargetOptions.h b/include/llvm/MC/MCTargetOptions.h index ab027ab27a41..5509bb3bdc7c 100644 --- a/include/llvm/MC/MCTargetOptions.h +++ b/include/llvm/MC/MCTargetOptions.h @@ -23,6 +23,12 @@ enum class ExceptionHandling { WinEH, /// Windows Exception Handling }; +enum class DebugCompressionType { + None, /// No compression + GNU, /// zlib-gnu style compression + Z, /// zlib style complession +}; + class StringRef; class MCTargetOptions { diff --git a/include/llvm/MC/MCWasmObjectWriter.h b/include/llvm/MC/MCWasmObjectWriter.h index a4dd382706d7..c250d3bf03fb 100644 --- a/include/llvm/MC/MCWasmObjectWriter.h +++ b/include/llvm/MC/MCWasmObjectWriter.h @@ -11,6 +11,7 @@ #define LLVM_MC_MCWASMOBJECTWRITER_H #include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/Wasm.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/raw_ostream.h" @@ -28,27 +29,6 @@ class MCSymbolWasm; class MCValue; class raw_pwrite_stream; -// Information about a single relocation. -struct WasmRelocationEntry { - uint64_t Offset; // Where is the relocation. - const MCSymbolWasm *Symbol; // The symbol to relocate with. - int64_t Addend; // A value to add to the symbol. - unsigned Type; // The type of the relocation. - MCSectionWasm *FixupSection;// The section the relocation is targeting. - - WasmRelocationEntry(uint64_t Offset, const MCSymbolWasm *Symbol, - int64_t Addend, unsigned Type, - MCSectionWasm *FixupSection) - : Offset(Offset), Symbol(Symbol), Addend(Addend), Type(Type), - FixupSection(FixupSection) {} - - void print(raw_ostream &Out) const { - Out << "Off=" << Offset << ", Sym=" << Symbol << ", Addend=" << Addend - << ", Type=" << Type << ", FixupSection=" << FixupSection; - } - void dump() const { print(errs()); } -}; - class MCWasmObjectTargetWriter { const unsigned Is64Bit : 1; @@ -56,17 +36,11 @@ protected: explicit MCWasmObjectTargetWriter(bool Is64Bit_); public: - virtual ~MCWasmObjectTargetWriter() {} + virtual ~MCWasmObjectTargetWriter(); virtual unsigned getRelocType(MCContext &Ctx, const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const = 0; - virtual bool needsRelocateWithSymbol(const MCSymbol &Sym, - unsigned Type) const; - - virtual void sortRelocs(const MCAssembler &Asm, - std::vector &Relocs); - /// \name Accessors /// @{ bool is64Bit() const { return Is64Bit; } diff --git a/include/llvm/Object/Archive.h b/include/llvm/Object/Archive.h index 807508107c56..6c5fb9d5c92b 100644 --- a/include/llvm/Object/Archive.h +++ b/include/llvm/Object/Archive.h @@ -14,9 +14,9 @@ #ifndef LLVM_OBJECT_ARCHIVE_H #define LLVM_OBJECT_ARCHIVE_H -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Object/Binary.h" #include "llvm/Support/Chrono.h" #include "llvm/Support/Error.h" diff --git a/include/llvm/Object/COFF.h b/include/llvm/Object/COFF.h index dafd1a43cb59..ae695a529597 100644 --- a/include/llvm/Object/COFF.h +++ b/include/llvm/Object/COFF.h @@ -15,13 +15,13 @@ #define LLVM_OBJECT_COFF_H #include "llvm/ADT/iterator_range.h" +#include "llvm/BinaryFormat/COFF.h" #include "llvm/DebugInfo/CodeView/CVDebugRecord.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/Object/Binary.h" #include "llvm/Object/Error.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/BinaryByteStream.h" -#include "llvm/Support/COFF.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/Endian.h" #include "llvm/Support/ErrorHandling.h" @@ -646,6 +646,13 @@ struct coff_resource_dir_entry { } Offset; }; +struct coff_resource_data_entry { + support::ulittle32_t DataRVA; + support::ulittle32_t DataSize; + support::ulittle32_t Codepage; + support::ulittle32_t Reserved; +}; + struct coff_resource_dir_table { support::ulittle32_t Characteristics; support::ulittle32_t TimeDateStamp; diff --git a/include/llvm/Object/COFFModuleDefinition.h b/include/llvm/Object/COFFModuleDefinition.h index 0428283fdc88..a0e8eacdb7a3 100644 --- a/include/llvm/Object/COFFModuleDefinition.h +++ b/include/llvm/Object/COFFModuleDefinition.h @@ -20,8 +20,8 @@ #ifndef LLVM_OBJECT_COFF_MODULE_DEFINITION_H #define LLVM_OBJECT_COFF_MODULE_DEFINITION_H -#include "llvm/Object/COFFImportFile.h" #include "llvm/Object/COFF.h" +#include "llvm/Object/COFFImportFile.h" namespace llvm { namespace object { diff --git a/include/llvm/Object/Decompressor.h b/include/llvm/Object/Decompressor.h index 0f63f8b821b7..c8e888d285e4 100644 --- a/include/llvm/Object/Decompressor.h +++ b/include/llvm/Object/Decompressor.h @@ -10,8 +10,8 @@ #ifndef LLVM_OBJECT_DECOMPRESSOR_H #define LLVM_OBJECT_DECOMPRESSOR_H -#include "llvm/ADT/StringRef.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Object/ObjectFile.h" namespace llvm { diff --git a/include/llvm/Object/ELF.h b/include/llvm/Object/ELF.h index a4d431b6cbe7..670c0bbce3ac 100644 --- a/include/llvm/Object/ELF.h +++ b/include/llvm/Object/ELF.h @@ -17,9 +17,9 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/Object/ELFTypes.h" #include "llvm/Object/Error.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include diff --git a/include/llvm/Object/ELFObjectFile.h b/include/llvm/Object/ELFObjectFile.h index ef2abd8c52ce..2ba3b13f49da 100644 --- a/include/llvm/Object/ELFObjectFile.h +++ b/include/llvm/Object/ELFObjectFile.h @@ -19,6 +19,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/iterator_range.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/Object/Binary.h" #include "llvm/Object/ELF.h" @@ -29,7 +30,6 @@ #include "llvm/Support/ARMAttributeParser.h" #include "llvm/Support/ARMBuildAttributes.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" diff --git a/include/llvm/Object/ELFTypes.h b/include/llvm/Object/ELFTypes.h index 99346fe1a882..808144694acb 100644 --- a/include/llvm/Object/ELFTypes.h +++ b/include/llvm/Object/ELFTypes.h @@ -12,8 +12,8 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/Object/Error.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include diff --git a/include/llvm/Object/IRObjectFile.h b/include/llvm/Object/IRObjectFile.h index 0ea89011e883..3bce7813ee93 100644 --- a/include/llvm/Object/IRObjectFile.h +++ b/include/llvm/Object/IRObjectFile.h @@ -15,10 +15,12 @@ #define LLVM_OBJECT_IROBJECTFILE_H #include "llvm/ADT/PointerUnion.h" +#include "llvm/Object/IRSymtab.h" #include "llvm/Object/ModuleSymbolTable.h" #include "llvm/Object/SymbolicFile.h" namespace llvm { +class BitcodeModule; class Mangler; class Module; class GlobalValue; @@ -61,7 +63,20 @@ public: static Expected> create(MemoryBufferRef Object, LLVMContext &Context); }; + +/// The contents of a bitcode file and its irsymtab. Any underlying data +/// for the irsymtab are owned by Symtab and Strtab. +struct IRSymtabFile { + std::vector Mods; + SmallVector Symtab, Strtab; + irsymtab::Reader TheReader; +}; + +/// Reads a bitcode file, creating its irsymtab if necessary. +Expected readIRSymtab(MemoryBufferRef MBRef); + } + } #endif diff --git a/include/llvm/Object/IRSymtab.h b/include/llvm/Object/IRSymtab.h index b425543bf637..5b832141a865 100644 --- a/include/llvm/Object/IRSymtab.h +++ b/include/llvm/Object/IRSymtab.h @@ -25,8 +25,8 @@ #define LLVM_OBJECT_IRSYMTAB_H #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/IR/GlobalValue.h" #include "llvm/Object/SymbolicFile.h" #include "llvm/Support/Endian.h" @@ -36,6 +36,9 @@ #include namespace llvm { + +struct BitcodeFileContents; + namespace irsymtab { namespace storage { @@ -314,6 +317,16 @@ inline Reader::symbol_range Reader::module_symbols(unsigned I) const { SymbolRef(MEnd, MEnd, nullptr, this)}; } +/// The contents of the irsymtab in a bitcode file. Any underlying data for the +/// irsymtab are owned by Symtab and Strtab. +struct FileContents { + SmallVector Symtab, Strtab; + Reader TheReader; +}; + +/// Reads the contents of a bitcode file, creating its irsymtab if necessary. +Expected readBitcode(const BitcodeFileContents &BFC); + } // end namespace irsymtab } // end namespace llvm diff --git a/include/llvm/Object/MachO.h b/include/llvm/Object/MachO.h index a4356d5977b2..3fc726f4ccb8 100644 --- a/include/llvm/Object/MachO.h +++ b/include/llvm/Object/MachO.h @@ -16,19 +16,19 @@ #define LLVM_OBJECT_MACHO_H #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/Object/Binary.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Object/SymbolicFile.h" #include "llvm/Support/Error.h" #include "llvm/Support/Format.h" -#include "llvm/Support/MachO.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" #include @@ -550,6 +550,8 @@ public: bool isRelocatableObject() const override; + StringRef mapDebugSectionName(StringRef Name) const override; + bool hasPageZeroSegment() const { return HasPageZeroSegment; } static bool classof(const Binary *v) { diff --git a/include/llvm/Object/MachOUniversal.h b/include/llvm/Object/MachOUniversal.h index a14c4ca01223..8a6f0fc56971 100644 --- a/include/llvm/Object/MachOUniversal.h +++ b/include/llvm/Object/MachOUniversal.h @@ -16,10 +16,10 @@ #include "llvm/ADT/Triple.h" #include "llvm/ADT/iterator_range.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/Object/Archive.h" #include "llvm/Object/Binary.h" #include "llvm/Object/MachO.h" -#include "llvm/Support/MachO.h" namespace llvm { class StringRef; diff --git a/include/llvm/Object/ObjectFile.h b/include/llvm/Object/ObjectFile.h index ea6a9049bc1b..6b5b9d95fcf3 100644 --- a/include/llvm/Object/ObjectFile.h +++ b/include/llvm/Object/ObjectFile.h @@ -14,8 +14,9 @@ #ifndef LLVM_OBJECT_OBJECTFILE_H #define LLVM_OBJECT_OBJECTFILE_H -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/BinaryFormat/Magic.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/Object/Binary.h" #include "llvm/Object/Error.h" @@ -292,6 +293,9 @@ public: return std::error_code(); } + /// Maps a debug section name to a standard DWARF section name. + virtual StringRef mapDebugSectionName(StringRef Name) const { return Name; } + /// True if this is a relocatable object (.o/.obj). virtual bool isRelocatableObject() const = 0; @@ -303,10 +307,10 @@ public: createObjectFile(StringRef ObjectPath); static Expected> - createObjectFile(MemoryBufferRef Object, sys::fs::file_magic Type); + createObjectFile(MemoryBufferRef Object, llvm::file_magic Type); static Expected> createObjectFile(MemoryBufferRef Object) { - return createObjectFile(Object, sys::fs::file_magic::unknown); + return createObjectFile(Object, llvm::file_magic::unknown); } static inline bool classof(const Binary *v) { diff --git a/include/llvm/Object/RelocVisitor.h b/include/llvm/Object/RelocVisitor.h index 348179860f3e..c358d3996435 100644 --- a/include/llvm/Object/RelocVisitor.h +++ b/include/llvm/Object/RelocVisitor.h @@ -17,15 +17,15 @@ #define LLVM_OBJECT_RELOCVISITOR_H #include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/Object/COFF.h" #include "llvm/Object/ELFObjectFile.h" #include "llvm/Object/MachO.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ErrorOr.h" -#include "llvm/Support/MachO.h" #include #include diff --git a/include/llvm/Object/SymbolicFile.h b/include/llvm/Object/SymbolicFile.h index f4be4bfdb1a3..97eeba6611a2 100644 --- a/include/llvm/Object/SymbolicFile.h +++ b/include/llvm/Object/SymbolicFile.h @@ -14,8 +14,9 @@ #ifndef LLVM_OBJECT_SYMBOLICFILE_H #define LLVM_OBJECT_SYMBOLICFILE_H -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/BinaryFormat/Magic.h" #include "llvm/Object/Binary.h" #include "llvm/Support/Error.h" #include "llvm/Support/FileSystem.h" @@ -162,12 +163,12 @@ public: // construction aux. static Expected> - createSymbolicFile(MemoryBufferRef Object, sys::fs::file_magic Type, + createSymbolicFile(MemoryBufferRef Object, llvm::file_magic Type, LLVMContext *Context); static Expected> createSymbolicFile(MemoryBufferRef Object) { - return createSymbolicFile(Object, sys::fs::file_magic::unknown, nullptr); + return createSymbolicFile(Object, llvm::file_magic::unknown, nullptr); } static Expected> createSymbolicFile(StringRef ObjectPath); diff --git a/include/llvm/Object/Wasm.h b/include/llvm/Object/Wasm.h index de54a4928cce..10edc461b9e9 100644 --- a/include/llvm/Object/Wasm.h +++ b/include/llvm/Object/Wasm.h @@ -19,11 +19,11 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Wasm.h" #include "llvm/Object/Binary.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/Error.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/Wasm.h" #include #include #include diff --git a/include/llvm/Object/WindowsResource.h b/include/llvm/Object/WindowsResource.h index 2484f551aee0..c5189329d3ec 100644 --- a/include/llvm/Object/WindowsResource.h +++ b/include/llvm/Object/WindowsResource.h @@ -31,11 +31,11 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/BinaryFormat/COFF.h" #include "llvm/Object/Binary.h" #include "llvm/Object/Error.h" #include "llvm/Support/BinaryByteStream.h" #include "llvm/Support/BinaryStreamReader.h" -#include "llvm/Support/COFF.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" @@ -44,10 +44,15 @@ #include namespace llvm { + +class FileOutputBuffer; + namespace object { class WindowsResource; +enum class Machine { UNKNOWN, ARM, X64, X86 }; + class ResourceEntryRef { public: Error moveNext(bool &End); @@ -58,6 +63,10 @@ public: ArrayRef getNameString() const { return Name; } uint16_t getNameID() const { return NameID; } uint16_t getLanguage() const { return Suffix->Language; } + uint16_t getMajorVersion() const { return Suffix->Version >> 16; } + uint16_t getMinorVersion() const { return Suffix->Version; } + uint32_t getCharacteristics() const { return Suffix->Characteristics; } + ArrayRef getData() const { return Data; } private: friend class WindowsResource; @@ -106,34 +115,77 @@ private: class WindowsResourceParser { public: + class TreeNode; WindowsResourceParser(); - Error parse(WindowsResource *WR); - void printTree() const; + const TreeNode &getTree() const { return Root; } + const ArrayRef> getData() const { return Data; } + const ArrayRef> getStringTable() const { + return StringTable; + } -private: class TreeNode { public: - TreeNode() = default; - explicit TreeNode(ArrayRef Ref); - void addEntry(const ResourceEntryRef &Entry); + template + using Children = std::map>; + void print(ScopedPrinter &Writer, StringRef Name) const; + uint32_t getTreeSize() const; + uint32_t getStringIndex() const { return StringIndex; } + uint32_t getDataIndex() const { return DataIndex; } + uint16_t getMajorVersion() const { return MajorVersion; } + uint16_t getMinorVersion() const { return MinorVersion; } + uint32_t getCharacteristics() const { return Characteristics; } + bool checkIsDataNode() const { return IsDataNode; } + const Children &getIDChildren() const { return IDChildren; } + const Children &getStringChildren() const { + return StringChildren; + } private: + friend class WindowsResourceParser; + + static uint32_t StringCount; + static uint32_t DataCount; + + static std::unique_ptr createStringNode(); + static std::unique_ptr createIDNode(); + static std::unique_ptr createDataNode(uint16_t MajorVersion, + uint16_t MinorVersion, + uint32_t Characteristics); + + explicit TreeNode(bool IsStringNode); + TreeNode(uint16_t MajorVersion, uint16_t MinorVersion, + uint32_t Characteristics); + + void addEntry(const ResourceEntryRef &Entry); TreeNode &addTypeNode(const ResourceEntryRef &Entry); TreeNode &addNameNode(const ResourceEntryRef &Entry); TreeNode &addLanguageNode(const ResourceEntryRef &Entry); - TreeNode &addChild(uint32_t ID); + TreeNode &addChild(uint32_t ID, bool IsDataNode = false, + uint16_t MajorVersion = 0, uint16_t MinorVersion = 0, + uint32_t Characteristics = 0); TreeNode &addChild(ArrayRef NameRef); - std::vector Name; - std::map> IDChildren; - std::map> StringChildren; + bool IsDataNode = false; + uint32_t StringIndex; + uint32_t DataIndex; + Children IDChildren; + Children StringChildren; + uint16_t MajorVersion = 0; + uint16_t MinorVersion = 0; + uint32_t Characteristics = 0; }; +private: TreeNode Root; + std::vector> Data; + std::vector> StringTable; }; +Error writeWindowsResourceCOFF(StringRef OutputFile, Machine MachineType, + const WindowsResourceParser &Parser); + } // namespace object } // namespace llvm diff --git a/include/llvm/ObjectYAML/COFFYAML.h b/include/llvm/ObjectYAML/COFFYAML.h index 65ad1dde67f5..1b5f7b00239a 100644 --- a/include/llvm/ObjectYAML/COFFYAML.h +++ b/include/llvm/ObjectYAML/COFFYAML.h @@ -15,8 +15,8 @@ #define LLVM_OBJECTYAML_COFFYAML_H #include "llvm/ADT/Optional.h" +#include "llvm/BinaryFormat/COFF.h" #include "llvm/ObjectYAML/YAML.h" -#include "llvm/Support/COFF.h" namespace llvm { diff --git a/include/llvm/ObjectYAML/CodeViewYAMLDebugSections.h b/include/llvm/ObjectYAML/CodeViewYAMLDebugSections.h index a6d4d404415f..faa3ed8a6c52 100644 --- a/include/llvm/ObjectYAML/CodeViewYAMLDebugSections.h +++ b/include/llvm/ObjectYAML/CodeViewYAMLDebugSections.h @@ -26,6 +26,8 @@ namespace codeview { class DebugStringTableSubsection; class DebugStringTableSubsectionRef; class DebugChecksumsSubsectionRef; +class DebugStringTableSubsection; +class DebugChecksumsSubsection; } namespace CodeViewYAML { @@ -33,6 +35,23 @@ namespace detail { struct YAMLSubsectionBase; } +struct YAMLFrameData { + uint32_t RvaStart; + uint32_t CodeSize; + uint32_t LocalSize; + uint32_t ParamsSize; + uint32_t MaxStackSize; + StringRef FrameFunc; + uint32_t PrologSize; + uint32_t SavedRegsSize; + uint32_t Flags; +}; + +struct YAMLCrossModuleImport { + StringRef ModuleName; + std::vector ImportIds; +}; + struct SourceLineEntry { uint32_t Offset; uint32_t LineStart; @@ -92,8 +111,17 @@ struct YAMLDebugSubsection { }; Expected>> -convertSubsectionList(ArrayRef Subsections, - codeview::DebugStringTableSubsection &Strings); +toCodeViewSubsectionList(BumpPtrAllocator &Allocator, + ArrayRef Subsections, + codeview::DebugStringTableSubsection &Strings); +Expected>> +toCodeViewSubsectionList( + BumpPtrAllocator &Allocator, ArrayRef Subsections, + std::unique_ptr &TakeStrings, + codeview::DebugStringTableSubsection *StringsRef); + +std::unique_ptr +findStringTable(ArrayRef Sections); } // namespace CodeViewYAML } // namespace llvm diff --git a/include/llvm/ObjectYAML/CodeViewYAMLTypes.h b/include/llvm/ObjectYAML/CodeViewYAMLTypes.h index a57ada34a4fa..91b75aabe7a5 100644 --- a/include/llvm/ObjectYAML/CodeViewYAMLTypes.h +++ b/include/llvm/ObjectYAML/CodeViewYAMLTypes.h @@ -18,8 +18,12 @@ #include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" #include "llvm/ObjectYAML/YAML.h" +#include "llvm/Support/Allocator.h" namespace llvm { +namespace codeview { +class TypeTableBuilder; +} namespace CodeViewYAML { namespace detail { struct LeafRecordBase; @@ -34,6 +38,7 @@ struct LeafRecord { std::shared_ptr Leaf; codeview::CVType toCodeViewRecord(BumpPtrAllocator &Allocator) const; + codeview::CVType toCodeViewRecord(codeview::TypeTableBuilder &TS) const; static Expected fromCodeViewRecord(codeview::CVType Type); }; } // namespace CodeViewYAML diff --git a/include/llvm/ObjectYAML/DWARFYAML.h b/include/llvm/ObjectYAML/DWARFYAML.h index 3f39cfc7bb3d..75e9112e121a 100644 --- a/include/llvm/ObjectYAML/DWARFYAML.h +++ b/include/llvm/ObjectYAML/DWARFYAML.h @@ -16,8 +16,8 @@ #ifndef LLVM_OBJECTYAML_DWARFYAML_H #define LLVM_OBJECTYAML_DWARFYAML_H +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/ObjectYAML/YAML.h" -#include "llvm/Support/Dwarf.h" namespace llvm { namespace DWARFYAML { @@ -241,7 +241,7 @@ template <> struct MappingTraits { template <> struct ScalarEnumerationTraits { static void enumeration(IO &io, dwarf::Tag &value) { -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" io.enumFallback(value); } }; @@ -251,7 +251,7 @@ template <> struct ScalarEnumerationTraits { template <> struct ScalarEnumerationTraits { static void enumeration(IO &io, dwarf::LineNumberOps &value) { -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" io.enumFallback(value); } }; @@ -261,7 +261,7 @@ template <> struct ScalarEnumerationTraits { template <> struct ScalarEnumerationTraits { static void enumeration(IO &io, dwarf::LineNumberExtendedOps &value) { -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" io.enumFallback(value); } }; @@ -271,7 +271,7 @@ template <> struct ScalarEnumerationTraits { template <> struct ScalarEnumerationTraits { static void enumeration(IO &io, dwarf::Attribute &value) { -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" io.enumFallback(value); } }; @@ -281,7 +281,7 @@ template <> struct ScalarEnumerationTraits { template <> struct ScalarEnumerationTraits { static void enumeration(IO &io, dwarf::Form &value) { -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" io.enumFallback(value); } }; @@ -291,7 +291,7 @@ template <> struct ScalarEnumerationTraits { template <> struct ScalarEnumerationTraits { static void enumeration(IO &io, dwarf::UnitType &value) { -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" io.enumFallback(value); } }; diff --git a/include/llvm/ObjectYAML/ELFYAML.h b/include/llvm/ObjectYAML/ELFYAML.h index 81a4ec28c94f..9d62ec27ad31 100644 --- a/include/llvm/ObjectYAML/ELFYAML.h +++ b/include/llvm/ObjectYAML/ELFYAML.h @@ -16,8 +16,8 @@ #ifndef LLVM_OBJECTYAML_ELFYAML_H #define LLVM_OBJECTYAML_ELFYAML_H +#include "llvm/BinaryFormat/ELF.h" #include "llvm/ObjectYAML/YAML.h" -#include "llvm/Support/ELF.h" namespace llvm { namespace ELFYAML { diff --git a/include/llvm/ObjectYAML/MachOYAML.h b/include/llvm/ObjectYAML/MachOYAML.h index ae858c8f4aaf..59aca9a1ddf2 100644 --- a/include/llvm/ObjectYAML/MachOYAML.h +++ b/include/llvm/ObjectYAML/MachOYAML.h @@ -16,9 +16,9 @@ #ifndef LLVM_OBJECTYAML_MACHOYAML_H #define LLVM_OBJECTYAML_MACHOYAML_H -#include "llvm/ObjectYAML/YAML.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/ObjectYAML/DWARFYAML.h" -#include "llvm/Support/MachO.h" +#include "llvm/ObjectYAML/YAML.h" namespace llvm { namespace MachOYAML { @@ -209,7 +209,7 @@ template <> struct MappingTraits { template <> struct ScalarEnumerationTraits { static void enumeration(IO &io, MachO::LoadCommandType &value) { -#include "llvm/Support/MachO.def" +#include "llvm/BinaryFormat/MachO.def" io.enumFallback(value); } }; @@ -278,7 +278,7 @@ template <> struct ScalarTraits { static void mapping(IO &IO, MachO::LCStruct &LoadCommand); \ }; -#include "llvm/Support/MachO.def" +#include "llvm/BinaryFormat/MachO.def" // Extra structures used by load commands template <> struct MappingTraits { diff --git a/include/llvm/ObjectYAML/WasmYAML.h b/include/llvm/ObjectYAML/WasmYAML.h index 7b70c9537827..447dbd7a603d 100644 --- a/include/llvm/ObjectYAML/WasmYAML.h +++ b/include/llvm/ObjectYAML/WasmYAML.h @@ -16,8 +16,8 @@ #ifndef LLVM_OBJECTYAML_WASMYAML_H #define LLVM_OBJECTYAML_WASMYAML_H +#include "llvm/BinaryFormat/Wasm.h" #include "llvm/ObjectYAML/YAML.h" -#include "llvm/Support/Wasm.h" namespace llvm { namespace WasmYAML { diff --git a/include/llvm/Option/ArgList.h b/include/llvm/Option/ArgList.h index 4ed28d7a852b..6a92dd01e911 100644 --- a/include/llvm/Option/ArgList.h +++ b/include/llvm/Option/ArgList.h @@ -11,8 +11,8 @@ #define LLVM_OPTION_ARGLIST_H #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/Option/Arg.h" diff --git a/include/llvm/Pass.h b/include/llvm/Pass.h index e9c8ca3072c7..2dd6935cf01c 100644 --- a/include/llvm/Pass.h +++ b/include/llvm/Pass.h @@ -384,7 +384,7 @@ extern bool isFunctionInPrintList(StringRef FunctionName); // Include support files that contain important APIs commonly used by Passes, // but that we want to separate out to make it easier to read the header files. // -#include "llvm/PassSupport.h" #include "llvm/PassAnalysisSupport.h" +#include "llvm/PassSupport.h" #endif diff --git a/include/llvm/ProfileData/Coverage/CoverageMapping.h b/include/llvm/ProfileData/Coverage/CoverageMapping.h index b9a9f5377698..b2f73fda2bae 100644 --- a/include/llvm/ProfileData/Coverage/CoverageMapping.h +++ b/include/llvm/ProfileData/Coverage/CoverageMapping.h @@ -18,11 +18,11 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Hashing.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" #include "llvm/ADT/iterator.h" #include "llvm/ADT/iterator_range.h" -#include "llvm/ADT/None.h" -#include "llvm/ADT/StringSet.h" -#include "llvm/ADT/StringRef.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadata.h b/include/llvm/Support/AMDGPUCodeObjectMetadata.h similarity index 98% rename from lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadata.h rename to include/llvm/Support/AMDGPUCodeObjectMetadata.h index 816e8c744b27..d274c5ee9184 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadata.h +++ b/include/llvm/Support/AMDGPUCodeObjectMetadata.h @@ -14,8 +14,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATA_H -#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATA_H +#ifndef LLVM_SUPPORT_AMDGPUCODEOBJECTMETADATA_H +#define LLVM_SUPPORT_AMDGPUCODEOBJECTMETADATA_H #include #include @@ -419,4 +419,4 @@ struct Metadata final { } // end namespace AMDGPU } // end namespace llvm -#endif // LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATA_H +#endif // LLVM_SUPPORT_AMDGPUCODEOBJECTMETADATA_H diff --git a/include/llvm/Support/BinaryStreamArray.h b/include/llvm/Support/BinaryStreamArray.h index 77c99ffff919..65ec15f6d9e0 100644 --- a/include/llvm/Support/BinaryStreamArray.h +++ b/include/llvm/Support/BinaryStreamArray.h @@ -42,36 +42,114 @@ namespace llvm { /// having to specify a second template argument to VarStreamArray (documented /// below). template struct VarStreamArrayExtractor { - struct ContextType {}; - // Method intentionally deleted. You must provide an explicit specialization - // with one of the following two methods implemented. - static Error extract(BinaryStreamRef Stream, uint32_t &Len, T &Item) = delete; - - static Error extract(BinaryStreamRef Stream, uint32_t &Len, T &Item, - const ContextType &Ctx) = delete; + // with the following method implemented. + Error operator()(BinaryStreamRef Stream, uint32_t &Len, + T &Item) const = delete; }; -template -class VarStreamArrayIterator - : public iterator_facade_base< - VarStreamArrayIterator, - std::forward_iterator_tag, Value> { - typedef VarStreamArrayIterator - IterType; +/// VarStreamArray represents an array of variable length records backed by a +/// stream. This could be a contiguous sequence of bytes in memory, it could +/// be a file on disk, or it could be a PDB stream where bytes are stored as +/// discontiguous blocks in a file. Usually it is desirable to treat arrays +/// as contiguous blocks of memory, but doing so with large PDB files, for +/// example, could mean allocating huge amounts of memory just to allow +/// re-ordering of stream data to be contiguous before iterating over it. By +/// abstracting this out, we need not duplicate this memory, and we can +/// iterate over arrays in arbitrarily formatted streams. Elements are parsed +/// lazily on iteration, so there is no upfront cost associated with building +/// or copying a VarStreamArray, no matter how large it may be. +/// +/// You create a VarStreamArray by specifying a ValueType and an Extractor type. +/// If you do not specify an Extractor type, you are expected to specialize +/// VarStreamArrayExtractor for your ValueType. +/// +/// By default an Extractor is default constructed in the class, but in some +/// cases you might find it useful for an Extractor to maintain state across +/// extractions. In this case you can provide your own Extractor through a +/// secondary constructor. The following examples show various ways of +/// creating a VarStreamArray. +/// +/// // Will use VarStreamArrayExtractor as the extractor. +/// VarStreamArray MyTypeArray; +/// +/// // Will use a default-constructed MyExtractor as the extractor. +/// VarStreamArray MyTypeArray2; +/// +/// // Will use the specific instance of MyExtractor provided. +/// // MyExtractor need not be default-constructible in this case. +/// MyExtractor E(SomeContext); +/// VarStreamArray MyTypeArray3(E); +/// + +template class VarStreamArrayIterator; + +template > +class VarStreamArray { + friend class VarStreamArrayIterator; public: - VarStreamArrayIterator() = default; - VarStreamArrayIterator(const ArrayType &Array, const WrappedCtx &Ctx, - BinaryStreamRef Stream, bool *HadError = nullptr, - uint32_t Offset = 0) - : IterRef(Stream), Ctx(&Ctx), Array(&Array), AbsOffset(Offset), - HadError(HadError) { + typedef VarStreamArrayIterator Iterator; + + VarStreamArray() = default; + + explicit VarStreamArray(const Extractor &E) : E(E) {} + + explicit VarStreamArray(BinaryStreamRef Stream) : Stream(Stream) {} + + VarStreamArray(BinaryStreamRef Stream, const Extractor &E) + : Stream(Stream), E(E) {} + + Iterator begin(bool *HadError = nullptr) const { + return Iterator(*this, E, HadError); + } + + bool valid() const { return Stream.valid(); } + + Iterator end() const { return Iterator(E); } + + bool empty() const { return Stream.getLength() == 0; } + + /// \brief given an offset into the array's underlying stream, return an + /// iterator to the record at that offset. This is considered unsafe + /// since the behavior is undefined if \p Offset does not refer to the + /// beginning of a valid record. + Iterator at(uint32_t Offset) const { + return Iterator(*this, E, Offset, nullptr); + } + + const Extractor &getExtractor() const { return E; } + Extractor &getExtractor() { return E; } + + BinaryStreamRef getUnderlyingStream() const { return Stream; } + void setUnderlyingStream(BinaryStreamRef S) { Stream = S; } + +private: + BinaryStreamRef Stream; + Extractor E; +}; + +template +class VarStreamArrayIterator + : public iterator_facade_base, + std::forward_iterator_tag, ValueType> { + typedef VarStreamArrayIterator IterType; + typedef VarStreamArray ArrayType; + +public: + VarStreamArrayIterator(const ArrayType &Array, const Extractor &E, + bool *HadError) + : VarStreamArrayIterator(Array, E, 0, HadError) {} + + VarStreamArrayIterator(const ArrayType &Array, const Extractor &E, + uint32_t Offset, bool *HadError) + : IterRef(Array.Stream.drop_front(Offset)), Extract(E), + Array(&Array), AbsOffset(Offset), HadError(HadError) { if (IterRef.getLength() == 0) moveToEnd(); else { - auto EC = Ctx.template invoke(IterRef, ThisLen, ThisValue); + auto EC = Extract(IterRef, ThisLen, ThisValue); if (EC) { consumeError(std::move(EC)); markError(); @@ -79,13 +157,8 @@ public: } } - VarStreamArrayIterator(const ArrayType &Array, const WrappedCtx &Ctx, - bool *HadError = nullptr) - : VarStreamArrayIterator(Array, Ctx, Array.Stream, HadError) {} - - VarStreamArrayIterator(const WrappedCtx &Ctx) : Ctx(&Ctx) {} - VarStreamArrayIterator(const VarStreamArrayIterator &Other) = default; - + VarStreamArrayIterator() = default; + explicit VarStreamArrayIterator(const Extractor &E) : Extract(E) {} ~VarStreamArrayIterator() = default; bool operator==(const IterType &R) const { @@ -103,12 +176,12 @@ public: return false; } - const Value &operator*() const { + const ValueType &operator*() const { assert(Array && !HasError); return ThisValue; } - Value &operator*() { + ValueType &operator*() { assert(Array && !HasError); return ThisValue; } @@ -125,7 +198,7 @@ public: moveToEnd(); } else { // There is some data after the current record. - auto EC = Ctx->template invoke(IterRef, ThisLen, ThisValue); + auto EC = Extract(IterRef, ThisLen, ThisValue); if (EC) { consumeError(std::move(EC)); markError(); @@ -153,9 +226,9 @@ private: *HadError = true; } - Value ThisValue; + ValueType ThisValue; BinaryStreamRef IterRef; - const WrappedCtx *Ctx{nullptr}; + Extractor Extract; const ArrayType *Array{nullptr}; uint32_t ThisLen{0}; uint32_t AbsOffset{0}; @@ -163,127 +236,6 @@ private: bool *HadError{nullptr}; }; -template struct ContextWrapper { - ContextWrapper() = default; - - explicit ContextWrapper(Context &&Ctx) : Ctx(Ctx) {} - - template - Error invoke(BinaryStreamRef Stream, uint32_t &Len, T &Item) const { - return Extractor::extract(Stream, Len, Item, Ctx); - } - - Context Ctx; -}; - -template struct ContextWrapper { - ContextWrapper() = default; - - template - Error invoke(BinaryStreamRef Stream, uint32_t &Len, T &Item) const { - return Extractor::extract(Stream, Len, Item); - } -}; - -/// VarStreamArray represents an array of variable length records backed by a -/// stream. This could be a contiguous sequence of bytes in memory, it could -/// be a file on disk, or it could be a PDB stream where bytes are stored as -/// discontiguous blocks in a file. Usually it is desirable to treat arrays -/// as contiguous blocks of memory, but doing so with large PDB files, for -/// example, could mean allocating huge amounts of memory just to allow -/// re-ordering of stream data to be contiguous before iterating over it. By -/// abstracting this out, we need not duplicate this memory, and we can -/// iterate over arrays in arbitrarily formatted streams. Elements are parsed -/// lazily on iteration, so there is no upfront cost associated with building -/// or copying a VarStreamArray, no matter how large it may be. -/// -/// You create a VarStreamArray by specifying a ValueType and an Extractor type. -/// If you do not specify an Extractor type, you are expected to specialize -/// VarStreamArrayExtractor for your ValueType. -/// -/// The default extractor type is stateless, but by specializing -/// VarStreamArrayExtractor or defining your own custom extractor type and -/// adding the appropriate ContextType typedef to the class, you can pass a -/// context field during construction of the VarStreamArray that will be -/// passed to each call to extract. -/// -template -class VarStreamArrayBase { - typedef VarStreamArrayBase MyType; - -public: - typedef VarStreamArrayIterator Iterator; - friend Iterator; - - VarStreamArrayBase() = default; - - VarStreamArrayBase(BinaryStreamRef Stream, const WrappedCtx &Ctx) - : Stream(Stream), Ctx(Ctx) {} - - VarStreamArrayBase(const MyType &Other) - : Stream(Other.Stream), Ctx(Other.Ctx) {} - - Iterator begin(bool *HadError = nullptr) const { - if (empty()) - return end(); - - return Iterator(*this, Ctx, Stream, HadError); - } - - bool valid() const { return Stream.valid(); } - - Iterator end() const { return Iterator(Ctx); } - - bool empty() const { return Stream.getLength() == 0; } - - /// \brief given an offset into the array's underlying stream, return an - /// iterator to the record at that offset. This is considered unsafe - /// since the behavior is undefined if \p Offset does not refer to the - /// beginning of a valid record. - Iterator at(uint32_t Offset) const { - return Iterator(*this, Ctx, Stream.drop_front(Offset), nullptr, Offset); - } - - BinaryStreamRef getUnderlyingStream() const { return Stream; } - -private: - BinaryStreamRef Stream; - WrappedCtx Ctx; -}; - -template -class VarStreamArrayImpl - : public VarStreamArrayBase> { - typedef ContextWrapper WrappedContext; - typedef VarStreamArrayImpl MyType; - typedef VarStreamArrayBase BaseType; - -public: - typedef Context ContextType; - - VarStreamArrayImpl() = default; - VarStreamArrayImpl(BinaryStreamRef Stream, Context &&Ctx) - : BaseType(Stream, WrappedContext(std::forward(Ctx))) {} -}; - -template -class VarStreamArrayImpl - : public VarStreamArrayBase> { - typedef ContextWrapper WrappedContext; - typedef VarStreamArrayImpl MyType; - typedef VarStreamArrayBase BaseType; - -public: - VarStreamArrayImpl() = default; - VarStreamArrayImpl(BinaryStreamRef Stream) - : BaseType(Stream, WrappedContext()) {} -}; - -template > -using VarStreamArray = - VarStreamArrayImpl; - template class FixedStreamArrayIterator; /// FixedStreamArray is similar to VarStreamArray, except with each record diff --git a/include/llvm/Support/BinaryStreamReader.h b/include/llvm/Support/BinaryStreamReader.h index 29e8a2ab08aa..738c042add3e 100644 --- a/include/llvm/Support/BinaryStreamReader.h +++ b/include/llvm/Support/BinaryStreamReader.h @@ -198,25 +198,7 @@ public: BinaryStreamRef S; if (auto EC = readStreamRef(S, Size)) return EC; - Array = VarStreamArray(S); - return Error::success(); - } - - /// Read a VarStreamArray of size \p Size bytes and store the result into - /// \p Array. Updates the stream's offset to point after the newly read - /// array. Never causes a copy (although iterating the elements of the - /// VarStreamArray may, depending upon the implementation of the underlying - /// stream). - /// - /// \returns a success error code if the data was successfully read, otherwise - /// returns an appropriate error code. - template - Error readArray(VarStreamArray &Array, uint32_t Size, - ContextType &&Context) { - BinaryStreamRef S; - if (auto EC = readStreamRef(S, Size)) - return EC; - Array = VarStreamArray(S, std::move(Context)); + Array.setUnderlyingStream(S); return Error::success(); } diff --git a/include/llvm/Support/CBindingWrapping.h b/include/llvm/Support/CBindingWrapping.h index d4633aa7d3c6..f60f99d376ad 100644 --- a/include/llvm/Support/CBindingWrapping.h +++ b/include/llvm/Support/CBindingWrapping.h @@ -14,8 +14,8 @@ #ifndef LLVM_SUPPORT_CBINDINGWRAPPING_H #define LLVM_SUPPORT_CBINDINGWRAPPING_H -#include "llvm/Support/Casting.h" #include "llvm-c/Types.h" +#include "llvm/Support/Casting.h" #define DEFINE_SIMPLE_CONVERSION_FUNCTIONS(ty, ref) \ inline ty *unwrap(ref P) { \ diff --git a/include/llvm/Support/COFF.h b/include/llvm/Support/COFF.h deleted file mode 100644 index bc2098e2b5cf..000000000000 --- a/include/llvm/Support/COFF.h +++ /dev/null @@ -1,724 +0,0 @@ -//===-- llvm/Support/COFF.h -------------------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains an definitions used in Windows COFF Files. -// -// Structures and enums defined within this file where created using -// information from Microsoft's publicly available PE/COFF format document: -// -// Microsoft Portable Executable and Common Object File Format Specification -// Revision 8.1 - February 15, 2008 -// -// As of 5/2/2010, hosted by Microsoft at: -// http://www.microsoft.com/whdc/system/platform/firmware/pecoff.mspx -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_SUPPORT_COFF_H -#define LLVM_SUPPORT_COFF_H - -#include "llvm/Support/DataTypes.h" -#include -#include - -namespace llvm { -namespace COFF { - - // The maximum number of sections that a COFF object can have (inclusive). - const int32_t MaxNumberOfSections16 = 65279; - - // The PE signature bytes that follows the DOS stub header. - static const char PEMagic[] = { 'P', 'E', '\0', '\0' }; - - static const char BigObjMagic[] = { - '\xc7', '\xa1', '\xba', '\xd1', '\xee', '\xba', '\xa9', '\x4b', - '\xaf', '\x20', '\xfa', '\xf6', '\x6a', '\xa4', '\xdc', '\xb8', - }; - - static const char ClGlObjMagic[] = { - '\x38', '\xfe', '\xb3', '\x0c', '\xa5', '\xd9', '\xab', '\x4d', - '\xac', '\x9b', '\xd6', '\xb6', '\x22', '\x26', '\x53', '\xc2', - }; - - // Sizes in bytes of various things in the COFF format. - enum { - Header16Size = 20, - Header32Size = 56, - NameSize = 8, - Symbol16Size = 18, - Symbol32Size = 20, - SectionSize = 40, - RelocationSize = 10 - }; - - struct header { - uint16_t Machine; - int32_t NumberOfSections; - uint32_t TimeDateStamp; - uint32_t PointerToSymbolTable; - uint32_t NumberOfSymbols; - uint16_t SizeOfOptionalHeader; - uint16_t Characteristics; - }; - - struct BigObjHeader { - enum : uint16_t { MinBigObjectVersion = 2 }; - - uint16_t Sig1; ///< Must be IMAGE_FILE_MACHINE_UNKNOWN (0). - uint16_t Sig2; ///< Must be 0xFFFF. - uint16_t Version; - uint16_t Machine; - uint32_t TimeDateStamp; - uint8_t UUID[16]; - uint32_t unused1; - uint32_t unused2; - uint32_t unused3; - uint32_t unused4; - uint32_t NumberOfSections; - uint32_t PointerToSymbolTable; - uint32_t NumberOfSymbols; - }; - - enum MachineTypes { - MT_Invalid = 0xffff, - - IMAGE_FILE_MACHINE_UNKNOWN = 0x0, - IMAGE_FILE_MACHINE_AM33 = 0x13, - IMAGE_FILE_MACHINE_AMD64 = 0x8664, - IMAGE_FILE_MACHINE_ARM = 0x1C0, - IMAGE_FILE_MACHINE_ARMNT = 0x1C4, - IMAGE_FILE_MACHINE_ARM64 = 0xAA64, - IMAGE_FILE_MACHINE_EBC = 0xEBC, - IMAGE_FILE_MACHINE_I386 = 0x14C, - IMAGE_FILE_MACHINE_IA64 = 0x200, - IMAGE_FILE_MACHINE_M32R = 0x9041, - IMAGE_FILE_MACHINE_MIPS16 = 0x266, - IMAGE_FILE_MACHINE_MIPSFPU = 0x366, - IMAGE_FILE_MACHINE_MIPSFPU16 = 0x466, - IMAGE_FILE_MACHINE_POWERPC = 0x1F0, - IMAGE_FILE_MACHINE_POWERPCFP = 0x1F1, - IMAGE_FILE_MACHINE_R4000 = 0x166, - IMAGE_FILE_MACHINE_SH3 = 0x1A2, - IMAGE_FILE_MACHINE_SH3DSP = 0x1A3, - IMAGE_FILE_MACHINE_SH4 = 0x1A6, - IMAGE_FILE_MACHINE_SH5 = 0x1A8, - IMAGE_FILE_MACHINE_THUMB = 0x1C2, - IMAGE_FILE_MACHINE_WCEMIPSV2 = 0x169 - }; - - enum Characteristics { - C_Invalid = 0, - - /// The file does not contain base relocations and must be loaded at its - /// preferred base. If this cannot be done, the loader will error. - IMAGE_FILE_RELOCS_STRIPPED = 0x0001, - /// The file is valid and can be run. - IMAGE_FILE_EXECUTABLE_IMAGE = 0x0002, - /// COFF line numbers have been stripped. This is deprecated and should be - /// 0. - IMAGE_FILE_LINE_NUMS_STRIPPED = 0x0004, - /// COFF symbol table entries for local symbols have been removed. This is - /// deprecated and should be 0. - IMAGE_FILE_LOCAL_SYMS_STRIPPED = 0x0008, - /// Aggressively trim working set. This is deprecated and must be 0. - IMAGE_FILE_AGGRESSIVE_WS_TRIM = 0x0010, - /// Image can handle > 2GiB addresses. - IMAGE_FILE_LARGE_ADDRESS_AWARE = 0x0020, - /// Little endian: the LSB precedes the MSB in memory. This is deprecated - /// and should be 0. - IMAGE_FILE_BYTES_REVERSED_LO = 0x0080, - /// Machine is based on a 32bit word architecture. - IMAGE_FILE_32BIT_MACHINE = 0x0100, - /// Debugging info has been removed. - IMAGE_FILE_DEBUG_STRIPPED = 0x0200, - /// If the image is on removable media, fully load it and copy it to swap. - IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP = 0x0400, - /// If the image is on network media, fully load it and copy it to swap. - IMAGE_FILE_NET_RUN_FROM_SWAP = 0x0800, - /// The image file is a system file, not a user program. - IMAGE_FILE_SYSTEM = 0x1000, - /// The image file is a DLL. - IMAGE_FILE_DLL = 0x2000, - /// This file should only be run on a uniprocessor machine. - IMAGE_FILE_UP_SYSTEM_ONLY = 0x4000, - /// Big endian: the MSB precedes the LSB in memory. This is deprecated - /// and should be 0. - IMAGE_FILE_BYTES_REVERSED_HI = 0x8000 - }; - - enum ResourceTypeID { - RID_Cursor = 1, - RID_Bitmap = 2, - RID_Icon = 3, - RID_Menu = 4, - RID_Dialog = 5, - RID_String = 6, - RID_FontDir = 7, - RID_Font = 8, - RID_Accelerator = 9, - RID_RCData = 10, - RID_MessageTable = 11, - RID_Group_Cursor = 12, - RID_Group_Icon = 14, - RID_Version = 16, - RID_DLGInclude = 17, - RID_PlugPlay = 19, - RID_VXD = 20, - RID_AniCursor = 21, - RID_AniIcon = 22, - RID_HTML = 23, - RID_Manifest = 24, - }; - - struct symbol { - char Name[NameSize]; - uint32_t Value; - int32_t SectionNumber; - uint16_t Type; - uint8_t StorageClass; - uint8_t NumberOfAuxSymbols; - }; - - enum SymbolSectionNumber : int32_t { - IMAGE_SYM_DEBUG = -2, - IMAGE_SYM_ABSOLUTE = -1, - IMAGE_SYM_UNDEFINED = 0 - }; - - /// Storage class tells where and what the symbol represents - enum SymbolStorageClass { - SSC_Invalid = 0xff, - - IMAGE_SYM_CLASS_END_OF_FUNCTION = -1, ///< Physical end of function - IMAGE_SYM_CLASS_NULL = 0, ///< No symbol - IMAGE_SYM_CLASS_AUTOMATIC = 1, ///< Stack variable - IMAGE_SYM_CLASS_EXTERNAL = 2, ///< External symbol - IMAGE_SYM_CLASS_STATIC = 3, ///< Static - IMAGE_SYM_CLASS_REGISTER = 4, ///< Register variable - IMAGE_SYM_CLASS_EXTERNAL_DEF = 5, ///< External definition - IMAGE_SYM_CLASS_LABEL = 6, ///< Label - IMAGE_SYM_CLASS_UNDEFINED_LABEL = 7, ///< Undefined label - IMAGE_SYM_CLASS_MEMBER_OF_STRUCT = 8, ///< Member of structure - IMAGE_SYM_CLASS_ARGUMENT = 9, ///< Function argument - IMAGE_SYM_CLASS_STRUCT_TAG = 10, ///< Structure tag - IMAGE_SYM_CLASS_MEMBER_OF_UNION = 11, ///< Member of union - IMAGE_SYM_CLASS_UNION_TAG = 12, ///< Union tag - IMAGE_SYM_CLASS_TYPE_DEFINITION = 13, ///< Type definition - IMAGE_SYM_CLASS_UNDEFINED_STATIC = 14, ///< Undefined static - IMAGE_SYM_CLASS_ENUM_TAG = 15, ///< Enumeration tag - IMAGE_SYM_CLASS_MEMBER_OF_ENUM = 16, ///< Member of enumeration - IMAGE_SYM_CLASS_REGISTER_PARAM = 17, ///< Register parameter - IMAGE_SYM_CLASS_BIT_FIELD = 18, ///< Bit field - /// ".bb" or ".eb" - beginning or end of block - IMAGE_SYM_CLASS_BLOCK = 100, - /// ".bf" or ".ef" - beginning or end of function - IMAGE_SYM_CLASS_FUNCTION = 101, - IMAGE_SYM_CLASS_END_OF_STRUCT = 102, ///< End of structure - IMAGE_SYM_CLASS_FILE = 103, ///< File name - /// Line number, reformatted as symbol - IMAGE_SYM_CLASS_SECTION = 104, - IMAGE_SYM_CLASS_WEAK_EXTERNAL = 105, ///< Duplicate tag - /// External symbol in dmert public lib - IMAGE_SYM_CLASS_CLR_TOKEN = 107 - }; - - enum SymbolBaseType { - IMAGE_SYM_TYPE_NULL = 0, ///< No type information or unknown base type. - IMAGE_SYM_TYPE_VOID = 1, ///< Used with void pointers and functions. - IMAGE_SYM_TYPE_CHAR = 2, ///< A character (signed byte). - IMAGE_SYM_TYPE_SHORT = 3, ///< A 2-byte signed integer. - IMAGE_SYM_TYPE_INT = 4, ///< A natural integer type on the target. - IMAGE_SYM_TYPE_LONG = 5, ///< A 4-byte signed integer. - IMAGE_SYM_TYPE_FLOAT = 6, ///< A 4-byte floating-point number. - IMAGE_SYM_TYPE_DOUBLE = 7, ///< An 8-byte floating-point number. - IMAGE_SYM_TYPE_STRUCT = 8, ///< A structure. - IMAGE_SYM_TYPE_UNION = 9, ///< An union. - IMAGE_SYM_TYPE_ENUM = 10, ///< An enumerated type. - IMAGE_SYM_TYPE_MOE = 11, ///< A member of enumeration (a specific value). - IMAGE_SYM_TYPE_BYTE = 12, ///< A byte; unsigned 1-byte integer. - IMAGE_SYM_TYPE_WORD = 13, ///< A word; unsigned 2-byte integer. - IMAGE_SYM_TYPE_UINT = 14, ///< An unsigned integer of natural size. - IMAGE_SYM_TYPE_DWORD = 15 ///< An unsigned 4-byte integer. - }; - - enum SymbolComplexType { - IMAGE_SYM_DTYPE_NULL = 0, ///< No complex type; simple scalar variable. - IMAGE_SYM_DTYPE_POINTER = 1, ///< A pointer to base type. - IMAGE_SYM_DTYPE_FUNCTION = 2, ///< A function that returns a base type. - IMAGE_SYM_DTYPE_ARRAY = 3, ///< An array of base type. - - /// Type is formed as (base + (derived << SCT_COMPLEX_TYPE_SHIFT)) - SCT_COMPLEX_TYPE_SHIFT = 4 - }; - - enum AuxSymbolType { - IMAGE_AUX_SYMBOL_TYPE_TOKEN_DEF = 1 - }; - - struct section { - char Name[NameSize]; - uint32_t VirtualSize; - uint32_t VirtualAddress; - uint32_t SizeOfRawData; - uint32_t PointerToRawData; - uint32_t PointerToRelocations; - uint32_t PointerToLineNumbers; - uint16_t NumberOfRelocations; - uint16_t NumberOfLineNumbers; - uint32_t Characteristics; - }; - - enum SectionCharacteristics : uint32_t { - SC_Invalid = 0xffffffff, - - IMAGE_SCN_TYPE_NOLOAD = 0x00000002, - IMAGE_SCN_TYPE_NO_PAD = 0x00000008, - IMAGE_SCN_CNT_CODE = 0x00000020, - IMAGE_SCN_CNT_INITIALIZED_DATA = 0x00000040, - IMAGE_SCN_CNT_UNINITIALIZED_DATA = 0x00000080, - IMAGE_SCN_LNK_OTHER = 0x00000100, - IMAGE_SCN_LNK_INFO = 0x00000200, - IMAGE_SCN_LNK_REMOVE = 0x00000800, - IMAGE_SCN_LNK_COMDAT = 0x00001000, - IMAGE_SCN_GPREL = 0x00008000, - IMAGE_SCN_MEM_PURGEABLE = 0x00020000, - IMAGE_SCN_MEM_16BIT = 0x00020000, - IMAGE_SCN_MEM_LOCKED = 0x00040000, - IMAGE_SCN_MEM_PRELOAD = 0x00080000, - IMAGE_SCN_ALIGN_1BYTES = 0x00100000, - IMAGE_SCN_ALIGN_2BYTES = 0x00200000, - IMAGE_SCN_ALIGN_4BYTES = 0x00300000, - IMAGE_SCN_ALIGN_8BYTES = 0x00400000, - IMAGE_SCN_ALIGN_16BYTES = 0x00500000, - IMAGE_SCN_ALIGN_32BYTES = 0x00600000, - IMAGE_SCN_ALIGN_64BYTES = 0x00700000, - IMAGE_SCN_ALIGN_128BYTES = 0x00800000, - IMAGE_SCN_ALIGN_256BYTES = 0x00900000, - IMAGE_SCN_ALIGN_512BYTES = 0x00A00000, - IMAGE_SCN_ALIGN_1024BYTES = 0x00B00000, - IMAGE_SCN_ALIGN_2048BYTES = 0x00C00000, - IMAGE_SCN_ALIGN_4096BYTES = 0x00D00000, - IMAGE_SCN_ALIGN_8192BYTES = 0x00E00000, - IMAGE_SCN_LNK_NRELOC_OVFL = 0x01000000, - IMAGE_SCN_MEM_DISCARDABLE = 0x02000000, - IMAGE_SCN_MEM_NOT_CACHED = 0x04000000, - IMAGE_SCN_MEM_NOT_PAGED = 0x08000000, - IMAGE_SCN_MEM_SHARED = 0x10000000, - IMAGE_SCN_MEM_EXECUTE = 0x20000000, - IMAGE_SCN_MEM_READ = 0x40000000, - IMAGE_SCN_MEM_WRITE = 0x80000000 - }; - - struct relocation { - uint32_t VirtualAddress; - uint32_t SymbolTableIndex; - uint16_t Type; - }; - - enum RelocationTypeI386 { - IMAGE_REL_I386_ABSOLUTE = 0x0000, - IMAGE_REL_I386_DIR16 = 0x0001, - IMAGE_REL_I386_REL16 = 0x0002, - IMAGE_REL_I386_DIR32 = 0x0006, - IMAGE_REL_I386_DIR32NB = 0x0007, - IMAGE_REL_I386_SEG12 = 0x0009, - IMAGE_REL_I386_SECTION = 0x000A, - IMAGE_REL_I386_SECREL = 0x000B, - IMAGE_REL_I386_TOKEN = 0x000C, - IMAGE_REL_I386_SECREL7 = 0x000D, - IMAGE_REL_I386_REL32 = 0x0014 - }; - - enum RelocationTypeAMD64 { - IMAGE_REL_AMD64_ABSOLUTE = 0x0000, - IMAGE_REL_AMD64_ADDR64 = 0x0001, - IMAGE_REL_AMD64_ADDR32 = 0x0002, - IMAGE_REL_AMD64_ADDR32NB = 0x0003, - IMAGE_REL_AMD64_REL32 = 0x0004, - IMAGE_REL_AMD64_REL32_1 = 0x0005, - IMAGE_REL_AMD64_REL32_2 = 0x0006, - IMAGE_REL_AMD64_REL32_3 = 0x0007, - IMAGE_REL_AMD64_REL32_4 = 0x0008, - IMAGE_REL_AMD64_REL32_5 = 0x0009, - IMAGE_REL_AMD64_SECTION = 0x000A, - IMAGE_REL_AMD64_SECREL = 0x000B, - IMAGE_REL_AMD64_SECREL7 = 0x000C, - IMAGE_REL_AMD64_TOKEN = 0x000D, - IMAGE_REL_AMD64_SREL32 = 0x000E, - IMAGE_REL_AMD64_PAIR = 0x000F, - IMAGE_REL_AMD64_SSPAN32 = 0x0010 - }; - - enum RelocationTypesARM { - IMAGE_REL_ARM_ABSOLUTE = 0x0000, - IMAGE_REL_ARM_ADDR32 = 0x0001, - IMAGE_REL_ARM_ADDR32NB = 0x0002, - IMAGE_REL_ARM_BRANCH24 = 0x0003, - IMAGE_REL_ARM_BRANCH11 = 0x0004, - IMAGE_REL_ARM_TOKEN = 0x0005, - IMAGE_REL_ARM_BLX24 = 0x0008, - IMAGE_REL_ARM_BLX11 = 0x0009, - IMAGE_REL_ARM_SECTION = 0x000E, - IMAGE_REL_ARM_SECREL = 0x000F, - IMAGE_REL_ARM_MOV32A = 0x0010, - IMAGE_REL_ARM_MOV32T = 0x0011, - IMAGE_REL_ARM_BRANCH20T = 0x0012, - IMAGE_REL_ARM_BRANCH24T = 0x0014, - IMAGE_REL_ARM_BLX23T = 0x0015 - }; - - enum RelocationTypesARM64 { - IMAGE_REL_ARM64_ABSOLUTE = 0x0000, - IMAGE_REL_ARM64_ADDR32 = 0x0001, - IMAGE_REL_ARM64_ADDR32NB = 0x0002, - IMAGE_REL_ARM64_BRANCH26 = 0x0003, - IMAGE_REL_ARM64_PAGEBASE_REL2 = 0x0004, - IMAGE_REL_ARM64_REL21 = 0x0005, - IMAGE_REL_ARM64_PAGEOFFSET_12A = 0x0006, - IMAGE_REL_ARM64_PAGEOFFSET_12L = 0x0007, - IMAGE_REL_ARM64_SECREL = 0x0008, - IMAGE_REL_ARM64_SECREL_LOW12A = 0x0009, - IMAGE_REL_ARM64_SECREL_HIGH12A = 0x000A, - IMAGE_REL_ARM64_SECREL_LOW12L = 0x000B, - IMAGE_REL_ARM64_TOKEN = 0x000C, - IMAGE_REL_ARM64_SECTION = 0x000D, - IMAGE_REL_ARM64_ADDR64 = 0x000E, - IMAGE_REL_ARM64_BRANCH19 = 0x000F, - IMAGE_REL_ARM64_BRANCH14 = 0x0010, - }; - - enum COMDATType { - IMAGE_COMDAT_SELECT_NODUPLICATES = 1, - IMAGE_COMDAT_SELECT_ANY, - IMAGE_COMDAT_SELECT_SAME_SIZE, - IMAGE_COMDAT_SELECT_EXACT_MATCH, - IMAGE_COMDAT_SELECT_ASSOCIATIVE, - IMAGE_COMDAT_SELECT_LARGEST, - IMAGE_COMDAT_SELECT_NEWEST - }; - - // Auxiliary Symbol Formats - struct AuxiliaryFunctionDefinition { - uint32_t TagIndex; - uint32_t TotalSize; - uint32_t PointerToLinenumber; - uint32_t PointerToNextFunction; - char unused[2]; - }; - - struct AuxiliarybfAndefSymbol { - uint8_t unused1[4]; - uint16_t Linenumber; - uint8_t unused2[6]; - uint32_t PointerToNextFunction; - uint8_t unused3[2]; - }; - - struct AuxiliaryWeakExternal { - uint32_t TagIndex; - uint32_t Characteristics; - uint8_t unused[10]; - }; - - enum WeakExternalCharacteristics { - IMAGE_WEAK_EXTERN_SEARCH_NOLIBRARY = 1, - IMAGE_WEAK_EXTERN_SEARCH_LIBRARY = 2, - IMAGE_WEAK_EXTERN_SEARCH_ALIAS = 3 - }; - - struct AuxiliarySectionDefinition { - uint32_t Length; - uint16_t NumberOfRelocations; - uint16_t NumberOfLinenumbers; - uint32_t CheckSum; - uint32_t Number; - uint8_t Selection; - char unused; - }; - - struct AuxiliaryCLRToken { - uint8_t AuxType; - uint8_t unused1; - uint32_t SymbolTableIndex; - char unused2[12]; - }; - - union Auxiliary { - AuxiliaryFunctionDefinition FunctionDefinition; - AuxiliarybfAndefSymbol bfAndefSymbol; - AuxiliaryWeakExternal WeakExternal; - AuxiliarySectionDefinition SectionDefinition; - }; - - /// @brief The Import Directory Table. - /// - /// There is a single array of these and one entry per imported DLL. - struct ImportDirectoryTableEntry { - uint32_t ImportLookupTableRVA; - uint32_t TimeDateStamp; - uint32_t ForwarderChain; - uint32_t NameRVA; - uint32_t ImportAddressTableRVA; - }; - - /// @brief The PE32 Import Lookup Table. - /// - /// There is an array of these for each imported DLL. It represents either - /// the ordinal to import from the target DLL, or a name to lookup and import - /// from the target DLL. - /// - /// This also happens to be the same format used by the Import Address Table - /// when it is initially written out to the image. - struct ImportLookupTableEntry32 { - uint32_t data; - - /// @brief Is this entry specified by ordinal, or name? - bool isOrdinal() const { return data & 0x80000000; } - - /// @brief Get the ordinal value of this entry. isOrdinal must be true. - uint16_t getOrdinal() const { - assert(isOrdinal() && "ILT entry is not an ordinal!"); - return data & 0xFFFF; - } - - /// @brief Set the ordinal value and set isOrdinal to true. - void setOrdinal(uint16_t o) { - data = o; - data |= 0x80000000; - } - - /// @brief Get the Hint/Name entry RVA. isOrdinal must be false. - uint32_t getHintNameRVA() const { - assert(!isOrdinal() && "ILT entry is not a Hint/Name RVA!"); - return data; - } - - /// @brief Set the Hint/Name entry RVA and set isOrdinal to false. - void setHintNameRVA(uint32_t rva) { data = rva; } - }; - - /// @brief The DOS compatible header at the front of all PEs. - struct DOSHeader { - uint16_t Magic; - uint16_t UsedBytesInTheLastPage; - uint16_t FileSizeInPages; - uint16_t NumberOfRelocationItems; - uint16_t HeaderSizeInParagraphs; - uint16_t MinimumExtraParagraphs; - uint16_t MaximumExtraParagraphs; - uint16_t InitialRelativeSS; - uint16_t InitialSP; - uint16_t Checksum; - uint16_t InitialIP; - uint16_t InitialRelativeCS; - uint16_t AddressOfRelocationTable; - uint16_t OverlayNumber; - uint16_t Reserved[4]; - uint16_t OEMid; - uint16_t OEMinfo; - uint16_t Reserved2[10]; - uint32_t AddressOfNewExeHeader; - }; - - struct PE32Header { - enum { - PE32 = 0x10b, - PE32_PLUS = 0x20b - }; - - uint16_t Magic; - uint8_t MajorLinkerVersion; - uint8_t MinorLinkerVersion; - uint32_t SizeOfCode; - uint32_t SizeOfInitializedData; - uint32_t SizeOfUninitializedData; - uint32_t AddressOfEntryPoint; // RVA - uint32_t BaseOfCode; // RVA - uint32_t BaseOfData; // RVA - uint32_t ImageBase; - uint32_t SectionAlignment; - uint32_t FileAlignment; - uint16_t MajorOperatingSystemVersion; - uint16_t MinorOperatingSystemVersion; - uint16_t MajorImageVersion; - uint16_t MinorImageVersion; - uint16_t MajorSubsystemVersion; - uint16_t MinorSubsystemVersion; - uint32_t Win32VersionValue; - uint32_t SizeOfImage; - uint32_t SizeOfHeaders; - uint32_t CheckSum; - uint16_t Subsystem; - // FIXME: This should be DllCharacteristics to match the COFF spec. - uint16_t DLLCharacteristics; - uint32_t SizeOfStackReserve; - uint32_t SizeOfStackCommit; - uint32_t SizeOfHeapReserve; - uint32_t SizeOfHeapCommit; - uint32_t LoaderFlags; - // FIXME: This should be NumberOfRvaAndSizes to match the COFF spec. - uint32_t NumberOfRvaAndSize; - }; - - struct DataDirectory { - uint32_t RelativeVirtualAddress; - uint32_t Size; - }; - - enum DataDirectoryIndex { - EXPORT_TABLE = 0, - IMPORT_TABLE, - RESOURCE_TABLE, - EXCEPTION_TABLE, - CERTIFICATE_TABLE, - BASE_RELOCATION_TABLE, - DEBUG_DIRECTORY, - ARCHITECTURE, - GLOBAL_PTR, - TLS_TABLE, - LOAD_CONFIG_TABLE, - BOUND_IMPORT, - IAT, - DELAY_IMPORT_DESCRIPTOR, - CLR_RUNTIME_HEADER, - - NUM_DATA_DIRECTORIES - }; - - enum WindowsSubsystem { - IMAGE_SUBSYSTEM_UNKNOWN = 0, ///< An unknown subsystem. - IMAGE_SUBSYSTEM_NATIVE = 1, ///< Device drivers and native Windows processes - IMAGE_SUBSYSTEM_WINDOWS_GUI = 2, ///< The Windows GUI subsystem. - IMAGE_SUBSYSTEM_WINDOWS_CUI = 3, ///< The Windows character subsystem. - IMAGE_SUBSYSTEM_OS2_CUI = 5, ///< The OS/2 character subsytem. - IMAGE_SUBSYSTEM_POSIX_CUI = 7, ///< The POSIX character subsystem. - IMAGE_SUBSYSTEM_NATIVE_WINDOWS = 8, ///< Native Windows 9x driver. - IMAGE_SUBSYSTEM_WINDOWS_CE_GUI = 9, ///< Windows CE. - IMAGE_SUBSYSTEM_EFI_APPLICATION = 10, ///< An EFI application. - IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER = 11, ///< An EFI driver with boot - /// services. - IMAGE_SUBSYSTEM_EFI_RUNTIME_DRIVER = 12, ///< An EFI driver with run-time - /// services. - IMAGE_SUBSYSTEM_EFI_ROM = 13, ///< An EFI ROM image. - IMAGE_SUBSYSTEM_XBOX = 14, ///< XBOX. - IMAGE_SUBSYSTEM_WINDOWS_BOOT_APPLICATION = 16 ///< A BCD application. - }; - - enum DLLCharacteristics { - /// ASLR with 64 bit address space. - IMAGE_DLL_CHARACTERISTICS_HIGH_ENTROPY_VA = 0x0020, - /// DLL can be relocated at load time. - IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE = 0x0040, - /// Code integrity checks are enforced. - IMAGE_DLL_CHARACTERISTICS_FORCE_INTEGRITY = 0x0080, - ///< Image is NX compatible. - IMAGE_DLL_CHARACTERISTICS_NX_COMPAT = 0x0100, - /// Isolation aware, but do not isolate the image. - IMAGE_DLL_CHARACTERISTICS_NO_ISOLATION = 0x0200, - /// Does not use structured exception handling (SEH). No SEH handler may be - /// called in this image. - IMAGE_DLL_CHARACTERISTICS_NO_SEH = 0x0400, - /// Do not bind the image. - IMAGE_DLL_CHARACTERISTICS_NO_BIND = 0x0800, - ///< Image should execute in an AppContainer. - IMAGE_DLL_CHARACTERISTICS_APPCONTAINER = 0x1000, - ///< A WDM driver. - IMAGE_DLL_CHARACTERISTICS_WDM_DRIVER = 0x2000, - ///< Image supports Control Flow Guard. - IMAGE_DLL_CHARACTERISTICS_GUARD_CF = 0x4000, - /// Terminal Server aware. - IMAGE_DLL_CHARACTERISTICS_TERMINAL_SERVER_AWARE = 0x8000 - }; - - enum DebugType { - IMAGE_DEBUG_TYPE_UNKNOWN = 0, - IMAGE_DEBUG_TYPE_COFF = 1, - IMAGE_DEBUG_TYPE_CODEVIEW = 2, - IMAGE_DEBUG_TYPE_FPO = 3, - IMAGE_DEBUG_TYPE_MISC = 4, - IMAGE_DEBUG_TYPE_EXCEPTION = 5, - IMAGE_DEBUG_TYPE_FIXUP = 6, - IMAGE_DEBUG_TYPE_OMAP_TO_SRC = 7, - IMAGE_DEBUG_TYPE_OMAP_FROM_SRC = 8, - IMAGE_DEBUG_TYPE_BORLAND = 9, - IMAGE_DEBUG_TYPE_RESERVED10 = 10, - IMAGE_DEBUG_TYPE_CLSID = 11, - IMAGE_DEBUG_TYPE_VC_FEATURE = 12, - IMAGE_DEBUG_TYPE_POGO = 13, - IMAGE_DEBUG_TYPE_ILTCG = 14, - IMAGE_DEBUG_TYPE_MPX = 15, - IMAGE_DEBUG_TYPE_REPRO = 16, - }; - - enum BaseRelocationType { - IMAGE_REL_BASED_ABSOLUTE = 0, - IMAGE_REL_BASED_HIGH = 1, - IMAGE_REL_BASED_LOW = 2, - IMAGE_REL_BASED_HIGHLOW = 3, - IMAGE_REL_BASED_HIGHADJ = 4, - IMAGE_REL_BASED_MIPS_JMPADDR = 5, - IMAGE_REL_BASED_ARM_MOV32A = 5, - IMAGE_REL_BASED_ARM_MOV32T = 7, - IMAGE_REL_BASED_MIPS_JMPADDR16 = 9, - IMAGE_REL_BASED_DIR64 = 10 - }; - - enum ImportType { - IMPORT_CODE = 0, - IMPORT_DATA = 1, - IMPORT_CONST = 2 - }; - - enum ImportNameType { - /// Import is by ordinal. This indicates that the value in the Ordinal/Hint - /// field of the import header is the import's ordinal. If this constant is - /// not specified, then the Ordinal/Hint field should always be interpreted - /// as the import's hint. - IMPORT_ORDINAL = 0, - /// The import name is identical to the public symbol name - IMPORT_NAME = 1, - /// The import name is the public symbol name, but skipping the leading ?, - /// @, or optionally _. - IMPORT_NAME_NOPREFIX = 2, - /// The import name is the public symbol name, but skipping the leading ?, - /// @, or optionally _, and truncating at the first @. - IMPORT_NAME_UNDECORATE = 3 - }; - - struct ImportHeader { - uint16_t Sig1; ///< Must be IMAGE_FILE_MACHINE_UNKNOWN (0). - uint16_t Sig2; ///< Must be 0xFFFF. - uint16_t Version; - uint16_t Machine; - uint32_t TimeDateStamp; - uint32_t SizeOfData; - uint16_t OrdinalHint; - uint16_t TypeInfo; - - ImportType getType() const { - return static_cast(TypeInfo & 0x3); - } - - ImportNameType getNameType() const { - return static_cast((TypeInfo & 0x1C) >> 2); - } - }; - - enum CodeViewIdentifiers { - DEBUG_SECTION_MAGIC = 0x4, - }; - - inline bool isReservedSectionNumber(int32_t SectionNumber) { - return SectionNumber <= 0; - } - -} // End namespace COFF. -} // End namespace llvm. - -#endif diff --git a/include/llvm/Support/Casting.h b/include/llvm/Support/Casting.h index 89d2af052dc1..baa2a814e9a1 100644 --- a/include/llvm/Support/Casting.h +++ b/include/llvm/Support/Casting.h @@ -1,4 +1,4 @@ -//===-- llvm/Support/Casting.h - Allow flexible, checked, casts -*- C++ -*-===// +//===- llvm/Support/Casting.h - Allow flexible, checked, casts --*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -19,6 +19,7 @@ #include "llvm/Support/type_traits.h" #include #include +#include namespace llvm { @@ -31,18 +32,19 @@ namespace llvm { // template selection process... the default implementation is a noop. // template struct simplify_type { - typedef From SimpleType; // The real type this represents... + using SimpleType = From; // The real type this represents... // An accessor to get the real value... static SimpleType &getSimplifiedValue(From &Val) { return Val; } }; template struct simplify_type { - typedef typename simplify_type::SimpleType NonConstSimpleType; - typedef typename add_const_past_pointer::type - SimpleType; - typedef typename add_lvalue_reference_if_not_pointer::type - RetType; + using NonConstSimpleType = typename simplify_type::SimpleType; + using SimpleType = + typename add_const_past_pointer::type; + using RetType = + typename add_lvalue_reference_if_not_pointer::type; + static RetType getSimplifiedValue(const From& Val) { return simplify_type::getSimplifiedValue(const_cast(Val)); } @@ -148,36 +150,35 @@ template LLVM_NODISCARD inline bool isa(const Y &Val) { template struct cast_retty; - // Calculate what type the 'cast' function should return, based on a requested // type of To and a source type of From. template struct cast_retty_impl { - typedef To& ret_type; // Normal case, return Ty& + using ret_type = To &; // Normal case, return Ty& }; template struct cast_retty_impl { - typedef const To &ret_type; // Normal case, return Ty& + using ret_type = const To &; // Normal case, return Ty& }; template struct cast_retty_impl { - typedef To* ret_type; // Pointer arg case, return Ty* + using ret_type = To *; // Pointer arg case, return Ty* }; template struct cast_retty_impl { - typedef const To* ret_type; // Constant pointer arg case, return const Ty* + using ret_type = const To *; // Constant pointer arg case, return const Ty* }; template struct cast_retty_impl { - typedef const To* ret_type; // Constant pointer arg case, return const Ty* + using ret_type = const To *; // Constant pointer arg case, return const Ty* }; template struct cast_retty_impl> { private: - typedef typename cast_retty_impl::ret_type PointerType; - typedef typename std::remove_pointer::type ResultType; + using PointerType = typename cast_retty_impl::ret_type; + using ResultType = typename std::remove_pointer::type; public: - typedef std::unique_ptr ret_type; + using ret_type = std::unique_ptr; }; template @@ -185,19 +186,19 @@ struct cast_retty_wrap { // When the simplified type and the from type are not the same, use the type // simplifier to reduce the type, then reuse cast_retty_impl to get the // resultant type. - typedef typename cast_retty::ret_type ret_type; + using ret_type = typename cast_retty::ret_type; }; template struct cast_retty_wrap { // When the simplified type is equal to the from type, use it directly. - typedef typename cast_retty_impl::ret_type ret_type; + using ret_type = typename cast_retty_impl::ret_type; }; template struct cast_retty { - typedef typename cast_retty_wrap::SimpleType>::ret_type ret_type; + using ret_type = typename cast_retty_wrap< + To, From, typename simplify_type::SimpleType>::ret_type; }; // Ensure the non-simple values are converted using the simplify_type template @@ -393,6 +394,6 @@ LLVM_NODISCARD inline auto unique_dyn_cast_or_null(std::unique_ptr &&Val) return unique_dyn_cast_or_null(Val); } -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_SUPPORT_CASTING_H diff --git a/include/llvm/Support/CommandLine.h b/include/llvm/Support/CommandLine.h index ae32e20d6dab..771b0a8c26a9 100644 --- a/include/llvm/Support/CommandLine.h +++ b/include/llvm/Support/CommandLine.h @@ -21,18 +21,19 @@ #define LLVM_SUPPORT_COMMANDLINE_H #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/iterator_range.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ManagedStatic.h" #include #include #include +#include #include #include #include @@ -41,6 +42,7 @@ namespace llvm { class StringSaver; +class raw_ostream; /// cl Namespace - This namespace contains all of the command line option /// processing machinery. It is intentionally a short name to make qualified @@ -64,12 +66,15 @@ bool ParseCommandLineOptions(int argc, const char *const *argv, void ParseEnvironmentOptions(const char *progName, const char *envvar, const char *Overview = ""); +// Function pointer type for printing version information. +using VersionPrinterTy = std::function; + ///===---------------------------------------------------------------------===// /// SetVersionPrinter - Override the default (LLVM specific) version printer /// used to print out the version when --version is given /// on the command line. This allows other systems using the /// CommandLine utilities to print their own version string. -void SetVersionPrinter(void (*func)()); +void SetVersionPrinter(VersionPrinterTy func); ///===---------------------------------------------------------------------===// /// AddExtraVersionPrinter - Add an extra printer to use in addition to the @@ -78,7 +83,7 @@ void SetVersionPrinter(void (*func)()); /// which will be called after the basic LLVM version /// printing is complete. Each can then add additional /// information specific to the tool. -void AddExtraVersionPrinter(void (*func)()); +void AddExtraVersionPrinter(VersionPrinterTy func); // PrintOptionValues - Print option values. // With -print-options print the difference between option values and defaults. @@ -242,7 +247,7 @@ class Option { // Out of line virtual function to provide home for the class. virtual void anchor(); - int NumOccurrences; // The number of times specified + int NumOccurrences = 0; // The number of times specified // Occurrences, HiddenFlag, and Formatting are all enum types but to avoid // problems with signed enums in bitfields. unsigned Occurrences : 3; // enum NumOccurrencesFlag @@ -252,8 +257,8 @@ class Option { unsigned HiddenFlag : 2; // enum OptionHidden unsigned Formatting : 2; // enum FormattingFlags unsigned Misc : 3; - unsigned Position; // Position of last occurrence of the option - unsigned AdditionalVals; // Greater than 0 for multi-valued option. + unsigned Position = 0; // Position of last occurrence of the option + unsigned AdditionalVals = 0; // Greater than 0 for multi-valued option. public: StringRef ArgStr; // The argument string itself (ex: "help", "o") @@ -261,7 +266,7 @@ public: StringRef ValueStr; // String describing what the value of this option is OptionCategory *Category; // The Category this option belongs to SmallPtrSet Subs; // The subcommands this option belongs to. - bool FullyInitialized; // Has addArguemnt been called? + bool FullyInitialized = false; // Has addArguemnt been called? inline enum NumOccurrencesFlag getNumOccurrencesFlag() const { return (enum NumOccurrencesFlag)Occurrences; @@ -316,10 +321,8 @@ public: protected: explicit Option(enum NumOccurrencesFlag OccurrencesFlag, enum OptionHidden Hidden) - : NumOccurrences(0), Occurrences(OccurrencesFlag), Value(0), - HiddenFlag(Hidden), Formatting(NormalFormatting), Misc(0), Position(0), - AdditionalVals(0), Category(&GeneralCategory), FullyInitialized(false) { - } + : Occurrences(OccurrencesFlag), Value(0), HiddenFlag(Hidden), + Formatting(NormalFormatting), Misc(0), Category(&GeneralCategory) {} inline void setNumAdditionalVals(unsigned n) { AdditionalVals = n; } @@ -447,8 +450,8 @@ struct GenericOptionValue { protected: GenericOptionValue() = default; GenericOptionValue(const GenericOptionValue&) = default; - ~GenericOptionValue() = default; GenericOptionValue &operator=(const GenericOptionValue &) = default; + ~GenericOptionValue() = default; private: virtual void anchor(); @@ -461,7 +464,7 @@ template struct OptionValue; template struct OptionValueBase : public GenericOptionValue { // Temporary storage for argument passing. - typedef OptionValue WrapperType; + using WrapperType = OptionValue; bool hasValue() const { return false; } @@ -487,8 +490,8 @@ template class OptionValueCopy : public GenericOptionValue { protected: OptionValueCopy(const OptionValueCopy&) = default; + OptionValueCopy &operator=(const OptionValueCopy &) = default; ~OptionValueCopy() = default; - OptionValueCopy &operator=(const OptionValueCopy&) = default; public: OptionValueCopy() = default; @@ -519,13 +522,13 @@ public: // Non-class option values. template struct OptionValueBase : OptionValueCopy { - typedef DataType WrapperType; + using WrapperType = DataType; protected: OptionValueBase() = default; OptionValueBase(const OptionValueBase&) = default; + OptionValueBase &operator=(const OptionValueBase &) = default; ~OptionValueBase() = default; - OptionValueBase &operator=(const OptionValueBase&) = default; }; // Top-level option class. @@ -548,7 +551,7 @@ enum boolOrDefault { BOU_UNSET, BOU_TRUE, BOU_FALSE }; template <> struct OptionValue final : OptionValueCopy { - typedef cl::boolOrDefault WrapperType; + using WrapperType = cl::boolOrDefault; OptionValue() = default; @@ -565,7 +568,7 @@ private: template <> struct OptionValue final : OptionValueCopy { - typedef StringRef WrapperType; + using WrapperType = StringRef; OptionValue() = default; @@ -736,13 +739,15 @@ protected: public: OptionInfo(StringRef name, DataType v, StringRef helpStr) : GenericOptionInfo(name, helpStr), V(v) {} + OptionValue V; }; SmallVector Values; public: parser(Option &O) : generic_parser_base(O) {} - typedef DataType parser_data_type; + + using parser_data_type = DataType; // Implement virtual functions needed by generic_parser_base unsigned getNumOptions() const override { return unsigned(Values.size()); } @@ -837,10 +842,10 @@ protected: // template class basic_parser : public basic_parser_impl { public: - basic_parser(Option &O) : basic_parser_impl(O) {} + using parser_data_type = DataType; + using OptVal = OptionValue; - typedef DataType parser_data_type; - typedef OptionValue OptVal; + basic_parser(Option &O) : basic_parser_impl(O) {} protected: ~basic_parser() = default; @@ -1292,6 +1297,7 @@ class opt : public Option, enum ValueExpected getValueExpectedFlagDefault() const override { return Parser.getValueExpectedFlagDefault(); } + void getExtraOptionNames(SmallVectorImpl &OptionNames) override { return Parser.getExtraOptionNames(OptionNames); } @@ -1300,6 +1306,7 @@ class opt : public Option, size_t getOptionWidth() const override { return Parser.getOptionWidth(*this); } + void printOptionInfo(size_t GlobalWidth) const override { Parser.printOptionInfo(*this, GlobalWidth); } @@ -1384,16 +1391,18 @@ template class list_storage { std::vector Storage; public: - typedef typename std::vector::iterator iterator; + using iterator = typename std::vector::iterator; iterator begin() { return Storage.begin(); } iterator end() { return Storage.end(); } - typedef typename std::vector::const_iterator const_iterator; + using const_iterator = typename std::vector::const_iterator; + const_iterator begin() const { return Storage.begin(); } const_iterator end() const { return Storage.end(); } - typedef typename std::vector::size_type size_type; + using size_type = typename std::vector::size_type; + size_type size() const { return Storage.size(); } bool empty() const { return Storage.empty(); } @@ -1401,8 +1410,9 @@ public: void push_back(const DataType &value) { Storage.push_back(value); } void push_back(DataType &&value) { Storage.push_back(value); } - typedef typename std::vector::reference reference; - typedef typename std::vector::const_reference const_reference; + using reference = typename std::vector::reference; + using const_reference = typename std::vector::const_reference; + reference operator[](size_type pos) { return Storage[pos]; } const_reference operator[](size_type pos) const { return Storage[pos]; } @@ -1453,6 +1463,7 @@ class list : public Option, public list_storage { enum ValueExpected getValueExpectedFlagDefault() const override { return Parser.getValueExpectedFlagDefault(); } + void getExtraOptionNames(SmallVectorImpl &OptionNames) override { return Parser.getExtraOptionNames(OptionNames); } @@ -1473,6 +1484,7 @@ class list : public Option, public list_storage { size_t getOptionWidth() const override { return Parser.getOptionWidth(*this); } + void printOptionInfo(size_t GlobalWidth) const override { Parser.printOptionInfo(*this, GlobalWidth); } @@ -1592,6 +1604,7 @@ class bits : public Option, public bits_storage { enum ValueExpected getValueExpectedFlagDefault() const override { return Parser.getValueExpectedFlagDefault(); } + void getExtraOptionNames(SmallVectorImpl &OptionNames) override { return Parser.getExtraOptionNames(OptionNames); } @@ -1612,6 +1625,7 @@ class bits : public Option, public bits_storage { size_t getOptionWidth() const override { return Parser.getOptionWidth(*this); } + void printOptionInfo(size_t GlobalWidth) const override { Parser.printOptionInfo(*this, GlobalWidth); } @@ -1824,9 +1838,9 @@ void TokenizeWindowsCommandLine(StringRef Source, StringSaver &Saver, /// \brief String tokenization function type. Should be compatible with either /// Windows or Unix command line tokenizers. -typedef void (*TokenizerCallback)(StringRef Source, StringSaver &Saver, - SmallVectorImpl &NewArgv, - bool MarkEOLs); +using TokenizerCallback = void (*)(StringRef Source, StringSaver &Saver, + SmallVectorImpl &NewArgv, + bool MarkEOLs); /// \brief Expand response files on a command line recursively using the given /// StringSaver and tokenization strategy. Argv should contain the command line @@ -1880,6 +1894,7 @@ void ResetAllOptionOccurrences(); void ResetCommandLineParser(); } // end namespace cl + } // end namespace llvm #endif // LLVM_SUPPORT_COMMANDLINE_H diff --git a/include/llvm/Support/ConvertUTF.h b/include/llvm/Support/ConvertUTF.h index f714c0ed997e..bd439f360216 100644 --- a/include/llvm/Support/ConvertUTF.h +++ b/include/llvm/Support/ConvertUTF.h @@ -90,8 +90,8 @@ #ifndef LLVM_SUPPORT_CONVERTUTF_H #define LLVM_SUPPORT_CONVERTUTF_H -#include #include +#include // Wrap everything in namespace llvm so that programs can link with llvm and // their own version of the unicode libraries. diff --git a/include/llvm/Support/DataTypes.h.cmake b/include/llvm/Support/DataTypes.h.cmake index 541dbc3d635d..a58e2e454b7d 100644 --- a/include/llvm/Support/DataTypes.h.cmake +++ b/include/llvm/Support/DataTypes.h.cmake @@ -85,11 +85,11 @@ typedef u_int64_t uint64_t; #else /* _MSC_VER */ #ifdef __cplusplus -#include #include +#include #else -#include #include +#include #endif #include diff --git a/include/llvm/Support/Endian.h b/include/llvm/Support/Endian.h index 06e089ffa166..f50d9b502daf 100644 --- a/include/llvm/Support/Endian.h +++ b/include/llvm/Support/Endian.h @@ -14,27 +14,36 @@ #ifndef LLVM_SUPPORT_ENDIAN_H #define LLVM_SUPPORT_ENDIAN_H +#include "llvm/Support/AlignOf.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Host.h" #include "llvm/Support/SwapByteOrder.h" - -#include +#include +#include +#include +#include +#include namespace llvm { namespace support { + enum endianness {big, little, native}; // These are named values for common alignments. enum {aligned = 0, unaligned = 1}; namespace detail { - /// \brief ::value is either alignment, or alignof(T) if alignment is 0. - template - struct PickAlignment { - enum { value = alignment == 0 ? alignof(T) : alignment }; - }; + +/// \brief ::value is either alignment, or alignof(T) if alignment is 0. +template +struct PickAlignment { + enum { value = alignment == 0 ? alignof(T) : alignment }; +}; + } // end namespace detail namespace endian { + constexpr endianness system_endianness() { return sys::IsBigEndianHost ? big : little; } @@ -190,9 +199,11 @@ inline void writeAtBitAlignment(void *memory, value_type value, &val[0], sizeof(value_type) * 2); } } + } // end namespace endian namespace detail { + template @@ -254,77 +265,78 @@ public: } // end namespace detail -typedef detail::packed_endian_specific_integral - ulittle16_t; -typedef detail::packed_endian_specific_integral - ulittle32_t; -typedef detail::packed_endian_specific_integral - ulittle64_t; +using ulittle16_t = + detail::packed_endian_specific_integral; +using ulittle32_t = + detail::packed_endian_specific_integral; +using ulittle64_t = + detail::packed_endian_specific_integral; -typedef detail::packed_endian_specific_integral - little16_t; -typedef detail::packed_endian_specific_integral - little32_t; -typedef detail::packed_endian_specific_integral - little64_t; +using little16_t = + detail::packed_endian_specific_integral; +using little32_t = + detail::packed_endian_specific_integral; +using little64_t = + detail::packed_endian_specific_integral; -typedef detail::packed_endian_specific_integral - aligned_ulittle16_t; -typedef detail::packed_endian_specific_integral - aligned_ulittle32_t; -typedef detail::packed_endian_specific_integral - aligned_ulittle64_t; +using aligned_ulittle16_t = + detail::packed_endian_specific_integral; +using aligned_ulittle32_t = + detail::packed_endian_specific_integral; +using aligned_ulittle64_t = + detail::packed_endian_specific_integral; -typedef detail::packed_endian_specific_integral - aligned_little16_t; -typedef detail::packed_endian_specific_integral - aligned_little32_t; -typedef detail::packed_endian_specific_integral - aligned_little64_t; +using aligned_little16_t = + detail::packed_endian_specific_integral; +using aligned_little32_t = + detail::packed_endian_specific_integral; +using aligned_little64_t = + detail::packed_endian_specific_integral; -typedef detail::packed_endian_specific_integral - ubig16_t; -typedef detail::packed_endian_specific_integral - ubig32_t; -typedef detail::packed_endian_specific_integral - ubig64_t; +using ubig16_t = + detail::packed_endian_specific_integral; +using ubig32_t = + detail::packed_endian_specific_integral; +using ubig64_t = + detail::packed_endian_specific_integral; -typedef detail::packed_endian_specific_integral - big16_t; -typedef detail::packed_endian_specific_integral - big32_t; -typedef detail::packed_endian_specific_integral - big64_t; +using big16_t = + detail::packed_endian_specific_integral; +using big32_t = + detail::packed_endian_specific_integral; +using big64_t = + detail::packed_endian_specific_integral; -typedef detail::packed_endian_specific_integral - aligned_ubig16_t; -typedef detail::packed_endian_specific_integral - aligned_ubig32_t; -typedef detail::packed_endian_specific_integral - aligned_ubig64_t; +using aligned_ubig16_t = + detail::packed_endian_specific_integral; +using aligned_ubig32_t = + detail::packed_endian_specific_integral; +using aligned_ubig64_t = + detail::packed_endian_specific_integral; -typedef detail::packed_endian_specific_integral - aligned_big16_t; -typedef detail::packed_endian_specific_integral - aligned_big32_t; -typedef detail::packed_endian_specific_integral - aligned_big64_t; +using aligned_big16_t = + detail::packed_endian_specific_integral; +using aligned_big32_t = + detail::packed_endian_specific_integral; +using aligned_big64_t = + detail::packed_endian_specific_integral; -typedef detail::packed_endian_specific_integral - unaligned_uint16_t; -typedef detail::packed_endian_specific_integral - unaligned_uint32_t; -typedef detail::packed_endian_specific_integral - unaligned_uint64_t; +using unaligned_uint16_t = + detail::packed_endian_specific_integral; +using unaligned_uint32_t = + detail::packed_endian_specific_integral; +using unaligned_uint64_t = + detail::packed_endian_specific_integral; -typedef detail::packed_endian_specific_integral - unaligned_int16_t; -typedef detail::packed_endian_specific_integral - unaligned_int32_t; -typedef detail::packed_endian_specific_integral - unaligned_int64_t; +using unaligned_int16_t = + detail::packed_endian_specific_integral; +using unaligned_int32_t = + detail::packed_endian_specific_integral; +using unaligned_int64_t = + detail::packed_endian_specific_integral; namespace endian { + template inline T read(const void *P, endianness E) { return read(P, E); } @@ -394,8 +406,10 @@ inline void write64le(void *P, uint64_t V) { write64(P, V); } inline void write16be(void *P, uint16_t V) { write16(P, V); } inline void write32be(void *P, uint32_t V) { write32(P, V); } inline void write64be(void *P, uint64_t V) { write64(P, V); } + } // end namespace endian + } // end namespace support } // end namespace llvm -#endif +#endif // LLVM_SUPPORT_ENDIAN_H diff --git a/include/llvm/Support/Error.h b/include/llvm/Support/Error.h index a3482f5a58b5..1e27e0b821f0 100644 --- a/include/llvm/Support/Error.h +++ b/include/llvm/Support/Error.h @@ -1,4 +1,4 @@ -//===----- llvm/Support/Error.h - Recoverable error handling ----*- C++ -*-===// +//===- llvm/Support/Error.h - Recoverable error handling --------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -22,6 +22,7 @@ #include "llvm/Support/AlignOf.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/raw_ostream.h" #include @@ -167,7 +168,7 @@ class LLVM_NODISCARD Error { protected: /// Create a success value. Prefer using 'Error::success()' for readability - Error() : Payload(nullptr) { + Error() { setPtr(nullptr); setChecked(false); } @@ -182,7 +183,7 @@ public: /// Move-construct an error value. The newly constructed error is considered /// unchecked, even if the source error had been checked. The original error /// becomes a checked Success value, regardless of its original state. - Error(Error &&Other) : Payload(nullptr) { + Error(Error &&Other) { setChecked(true); *this = std::move(Other); } @@ -299,7 +300,7 @@ private: return Tmp; } - ErrorInfoBase *Payload; + ErrorInfoBase *Payload = nullptr; }; /// Subclass of Error for the sole purpose of identifying the success path in @@ -327,7 +328,6 @@ template Error make_error(ArgTs &&... Args) { template class ErrorInfo : public ParentErrT { public: - static const void *classID() { return &ThisErrT::ID; } const void *dynamicClassID() const override { return &ThisErrT::ID; } @@ -645,20 +645,22 @@ private: template class LLVM_NODISCARD Expected { template friend class ExpectedAsOutParameter; template friend class Expected; - static const bool isRef = std::is_reference::value; - typedef ReferenceStorage::type> wrap; - typedef std::unique_ptr error_type; + static const bool isRef = std::is_reference::value; + + using wrap = ReferenceStorage::type>; + + using error_type = std::unique_ptr; public: - typedef typename std::conditional::type storage_type; - typedef T value_type; + using storage_type = typename std::conditional::type; + using value_type = T; private: - typedef typename std::remove_reference::type &reference; - typedef const typename std::remove_reference::type &const_reference; - typedef typename std::remove_reference::type *pointer; - typedef const typename std::remove_reference::type *const_pointer; + using reference = typename std::remove_reference::type &; + using const_reference = const typename std::remove_reference::type &; + using pointer = typename std::remove_reference::type *; + using const_pointer = const typename std::remove_reference::type *; public: /// Create an Expected error value from the given Error. @@ -891,7 +893,6 @@ private: template class ExpectedAsOutParameter { public: - ExpectedAsOutParameter(Expected *ValOrErr) : ValOrErr(ValOrErr) { if (ValOrErr) diff --git a/include/llvm/Support/ErrorOr.h b/include/llvm/Support/ErrorOr.h index 877f4063cd23..061fb65db465 100644 --- a/include/llvm/Support/ErrorOr.h +++ b/include/llvm/Support/ErrorOr.h @@ -16,13 +16,14 @@ #ifndef LLVM_SUPPORT_ERROROR_H #define LLVM_SUPPORT_ERROROR_H -#include "llvm/ADT/PointerIntPair.h" #include "llvm/Support/AlignOf.h" #include #include #include +#include namespace llvm { + /// \brief Stores a reference that can be changed. template class ReferenceStorage { @@ -67,17 +68,19 @@ public: template class ErrorOr { template friend class ErrorOr; + static const bool isRef = std::is_reference::value; - typedef ReferenceStorage::type> wrap; + + using wrap = ReferenceStorage::type>; public: - typedef typename std::conditional::type storage_type; + using storage_type = typename std::conditional::type; private: - typedef typename std::remove_reference::type &reference; - typedef const typename std::remove_reference::type &const_reference; - typedef typename std::remove_reference::type *pointer; - typedef const typename std::remove_reference::type *const_pointer; + using reference = typename std::remove_reference::type &; + using const_reference = const typename std::remove_reference::type &; + using pointer = typename std::remove_reference::type *; + using const_pointer = const typename std::remove_reference::type *; public: template @@ -282,6 +285,7 @@ typename std::enable_if::value || operator==(const ErrorOr &Err, E Code) { return Err.getError() == Code; } + } // end namespace llvm #endif // LLVM_SUPPORT_ERROROR_H diff --git a/include/llvm/Support/FileSystem.h b/include/llvm/Support/FileSystem.h index 7caefb5359b8..21c5fcdb7145 100644 --- a/include/llvm/Support/FileSystem.h +++ b/include/llvm/Support/FileSystem.h @@ -233,50 +233,6 @@ public: void permissions(perms p) { Perms = p; } }; -/// file_magic - An "enum class" enumeration of file types based on magic (the first -/// N bytes of the file). -struct file_magic { - enum Impl { - unknown = 0, ///< Unrecognized file - bitcode, ///< Bitcode file - archive, ///< ar style archive file - elf, ///< ELF Unknown type - elf_relocatable, ///< ELF Relocatable object file - elf_executable, ///< ELF Executable image - elf_shared_object, ///< ELF dynamically linked shared lib - elf_core, ///< ELF core image - macho_object, ///< Mach-O Object file - macho_executable, ///< Mach-O Executable - macho_fixed_virtual_memory_shared_lib, ///< Mach-O Shared Lib, FVM - macho_core, ///< Mach-O Core File - macho_preload_executable, ///< Mach-O Preloaded Executable - macho_dynamically_linked_shared_lib, ///< Mach-O dynlinked shared lib - macho_dynamic_linker, ///< The Mach-O dynamic linker - macho_bundle, ///< Mach-O Bundle file - macho_dynamically_linked_shared_lib_stub, ///< Mach-O Shared lib stub - macho_dsym_companion, ///< Mach-O dSYM companion file - macho_kext_bundle, ///< Mach-O kext bundle file - macho_universal_binary, ///< Mach-O universal binary - coff_cl_gl_object, ///< Microsoft cl.exe's intermediate code file - coff_object, ///< COFF object file - coff_import_library, ///< COFF import library - pecoff_executable, ///< PECOFF executable file - windows_resource, ///< Windows compiled resource file (.res) - wasm_object ///< WebAssembly Object file - }; - - bool is_object() const { - return V != unknown; - } - - file_magic() = default; - file_magic(Impl V) : V(V) {} - operator Impl() const { return V; } - -private: - Impl V = unknown; -}; - /// @} /// @name Physical Operators /// @{ @@ -770,17 +726,6 @@ std::error_code openFileForWrite(const Twine &Name, int &ResultFD, std::error_code openFileForRead(const Twine &Name, int &ResultFD, SmallVectorImpl *RealPath = nullptr); -/// @brief Identify the type of a binary file based on how magical it is. -file_magic identify_magic(StringRef magic); - -/// @brief Get and identify \a path's type based on its content. -/// -/// @param path Input path. -/// @param result Set to the type of file, or file_magic::unknown. -/// @returns errc::success if result has been successfully set, otherwise a -/// platform-specific error_code. -std::error_code identify_magic(const Twine &path, file_magic &result); - std::error_code getUniqueID(const Twine Path, UniqueID &Result); /// @brief Get disk space usage information. diff --git a/include/llvm/Support/FormatVariadic.h b/include/llvm/Support/FormatVariadic.h index 3a4668687cc9..c1153e84dfb5 100644 --- a/include/llvm/Support/FormatVariadic.h +++ b/include/llvm/Support/FormatVariadic.h @@ -27,8 +27,8 @@ #define LLVM_SUPPORT_FORMATVARIADIC_H #include "llvm/ADT/Optional.h" -#include "llvm/ADT/SmallString.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/FormatCommon.h" #include "llvm/Support/FormatProviders.h" diff --git a/include/llvm/Support/GCOV.h b/include/llvm/Support/GCOV.h index 73fddca8e35b..268c53c50252 100644 --- a/include/llvm/Support/GCOV.h +++ b/include/llvm/Support/GCOV.h @@ -16,12 +16,12 @@ #define LLVM_SUPPORT_GCOV_H #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/iterator.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" #include diff --git a/include/llvm/Support/GenericDomTree.h b/include/llvm/Support/GenericDomTree.h index 851ff7d80403..80a2dfcbad88 100644 --- a/include/llvm/Support/GenericDomTree.h +++ b/include/llvm/Support/GenericDomTree.h @@ -26,9 +26,9 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/GraphTraits.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/Support/raw_ostream.h" #include #include diff --git a/include/llvm/Support/LowLevelTypeImpl.h b/include/llvm/Support/LowLevelTypeImpl.h index e18e58b7b5b2..c79dd0c29507 100644 --- a/include/llvm/Support/LowLevelTypeImpl.h +++ b/include/llvm/Support/LowLevelTypeImpl.h @@ -27,9 +27,9 @@ #ifndef LLVM_SUPPORT_LOWLEVELTYPEIMPL_H #define LLVM_SUPPORT_LOWLEVELTYPEIMPL_H -#include #include "llvm/ADT/DenseMapInfo.h" #include "llvm/CodeGen/MachineValueType.h" +#include namespace llvm { diff --git a/include/llvm/Support/MachO.h b/include/llvm/Support/MachO.h deleted file mode 100644 index 3d704292c260..000000000000 --- a/include/llvm/Support/MachO.h +++ /dev/null @@ -1,2038 +0,0 @@ -//===-- llvm/Support/MachO.h - The MachO file format ------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines manifest constants for the MachO object file format. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_SUPPORT_MACHO_H -#define LLVM_SUPPORT_MACHO_H - -#include "llvm/Support/Compiler.h" -#include "llvm/Support/DataTypes.h" -#include "llvm/Support/Host.h" - -namespace llvm { - namespace MachO { - // Enums from - enum : uint32_t { - // Constants for the "magic" field in llvm::MachO::mach_header and - // llvm::MachO::mach_header_64 - MH_MAGIC = 0xFEEDFACEu, - MH_CIGAM = 0xCEFAEDFEu, - MH_MAGIC_64 = 0xFEEDFACFu, - MH_CIGAM_64 = 0xCFFAEDFEu, - FAT_MAGIC = 0xCAFEBABEu, - FAT_CIGAM = 0xBEBAFECAu, - FAT_MAGIC_64 = 0xCAFEBABFu, - FAT_CIGAM_64 = 0xBFBAFECAu - }; - - enum HeaderFileType { - // Constants for the "filetype" field in llvm::MachO::mach_header and - // llvm::MachO::mach_header_64 - MH_OBJECT = 0x1u, - MH_EXECUTE = 0x2u, - MH_FVMLIB = 0x3u, - MH_CORE = 0x4u, - MH_PRELOAD = 0x5u, - MH_DYLIB = 0x6u, - MH_DYLINKER = 0x7u, - MH_BUNDLE = 0x8u, - MH_DYLIB_STUB = 0x9u, - MH_DSYM = 0xAu, - MH_KEXT_BUNDLE = 0xBu - }; - - enum { - // Constant bits for the "flags" field in llvm::MachO::mach_header and - // llvm::MachO::mach_header_64 - MH_NOUNDEFS = 0x00000001u, - MH_INCRLINK = 0x00000002u, - MH_DYLDLINK = 0x00000004u, - MH_BINDATLOAD = 0x00000008u, - MH_PREBOUND = 0x00000010u, - MH_SPLIT_SEGS = 0x00000020u, - MH_LAZY_INIT = 0x00000040u, - MH_TWOLEVEL = 0x00000080u, - MH_FORCE_FLAT = 0x00000100u, - MH_NOMULTIDEFS = 0x00000200u, - MH_NOFIXPREBINDING = 0x00000400u, - MH_PREBINDABLE = 0x00000800u, - MH_ALLMODSBOUND = 0x00001000u, - MH_SUBSECTIONS_VIA_SYMBOLS = 0x00002000u, - MH_CANONICAL = 0x00004000u, - MH_WEAK_DEFINES = 0x00008000u, - MH_BINDS_TO_WEAK = 0x00010000u, - MH_ALLOW_STACK_EXECUTION = 0x00020000u, - MH_ROOT_SAFE = 0x00040000u, - MH_SETUID_SAFE = 0x00080000u, - MH_NO_REEXPORTED_DYLIBS = 0x00100000u, - MH_PIE = 0x00200000u, - MH_DEAD_STRIPPABLE_DYLIB = 0x00400000u, - MH_HAS_TLV_DESCRIPTORS = 0x00800000u, - MH_NO_HEAP_EXECUTION = 0x01000000u, - MH_APP_EXTENSION_SAFE = 0x02000000u - }; - - enum : uint32_t { - // Flags for the "cmd" field in llvm::MachO::load_command - LC_REQ_DYLD = 0x80000000u - }; - -#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \ - LCName = LCValue, - - enum LoadCommandType : uint32_t { - #include "llvm/Support/MachO.def" - }; - -#undef HANDLE_LOAD_COMMAND - - enum : uint32_t { - // Constant bits for the "flags" field in llvm::MachO::segment_command - SG_HIGHVM = 0x1u, - SG_FVMLIB = 0x2u, - SG_NORELOC = 0x4u, - SG_PROTECTED_VERSION_1 = 0x8u, - - // Constant masks for the "flags" field in llvm::MachO::section and - // llvm::MachO::section_64 - SECTION_TYPE = 0x000000ffu, // SECTION_TYPE - SECTION_ATTRIBUTES = 0xffffff00u, // SECTION_ATTRIBUTES - SECTION_ATTRIBUTES_USR = 0xff000000u, // SECTION_ATTRIBUTES_USR - SECTION_ATTRIBUTES_SYS = 0x00ffff00u // SECTION_ATTRIBUTES_SYS - }; - - /// These are the section type and attributes fields. A MachO section can - /// have only one Type, but can have any of the attributes specified. - enum SectionType : uint32_t { - // Constant masks for the "flags[7:0]" field in llvm::MachO::section and - // llvm::MachO::section_64 (mask "flags" with SECTION_TYPE) - - /// S_REGULAR - Regular section. - S_REGULAR = 0x00u, - /// S_ZEROFILL - Zero fill on demand section. - S_ZEROFILL = 0x01u, - /// S_CSTRING_LITERALS - Section with literal C strings. - S_CSTRING_LITERALS = 0x02u, - /// S_4BYTE_LITERALS - Section with 4 byte literals. - S_4BYTE_LITERALS = 0x03u, - /// S_8BYTE_LITERALS - Section with 8 byte literals. - S_8BYTE_LITERALS = 0x04u, - /// S_LITERAL_POINTERS - Section with pointers to literals. - S_LITERAL_POINTERS = 0x05u, - /// S_NON_LAZY_SYMBOL_POINTERS - Section with non-lazy symbol pointers. - S_NON_LAZY_SYMBOL_POINTERS = 0x06u, - /// S_LAZY_SYMBOL_POINTERS - Section with lazy symbol pointers. - S_LAZY_SYMBOL_POINTERS = 0x07u, - /// S_SYMBOL_STUBS - Section with symbol stubs, byte size of stub in - /// the Reserved2 field. - S_SYMBOL_STUBS = 0x08u, - /// S_MOD_INIT_FUNC_POINTERS - Section with only function pointers for - /// initialization. - S_MOD_INIT_FUNC_POINTERS = 0x09u, - /// S_MOD_TERM_FUNC_POINTERS - Section with only function pointers for - /// termination. - S_MOD_TERM_FUNC_POINTERS = 0x0au, - /// S_COALESCED - Section contains symbols that are to be coalesced. - S_COALESCED = 0x0bu, - /// S_GB_ZEROFILL - Zero fill on demand section (that can be larger than 4 - /// gigabytes). - S_GB_ZEROFILL = 0x0cu, - /// S_INTERPOSING - Section with only pairs of function pointers for - /// interposing. - S_INTERPOSING = 0x0du, - /// S_16BYTE_LITERALS - Section with only 16 byte literals. - S_16BYTE_LITERALS = 0x0eu, - /// S_DTRACE_DOF - Section contains DTrace Object Format. - S_DTRACE_DOF = 0x0fu, - /// S_LAZY_DYLIB_SYMBOL_POINTERS - Section with lazy symbol pointers to - /// lazy loaded dylibs. - S_LAZY_DYLIB_SYMBOL_POINTERS = 0x10u, - /// S_THREAD_LOCAL_REGULAR - Thread local data section. - S_THREAD_LOCAL_REGULAR = 0x11u, - /// S_THREAD_LOCAL_ZEROFILL - Thread local zerofill section. - S_THREAD_LOCAL_ZEROFILL = 0x12u, - /// S_THREAD_LOCAL_VARIABLES - Section with thread local variable - /// structure data. - S_THREAD_LOCAL_VARIABLES = 0x13u, - /// S_THREAD_LOCAL_VARIABLE_POINTERS - Section with pointers to thread - /// local structures. - S_THREAD_LOCAL_VARIABLE_POINTERS = 0x14u, - /// S_THREAD_LOCAL_INIT_FUNCTION_POINTERS - Section with thread local - /// variable initialization pointers to functions. - S_THREAD_LOCAL_INIT_FUNCTION_POINTERS = 0x15u, - - LAST_KNOWN_SECTION_TYPE = S_THREAD_LOCAL_INIT_FUNCTION_POINTERS - }; - - enum : uint32_t { - // Constant masks for the "flags[31:24]" field in llvm::MachO::section and - // llvm::MachO::section_64 (mask "flags" with SECTION_ATTRIBUTES_USR) - - /// S_ATTR_PURE_INSTRUCTIONS - Section contains only true machine - /// instructions. - S_ATTR_PURE_INSTRUCTIONS = 0x80000000u, - /// S_ATTR_NO_TOC - Section contains coalesced symbols that are not to be - /// in a ranlib table of contents. - S_ATTR_NO_TOC = 0x40000000u, - /// S_ATTR_STRIP_STATIC_SYMS - Ok to strip static symbols in this section - /// in files with the MY_DYLDLINK flag. - S_ATTR_STRIP_STATIC_SYMS = 0x20000000u, - /// S_ATTR_NO_DEAD_STRIP - No dead stripping. - S_ATTR_NO_DEAD_STRIP = 0x10000000u, - /// S_ATTR_LIVE_SUPPORT - Blocks are live if they reference live blocks. - S_ATTR_LIVE_SUPPORT = 0x08000000u, - /// S_ATTR_SELF_MODIFYING_CODE - Used with i386 code stubs written on by - /// dyld. - S_ATTR_SELF_MODIFYING_CODE = 0x04000000u, - /// S_ATTR_DEBUG - A debug section. - S_ATTR_DEBUG = 0x02000000u, - - // Constant masks for the "flags[23:8]" field in llvm::MachO::section and - // llvm::MachO::section_64 (mask "flags" with SECTION_ATTRIBUTES_SYS) - - /// S_ATTR_SOME_INSTRUCTIONS - Section contains some machine instructions. - S_ATTR_SOME_INSTRUCTIONS = 0x00000400u, - /// S_ATTR_EXT_RELOC - Section has external relocation entries. - S_ATTR_EXT_RELOC = 0x00000200u, - /// S_ATTR_LOC_RELOC - Section has local relocation entries. - S_ATTR_LOC_RELOC = 0x00000100u, - - // Constant masks for the value of an indirect symbol in an indirect - // symbol table - INDIRECT_SYMBOL_LOCAL = 0x80000000u, - INDIRECT_SYMBOL_ABS = 0x40000000u - }; - - enum DataRegionType { - // Constants for the "kind" field in a data_in_code_entry structure - DICE_KIND_DATA = 1u, - DICE_KIND_JUMP_TABLE8 = 2u, - DICE_KIND_JUMP_TABLE16 = 3u, - DICE_KIND_JUMP_TABLE32 = 4u, - DICE_KIND_ABS_JUMP_TABLE32 = 5u - }; - - enum RebaseType { - REBASE_TYPE_POINTER = 1u, - REBASE_TYPE_TEXT_ABSOLUTE32 = 2u, - REBASE_TYPE_TEXT_PCREL32 = 3u - }; - - enum { - REBASE_OPCODE_MASK = 0xF0u, - REBASE_IMMEDIATE_MASK = 0x0Fu - }; - - enum RebaseOpcode { - REBASE_OPCODE_DONE = 0x00u, - REBASE_OPCODE_SET_TYPE_IMM = 0x10u, - REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB = 0x20u, - REBASE_OPCODE_ADD_ADDR_ULEB = 0x30u, - REBASE_OPCODE_ADD_ADDR_IMM_SCALED = 0x40u, - REBASE_OPCODE_DO_REBASE_IMM_TIMES = 0x50u, - REBASE_OPCODE_DO_REBASE_ULEB_TIMES = 0x60u, - REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB = 0x70u, - REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB = 0x80u - }; - - enum BindType { - BIND_TYPE_POINTER = 1u, - BIND_TYPE_TEXT_ABSOLUTE32 = 2u, - BIND_TYPE_TEXT_PCREL32 = 3u - }; - - enum BindSpecialDylib { - BIND_SPECIAL_DYLIB_SELF = 0, - BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE = -1, - BIND_SPECIAL_DYLIB_FLAT_LOOKUP = -2 - }; - - enum { - BIND_SYMBOL_FLAGS_WEAK_IMPORT = 0x1u, - BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION = 0x8u, - - BIND_OPCODE_MASK = 0xF0u, - BIND_IMMEDIATE_MASK = 0x0Fu - }; - - enum BindOpcode { - BIND_OPCODE_DONE = 0x00u, - BIND_OPCODE_SET_DYLIB_ORDINAL_IMM = 0x10u, - BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB = 0x20u, - BIND_OPCODE_SET_DYLIB_SPECIAL_IMM = 0x30u, - BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM = 0x40u, - BIND_OPCODE_SET_TYPE_IMM = 0x50u, - BIND_OPCODE_SET_ADDEND_SLEB = 0x60u, - BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB = 0x70u, - BIND_OPCODE_ADD_ADDR_ULEB = 0x80u, - BIND_OPCODE_DO_BIND = 0x90u, - BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB = 0xA0u, - BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED = 0xB0u, - BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB = 0xC0u - }; - - enum { - EXPORT_SYMBOL_FLAGS_KIND_MASK = 0x03u, - EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION = 0x04u, - EXPORT_SYMBOL_FLAGS_REEXPORT = 0x08u, - EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER = 0x10u - }; - - enum ExportSymbolKind { - EXPORT_SYMBOL_FLAGS_KIND_REGULAR = 0x00u, - EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL = 0x01u, - EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE = 0x02u - }; - - enum { - // Constant masks for the "n_type" field in llvm::MachO::nlist and - // llvm::MachO::nlist_64 - N_STAB = 0xe0, - N_PEXT = 0x10, - N_TYPE = 0x0e, - N_EXT = 0x01 - }; - - enum NListType : uint8_t { - // Constants for the "n_type & N_TYPE" llvm::MachO::nlist and - // llvm::MachO::nlist_64 - N_UNDF = 0x0u, - N_ABS = 0x2u, - N_SECT = 0xeu, - N_PBUD = 0xcu, - N_INDR = 0xau - }; - - enum SectionOrdinal { - // Constants for the "n_sect" field in llvm::MachO::nlist and - // llvm::MachO::nlist_64 - NO_SECT = 0u, - MAX_SECT = 0xffu - }; - - enum { - // Constant masks for the "n_desc" field in llvm::MachO::nlist and - // llvm::MachO::nlist_64 - // The low 3 bits are the for the REFERENCE_TYPE. - REFERENCE_TYPE = 0x7, - REFERENCE_FLAG_UNDEFINED_NON_LAZY = 0, - REFERENCE_FLAG_UNDEFINED_LAZY = 1, - REFERENCE_FLAG_DEFINED = 2, - REFERENCE_FLAG_PRIVATE_DEFINED = 3, - REFERENCE_FLAG_PRIVATE_UNDEFINED_NON_LAZY = 4, - REFERENCE_FLAG_PRIVATE_UNDEFINED_LAZY = 5, - // Flag bits (some overlap with the library ordinal bits). - N_ARM_THUMB_DEF = 0x0008u, - REFERENCED_DYNAMICALLY = 0x0010u, - N_NO_DEAD_STRIP = 0x0020u, - N_WEAK_REF = 0x0040u, - N_WEAK_DEF = 0x0080u, - N_SYMBOL_RESOLVER = 0x0100u, - N_ALT_ENTRY = 0x0200u, - // For undefined symbols coming from libraries, see GET_LIBRARY_ORDINAL() - // as these are in the top 8 bits. - SELF_LIBRARY_ORDINAL = 0x0, - MAX_LIBRARY_ORDINAL = 0xfd, - DYNAMIC_LOOKUP_ORDINAL = 0xfe, - EXECUTABLE_ORDINAL = 0xff - }; - - enum StabType { - // Constant values for the "n_type" field in llvm::MachO::nlist and - // llvm::MachO::nlist_64 when "(n_type & N_STAB) != 0" - N_GSYM = 0x20u, - N_FNAME = 0x22u, - N_FUN = 0x24u, - N_STSYM = 0x26u, - N_LCSYM = 0x28u, - N_BNSYM = 0x2Eu, - N_PC = 0x30u, - N_AST = 0x32u, - N_OPT = 0x3Cu, - N_RSYM = 0x40u, - N_SLINE = 0x44u, - N_ENSYM = 0x4Eu, - N_SSYM = 0x60u, - N_SO = 0x64u, - N_OSO = 0x66u, - N_LSYM = 0x80u, - N_BINCL = 0x82u, - N_SOL = 0x84u, - N_PARAMS = 0x86u, - N_VERSION = 0x88u, - N_OLEVEL = 0x8Au, - N_PSYM = 0xA0u, - N_EINCL = 0xA2u, - N_ENTRY = 0xA4u, - N_LBRAC = 0xC0u, - N_EXCL = 0xC2u, - N_RBRAC = 0xE0u, - N_BCOMM = 0xE2u, - N_ECOMM = 0xE4u, - N_ECOML = 0xE8u, - N_LENG = 0xFEu - }; - - enum : uint32_t { - // Constant values for the r_symbolnum field in an - // llvm::MachO::relocation_info structure when r_extern is 0. - R_ABS = 0, - - // Constant bits for the r_address field in an - // llvm::MachO::relocation_info structure. - R_SCATTERED = 0x80000000 - }; - - enum RelocationInfoType { - // Constant values for the r_type field in an - // llvm::MachO::relocation_info or llvm::MachO::scattered_relocation_info - // structure. - GENERIC_RELOC_VANILLA = 0, - GENERIC_RELOC_PAIR = 1, - GENERIC_RELOC_SECTDIFF = 2, - GENERIC_RELOC_PB_LA_PTR = 3, - GENERIC_RELOC_LOCAL_SECTDIFF = 4, - GENERIC_RELOC_TLV = 5, - - // Constant values for the r_type field in a PowerPC architecture - // llvm::MachO::relocation_info or llvm::MachO::scattered_relocation_info - // structure. - PPC_RELOC_VANILLA = GENERIC_RELOC_VANILLA, - PPC_RELOC_PAIR = GENERIC_RELOC_PAIR, - PPC_RELOC_BR14 = 2, - PPC_RELOC_BR24 = 3, - PPC_RELOC_HI16 = 4, - PPC_RELOC_LO16 = 5, - PPC_RELOC_HA16 = 6, - PPC_RELOC_LO14 = 7, - PPC_RELOC_SECTDIFF = 8, - PPC_RELOC_PB_LA_PTR = 9, - PPC_RELOC_HI16_SECTDIFF = 10, - PPC_RELOC_LO16_SECTDIFF = 11, - PPC_RELOC_HA16_SECTDIFF = 12, - PPC_RELOC_JBSR = 13, - PPC_RELOC_LO14_SECTDIFF = 14, - PPC_RELOC_LOCAL_SECTDIFF = 15, - - // Constant values for the r_type field in an ARM architecture - // llvm::MachO::relocation_info or llvm::MachO::scattered_relocation_info - // structure. - ARM_RELOC_VANILLA = GENERIC_RELOC_VANILLA, - ARM_RELOC_PAIR = GENERIC_RELOC_PAIR, - ARM_RELOC_SECTDIFF = GENERIC_RELOC_SECTDIFF, - ARM_RELOC_LOCAL_SECTDIFF = 3, - ARM_RELOC_PB_LA_PTR = 4, - ARM_RELOC_BR24 = 5, - ARM_THUMB_RELOC_BR22 = 6, - ARM_THUMB_32BIT_BRANCH = 7, // obsolete - ARM_RELOC_HALF = 8, - ARM_RELOC_HALF_SECTDIFF = 9, - - // Constant values for the r_type field in an ARM64 architecture - // llvm::MachO::relocation_info or llvm::MachO::scattered_relocation_info - // structure. - - // For pointers. - ARM64_RELOC_UNSIGNED = 0, - // Must be followed by an ARM64_RELOC_UNSIGNED - ARM64_RELOC_SUBTRACTOR = 1, - // A B/BL instruction with 26-bit displacement. - ARM64_RELOC_BRANCH26 = 2, - // PC-rel distance to page of target. - ARM64_RELOC_PAGE21 = 3, - // Offset within page, scaled by r_length. - ARM64_RELOC_PAGEOFF12 = 4, - // PC-rel distance to page of GOT slot. - ARM64_RELOC_GOT_LOAD_PAGE21 = 5, - // Offset within page of GOT slot, scaled by r_length. - ARM64_RELOC_GOT_LOAD_PAGEOFF12 = 6, - // For pointers to GOT slots. - ARM64_RELOC_POINTER_TO_GOT = 7, - // PC-rel distance to page of TLVP slot. - ARM64_RELOC_TLVP_LOAD_PAGE21 = 8, - // Offset within page of TLVP slot, scaled by r_length. - ARM64_RELOC_TLVP_LOAD_PAGEOFF12 = 9, - // Must be followed by ARM64_RELOC_PAGE21 or ARM64_RELOC_PAGEOFF12. - ARM64_RELOC_ADDEND = 10, - - // Constant values for the r_type field in an x86_64 architecture - // llvm::MachO::relocation_info or llvm::MachO::scattered_relocation_info - // structure - X86_64_RELOC_UNSIGNED = 0, - X86_64_RELOC_SIGNED = 1, - X86_64_RELOC_BRANCH = 2, - X86_64_RELOC_GOT_LOAD = 3, - X86_64_RELOC_GOT = 4, - X86_64_RELOC_SUBTRACTOR = 5, - X86_64_RELOC_SIGNED_1 = 6, - X86_64_RELOC_SIGNED_2 = 7, - X86_64_RELOC_SIGNED_4 = 8, - X86_64_RELOC_TLV = 9 - }; - - // Values for segment_command.initprot. - // From - enum { - VM_PROT_READ = 0x1, - VM_PROT_WRITE = 0x2, - VM_PROT_EXECUTE = 0x4 - }; - - // Values for platform field in build_version_command. - enum { - PLATFORM_MACOS = 1, - PLATFORM_IOS = 2, - PLATFORM_TVOS = 3, - PLATFORM_WATCHOS = 4, - PLATFORM_BRIDGEOS = 5 - }; - - // Values for tools enum in build_tool_version. - enum { - TOOL_CLANG = 1, - TOOL_SWIFT = 2, - TOOL_LD = 3 - }; - - // Structs from - - struct mach_header { - uint32_t magic; - uint32_t cputype; - uint32_t cpusubtype; - uint32_t filetype; - uint32_t ncmds; - uint32_t sizeofcmds; - uint32_t flags; - }; - - struct mach_header_64 { - uint32_t magic; - uint32_t cputype; - uint32_t cpusubtype; - uint32_t filetype; - uint32_t ncmds; - uint32_t sizeofcmds; - uint32_t flags; - uint32_t reserved; - }; - - struct load_command { - uint32_t cmd; - uint32_t cmdsize; - }; - - struct segment_command { - uint32_t cmd; - uint32_t cmdsize; - char segname[16]; - uint32_t vmaddr; - uint32_t vmsize; - uint32_t fileoff; - uint32_t filesize; - uint32_t maxprot; - uint32_t initprot; - uint32_t nsects; - uint32_t flags; - }; - - struct segment_command_64 { - uint32_t cmd; - uint32_t cmdsize; - char segname[16]; - uint64_t vmaddr; - uint64_t vmsize; - uint64_t fileoff; - uint64_t filesize; - uint32_t maxprot; - uint32_t initprot; - uint32_t nsects; - uint32_t flags; - }; - - struct section { - char sectname[16]; - char segname[16]; - uint32_t addr; - uint32_t size; - uint32_t offset; - uint32_t align; - uint32_t reloff; - uint32_t nreloc; - uint32_t flags; - uint32_t reserved1; - uint32_t reserved2; - }; - - struct section_64 { - char sectname[16]; - char segname[16]; - uint64_t addr; - uint64_t size; - uint32_t offset; - uint32_t align; - uint32_t reloff; - uint32_t nreloc; - uint32_t flags; - uint32_t reserved1; - uint32_t reserved2; - uint32_t reserved3; - }; - - struct fvmlib { - uint32_t name; - uint32_t minor_version; - uint32_t header_addr; - }; - - // The fvmlib_command is obsolete and no longer supported. - struct fvmlib_command { - uint32_t cmd; - uint32_t cmdsize; - struct fvmlib fvmlib; - }; - - struct dylib { - uint32_t name; - uint32_t timestamp; - uint32_t current_version; - uint32_t compatibility_version; - }; - - struct dylib_command { - uint32_t cmd; - uint32_t cmdsize; - struct dylib dylib; - }; - - struct sub_framework_command { - uint32_t cmd; - uint32_t cmdsize; - uint32_t umbrella; - }; - - struct sub_client_command { - uint32_t cmd; - uint32_t cmdsize; - uint32_t client; - }; - - struct sub_umbrella_command { - uint32_t cmd; - uint32_t cmdsize; - uint32_t sub_umbrella; - }; - - struct sub_library_command { - uint32_t cmd; - uint32_t cmdsize; - uint32_t sub_library; - }; - - // The prebound_dylib_command is obsolete and no longer supported. - struct prebound_dylib_command { - uint32_t cmd; - uint32_t cmdsize; - uint32_t name; - uint32_t nmodules; - uint32_t linked_modules; - }; - - struct dylinker_command { - uint32_t cmd; - uint32_t cmdsize; - uint32_t name; - }; - - struct thread_command { - uint32_t cmd; - uint32_t cmdsize; - }; - - struct routines_command { - uint32_t cmd; - uint32_t cmdsize; - uint32_t init_address; - uint32_t init_module; - uint32_t reserved1; - uint32_t reserved2; - uint32_t reserved3; - uint32_t reserved4; - uint32_t reserved5; - uint32_t reserved6; - }; - - struct routines_command_64 { - uint32_t cmd; - uint32_t cmdsize; - uint64_t init_address; - uint64_t init_module; - uint64_t reserved1; - uint64_t reserved2; - uint64_t reserved3; - uint64_t reserved4; - uint64_t reserved5; - uint64_t reserved6; - }; - - struct symtab_command { - uint32_t cmd; - uint32_t cmdsize; - uint32_t symoff; - uint32_t nsyms; - uint32_t stroff; - uint32_t strsize; - }; - - struct dysymtab_command { - uint32_t cmd; - uint32_t cmdsize; - uint32_t ilocalsym; - uint32_t nlocalsym; - uint32_t iextdefsym; - uint32_t nextdefsym; - uint32_t iundefsym; - uint32_t nundefsym; - uint32_t tocoff; - uint32_t ntoc; - uint32_t modtaboff; - uint32_t nmodtab; - uint32_t extrefsymoff; - uint32_t nextrefsyms; - uint32_t indirectsymoff; - uint32_t nindirectsyms; - uint32_t extreloff; - uint32_t nextrel; - uint32_t locreloff; - uint32_t nlocrel; - }; - - struct dylib_table_of_contents { - uint32_t symbol_index; - uint32_t module_index; - }; - - struct dylib_module { - uint32_t module_name; - uint32_t iextdefsym; - uint32_t nextdefsym; - uint32_t irefsym; - uint32_t nrefsym; - uint32_t ilocalsym; - uint32_t nlocalsym; - uint32_t iextrel; - uint32_t nextrel; - uint32_t iinit_iterm; - uint32_t ninit_nterm; - uint32_t objc_module_info_addr; - uint32_t objc_module_info_size; - }; - - struct dylib_module_64 { - uint32_t module_name; - uint32_t iextdefsym; - uint32_t nextdefsym; - uint32_t irefsym; - uint32_t nrefsym; - uint32_t ilocalsym; - uint32_t nlocalsym; - uint32_t iextrel; - uint32_t nextrel; - uint32_t iinit_iterm; - uint32_t ninit_nterm; - uint32_t objc_module_info_size; - uint64_t objc_module_info_addr; - }; - - struct dylib_reference { - uint32_t isym:24, - flags:8; - }; - - // The twolevel_hints_command is obsolete and no longer supported. - struct twolevel_hints_command { - uint32_t cmd; - uint32_t cmdsize; - uint32_t offset; - uint32_t nhints; - }; - - // The twolevel_hints_command is obsolete and no longer supported. - struct twolevel_hint { - uint32_t isub_image:8, - itoc:24; - }; - - // The prebind_cksum_command is obsolete and no longer supported. - struct prebind_cksum_command { - uint32_t cmd; - uint32_t cmdsize; - uint32_t cksum; - }; - - struct uuid_command { - uint32_t cmd; - uint32_t cmdsize; - uint8_t uuid[16]; - }; - - struct rpath_command { - uint32_t cmd; - uint32_t cmdsize; - uint32_t path; - }; - - struct linkedit_data_command { - uint32_t cmd; - uint32_t cmdsize; - uint32_t dataoff; - uint32_t datasize; - }; - - struct data_in_code_entry { - uint32_t offset; - uint16_t length; - uint16_t kind; - }; - - struct source_version_command { - uint32_t cmd; - uint32_t cmdsize; - uint64_t version; - }; - - struct encryption_info_command { - uint32_t cmd; - uint32_t cmdsize; - uint32_t cryptoff; - uint32_t cryptsize; - uint32_t cryptid; - }; - - struct encryption_info_command_64 { - uint32_t cmd; - uint32_t cmdsize; - uint32_t cryptoff; - uint32_t cryptsize; - uint32_t cryptid; - uint32_t pad; - }; - - struct version_min_command { - uint32_t cmd; // LC_VERSION_MIN_MACOSX or - // LC_VERSION_MIN_IPHONEOS - uint32_t cmdsize; // sizeof(struct version_min_command) - uint32_t version; // X.Y.Z is encoded in nibbles xxxx.yy.zz - uint32_t sdk; // X.Y.Z is encoded in nibbles xxxx.yy.zz - }; - - struct note_command { - uint32_t cmd; // LC_NOTE - uint32_t cmdsize; // sizeof(struct note_command) - char data_owner[16]; // owner name for this LC_NOTE - uint64_t offset; // file offset of this data - uint64_t size; // length of data region - }; - - struct build_tool_version { - uint32_t tool; // enum for the tool - uint32_t version; // version of the tool - }; - - struct build_version_command { - uint32_t cmd; // LC_BUILD_VERSION - uint32_t cmdsize; // sizeof(struct build_version_command) + - // ntools * sizeof(struct build_tool_version) - uint32_t platform; // platform - uint32_t minos; // X.Y.Z is encoded in nibbles xxxx.yy.zz - uint32_t sdk; // X.Y.Z is encoded in nibbles xxxx.yy.zz - uint32_t ntools; // number of tool entries following this - }; - - struct dyld_info_command { - uint32_t cmd; - uint32_t cmdsize; - uint32_t rebase_off; - uint32_t rebase_size; - uint32_t bind_off; - uint32_t bind_size; - uint32_t weak_bind_off; - uint32_t weak_bind_size; - uint32_t lazy_bind_off; - uint32_t lazy_bind_size; - uint32_t export_off; - uint32_t export_size; - }; - - struct linker_option_command { - uint32_t cmd; - uint32_t cmdsize; - uint32_t count; - }; - - // The symseg_command is obsolete and no longer supported. - struct symseg_command { - uint32_t cmd; - uint32_t cmdsize; - uint32_t offset; - uint32_t size; - }; - - // The ident_command is obsolete and no longer supported. - struct ident_command { - uint32_t cmd; - uint32_t cmdsize; - }; - - // The fvmfile_command is obsolete and no longer supported. - struct fvmfile_command { - uint32_t cmd; - uint32_t cmdsize; - uint32_t name; - uint32_t header_addr; - }; - - struct tlv_descriptor_32 { - uint32_t thunk; - uint32_t key; - uint32_t offset; - }; - - struct tlv_descriptor_64 { - uint64_t thunk; - uint64_t key; - uint64_t offset; - }; - - struct tlv_descriptor { - uintptr_t thunk; - uintptr_t key; - uintptr_t offset; - }; - - struct entry_point_command { - uint32_t cmd; - uint32_t cmdsize; - uint64_t entryoff; - uint64_t stacksize; - }; - - // Structs from - struct fat_header { - uint32_t magic; - uint32_t nfat_arch; - }; - - struct fat_arch { - uint32_t cputype; - uint32_t cpusubtype; - uint32_t offset; - uint32_t size; - uint32_t align; - }; - - struct fat_arch_64 { - uint32_t cputype; - uint32_t cpusubtype; - uint64_t offset; - uint64_t size; - uint32_t align; - uint32_t reserved; - }; - - // Structs from - struct relocation_info { - int32_t r_address; - uint32_t r_symbolnum:24, - r_pcrel:1, - r_length:2, - r_extern:1, - r_type:4; - }; - - struct scattered_relocation_info { -#if defined(BYTE_ORDER) && defined(BIG_ENDIAN) && (BYTE_ORDER == BIG_ENDIAN) - uint32_t r_scattered:1, - r_pcrel:1, - r_length:2, - r_type:4, - r_address:24; -#else - uint32_t r_address:24, - r_type:4, - r_length:2, - r_pcrel:1, - r_scattered:1; -#endif - int32_t r_value; - }; - - // Structs NOT from , but that make LLVM's life easier - struct any_relocation_info { - uint32_t r_word0, r_word1; - }; - - // Structs from - struct nlist_base { - uint32_t n_strx; - uint8_t n_type; - uint8_t n_sect; - uint16_t n_desc; - }; - - struct nlist { - uint32_t n_strx; - uint8_t n_type; - uint8_t n_sect; - int16_t n_desc; - uint32_t n_value; - }; - - struct nlist_64 { - uint32_t n_strx; - uint8_t n_type; - uint8_t n_sect; - uint16_t n_desc; - uint64_t n_value; - }; - - // Byte order swapping functions for MachO structs - - inline void swapStruct(fat_header &mh) { - sys::swapByteOrder(mh.magic); - sys::swapByteOrder(mh.nfat_arch); - } - - inline void swapStruct(fat_arch &mh) { - sys::swapByteOrder(mh.cputype); - sys::swapByteOrder(mh.cpusubtype); - sys::swapByteOrder(mh.offset); - sys::swapByteOrder(mh.size); - sys::swapByteOrder(mh.align); - } - - inline void swapStruct(fat_arch_64 &mh) { - sys::swapByteOrder(mh.cputype); - sys::swapByteOrder(mh.cpusubtype); - sys::swapByteOrder(mh.offset); - sys::swapByteOrder(mh.size); - sys::swapByteOrder(mh.align); - sys::swapByteOrder(mh.reserved); - } - - inline void swapStruct(mach_header &mh) { - sys::swapByteOrder(mh.magic); - sys::swapByteOrder(mh.cputype); - sys::swapByteOrder(mh.cpusubtype); - sys::swapByteOrder(mh.filetype); - sys::swapByteOrder(mh.ncmds); - sys::swapByteOrder(mh.sizeofcmds); - sys::swapByteOrder(mh.flags); - } - - inline void swapStruct(mach_header_64 &H) { - sys::swapByteOrder(H.magic); - sys::swapByteOrder(H.cputype); - sys::swapByteOrder(H.cpusubtype); - sys::swapByteOrder(H.filetype); - sys::swapByteOrder(H.ncmds); - sys::swapByteOrder(H.sizeofcmds); - sys::swapByteOrder(H.flags); - sys::swapByteOrder(H.reserved); - } - - inline void swapStruct(load_command &lc) { - sys::swapByteOrder(lc.cmd); - sys::swapByteOrder(lc.cmdsize); - } - - inline void swapStruct(symtab_command &lc) { - sys::swapByteOrder(lc.cmd); - sys::swapByteOrder(lc.cmdsize); - sys::swapByteOrder(lc.symoff); - sys::swapByteOrder(lc.nsyms); - sys::swapByteOrder(lc.stroff); - sys::swapByteOrder(lc.strsize); - } - - inline void swapStruct(segment_command_64 &seg) { - sys::swapByteOrder(seg.cmd); - sys::swapByteOrder(seg.cmdsize); - sys::swapByteOrder(seg.vmaddr); - sys::swapByteOrder(seg.vmsize); - sys::swapByteOrder(seg.fileoff); - sys::swapByteOrder(seg.filesize); - sys::swapByteOrder(seg.maxprot); - sys::swapByteOrder(seg.initprot); - sys::swapByteOrder(seg.nsects); - sys::swapByteOrder(seg.flags); - } - - inline void swapStruct(segment_command &seg) { - sys::swapByteOrder(seg.cmd); - sys::swapByteOrder(seg.cmdsize); - sys::swapByteOrder(seg.vmaddr); - sys::swapByteOrder(seg.vmsize); - sys::swapByteOrder(seg.fileoff); - sys::swapByteOrder(seg.filesize); - sys::swapByteOrder(seg.maxprot); - sys::swapByteOrder(seg.initprot); - sys::swapByteOrder(seg.nsects); - sys::swapByteOrder(seg.flags); - } - - inline void swapStruct(section_64 §) { - sys::swapByteOrder(sect.addr); - sys::swapByteOrder(sect.size); - sys::swapByteOrder(sect.offset); - sys::swapByteOrder(sect.align); - sys::swapByteOrder(sect.reloff); - sys::swapByteOrder(sect.nreloc); - sys::swapByteOrder(sect.flags); - sys::swapByteOrder(sect.reserved1); - sys::swapByteOrder(sect.reserved2); - } - - inline void swapStruct(section §) { - sys::swapByteOrder(sect.addr); - sys::swapByteOrder(sect.size); - sys::swapByteOrder(sect.offset); - sys::swapByteOrder(sect.align); - sys::swapByteOrder(sect.reloff); - sys::swapByteOrder(sect.nreloc); - sys::swapByteOrder(sect.flags); - sys::swapByteOrder(sect.reserved1); - sys::swapByteOrder(sect.reserved2); - } - - inline void swapStruct(dyld_info_command &info) { - sys::swapByteOrder(info.cmd); - sys::swapByteOrder(info.cmdsize); - sys::swapByteOrder(info.rebase_off); - sys::swapByteOrder(info.rebase_size); - sys::swapByteOrder(info.bind_off); - sys::swapByteOrder(info.bind_size); - sys::swapByteOrder(info.weak_bind_off); - sys::swapByteOrder(info.weak_bind_size); - sys::swapByteOrder(info.lazy_bind_off); - sys::swapByteOrder(info.lazy_bind_size); - sys::swapByteOrder(info.export_off); - sys::swapByteOrder(info.export_size); - } - - inline void swapStruct(dylib_command &d) { - sys::swapByteOrder(d.cmd); - sys::swapByteOrder(d.cmdsize); - sys::swapByteOrder(d.dylib.name); - sys::swapByteOrder(d.dylib.timestamp); - sys::swapByteOrder(d.dylib.current_version); - sys::swapByteOrder(d.dylib.compatibility_version); - } - - inline void swapStruct(sub_framework_command &s) { - sys::swapByteOrder(s.cmd); - sys::swapByteOrder(s.cmdsize); - sys::swapByteOrder(s.umbrella); - } - - inline void swapStruct(sub_umbrella_command &s) { - sys::swapByteOrder(s.cmd); - sys::swapByteOrder(s.cmdsize); - sys::swapByteOrder(s.sub_umbrella); - } - - inline void swapStruct(sub_library_command &s) { - sys::swapByteOrder(s.cmd); - sys::swapByteOrder(s.cmdsize); - sys::swapByteOrder(s.sub_library); - } - - inline void swapStruct(sub_client_command &s) { - sys::swapByteOrder(s.cmd); - sys::swapByteOrder(s.cmdsize); - sys::swapByteOrder(s.client); - } - - inline void swapStruct(routines_command &r) { - sys::swapByteOrder(r.cmd); - sys::swapByteOrder(r.cmdsize); - sys::swapByteOrder(r.init_address); - sys::swapByteOrder(r.init_module); - sys::swapByteOrder(r.reserved1); - sys::swapByteOrder(r.reserved2); - sys::swapByteOrder(r.reserved3); - sys::swapByteOrder(r.reserved4); - sys::swapByteOrder(r.reserved5); - sys::swapByteOrder(r.reserved6); - } - - inline void swapStruct(routines_command_64 &r) { - sys::swapByteOrder(r.cmd); - sys::swapByteOrder(r.cmdsize); - sys::swapByteOrder(r.init_address); - sys::swapByteOrder(r.init_module); - sys::swapByteOrder(r.reserved1); - sys::swapByteOrder(r.reserved2); - sys::swapByteOrder(r.reserved3); - sys::swapByteOrder(r.reserved4); - sys::swapByteOrder(r.reserved5); - sys::swapByteOrder(r.reserved6); - } - - inline void swapStruct(thread_command &t) { - sys::swapByteOrder(t.cmd); - sys::swapByteOrder(t.cmdsize); - } - - inline void swapStruct(dylinker_command &d) { - sys::swapByteOrder(d.cmd); - sys::swapByteOrder(d.cmdsize); - sys::swapByteOrder(d.name); - } - - inline void swapStruct(uuid_command &u) { - sys::swapByteOrder(u.cmd); - sys::swapByteOrder(u.cmdsize); - } - - inline void swapStruct(rpath_command &r) { - sys::swapByteOrder(r.cmd); - sys::swapByteOrder(r.cmdsize); - sys::swapByteOrder(r.path); - } - - inline void swapStruct(source_version_command &s) { - sys::swapByteOrder(s.cmd); - sys::swapByteOrder(s.cmdsize); - sys::swapByteOrder(s.version); - } - - inline void swapStruct(entry_point_command &e) { - sys::swapByteOrder(e.cmd); - sys::swapByteOrder(e.cmdsize); - sys::swapByteOrder(e.entryoff); - sys::swapByteOrder(e.stacksize); - } - - inline void swapStruct(encryption_info_command &e) { - sys::swapByteOrder(e.cmd); - sys::swapByteOrder(e.cmdsize); - sys::swapByteOrder(e.cryptoff); - sys::swapByteOrder(e.cryptsize); - sys::swapByteOrder(e.cryptid); - } - - inline void swapStruct(encryption_info_command_64 &e) { - sys::swapByteOrder(e.cmd); - sys::swapByteOrder(e.cmdsize); - sys::swapByteOrder(e.cryptoff); - sys::swapByteOrder(e.cryptsize); - sys::swapByteOrder(e.cryptid); - sys::swapByteOrder(e.pad); - } - - inline void swapStruct(dysymtab_command &dst) { - sys::swapByteOrder(dst.cmd); - sys::swapByteOrder(dst.cmdsize); - sys::swapByteOrder(dst.ilocalsym); - sys::swapByteOrder(dst.nlocalsym); - sys::swapByteOrder(dst.iextdefsym); - sys::swapByteOrder(dst.nextdefsym); - sys::swapByteOrder(dst.iundefsym); - sys::swapByteOrder(dst.nundefsym); - sys::swapByteOrder(dst.tocoff); - sys::swapByteOrder(dst.ntoc); - sys::swapByteOrder(dst.modtaboff); - sys::swapByteOrder(dst.nmodtab); - sys::swapByteOrder(dst.extrefsymoff); - sys::swapByteOrder(dst.nextrefsyms); - sys::swapByteOrder(dst.indirectsymoff); - sys::swapByteOrder(dst.nindirectsyms); - sys::swapByteOrder(dst.extreloff); - sys::swapByteOrder(dst.nextrel); - sys::swapByteOrder(dst.locreloff); - sys::swapByteOrder(dst.nlocrel); - } - - inline void swapStruct(any_relocation_info &reloc) { - sys::swapByteOrder(reloc.r_word0); - sys::swapByteOrder(reloc.r_word1); - } - - inline void swapStruct(nlist_base &S) { - sys::swapByteOrder(S.n_strx); - sys::swapByteOrder(S.n_desc); - } - - inline void swapStruct(nlist &sym) { - sys::swapByteOrder(sym.n_strx); - sys::swapByteOrder(sym.n_desc); - sys::swapByteOrder(sym.n_value); - } - - inline void swapStruct(nlist_64 &sym) { - sys::swapByteOrder(sym.n_strx); - sys::swapByteOrder(sym.n_desc); - sys::swapByteOrder(sym.n_value); - } - - inline void swapStruct(linkedit_data_command &C) { - sys::swapByteOrder(C.cmd); - sys::swapByteOrder(C.cmdsize); - sys::swapByteOrder(C.dataoff); - sys::swapByteOrder(C.datasize); - } - - inline void swapStruct(linker_option_command &C) { - sys::swapByteOrder(C.cmd); - sys::swapByteOrder(C.cmdsize); - sys::swapByteOrder(C.count); - } - - inline void swapStruct(version_min_command&C) { - sys::swapByteOrder(C.cmd); - sys::swapByteOrder(C.cmdsize); - sys::swapByteOrder(C.version); - sys::swapByteOrder(C.sdk); - } - - inline void swapStruct(note_command &C) { - sys::swapByteOrder(C.cmd); - sys::swapByteOrder(C.cmdsize); - sys::swapByteOrder(C.offset); - sys::swapByteOrder(C.size); - } - - inline void swapStruct(build_version_command&C) { - sys::swapByteOrder(C.cmd); - sys::swapByteOrder(C.cmdsize); - sys::swapByteOrder(C.platform); - sys::swapByteOrder(C.minos); - sys::swapByteOrder(C.sdk); - sys::swapByteOrder(C.ntools); - } - - inline void swapStruct(build_tool_version&C) { - sys::swapByteOrder(C.tool); - sys::swapByteOrder(C.version); - } - - inline void swapStruct(data_in_code_entry &C) { - sys::swapByteOrder(C.offset); - sys::swapByteOrder(C.length); - sys::swapByteOrder(C.kind); - } - - inline void swapStruct(uint32_t &C) { - sys::swapByteOrder(C); - } - - // The prebind_cksum_command is obsolete and no longer supported. - inline void swapStruct(prebind_cksum_command &C) { - sys::swapByteOrder(C.cmd); - sys::swapByteOrder(C.cmdsize); - sys::swapByteOrder(C.cksum); - } - - // The twolevel_hints_command is obsolete and no longer supported. - inline void swapStruct(twolevel_hints_command &C) { - sys::swapByteOrder(C.cmd); - sys::swapByteOrder(C.cmdsize); - sys::swapByteOrder(C.offset); - sys::swapByteOrder(C.nhints); - } - - // The prebound_dylib_command is obsolete and no longer supported. - inline void swapStruct(prebound_dylib_command &C) { - sys::swapByteOrder(C.cmd); - sys::swapByteOrder(C.cmdsize); - sys::swapByteOrder(C.name); - sys::swapByteOrder(C.nmodules); - sys::swapByteOrder(C.linked_modules); - } - - // The fvmfile_command is obsolete and no longer supported. - inline void swapStruct(fvmfile_command &C) { - sys::swapByteOrder(C.cmd); - sys::swapByteOrder(C.cmdsize); - sys::swapByteOrder(C.name); - sys::swapByteOrder(C.header_addr); - } - - // The symseg_command is obsolete and no longer supported. - inline void swapStruct(symseg_command &C) { - sys::swapByteOrder(C.cmd); - sys::swapByteOrder(C.cmdsize); - sys::swapByteOrder(C.offset); - sys::swapByteOrder(C.size); - } - - // The ident_command is obsolete and no longer supported. - inline void swapStruct(ident_command &C) { - sys::swapByteOrder(C.cmd); - sys::swapByteOrder(C.cmdsize); - } - - inline void swapStruct(fvmlib &C) { - sys::swapByteOrder(C.name); - sys::swapByteOrder(C.minor_version); - sys::swapByteOrder(C.header_addr); - } - - // The fvmlib_command is obsolete and no longer supported. - inline void swapStruct(fvmlib_command &C) { - sys::swapByteOrder(C.cmd); - sys::swapByteOrder(C.cmdsize); - swapStruct(C.fvmlib); - } - - // Get/Set functions from - - static inline uint16_t GET_LIBRARY_ORDINAL(uint16_t n_desc) { - return (((n_desc) >> 8u) & 0xffu); - } - - static inline void SET_LIBRARY_ORDINAL(uint16_t &n_desc, uint8_t ordinal) { - n_desc = (((n_desc) & 0x00ff) | (((ordinal) & 0xff) << 8)); - } - - static inline uint8_t GET_COMM_ALIGN (uint16_t n_desc) { - return (n_desc >> 8u) & 0x0fu; - } - - static inline void SET_COMM_ALIGN (uint16_t &n_desc, uint8_t align) { - n_desc = ((n_desc & 0xf0ffu) | ((align & 0x0fu) << 8u)); - } - - // Enums from - enum : uint32_t { - // Capability bits used in the definition of cpu_type. - CPU_ARCH_MASK = 0xff000000, // Mask for architecture bits - CPU_ARCH_ABI64 = 0x01000000 // 64 bit ABI - }; - - // Constants for the cputype field. - enum CPUType { - CPU_TYPE_ANY = -1, - CPU_TYPE_X86 = 7, - CPU_TYPE_I386 = CPU_TYPE_X86, - CPU_TYPE_X86_64 = CPU_TYPE_X86 | CPU_ARCH_ABI64, - /* CPU_TYPE_MIPS = 8, */ - CPU_TYPE_MC98000 = 10, // Old Motorola PowerPC - CPU_TYPE_ARM = 12, - CPU_TYPE_ARM64 = CPU_TYPE_ARM | CPU_ARCH_ABI64, - CPU_TYPE_SPARC = 14, - CPU_TYPE_POWERPC = 18, - CPU_TYPE_POWERPC64 = CPU_TYPE_POWERPC | CPU_ARCH_ABI64 - }; - - enum : uint32_t { - // Capability bits used in the definition of cpusubtype. - CPU_SUBTYPE_MASK = 0xff000000, // Mask for architecture bits - CPU_SUBTYPE_LIB64 = 0x80000000, // 64 bit libraries - - // Special CPU subtype constants. - CPU_SUBTYPE_MULTIPLE = ~0u - }; - - // Constants for the cpusubtype field. - enum CPUSubTypeX86 { - CPU_SUBTYPE_I386_ALL = 3, - CPU_SUBTYPE_386 = 3, - CPU_SUBTYPE_486 = 4, - CPU_SUBTYPE_486SX = 0x84, - CPU_SUBTYPE_586 = 5, - CPU_SUBTYPE_PENT = CPU_SUBTYPE_586, - CPU_SUBTYPE_PENTPRO = 0x16, - CPU_SUBTYPE_PENTII_M3 = 0x36, - CPU_SUBTYPE_PENTII_M5 = 0x56, - CPU_SUBTYPE_CELERON = 0x67, - CPU_SUBTYPE_CELERON_MOBILE = 0x77, - CPU_SUBTYPE_PENTIUM_3 = 0x08, - CPU_SUBTYPE_PENTIUM_3_M = 0x18, - CPU_SUBTYPE_PENTIUM_3_XEON = 0x28, - CPU_SUBTYPE_PENTIUM_M = 0x09, - CPU_SUBTYPE_PENTIUM_4 = 0x0a, - CPU_SUBTYPE_PENTIUM_4_M = 0x1a, - CPU_SUBTYPE_ITANIUM = 0x0b, - CPU_SUBTYPE_ITANIUM_2 = 0x1b, - CPU_SUBTYPE_XEON = 0x0c, - CPU_SUBTYPE_XEON_MP = 0x1c, - - CPU_SUBTYPE_X86_ALL = 3, - CPU_SUBTYPE_X86_64_ALL = 3, - CPU_SUBTYPE_X86_ARCH1 = 4, - CPU_SUBTYPE_X86_64_H = 8 - }; - static inline int CPU_SUBTYPE_INTEL(int Family, int Model) { - return Family | (Model << 4); - } - static inline int CPU_SUBTYPE_INTEL_FAMILY(CPUSubTypeX86 ST) { - return ((int)ST) & 0x0f; - } - static inline int CPU_SUBTYPE_INTEL_MODEL(CPUSubTypeX86 ST) { - return ((int)ST) >> 4; - } - enum { - CPU_SUBTYPE_INTEL_FAMILY_MAX = 15, - CPU_SUBTYPE_INTEL_MODEL_ALL = 0 - }; - - enum CPUSubTypeARM { - CPU_SUBTYPE_ARM_ALL = 0, - CPU_SUBTYPE_ARM_V4T = 5, - CPU_SUBTYPE_ARM_V6 = 6, - CPU_SUBTYPE_ARM_V5 = 7, - CPU_SUBTYPE_ARM_V5TEJ = 7, - CPU_SUBTYPE_ARM_XSCALE = 8, - CPU_SUBTYPE_ARM_V7 = 9, - // unused ARM_V7F = 10, - CPU_SUBTYPE_ARM_V7S = 11, - CPU_SUBTYPE_ARM_V7K = 12, - CPU_SUBTYPE_ARM_V6M = 14, - CPU_SUBTYPE_ARM_V7M = 15, - CPU_SUBTYPE_ARM_V7EM = 16 - }; - - enum CPUSubTypeARM64 { - CPU_SUBTYPE_ARM64_ALL = 0 - }; - - enum CPUSubTypeSPARC { - CPU_SUBTYPE_SPARC_ALL = 0 - }; - - enum CPUSubTypePowerPC { - CPU_SUBTYPE_POWERPC_ALL = 0, - CPU_SUBTYPE_POWERPC_601 = 1, - CPU_SUBTYPE_POWERPC_602 = 2, - CPU_SUBTYPE_POWERPC_603 = 3, - CPU_SUBTYPE_POWERPC_603e = 4, - CPU_SUBTYPE_POWERPC_603ev = 5, - CPU_SUBTYPE_POWERPC_604 = 6, - CPU_SUBTYPE_POWERPC_604e = 7, - CPU_SUBTYPE_POWERPC_620 = 8, - CPU_SUBTYPE_POWERPC_750 = 9, - CPU_SUBTYPE_POWERPC_7400 = 10, - CPU_SUBTYPE_POWERPC_7450 = 11, - CPU_SUBTYPE_POWERPC_970 = 100, - - CPU_SUBTYPE_MC980000_ALL = CPU_SUBTYPE_POWERPC_ALL, - CPU_SUBTYPE_MC98601 = CPU_SUBTYPE_POWERPC_601 - }; - - struct x86_thread_state32_t { - uint32_t eax; - uint32_t ebx; - uint32_t ecx; - uint32_t edx; - uint32_t edi; - uint32_t esi; - uint32_t ebp; - uint32_t esp; - uint32_t ss; - uint32_t eflags; - uint32_t eip; - uint32_t cs; - uint32_t ds; - uint32_t es; - uint32_t fs; - uint32_t gs; - }; - - struct x86_thread_state64_t { - uint64_t rax; - uint64_t rbx; - uint64_t rcx; - uint64_t rdx; - uint64_t rdi; - uint64_t rsi; - uint64_t rbp; - uint64_t rsp; - uint64_t r8; - uint64_t r9; - uint64_t r10; - uint64_t r11; - uint64_t r12; - uint64_t r13; - uint64_t r14; - uint64_t r15; - uint64_t rip; - uint64_t rflags; - uint64_t cs; - uint64_t fs; - uint64_t gs; - }; - - enum x86_fp_control_precis { - x86_FP_PREC_24B = 0, - x86_FP_PREC_53B = 2, - x86_FP_PREC_64B = 3 - }; - - enum x86_fp_control_rc { - x86_FP_RND_NEAR = 0, - x86_FP_RND_DOWN = 1, - x86_FP_RND_UP = 2, - x86_FP_CHOP = 3 - }; - - struct fp_control_t { - unsigned short - invalid :1, - denorm :1, - zdiv :1, - ovrfl :1, - undfl :1, - precis :1, - :2, - pc :2, - rc :2, - :1, - :3; - }; - - struct fp_status_t { - unsigned short - invalid :1, - denorm :1, - zdiv :1, - ovrfl :1, - undfl :1, - precis :1, - stkflt :1, - errsumm :1, - c0 :1, - c1 :1, - c2 :1, - tos :3, - c3 :1, - busy :1; - }; - - struct mmst_reg_t { - char mmst_reg[10]; - char mmst_rsrv[6]; - }; - - struct xmm_reg_t { - char xmm_reg[16]; - }; - - struct x86_float_state64_t { - int32_t fpu_reserved[2]; - fp_control_t fpu_fcw; - fp_status_t fpu_fsw; - uint8_t fpu_ftw; - uint8_t fpu_rsrv1; - uint16_t fpu_fop; - uint32_t fpu_ip; - uint16_t fpu_cs; - uint16_t fpu_rsrv2; - uint32_t fpu_dp; - uint16_t fpu_ds; - uint16_t fpu_rsrv3; - uint32_t fpu_mxcsr; - uint32_t fpu_mxcsrmask; - mmst_reg_t fpu_stmm0; - mmst_reg_t fpu_stmm1; - mmst_reg_t fpu_stmm2; - mmst_reg_t fpu_stmm3; - mmst_reg_t fpu_stmm4; - mmst_reg_t fpu_stmm5; - mmst_reg_t fpu_stmm6; - mmst_reg_t fpu_stmm7; - xmm_reg_t fpu_xmm0; - xmm_reg_t fpu_xmm1; - xmm_reg_t fpu_xmm2; - xmm_reg_t fpu_xmm3; - xmm_reg_t fpu_xmm4; - xmm_reg_t fpu_xmm5; - xmm_reg_t fpu_xmm6; - xmm_reg_t fpu_xmm7; - xmm_reg_t fpu_xmm8; - xmm_reg_t fpu_xmm9; - xmm_reg_t fpu_xmm10; - xmm_reg_t fpu_xmm11; - xmm_reg_t fpu_xmm12; - xmm_reg_t fpu_xmm13; - xmm_reg_t fpu_xmm14; - xmm_reg_t fpu_xmm15; - char fpu_rsrv4[6*16]; - uint32_t fpu_reserved1; - }; - - struct x86_exception_state64_t { - uint16_t trapno; - uint16_t cpu; - uint32_t err; - uint64_t faultvaddr; - }; - - inline void swapStruct(x86_thread_state32_t &x) { - sys::swapByteOrder(x.eax); - sys::swapByteOrder(x.ebx); - sys::swapByteOrder(x.ecx); - sys::swapByteOrder(x.edx); - sys::swapByteOrder(x.edi); - sys::swapByteOrder(x.esi); - sys::swapByteOrder(x.ebp); - sys::swapByteOrder(x.esp); - sys::swapByteOrder(x.ss); - sys::swapByteOrder(x.eflags); - sys::swapByteOrder(x.eip); - sys::swapByteOrder(x.cs); - sys::swapByteOrder(x.ds); - sys::swapByteOrder(x.es); - sys::swapByteOrder(x.fs); - sys::swapByteOrder(x.gs); - } - - inline void swapStruct(x86_thread_state64_t &x) { - sys::swapByteOrder(x.rax); - sys::swapByteOrder(x.rbx); - sys::swapByteOrder(x.rcx); - sys::swapByteOrder(x.rdx); - sys::swapByteOrder(x.rdi); - sys::swapByteOrder(x.rsi); - sys::swapByteOrder(x.rbp); - sys::swapByteOrder(x.rsp); - sys::swapByteOrder(x.r8); - sys::swapByteOrder(x.r9); - sys::swapByteOrder(x.r10); - sys::swapByteOrder(x.r11); - sys::swapByteOrder(x.r12); - sys::swapByteOrder(x.r13); - sys::swapByteOrder(x.r14); - sys::swapByteOrder(x.r15); - sys::swapByteOrder(x.rip); - sys::swapByteOrder(x.rflags); - sys::swapByteOrder(x.cs); - sys::swapByteOrder(x.fs); - sys::swapByteOrder(x.gs); - } - - inline void swapStruct(x86_float_state64_t &x) { - sys::swapByteOrder(x.fpu_reserved[0]); - sys::swapByteOrder(x.fpu_reserved[1]); - // TODO swap: fp_control_t fpu_fcw; - // TODO swap: fp_status_t fpu_fsw; - sys::swapByteOrder(x.fpu_fop); - sys::swapByteOrder(x.fpu_ip); - sys::swapByteOrder(x.fpu_cs); - sys::swapByteOrder(x.fpu_rsrv2); - sys::swapByteOrder(x.fpu_dp); - sys::swapByteOrder(x.fpu_ds); - sys::swapByteOrder(x.fpu_rsrv3); - sys::swapByteOrder(x.fpu_mxcsr); - sys::swapByteOrder(x.fpu_mxcsrmask); - sys::swapByteOrder(x.fpu_reserved1); - } - - inline void swapStruct(x86_exception_state64_t &x) { - sys::swapByteOrder(x.trapno); - sys::swapByteOrder(x.cpu); - sys::swapByteOrder(x.err); - sys::swapByteOrder(x.faultvaddr); - } - - struct x86_state_hdr_t { - uint32_t flavor; - uint32_t count; - }; - - struct x86_thread_state_t { - x86_state_hdr_t tsh; - union { - x86_thread_state64_t ts64; - x86_thread_state32_t ts32; - } uts; - }; - - struct x86_float_state_t { - x86_state_hdr_t fsh; - union { - x86_float_state64_t fs64; - } ufs; - }; - - struct x86_exception_state_t { - x86_state_hdr_t esh; - union { - x86_exception_state64_t es64; - } ues; - }; - - inline void swapStruct(x86_state_hdr_t &x) { - sys::swapByteOrder(x.flavor); - sys::swapByteOrder(x.count); - } - - enum X86ThreadFlavors { - x86_THREAD_STATE32 = 1, - x86_FLOAT_STATE32 = 2, - x86_EXCEPTION_STATE32 = 3, - x86_THREAD_STATE64 = 4, - x86_FLOAT_STATE64 = 5, - x86_EXCEPTION_STATE64 = 6, - x86_THREAD_STATE = 7, - x86_FLOAT_STATE = 8, - x86_EXCEPTION_STATE = 9, - x86_DEBUG_STATE32 = 10, - x86_DEBUG_STATE64 = 11, - x86_DEBUG_STATE = 12 - }; - - inline void swapStruct(x86_thread_state_t &x) { - swapStruct(x.tsh); - if (x.tsh.flavor == x86_THREAD_STATE64) - swapStruct(x.uts.ts64); - } - - inline void swapStruct(x86_float_state_t &x) { - swapStruct(x.fsh); - if (x.fsh.flavor == x86_FLOAT_STATE64) - swapStruct(x.ufs.fs64); - } - - inline void swapStruct(x86_exception_state_t &x) { - swapStruct(x.esh); - if (x.esh.flavor == x86_EXCEPTION_STATE64) - swapStruct(x.ues.es64); - } - - const uint32_t x86_THREAD_STATE32_COUNT = - sizeof(x86_thread_state32_t) / sizeof(uint32_t); - - const uint32_t x86_THREAD_STATE64_COUNT = - sizeof(x86_thread_state64_t) / sizeof(uint32_t); - const uint32_t x86_FLOAT_STATE64_COUNT = - sizeof(x86_float_state64_t) / sizeof(uint32_t); - const uint32_t x86_EXCEPTION_STATE64_COUNT = - sizeof(x86_exception_state64_t) / sizeof(uint32_t); - - const uint32_t x86_THREAD_STATE_COUNT = - sizeof(x86_thread_state_t) / sizeof(uint32_t); - const uint32_t x86_FLOAT_STATE_COUNT = - sizeof(x86_float_state_t) / sizeof(uint32_t); - const uint32_t x86_EXCEPTION_STATE_COUNT = - sizeof(x86_exception_state_t) / sizeof(uint32_t); - - struct arm_thread_state32_t { - uint32_t r[13]; - uint32_t sp; - uint32_t lr; - uint32_t pc; - uint32_t cpsr; - }; - - inline void swapStruct(arm_thread_state32_t &x) { - for (int i = 0; i < 13; i++) - sys::swapByteOrder(x.r[i]); - sys::swapByteOrder(x.sp); - sys::swapByteOrder(x.lr); - sys::swapByteOrder(x.pc); - sys::swapByteOrder(x.cpsr); - } - - struct arm_thread_state64_t { - uint64_t x[29]; - uint64_t fp; - uint64_t lr; - uint64_t sp; - uint64_t pc; - uint32_t cpsr; - uint32_t pad; - }; - - inline void swapStruct(arm_thread_state64_t &x) { - for (int i = 0; i < 29; i++) - sys::swapByteOrder(x.x[i]); - sys::swapByteOrder(x.fp); - sys::swapByteOrder(x.lr); - sys::swapByteOrder(x.sp); - sys::swapByteOrder(x.pc); - sys::swapByteOrder(x.cpsr); - } - - struct arm_state_hdr_t { - uint32_t flavor; - uint32_t count; - }; - - struct arm_thread_state_t { - arm_state_hdr_t tsh; - union { - arm_thread_state32_t ts32; - } uts; - }; - - inline void swapStruct(arm_state_hdr_t &x) { - sys::swapByteOrder(x.flavor); - sys::swapByteOrder(x.count); - } - - enum ARMThreadFlavors { - ARM_THREAD_STATE = 1, - ARM_VFP_STATE = 2, - ARM_EXCEPTION_STATE = 3, - ARM_DEBUG_STATE = 4, - ARN_THREAD_STATE_NONE = 5, - ARM_THREAD_STATE64 = 6, - ARM_EXCEPTION_STATE64 = 7 - }; - - inline void swapStruct(arm_thread_state_t &x) { - swapStruct(x.tsh); - if (x.tsh.flavor == ARM_THREAD_STATE) - swapStruct(x.uts.ts32); - } - - const uint32_t ARM_THREAD_STATE_COUNT = - sizeof(arm_thread_state32_t) / sizeof(uint32_t); - - const uint32_t ARM_THREAD_STATE64_COUNT = - sizeof(arm_thread_state64_t) / sizeof(uint32_t); - - struct ppc_thread_state32_t { - uint32_t srr0; - uint32_t srr1; - uint32_t r0; - uint32_t r1; - uint32_t r2; - uint32_t r3; - uint32_t r4; - uint32_t r5; - uint32_t r6; - uint32_t r7; - uint32_t r8; - uint32_t r9; - uint32_t r10; - uint32_t r11; - uint32_t r12; - uint32_t r13; - uint32_t r14; - uint32_t r15; - uint32_t r16; - uint32_t r17; - uint32_t r18; - uint32_t r19; - uint32_t r20; - uint32_t r21; - uint32_t r22; - uint32_t r23; - uint32_t r24; - uint32_t r25; - uint32_t r26; - uint32_t r27; - uint32_t r28; - uint32_t r29; - uint32_t r30; - uint32_t r31; - uint32_t ct; - uint32_t xer; - uint32_t lr; - uint32_t ctr; - uint32_t mq; - uint32_t vrsave; - }; - - inline void swapStruct(ppc_thread_state32_t &x) { - sys::swapByteOrder(x.srr0); - sys::swapByteOrder(x.srr1); - sys::swapByteOrder(x.r0); - sys::swapByteOrder(x.r1); - sys::swapByteOrder(x.r2); - sys::swapByteOrder(x.r3); - sys::swapByteOrder(x.r4); - sys::swapByteOrder(x.r5); - sys::swapByteOrder(x.r6); - sys::swapByteOrder(x.r7); - sys::swapByteOrder(x.r8); - sys::swapByteOrder(x.r9); - sys::swapByteOrder(x.r10); - sys::swapByteOrder(x.r11); - sys::swapByteOrder(x.r12); - sys::swapByteOrder(x.r13); - sys::swapByteOrder(x.r14); - sys::swapByteOrder(x.r15); - sys::swapByteOrder(x.r16); - sys::swapByteOrder(x.r17); - sys::swapByteOrder(x.r18); - sys::swapByteOrder(x.r19); - sys::swapByteOrder(x.r20); - sys::swapByteOrder(x.r21); - sys::swapByteOrder(x.r22); - sys::swapByteOrder(x.r23); - sys::swapByteOrder(x.r24); - sys::swapByteOrder(x.r25); - sys::swapByteOrder(x.r26); - sys::swapByteOrder(x.r27); - sys::swapByteOrder(x.r28); - sys::swapByteOrder(x.r29); - sys::swapByteOrder(x.r30); - sys::swapByteOrder(x.r31); - sys::swapByteOrder(x.ct); - sys::swapByteOrder(x.xer); - sys::swapByteOrder(x.lr); - sys::swapByteOrder(x.ctr); - sys::swapByteOrder(x.mq); - sys::swapByteOrder(x.vrsave); - } - - struct ppc_state_hdr_t { - uint32_t flavor; - uint32_t count; - }; - - struct ppc_thread_state_t { - ppc_state_hdr_t tsh; - union { - ppc_thread_state32_t ts32; - } uts; - }; - - inline void swapStruct(ppc_state_hdr_t &x) { - sys::swapByteOrder(x.flavor); - sys::swapByteOrder(x.count); - } - - enum PPCThreadFlavors { - PPC_THREAD_STATE = 1, - PPC_FLOAT_STATE = 2, - PPC_EXCEPTION_STATE = 3, - PPC_VECTOR_STATE = 4, - PPC_THREAD_STATE64 = 5, - PPC_EXCEPTION_STATE64 = 6, - PPC_THREAD_STATE_NONE = 7 - }; - - inline void swapStruct(ppc_thread_state_t &x) { - swapStruct(x.tsh); - if (x.tsh.flavor == PPC_THREAD_STATE) - swapStruct(x.uts.ts32); - } - - const uint32_t PPC_THREAD_STATE_COUNT = - sizeof(ppc_thread_state32_t) / sizeof(uint32_t); - - // Define a union of all load command structs - #define LOAD_COMMAND_STRUCT(LCStruct) LCStruct LCStruct##_data; - - union macho_load_command { - #include "llvm/Support/MachO.def" - }; - - } // end namespace MachO -} // end namespace llvm - -#endif diff --git a/include/llvm/Support/MathExtras.h b/include/llvm/Support/MathExtras.h index 7f07e8cc3a51..bb840380d4d3 100644 --- a/include/llvm/Support/MathExtras.h +++ b/include/llvm/Support/MathExtras.h @@ -20,8 +20,8 @@ #include #include #include -#include #include +#include #ifdef _MSC_VER #include diff --git a/include/llvm/Support/MemoryBuffer.h b/include/llvm/Support/MemoryBuffer.h index e8bdc3e89fa7..73f0251a6b6e 100644 --- a/include/llvm/Support/MemoryBuffer.h +++ b/include/llvm/Support/MemoryBuffer.h @@ -14,14 +14,14 @@ #ifndef LLVM_SUPPORT_MEMORYBUFFER_H #define LLVM_SUPPORT_MEMORYBUFFER_H +#include "llvm-c/Types.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/CBindingWrapping.h" #include "llvm/Support/ErrorOr.h" -#include "llvm-c/Types.h" -#include #include #include +#include namespace llvm { diff --git a/include/llvm/Support/Solaris.h b/include/llvm/Support/Solaris.h index b08228532489..88d83014c468 100644 --- a/include/llvm/Support/Solaris.h +++ b/include/llvm/Support/Solaris.h @@ -14,8 +14,8 @@ #ifndef LLVM_SUPPORT_SOLARIS_H #define LLVM_SUPPORT_SOLARIS_H -#include #include +#include /* Solaris doesn't have endian.h. SPARC is the only supported big-endian ISA. */ #define BIG_ENDIAN 4321 diff --git a/include/llvm/Support/SourceMgr.h b/include/llvm/Support/SourceMgr.h index cb90d968c44c..399f8dcd76fc 100644 --- a/include/llvm/Support/SourceMgr.h +++ b/include/llvm/Support/SourceMgr.h @@ -49,7 +49,7 @@ public: /// Clients that want to handle their own diagnostics in a custom way can /// register a function pointer+context as a diagnostic handler. /// It gets called each time PrintMessage is invoked. - typedef void (*DiagHandlerTy)(const SMDiagnostic &, void *Context); + using DiagHandlerTy = void (*)(const SMDiagnostic &, void *Context); private: struct SrcBuffer { diff --git a/include/llvm/Support/StringPool.h b/include/llvm/Support/StringPool.h index 2ec0c3b76c11..bb5fd07f0d00 100644 --- a/include/llvm/Support/StringPool.h +++ b/include/llvm/Support/StringPool.h @@ -1,4 +1,4 @@ -//===-- StringPool.h - Interned string pool ---------------------*- C++ -*-===// +//===- StringPool.h - Interned string pool ----------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -30,6 +30,7 @@ #define LLVM_SUPPORT_STRINGPOOL_H #include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" #include namespace llvm { @@ -43,17 +44,17 @@ namespace llvm { /// PooledString - This is the value of an entry in the pool's interning /// table. struct PooledString { - StringPool *Pool; ///< So the string can remove itself. - unsigned Refcount; ///< Number of referencing PooledStringPtrs. + StringPool *Pool = nullptr; ///< So the string can remove itself. + unsigned Refcount = 0; ///< Number of referencing PooledStringPtrs. public: - PooledString() : Pool(nullptr), Refcount(0) { } + PooledString() = default; }; friend class PooledStringPtr; - typedef StringMap table_t; - typedef StringMapEntry entry_t; + using table_t = StringMap; + using entry_t = StringMapEntry; table_t InternTable; public: @@ -76,11 +77,12 @@ namespace llvm { /// a single pointer, but it does have reference-counting overhead when /// copied. class PooledStringPtr { - typedef StringPool::entry_t entry_t; - entry_t *S; + using entry_t = StringPool::entry_t; + + entry_t *S = nullptr; public: - PooledStringPtr() : S(nullptr) {} + PooledStringPtr() = default; explicit PooledStringPtr(entry_t *E) : S(E) { if (S) ++S->getValue().Refcount; @@ -133,6 +135,6 @@ namespace llvm { inline bool operator!=(const PooledStringPtr &That) const { return S != That.S; } }; -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_SUPPORT_STRINGPOOL_H diff --git a/include/llvm/Support/TargetRegistry.h b/include/llvm/Support/TargetRegistry.h index bd68d2414487..9e9a91b0abda 100644 --- a/include/llvm/Support/TargetRegistry.h +++ b/include/llvm/Support/TargetRegistry.h @@ -20,10 +20,10 @@ #define LLVM_SUPPORT_TARGETREGISTRY_H #include "llvm-c/Disassembler.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" @@ -598,7 +598,7 @@ struct TargetRegistry { /// printRegisteredTargetsForVersion - Print the registered targets /// appropriately for inclusion in a tool's version output. - static void printRegisteredTargetsForVersion(); + static void printRegisteredTargetsForVersion(raw_ostream &OS); /// @name Registry Access /// @{ diff --git a/include/llvm/Support/raw_sha1_ostream.h b/include/llvm/Support/raw_sha1_ostream.h index 329ef9fd069b..bd55d98b7c1d 100644 --- a/include/llvm/Support/raw_sha1_ostream.h +++ b/include/llvm/Support/raw_sha1_ostream.h @@ -14,9 +14,9 @@ #ifndef LLVM_SUPPORT_RAW_SHA1_OSTREAM_H #define LLVM_SUPPORT_RAW_SHA1_OSTREAM_H -#include "llvm/Support/raw_ostream.h" -#include "llvm/Support/SHA1.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/Support/SHA1.h" +#include "llvm/Support/raw_ostream.h" namespace llvm { diff --git a/include/llvm/Support/type_traits.h b/include/llvm/Support/type_traits.h index ce4bbf8cb2cc..cc0878358800 100644 --- a/include/llvm/Support/type_traits.h +++ b/include/llvm/Support/type_traits.h @@ -14,11 +14,10 @@ #ifndef LLVM_SUPPORT_TYPE_TRAITS_H #define LLVM_SUPPORT_TYPE_TRAITS_H +#include "llvm/Support/Compiler.h" #include #include -#include "llvm/Support/Compiler.h" - #ifndef __has_feature #define LLVM_DEFINED_HAS_FEATURE #define __has_feature(x) 0 @@ -51,7 +50,7 @@ struct isPodLike { // std::pair's are pod-like if their elements are. template -struct isPodLike > { +struct isPodLike> { static const bool value = isPodLike::value && isPodLike::value; }; @@ -63,7 +62,7 @@ struct isPodLike > { /// Also note that enum classes aren't implicitly convertible to integral types, /// the value may therefore need to be explicitly converted before being used. template class is_integral_or_enum { - typedef typename std::remove_reference::type UnderlyingT; + using UnderlyingT = typename std::remove_reference::type; public: static const bool value = @@ -76,23 +75,23 @@ public: /// \brief If T is a pointer, just return it. If it is not, return T&. template -struct add_lvalue_reference_if_not_pointer { typedef T &type; }; +struct add_lvalue_reference_if_not_pointer { using type = T &; }; template struct add_lvalue_reference_if_not_pointer< T, typename std::enable_if::value>::type> { - typedef T type; + using type = T; }; /// \brief If T is a pointer to X, return a pointer to const X. If it is not, /// return const T. template -struct add_const_past_pointer { typedef const T type; }; +struct add_const_past_pointer { using type = const T; }; template struct add_const_past_pointer< T, typename std::enable_if::value>::type> { - typedef const typename std::remove_pointer::type *type; + using type = const typename std::remove_pointer::type *; }; template @@ -104,7 +103,8 @@ struct const_pointer_or_const_ref< T, typename std::enable_if::value>::type> { using type = typename add_const_past_pointer::type; }; -} + +} // end namespace llvm // If the compiler supports detecting whether a class is final, define // an LLVM_IS_FINAL macro. If it cannot be defined properly, this @@ -119,4 +119,4 @@ struct const_pointer_or_const_ref< #undef __has_feature #endif -#endif +#endif // LLVM_SUPPORT_TYPE_TRAITS_H diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h index 97a6f0c6e3ae..7595d4339810 100644 --- a/include/llvm/Target/TargetInstrInfo.h +++ b/include/llvm/Target/TargetInstrInfo.h @@ -16,13 +16,13 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineCombinerPattern.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/Support/BranchProbability.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" namespace llvm { diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index 7258a5cc2d89..a9d67228d205 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -677,6 +677,16 @@ public: unsigned &NumIntermediates, MVT &RegisterVT) const; + /// Certain targets such as MIPS require that some types such as vectors are + /// always broken down into scalars in some contexts. This occurs even if the + /// vector type is legal. + virtual unsigned getVectorTypeBreakdownForCallingConv( + LLVMContext &Context, EVT VT, EVT &IntermediateVT, + unsigned &NumIntermediates, MVT &RegisterVT) const { + return getVectorTypeBreakdown(Context, VT, IntermediateVT, NumIntermediates, + RegisterVT); + } + struct IntrinsicInfo { unsigned opc = 0; // target opcode EVT memVT; // memory VT @@ -1085,6 +1095,33 @@ public: llvm_unreachable("Unsupported extended type!"); } + /// Certain combinations of ABIs, Targets and features require that types + /// are legal for some operations and not for other operations. + /// For MIPS all vector types must be passed through the integer register set. + virtual MVT getRegisterTypeForCallingConv(MVT VT) const { + return getRegisterType(VT); + } + + virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, + EVT VT) const { + return getRegisterType(Context, VT); + } + + /// Certain targets require unusual breakdowns of certain types. For MIPS, + /// this occurs when a vector type is used, as vector are passed through the + /// integer register set. + virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, + EVT VT) const { + return getNumRegisters(Context, VT); + } + + /// Certain targets have context senstive alignment requirements, where one + /// type has the alignment requirement of another type. + virtual unsigned getABIAlignmentForCallingConv(Type *ArgTy, + DataLayout DL) const { + return DL.getABITypeAlignment(ArgTy); + } + /// If true, then instruction selection should seek to shrink the FP constant /// of the specified type to a smaller type in order to save space and / or /// reduce runtime. @@ -1876,6 +1913,38 @@ public: return false; } + /// Returns true if the opcode is a commutative binary operation. + virtual bool isCommutativeBinOp(unsigned Opcode) const { + // FIXME: This should get its info from the td file. + switch (Opcode) { + case ISD::ADD: + case ISD::SMIN: + case ISD::SMAX: + case ISD::UMIN: + case ISD::UMAX: + case ISD::MUL: + case ISD::MULHU: + case ISD::MULHS: + case ISD::SMUL_LOHI: + case ISD::UMUL_LOHI: + case ISD::FADD: + case ISD::FMUL: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + case ISD::SADDO: + case ISD::UADDO: + case ISD::ADDC: + case ISD::ADDE: + case ISD::FMINNUM: + case ISD::FMAXNUM: + case ISD::FMINNAN: + case ISD::FMAXNAN: + return true; + default: return false; + } + } + /// Return true if it's free to truncate a value of type FromTy to type /// ToTy. e.g. On x86 it's free to truncate a i32 value in register EAX to i16 /// by referencing its sub-register AX. diff --git a/include/llvm/Target/TargetMachine.h b/include/llvm/Target/TargetMachine.h index ed390799cfc3..933c6c87b0be 100644 --- a/include/llvm/Target/TargetMachine.h +++ b/include/llvm/Target/TargetMachine.h @@ -25,7 +25,6 @@ namespace llvm { class GlobalValue; -class MachineFunctionInitializer; class Mangler; class MCAsmInfo; class MCContext; @@ -227,8 +226,7 @@ public: PassManagerBase &, raw_pwrite_stream &, CodeGenFileType, bool /*DisableVerify*/ = true, AnalysisID /*StartBefore*/ = nullptr, AnalysisID /*StartAfter*/ = nullptr, AnalysisID /*StopBefore*/ = nullptr, - AnalysisID /*StopAfter*/ = nullptr, - MachineFunctionInitializer * /*MFInitializer*/ = nullptr) { + AnalysisID /*StopAfter*/ = nullptr) { return true; } @@ -289,8 +287,7 @@ public: PassManagerBase &PM, raw_pwrite_stream &Out, CodeGenFileType FileType, bool DisableVerify = true, AnalysisID StartBefore = nullptr, AnalysisID StartAfter = nullptr, AnalysisID StopBefore = nullptr, - AnalysisID StopAfter = nullptr, - MachineFunctionInitializer *MFInitializer = nullptr) override; + AnalysisID StopAfter = nullptr) override; /// Add passes to the specified pass manager to get machine code emitted with /// the MCJIT. This method returns true if machine code is not supported. It @@ -305,6 +302,11 @@ public: /// remove this at some point and always enable the verifier when /// EXPENSIVE_CHECKS is enabled. virtual bool isMachineVerifierClean() const { return true; } + + /// \brief Adds an AsmPrinter pass to the pipeline that prints assembly or + /// machine code from the MI representation. + bool addAsmPrinter(PassManagerBase &PM, raw_pwrite_stream &Out, + CodeGenFileType FileTYpe, MCContext &Context); }; } // end namespace llvm diff --git a/include/llvm/Target/TargetOptions.h b/include/llvm/Target/TargetOptions.h index 7cc33f2fdccb..5c2063880f8b 100644 --- a/include/llvm/Target/TargetOptions.h +++ b/include/llvm/Target/TargetOptions.h @@ -105,10 +105,10 @@ namespace llvm { HonorSignDependentRoundingFPMathOption(false), NoZerosInBSS(false), GuaranteedTailCallOpt(false), StackSymbolOrdering(true), EnableFastISel(false), UseInitArray(false), - DisableIntegratedAS(false), CompressDebugSections(false), - RelaxELFRelocations(false), FunctionSections(false), - DataSections(false), UniqueSectionNames(true), TrapUnreachable(false), - EmulatedTLS(false), EnableIPRA(false) {} + DisableIntegratedAS(false), RelaxELFRelocations(false), + FunctionSections(false), DataSections(false), + UniqueSectionNames(true), TrapUnreachable(false), EmulatedTLS(false), + EnableIPRA(false) {} /// PrintMachineCode - This flag is enabled when the -print-machineinstrs /// option is specified on the command line, and should enable debugging @@ -194,7 +194,7 @@ namespace llvm { unsigned DisableIntegratedAS : 1; /// Compress DWARF debug sections. - unsigned CompressDebugSections : 1; + DebugCompressionType CompressDebugSections = DebugCompressionType::None; unsigned RelaxELFRelocations : 1; diff --git a/include/llvm/Target/TargetSubtargetInfo.h b/include/llvm/Target/TargetSubtargetInfo.h index 83950a9cd027..9cb07a5c6dae 100644 --- a/include/llvm/Target/TargetSubtargetInfo.h +++ b/include/llvm/Target/TargetSubtargetInfo.h @@ -18,8 +18,8 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/PBQPRAConstraint.h" -#include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/ScheduleDAGMutation.h" +#include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/CodeGen.h" diff --git a/include/llvm/Transforms/IPO/FunctionAttrs.h b/include/llvm/Transforms/IPO/FunctionAttrs.h index 85d6364c8bbc..36dd06b85b41 100644 --- a/include/llvm/Transforms/IPO/FunctionAttrs.h +++ b/include/llvm/Transforms/IPO/FunctionAttrs.h @@ -14,8 +14,8 @@ #ifndef LLVM_TRANSFORMS_IPO_FUNCTIONATTRS_H #define LLVM_TRANSFORMS_IPO_FUNCTIONATTRS_H -#include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/CGSCCPassManager.h" +#include "llvm/Analysis/LazyCallGraph.h" #include "llvm/IR/PassManager.h" namespace llvm { diff --git a/include/llvm/Transforms/Scalar/GVNExpression.h b/include/llvm/Transforms/Scalar/GVNExpression.h index 324ebca46de2..008341304995 100644 --- a/include/llvm/Transforms/Scalar/GVNExpression.h +++ b/include/llvm/Transforms/Scalar/GVNExpression.h @@ -93,6 +93,11 @@ public: } virtual bool equals(const Expression &Other) const { return true; } + // Return true if the two expressions are exactly the same, including the + // normally ignored fields. + virtual bool exactlyEquals(const Expression &Other) const { + return getExpressionType() == Other.getExpressionType() && equals(Other); + } unsigned getOpcode() const { return Opcode; } void setOpcode(unsigned opcode) { Opcode = opcode; } @@ -345,6 +350,10 @@ public: void setAlignment(unsigned Align) { Alignment = Align; } bool equals(const Expression &Other) const override; + bool exactlyEquals(const Expression &Other) const override { + return Expression::exactlyEquals(Other) && + cast(Other).getLoadInst() == getLoadInst(); + } // // Debugging support @@ -382,6 +391,10 @@ public: Value *getStoredValue() const { return StoredValue; } bool equals(const Expression &Other) const override; + bool exactlyEquals(const Expression &Other) const override { + return Expression::exactlyEquals(Other) && + cast(Other).getStoreInst() == getStoreInst(); + } // Debugging support // diff --git a/include/llvm/Transforms/Utils/EscapeEnumerator.h b/include/llvm/Transforms/Utils/EscapeEnumerator.h index 80d16ed4cf5b..1256dfdaca17 100644 --- a/include/llvm/Transforms/Utils/EscapeEnumerator.h +++ b/include/llvm/Transforms/Utils/EscapeEnumerator.h @@ -15,8 +15,8 @@ #ifndef LLVM_TRANSFORMS_UTILS_ESCAPEENUMERATOR_H #define LLVM_TRANSFORMS_UTILS_ESCAPEENUMERATOR_H -#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" namespace llvm { diff --git a/include/llvm/Transforms/Utils/FunctionComparator.h b/include/llvm/Transforms/Utils/FunctionComparator.h index ee58d1d138f7..b0f10eafaa95 100644 --- a/include/llvm/Transforms/Utils/FunctionComparator.h +++ b/include/llvm/Transforms/Utils/FunctionComparator.h @@ -19,8 +19,8 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/Function.h" -#include "llvm/IR/ValueMap.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/ValueMap.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/Casting.h" #include diff --git a/include/llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h b/include/llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h index bb7fa523cb19..b7a3d130aa11 100644 --- a/include/llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h +++ b/include/llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h @@ -14,8 +14,8 @@ #define LLVM_TRANSFORMS_UTILS_IMPORTEDFUNCTIONSINLININGSTATISTICS_H #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" #include #include diff --git a/include/llvm/Transforms/Utils/Local.h b/include/llvm/Transforms/Utils/Local.h index 8942111307ff..8fed292e77a3 100644 --- a/include/llvm/Transforms/Utils/Local.h +++ b/include/llvm/Transforms/Utils/Local.h @@ -15,13 +15,13 @@ #ifndef LLVM_TRANSFORMS_UTILS_LOCAL_H #define LLVM_TRANSFORMS_UTILS_LOCAL_H +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Operator.h" -#include "llvm/ADT/SmallPtrSet.h" namespace llvm { diff --git a/include/llvm/Transforms/Utils/LoopVersioning.h b/include/llvm/Transforms/Utils/LoopVersioning.h index 0d345a972e10..fa5d7845d080 100644 --- a/include/llvm/Transforms/Utils/LoopVersioning.h +++ b/include/llvm/Transforms/Utils/LoopVersioning.h @@ -18,8 +18,8 @@ #include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Transforms/Utils/ValueMapper.h" #include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Transforms/Utils/ValueMapper.h" namespace llvm { diff --git a/include/llvm/Transforms/Utils/OrderedInstructions.h b/include/llvm/Transforms/Utils/OrderedInstructions.h new file mode 100644 index 000000000000..e043ff39a998 --- /dev/null +++ b/include/llvm/Transforms/Utils/OrderedInstructions.h @@ -0,0 +1,51 @@ +//===- llvm/Transforms/Utils/OrderedInstructions.h -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines an efficient way to check for dominance relation between 2 +// instructions. +// +// This interface dispatches to appropriate dominance check given 2 +// instructions, i.e. in case the instructions are in the same basic block, +// OrderedBasicBlock (with instruction numbering and caching) are used. +// Otherwise, dominator tree is used. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_UTILS_ORDEREDINSTRUCTIONS_H +#define LLVM_TRANSFORMS_UTILS_ORDEREDINSTRUCTIONS_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/Analysis/OrderedBasicBlock.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Operator.h" + +namespace llvm { + +class OrderedInstructions { + /// Used to check dominance for instructions in same basic block. + mutable DenseMap> + OBBMap; + + /// The dominator tree of the parent function. + DominatorTree *DT; + +public: + /// Constructor. + OrderedInstructions(DominatorTree *DT) : DT(DT) {} + + /// Return true if first instruction dominates the second. + bool dominates(const Instruction *, const Instruction *) const; + + /// Invalidate the OrderedBasicBlock cache when its basic block changes. + void invalidateBlock(BasicBlock *BB) { OBBMap.erase(BB); } +}; + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_UTILS_ORDEREDINSTRUCTIONS_H diff --git a/include/llvm/Transforms/Utils/ValueMapper.h b/include/llvm/Transforms/Utils/ValueMapper.h index e44dc437342d..0cc6b34d4593 100644 --- a/include/llvm/Transforms/Utils/ValueMapper.h +++ b/include/llvm/Transforms/Utils/ValueMapper.h @@ -16,8 +16,8 @@ #define LLVM_TRANSFORMS_UTILS_VALUEMAPPER_H #include "llvm/ADT/ArrayRef.h" -#include "llvm/IR/ValueMap.h" #include "llvm/IR/ValueHandle.h" +#include "llvm/IR/ValueMap.h" namespace llvm { diff --git a/include/llvm/Transforms/Vectorize/SLPVectorizer.h b/include/llvm/Transforms/Vectorize/SLPVectorizer.h index c514db41623c..6f258191e89e 100644 --- a/include/llvm/Transforms/Vectorize/SLPVectorizer.h +++ b/include/llvm/Transforms/Vectorize/SLPVectorizer.h @@ -84,7 +84,7 @@ private: ArrayRef BuildVector = None, bool AllowReorder = false); - /// \brief Try to vectorize a chain that may start at the operands of \V; + /// \brief Try to vectorize a chain that may start at the operands of \p V. bool tryToVectorize(BinaryOperator *V, slpvectorizer::BoUpSLP &R); /// \brief Vectorize the store instructions collected in Stores. diff --git a/include/llvm/module.modulemap b/include/llvm/module.modulemap index e0780885d159..d906b05f7aaa 100644 --- a/include/llvm/module.modulemap +++ b/include/llvm/module.modulemap @@ -38,6 +38,31 @@ module LLVM_Backend { } module LLVM_Bitcode { requires cplusplus umbrella "Bitcode" module * { export * } } + +module LLVM_BinaryFormat { + requires cplusplus + umbrella "BinaryFormat" module * { export * } + textual header "BinaryFormat/Dwarf.def" + textual header "BinaryFormat/MachO.def" + textual header "BinaryFormat/ELFRelocs/AArch64.def" + textual header "BinaryFormat/ELFRelocs/AMDGPU.def" + textual header "BinaryFormat/ELFRelocs/ARM.def" + textual header "BinaryFormat/ELFRelocs/AVR.def" + textual header "BinaryFormat/ELFRelocs/BPF.def" + textual header "BinaryFormat/ELFRelocs/Hexagon.def" + textual header "BinaryFormat/ELFRelocs/i386.def" + textual header "BinaryFormat/ELFRelocs/Lanai.def" + textual header "BinaryFormat/ELFRelocs/Mips.def" + textual header "BinaryFormat/ELFRelocs/PowerPC64.def" + textual header "BinaryFormat/ELFRelocs/PowerPC.def" + textual header "BinaryFormat/ELFRelocs/RISCV.def" + textual header "BinaryFormat/ELFRelocs/Sparc.def" + textual header "BinaryFormat/ELFRelocs/SystemZ.def" + textual header "BinaryFormat/ELFRelocs/x86_64.def" + textual header "BinaryFormat/ELFRelocs/WebAssembly.def" + textual header "BinaryFormat/WasmRelocs/WebAssembly.def" +} + module LLVM_Config { requires cplusplus umbrella "Config" module * { export * } } module LLVM_DebugInfo { @@ -259,25 +284,6 @@ module LLVM_Utils { // These are intended for textual inclusion. textual header "Support/ARMTargetParser.def" textual header "Support/AArch64TargetParser.def" - textual header "Support/Dwarf.def" - textual header "Support/MachO.def" - textual header "Support/ELFRelocs/AArch64.def" - textual header "Support/ELFRelocs/AMDGPU.def" - textual header "Support/ELFRelocs/ARM.def" - textual header "Support/ELFRelocs/AVR.def" - textual header "Support/ELFRelocs/BPF.def" - textual header "Support/ELFRelocs/Hexagon.def" - textual header "Support/ELFRelocs/i386.def" - textual header "Support/ELFRelocs/Lanai.def" - textual header "Support/ELFRelocs/Mips.def" - textual header "Support/ELFRelocs/PowerPC64.def" - textual header "Support/ELFRelocs/PowerPC.def" - textual header "Support/ELFRelocs/RISCV.def" - textual header "Support/ELFRelocs/Sparc.def" - textual header "Support/ELFRelocs/SystemZ.def" - textual header "Support/ELFRelocs/x86_64.def" - textual header "Support/ELFRelocs/WebAssembly.def" - textual header "Support/WasmRelocs/WebAssembly.def" } // This part of the module is usable from both C and C++ code. diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp index 4d6a6c9a30aa..435c782d97a5 100644 --- a/lib/Analysis/AliasAnalysisEvaluator.cpp +++ b/lib/Analysis/AliasAnalysisEvaluator.cpp @@ -14,9 +14,9 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" -#include "llvm/IR/Module.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp index 16b711a69ec3..ee17ad3ba586 100644 --- a/lib/Analysis/AliasSetTracker.cpp +++ b/lib/Analysis/AliasSetTracker.cpp @@ -17,8 +17,8 @@ #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Module.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp index 267e19adfe4d..23d5a887c34a 100644 --- a/lib/Analysis/BranchProbabilityInfo.cpp +++ b/lib/Analysis/BranchProbabilityInfo.cpp @@ -14,6 +14,7 @@ #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" @@ -30,6 +31,7 @@ using namespace llvm; INITIALIZE_PASS_BEGIN(BranchProbabilityInfoWrapperPass, "branch-prob", "Branch Probability Analysis", false, true) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(BranchProbabilityInfoWrapperPass, "branch-prob", "Branch Probability Analysis", false, true) @@ -457,7 +459,8 @@ bool BranchProbabilityInfo::calcLoopBranchHeuristics(const BasicBlock *BB, return true; } -bool BranchProbabilityInfo::calcZeroHeuristics(const BasicBlock *BB) { +bool BranchProbabilityInfo::calcZeroHeuristics(const BasicBlock *BB, + const TargetLibraryInfo *TLI) { const BranchInst *BI = dyn_cast(BB->getTerminator()); if (!BI || !BI->isConditional()) return false; @@ -480,8 +483,37 @@ bool BranchProbabilityInfo::calcZeroHeuristics(const BasicBlock *BB) { if (AndRHS->getUniqueInteger().isPowerOf2()) return false; + // Check if the LHS is the return value of a library function + LibFunc Func = NumLibFuncs; + if (TLI) + if (CallInst *Call = dyn_cast(CI->getOperand(0))) + if (Function *CalledFn = Call->getCalledFunction()) + TLI->getLibFunc(*CalledFn, Func); + bool isProb; - if (CV->isZero()) { + if (Func == LibFunc_strcasecmp || + Func == LibFunc_strcmp || + Func == LibFunc_strncasecmp || + Func == LibFunc_strncmp || + Func == LibFunc_memcmp) { + // strcmp and similar functions return zero, negative, or positive, if the + // first string is equal, less, or greater than the second. We consider it + // likely that the strings are not equal, so a comparison with zero is + // probably false, but also a comparison with any other number is also + // probably false given that what exactly is returned for nonzero values is + // not specified. Any kind of comparison other than equality we know + // nothing about. + switch (CI->getPredicate()) { + case CmpInst::ICMP_EQ: + isProb = false; + break; + case CmpInst::ICMP_NE: + isProb = true; + break; + default: + return false; + } + } else if (CV->isZero()) { switch (CI->getPredicate()) { case CmpInst::ICMP_EQ: // X == 0 -> Unlikely @@ -707,7 +739,8 @@ void BranchProbabilityInfo::eraseBlock(const BasicBlock *BB) { } } -void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI) { +void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI, + const TargetLibraryInfo *TLI) { DEBUG(dbgs() << "---- Branch Probability Info : " << F.getName() << " ----\n\n"); LastF = &F; // Store the last function we ran on for printing. @@ -733,7 +766,7 @@ void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI) { continue; if (calcPointerHeuristics(BB)) continue; - if (calcZeroHeuristics(BB)) + if (calcZeroHeuristics(BB, TLI)) continue; if (calcFloatingPointHeuristics(BB)) continue; @@ -747,12 +780,14 @@ void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI) { void BranchProbabilityInfoWrapperPass::getAnalysisUsage( AnalysisUsage &AU) const { AU.addRequired(); + AU.addRequired(); AU.setPreservesAll(); } bool BranchProbabilityInfoWrapperPass::runOnFunction(Function &F) { const LoopInfo &LI = getAnalysis().getLoopInfo(); - BPI.calculate(F, LI); + const TargetLibraryInfo &TLI = getAnalysis().getTLI(); + BPI.calculate(F, LI, &TLI); return false; } @@ -767,7 +802,7 @@ AnalysisKey BranchProbabilityAnalysis::Key; BranchProbabilityInfo BranchProbabilityAnalysis::run(Function &F, FunctionAnalysisManager &AM) { BranchProbabilityInfo BPI; - BPI.calculate(F, AM.getResult(F)); + BPI.calculate(F, AM.getResult(F), &AM.getResult(F)); return BPI; } diff --git a/lib/Analysis/CFLGraph.h b/lib/Analysis/CFLGraph.h index 54782b6bd4ad..95874b88244b 100644 --- a/lib/Analysis/CFLGraph.h +++ b/lib/Analysis/CFLGraph.h @@ -16,7 +16,6 @@ #define LLVM_ANALYSIS_CFLGRAPH_H #include "AliasAnalysisSummary.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/IR/InstVisitor.h" #include "llvm/IR/Instructions.h" diff --git a/lib/Analysis/CallPrinter.cpp b/lib/Analysis/CallPrinter.cpp index af942e9ed3e9..e7017e77652a 100644 --- a/lib/Analysis/CallPrinter.cpp +++ b/lib/Analysis/CallPrinter.cpp @@ -14,8 +14,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CallPrinter.h" +#include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/DOTGraphTraitsPass.h" using namespace llvm; diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp index 9862c3c9c270..2093f0fdec12 100644 --- a/lib/Analysis/CaptureTracking.cpp +++ b/lib/Analysis/CaptureTracking.cpp @@ -16,11 +16,11 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Analysis/CaptureTracking.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CFG.h" -#include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/OrderedBasicBlock.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" diff --git a/lib/Analysis/CodeMetrics.cpp b/lib/Analysis/CodeMetrics.cpp index bdffdd8eb270..e4d9292db92d 100644 --- a/lib/Analysis/CodeMetrics.cpp +++ b/lib/Analysis/CodeMetrics.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index a906770dbb34..0f5ec3f5626e 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -22,8 +22,8 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringRef.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Config/config.h" @@ -1015,9 +1015,11 @@ Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode, case Instruction::ICmp: case Instruction::FCmp: llvm_unreachable("Invalid for compares"); case Instruction::Call: - if (auto *F = dyn_cast(Ops.back())) - if (canConstantFoldCallTo(F)) - return ConstantFoldCall(F, Ops.slice(0, Ops.size() - 1), TLI); + if (auto *F = dyn_cast(Ops.back())) { + ImmutableCallSite CS(cast(InstOrCE)); + if (canConstantFoldCallTo(CS, F)) + return ConstantFoldCall(CS, F, Ops.slice(0, Ops.size() - 1), TLI); + } return nullptr; case Instruction::Select: return ConstantExpr::getSelect(Ops[0], Ops[1], Ops[2]); @@ -1356,7 +1358,9 @@ llvm::ConstantFoldLoadThroughGEPIndices(Constant *C, // Constant Folding for Calls // -bool llvm::canConstantFoldCallTo(const Function *F) { +bool llvm::canConstantFoldCallTo(ImmutableCallSite CS, const Function *F) { + if (CS.isNoBuiltin()) + return false; switch (F->getIntrinsicID()) { case Intrinsic::fabs: case Intrinsic::minnum: @@ -1584,6 +1588,9 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty, // cosine(arg) is between -1 and 1. cosine(invalid arg) is NaN if (IntrinsicID == Intrinsic::cos) return Constant::getNullValue(Ty); + if (IntrinsicID == Intrinsic::bswap || + IntrinsicID == Intrinsic::bitreverse) + return Operands[0]; } if (auto *Op = dyn_cast(Operands[0])) { if (IntrinsicID == Intrinsic::convert_to_fp16) { @@ -1815,7 +1822,7 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty, dyn_cast_or_null(Op->getAggregateElement(0U))) return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(), /*roundTowardZero=*/false, Ty); - LLVM_FALLTHROUGH; + break; case Intrinsic::x86_sse_cvttss2si: case Intrinsic::x86_sse_cvttss2si64: case Intrinsic::x86_sse2_cvttsd2si: @@ -1824,16 +1831,10 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty, dyn_cast_or_null(Op->getAggregateElement(0U))) return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(), /*roundTowardZero=*/true, Ty); + break; } } - if (isa(Operands[0])) { - if (IntrinsicID == Intrinsic::bswap || - IntrinsicID == Intrinsic::bitreverse) - return Operands[0]; - return nullptr; - } - return nullptr; } @@ -2034,6 +2035,14 @@ Constant *ConstantFoldVectorCall(StringRef Name, unsigned IntrinsicID, for (unsigned I = 0, E = VTy->getNumElements(); I != E; ++I) { // Gather a column of constants. for (unsigned J = 0, JE = Operands.size(); J != JE; ++J) { + // These intrinsics use a scalar type for their second argument. + if (J == 1 && + (IntrinsicID == Intrinsic::cttz || IntrinsicID == Intrinsic::ctlz || + IntrinsicID == Intrinsic::powi)) { + Lane[J] = Operands[J]; + continue; + } + Constant *Agg = Operands[J]->getAggregateElement(I); if (!Agg) return nullptr; @@ -2054,8 +2063,11 @@ Constant *ConstantFoldVectorCall(StringRef Name, unsigned IntrinsicID, } // end anonymous namespace Constant * -llvm::ConstantFoldCall(Function *F, ArrayRef Operands, +llvm::ConstantFoldCall(ImmutableCallSite CS, Function *F, + ArrayRef Operands, const TargetLibraryInfo *TLI) { + if (CS.isNoBuiltin()) + return nullptr; if (!F->hasName()) return nullptr; StringRef Name = F->getName(); @@ -2072,6 +2084,8 @@ llvm::ConstantFoldCall(Function *F, ArrayRef Operands, bool llvm::isMathLibCallNoop(CallSite CS, const TargetLibraryInfo *TLI) { // FIXME: Refactor this code; this duplicates logic in LibCallsShrinkWrap // (and to some extent ConstantFoldScalarCall). + if (CS.isNoBuiltin()) + return false; Function *F = CS.getCalledFunction(); if (!F) return false; diff --git a/lib/Analysis/GlobalsModRef.cpp b/lib/Analysis/GlobalsModRef.cpp index 33f00cb19b26..4ef023379bb6 100644 --- a/lib/Analysis/GlobalsModRef.cpp +++ b/lib/Analysis/GlobalsModRef.cpp @@ -475,7 +475,9 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) { const std::vector &SCC = *I; assert(!SCC.empty() && "SCC with no functions?"); - if (!SCC[0]->getFunction() || !SCC[0]->getFunction()->isDefinitionExact()) { + Function *F = SCC[0]->getFunction(); + + if (!F || !F->isDefinitionExact()) { // Calls externally or not exact - can't say anything useful. Remove any // existing function records (may have been created when scanning // globals). @@ -484,19 +486,18 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) { continue; } - FunctionInfo &FI = FunctionInfos[SCC[0]->getFunction()]; + FunctionInfo &FI = FunctionInfos[F]; bool KnowNothing = false; // Collect the mod/ref properties due to called functions. We only compute // one mod-ref set. for (unsigned i = 0, e = SCC.size(); i != e && !KnowNothing; ++i) { - Function *F = SCC[i]->getFunction(); if (!F) { KnowNothing = true; break; } - if (F->isDeclaration()) { + if (F->isDeclaration() || F->hasFnAttribute(Attribute::OptimizeNone)) { // Try to get mod/ref behaviour from function attributes. if (F->doesNotAccessMemory()) { // Can't do better than that! @@ -545,6 +546,13 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) { for (auto *Node : SCC) { if (FI.getModRefInfo() == MRI_ModRef) break; // The mod/ref lattice saturates here. + + // Don't prove any properties based on the implementation of an optnone + // function. Function attributes were already used as a best approximation + // above. + if (Node->getFunction()->hasFnAttribute(Attribute::OptimizeNone)) + continue; + for (Instruction &I : instructions(Node->getFunction())) { if (FI.getModRefInfo() == MRI_ModRef) break; // The mod/ref lattice saturates here. diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index 77c87928728a..6ff5938a3175 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -869,7 +869,7 @@ bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) { // because we have to continually rebuild the argument list even when no // simplifications can be performed. Until that is fixed with remapping // inside of instsimplify, directly constant fold calls here. - if (!canConstantFoldCallTo(F)) + if (!canConstantFoldCallTo(CS, F)) return false; // Try to re-map the arguments to constants. @@ -885,7 +885,7 @@ bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) { ConstantArgs.push_back(C); } - if (Constant *C = ConstantFoldCall(F, ConstantArgs)) { + if (Constant *C = ConstantFoldCall(CS, F, ConstantArgs)) { SimplifiedValues[CS.getInstruction()] = C; return true; } diff --git a/lib/Analysis/InstCount.cpp b/lib/Analysis/InstCount.cpp index de2b9c0c56db..27c6b580e7ac 100644 --- a/lib/Analysis/InstCount.cpp +++ b/lib/Analysis/InstCount.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/Passes.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/Passes.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstVisitor.h" #include "llvm/Pass.h" @@ -33,7 +33,6 @@ STATISTIC(TotalMemInst, "Number of memory instructions"); #include "llvm/IR/Instruction.def" - namespace { class InstCount : public FunctionPass, public InstVisitor { friend class InstVisitor; diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index 66ac847455cd..a975be79619b 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -2391,7 +2391,7 @@ static void setLimitsForBinOp(BinaryOperator &BO, APInt &Lower, APInt &Upper) { const APInt *C; switch (BO.getOpcode()) { case Instruction::Add: - if (match(BO.getOperand(1), m_APInt(C)) && *C != 0) { + if (match(BO.getOperand(1), m_APInt(C)) && !C->isNullValue()) { // FIXME: If we have both nuw and nsw, we should reduce the range further. if (BO.hasNoUnsignedWrap()) { // 'add nuw x, C' produces [C, UINT_MAX]. @@ -2429,7 +2429,7 @@ static void setLimitsForBinOp(BinaryOperator &BO, APInt &Lower, APInt &Upper) { Upper = APInt::getSignedMaxValue(Width).ashr(*C) + 1; } else if (match(BO.getOperand(0), m_APInt(C))) { unsigned ShiftAmount = Width - 1; - if (*C != 0 && BO.isExact()) + if (!C->isNullValue() && BO.isExact()) ShiftAmount = C->countTrailingZeros(); if (C->isNegative()) { // 'ashr C, x' produces [C, C >> (Width-1)] @@ -2450,7 +2450,7 @@ static void setLimitsForBinOp(BinaryOperator &BO, APInt &Lower, APInt &Upper) { } else if (match(BO.getOperand(0), m_APInt(C))) { // 'lshr C, x' produces [C >> (Width-1), C]. unsigned ShiftAmount = Width - 1; - if (*C != 0 && BO.isExact()) + if (!C->isNullValue() && BO.isExact()) ShiftAmount = C->countTrailingZeros(); Lower = C->lshr(ShiftAmount); Upper = *C + 1; @@ -2512,7 +2512,7 @@ static void setLimitsForBinOp(BinaryOperator &BO, APInt &Lower, APInt &Upper) { break; case Instruction::UDiv: - if (match(BO.getOperand(1), m_APInt(C)) && *C != 0) { + if (match(BO.getOperand(1), m_APInt(C)) && !C->isNullValue()) { // 'udiv x, C' produces [0, UINT_MAX / C]. Upper = APInt::getMaxValue(Width).udiv(*C) + 1; } else if (match(BO.getOperand(0), m_APInt(C))) { @@ -2827,14 +2827,14 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS, // - CI2 is one // - CI isn't zero if (LBO->hasNoSignedWrap() || LBO->hasNoUnsignedWrap() || - *CI2Val == 1 || !CI->isZero()) { + CI2Val->isOneValue() || !CI->isZero()) { if (Pred == ICmpInst::ICMP_EQ) return ConstantInt::getFalse(RHS->getContext()); if (Pred == ICmpInst::ICMP_NE) return ConstantInt::getTrue(RHS->getContext()); } } - if (CIVal->isSignMask() && *CI2Val == 1) { + if (CIVal->isSignMask() && CI2Val->isOneValue()) { if (Pred == ICmpInst::ICMP_UGT) return ConstantInt::getFalse(RHS->getContext()); if (Pred == ICmpInst::ICMP_ULE) @@ -3308,11 +3308,9 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, } // icmp eq|ne X, Y -> false|true if X != Y - if ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) && + if (ICmpInst::isEquality(Pred) && isKnownNonEqual(LHS, RHS, Q.DL, Q.AC, Q.CxtI, Q.DT)) { - LLVMContext &Ctx = LHS->getType()->getContext(); - return Pred == ICmpInst::ICMP_NE ? - ConstantInt::getTrue(Ctx) : ConstantInt::getFalse(Ctx); + return Pred == ICmpInst::ICMP_NE ? getTrue(ITy) : getFalse(ITy); } if (Value *V = simplifyICmpWithBinOp(Pred, LHS, RHS, Q, MaxRecurse)) @@ -3360,19 +3358,6 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, } } - // If a bit is known to be zero for A and known to be one for B, - // then A and B cannot be equal. - if (ICmpInst::isEquality(Pred)) { - const APInt *RHSVal; - if (match(RHS, m_APInt(RHSVal))) { - KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT); - if (LHSKnown.Zero.intersects(*RHSVal) || - !LHSKnown.One.isSubsetOf(*RHSVal)) - return Pred == ICmpInst::ICMP_EQ ? ConstantInt::getFalse(ITy) - : ConstantInt::getTrue(ITy); - } - } - // If the comparison is with the result of a select instruction, check whether // comparing with either branch of the select always yields the same value. if (isa(LHS) || isa(RHS)) @@ -3896,12 +3881,14 @@ static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef Ops, } // Check to see if this is constant foldable. - for (unsigned i = 0, e = Ops.size(); i != e; ++i) - if (!isa(Ops[i])) - return nullptr; + if (!all_of(Ops, [](Value *V) { return isa(V); })) + return nullptr; - return ConstantExpr::getGetElementPtr(SrcTy, cast(Ops[0]), - Ops.slice(1)); + auto *CE = ConstantExpr::getGetElementPtr(SrcTy, cast(Ops[0]), + Ops.slice(1)); + if (auto *CEFolded = ConstantFoldConstant(CE, Q.DL)) + return CEFolded; + return CE; } Value *llvm::SimplifyGEPInst(Type *SrcTy, ArrayRef Ops, @@ -4486,8 +4473,9 @@ static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd, } template -static Value *SimplifyCall(Value *V, IterTy ArgBegin, IterTy ArgEnd, - const SimplifyQuery &Q, unsigned MaxRecurse) { +static Value *SimplifyCall(ImmutableCallSite CS, Value *V, IterTy ArgBegin, + IterTy ArgEnd, const SimplifyQuery &Q, + unsigned MaxRecurse) { Type *Ty = V->getType(); if (PointerType *PTy = dyn_cast(Ty)) Ty = PTy->getElementType(); @@ -4506,7 +4494,7 @@ static Value *SimplifyCall(Value *V, IterTy ArgBegin, IterTy ArgEnd, if (Value *Ret = SimplifyIntrinsic(F, ArgBegin, ArgEnd, Q, MaxRecurse)) return Ret; - if (!canConstantFoldCallTo(F)) + if (!canConstantFoldCallTo(CS, F)) return nullptr; SmallVector ConstantArgs; @@ -4518,17 +4506,18 @@ static Value *SimplifyCall(Value *V, IterTy ArgBegin, IterTy ArgEnd, ConstantArgs.push_back(C); } - return ConstantFoldCall(F, ConstantArgs, Q.TLI); + return ConstantFoldCall(CS, F, ConstantArgs, Q.TLI); } -Value *llvm::SimplifyCall(Value *V, User::op_iterator ArgBegin, - User::op_iterator ArgEnd, const SimplifyQuery &Q) { - return ::SimplifyCall(V, ArgBegin, ArgEnd, Q, RecursionLimit); -} - -Value *llvm::SimplifyCall(Value *V, ArrayRef Args, +Value *llvm::SimplifyCall(ImmutableCallSite CS, Value *V, + User::op_iterator ArgBegin, User::op_iterator ArgEnd, const SimplifyQuery &Q) { - return ::SimplifyCall(V, Args.begin(), Args.end(), Q, RecursionLimit); + return ::SimplifyCall(CS, V, ArgBegin, ArgEnd, Q, RecursionLimit); +} + +Value *llvm::SimplifyCall(ImmutableCallSite CS, Value *V, + ArrayRef Args, const SimplifyQuery &Q) { + return ::SimplifyCall(CS, V, Args.begin(), Args.end(), Q, RecursionLimit); } /// See if we can compute a simplified version of this instruction. @@ -4659,7 +4648,8 @@ Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &SQ, break; case Instruction::Call: { CallSite CS(cast(I)); - Result = SimplifyCall(CS.getCalledValue(), CS.arg_begin(), CS.arg_end(), Q); + Result = SimplifyCall(CS, CS.getCalledValue(), CS.arg_begin(), CS.arg_end(), + Q); break; } #define HANDLE_CAST_INST(num, opc, clas) case Instruction::opc: diff --git a/lib/Analysis/LLVMBuild.txt b/lib/Analysis/LLVMBuild.txt index 15c757b48f76..8a87b980b0a8 100644 --- a/lib/Analysis/LLVMBuild.txt +++ b/lib/Analysis/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Library name = Analysis parent = Libraries -required_libraries = Core Support ProfileData Object +required_libraries = BinaryFormat Core Object ProfileData Support diff --git a/lib/Analysis/LazyBranchProbabilityInfo.cpp b/lib/Analysis/LazyBranchProbabilityInfo.cpp index b51c6beb7959..e2884d0a4564 100644 --- a/lib/Analysis/LazyBranchProbabilityInfo.cpp +++ b/lib/Analysis/LazyBranchProbabilityInfo.cpp @@ -16,6 +16,7 @@ #include "llvm/Analysis/LazyBranchProbabilityInfo.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" using namespace llvm; @@ -24,6 +25,7 @@ using namespace llvm; INITIALIZE_PASS_BEGIN(LazyBranchProbabilityInfoPass, DEBUG_TYPE, "Lazy Branch Probability Analysis", true, true) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(LazyBranchProbabilityInfoPass, DEBUG_TYPE, "Lazy Branch Probability Analysis", true, true) @@ -41,6 +43,7 @@ void LazyBranchProbabilityInfoPass::print(raw_ostream &OS, void LazyBranchProbabilityInfoPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); + AU.addRequired(); AU.setPreservesAll(); } @@ -48,16 +51,19 @@ void LazyBranchProbabilityInfoPass::releaseMemory() { LBPI.reset(); } bool LazyBranchProbabilityInfoPass::runOnFunction(Function &F) { LoopInfo &LI = getAnalysis().getLoopInfo(); - LBPI = llvm::make_unique(&F, &LI); + TargetLibraryInfo &TLI = getAnalysis().getTLI(); + LBPI = llvm::make_unique(&F, &LI, &TLI); return false; } void LazyBranchProbabilityInfoPass::getLazyBPIAnalysisUsage(AnalysisUsage &AU) { AU.addRequired(); AU.addRequired(); + AU.addRequired(); } void llvm::initializeLazyBPIPassPass(PassRegistry &Registry) { INITIALIZE_PASS_DEPENDENCY(LazyBranchProbabilityInfoPass); INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass); + INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass); } diff --git a/lib/Analysis/LazyCallGraph.cpp b/lib/Analysis/LazyCallGraph.cpp index eef56815f2e0..b6a9436cc1ec 100644 --- a/lib/Analysis/LazyCallGraph.cpp +++ b/lib/Analysis/LazyCallGraph.cpp @@ -8,10 +8,9 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/LazyCallGraph.h" -#include "llvm/ADT/ScopeExit.h" -#include "llvm/ADT/Sequence.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopeExit.h" +#include "llvm/ADT/Sequence.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/InstVisitor.h" #include "llvm/IR/Instructions.h" diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp index 6a9ae6440ace..3ed61a79478a 100644 --- a/lib/Analysis/LazyValueInfo.cpp +++ b/lib/Analysis/LazyValueInfo.cpp @@ -302,7 +302,7 @@ static bool hasSingleValue(const LVILatticeVal &Val) { /// contradictory. If this happens, we return some valid lattice value so as /// not confuse the rest of LVI. Ideally, we'd always return Undefined, but /// we do not make this guarantee. TODO: This would be a useful enhancement. -static LVILatticeVal intersect(LVILatticeVal A, LVILatticeVal B) { +static LVILatticeVal intersect(const LVILatticeVal &A, const LVILatticeVal &B) { // Undefined is the strongest state. It means the value is known to be along // an unreachable path. if (A.isUndefined()) @@ -364,7 +364,6 @@ namespace { /// This is the cache kept by LazyValueInfo which /// maintains information about queries across the clients' queries. class LazyValueInfoCache { - friend class LazyValueInfoAnnotatedWriter; /// This is all of the cached block information for exactly one Value*. /// The entries are sorted by the BasicBlock* of the /// entries, allowing us to do a lookup with a binary search. @@ -384,7 +383,6 @@ namespace { /// don't spend time removing unused blocks from our caches. DenseSet > SeenBlocks; - protected: /// This is all of the cached information for all values, /// mapped from Value* to key information. DenseMap> ValueCache; @@ -443,7 +441,6 @@ namespace { return BBI->second; } - void printCache(Function &F, raw_ostream &OS); /// clear - Empty the cache. void clear() { SeenBlocks.clear(); @@ -467,61 +464,6 @@ namespace { }; } - -namespace { - - /// An assembly annotator class to print LazyValueCache information in - /// comments. - class LazyValueInfoAnnotatedWriter : public AssemblyAnnotationWriter { - const LazyValueInfoCache* LVICache; - - public: - LazyValueInfoAnnotatedWriter(const LazyValueInfoCache *L) : LVICache(L) {} - - virtual void emitBasicBlockStartAnnot(const BasicBlock *BB, - formatted_raw_ostream &OS) { - auto ODI = LVICache->OverDefinedCache.find(const_cast(BB)); - if (ODI == LVICache->OverDefinedCache.end()) - return; - OS << "; OverDefined values for block are: \n"; - for (auto *V : ODI->second) - OS << ";" << *V << "\n"; - - // Find if there are latticevalues defined for arguments of the function. - auto *F = const_cast(BB->getParent()); - for (auto &Arg : F->args()) { - auto VI = LVICache->ValueCache.find_as(&Arg); - if (VI == LVICache->ValueCache.end()) - continue; - auto BBI = VI->second->BlockVals.find(const_cast(BB)); - if (BBI != VI->second->BlockVals.end()) - OS << "; CachedLatticeValue for: '" << *VI->first << "' is: '" - << BBI->second << "'\n"; - } - } - - virtual void emitInstructionAnnot(const Instruction *I, - formatted_raw_ostream &OS) { - - auto VI = LVICache->ValueCache.find_as(const_cast(I)); - if (VI == LVICache->ValueCache.end()) - return; - OS << "; CachedLatticeValues for: '" << *VI->first << "'\n"; - for (auto &BV : VI->second->BlockVals) { - OS << "; at beginning of BasicBlock: '"; - BV.first->printAsOperand(OS, false); - OS << "' LatticeVal: '" << BV.second << "' \n"; - } - } -}; -} - -void LazyValueInfoCache::printCache(Function &F, raw_ostream &OS) { - LazyValueInfoAnnotatedWriter Writer(this); - F.print(OS, &Writer); - -} - void LazyValueInfoCache::eraseValue(Value *V) { for (auto I = OverDefinedCache.begin(), E = OverDefinedCache.end(); I != E;) { // Copy and increment the iterator immediately so we can erase behind @@ -615,6 +557,30 @@ void LazyValueInfoCache::threadEdgeImpl(BasicBlock *OldSucc, } } + +namespace { +/// An assembly annotator class to print LazyValueCache information in +/// comments. +class LazyValueInfoImpl; +class LazyValueInfoAnnotatedWriter : public AssemblyAnnotationWriter { + LazyValueInfoImpl *LVIImpl; + // While analyzing which blocks we can solve values for, we need the dominator + // information. Since this is an optional parameter in LVI, we require this + // DomTreeAnalysis pass in the printer pass, and pass the dominator + // tree to the LazyValueInfoAnnotatedWriter. + DominatorTree &DT; + +public: + LazyValueInfoAnnotatedWriter(LazyValueInfoImpl *L, DominatorTree &DTree) + : LVIImpl(L), DT(DTree) {} + + virtual void emitBasicBlockStartAnnot(const BasicBlock *BB, + formatted_raw_ostream &OS); + + virtual void emitInstructionAnnot(const Instruction *I, + formatted_raw_ostream &OS); +}; +} namespace { // The actual implementation of the lazy analysis and update. Note that the // inheritance from LazyValueInfoCache is intended to be temporary while @@ -693,9 +659,10 @@ namespace { TheCache.clear(); } - /// Printing the LazyValueInfoCache. - void printCache(Function &F, raw_ostream &OS) { - TheCache.printCache(F, OS); + /// Printing the LazyValueInfo Analysis. + void printLVI(Function &F, DominatorTree &DTree, raw_ostream &OS) { + LazyValueInfoAnnotatedWriter Writer(this, DTree); + F.print(OS, &Writer); } /// This is part of the update interface to inform the cache @@ -714,6 +681,7 @@ namespace { }; } // end anonymous namespace + void LazyValueInfoImpl::solve() { SmallVector, 8> StartingStack( BlockValueStack.begin(), BlockValueStack.end()); @@ -838,7 +806,7 @@ bool LazyValueInfoImpl::solveBlockValueImpl(LVILatticeVal &Res, // that for all other pointer typed values, we terminate the search at the // definition. We could easily extend this to look through geps, bitcasts, // and the like to prove non-nullness, but it's not clear that's worth it - // compile time wise. The context-insensative value walk done inside + // compile time wise. The context-insensitive value walk done inside // isKnownNonNull gets most of the profitable cases at much less expense. // This does mean that we have a sensativity to where the defining // instruction is placed, even if it could legally be hoisted much higher. @@ -1693,63 +1661,62 @@ Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB, } static LazyValueInfo::Tristate getPredicateResult(unsigned Pred, Constant *C, - LVILatticeVal &Result, + const LVILatticeVal &Val, const DataLayout &DL, TargetLibraryInfo *TLI) { // If we know the value is a constant, evaluate the conditional. Constant *Res = nullptr; - if (Result.isConstant()) { - Res = ConstantFoldCompareInstOperands(Pred, Result.getConstant(), C, DL, - TLI); + if (Val.isConstant()) { + Res = ConstantFoldCompareInstOperands(Pred, Val.getConstant(), C, DL, TLI); if (ConstantInt *ResCI = dyn_cast(Res)) return ResCI->isZero() ? LazyValueInfo::False : LazyValueInfo::True; return LazyValueInfo::Unknown; } - if (Result.isConstantRange()) { + if (Val.isConstantRange()) { ConstantInt *CI = dyn_cast(C); if (!CI) return LazyValueInfo::Unknown; - const ConstantRange &CR = Result.getConstantRange(); + const ConstantRange &CR = Val.getConstantRange(); if (Pred == ICmpInst::ICMP_EQ) { if (!CR.contains(CI->getValue())) return LazyValueInfo::False; - if (CR.isSingleElement() && CR.contains(CI->getValue())) + if (CR.isSingleElement()) return LazyValueInfo::True; } else if (Pred == ICmpInst::ICMP_NE) { if (!CR.contains(CI->getValue())) return LazyValueInfo::True; - if (CR.isSingleElement() && CR.contains(CI->getValue())) + if (CR.isSingleElement()) + return LazyValueInfo::False; + } else { + // Handle more complex predicates. + ConstantRange TrueValues = ConstantRange::makeExactICmpRegion( + (ICmpInst::Predicate)Pred, CI->getValue()); + if (TrueValues.contains(CR)) + return LazyValueInfo::True; + if (TrueValues.inverse().contains(CR)) return LazyValueInfo::False; } - - // Handle more complex predicates. - ConstantRange TrueValues = ConstantRange::makeExactICmpRegion( - (ICmpInst::Predicate)Pred, CI->getValue()); - if (TrueValues.contains(CR)) - return LazyValueInfo::True; - if (TrueValues.inverse().contains(CR)) - return LazyValueInfo::False; return LazyValueInfo::Unknown; } - if (Result.isNotConstant()) { + if (Val.isNotConstant()) { // If this is an equality comparison, we can try to fold it knowing that // "V != C1". if (Pred == ICmpInst::ICMP_EQ) { // !C1 == C -> false iff C1 == C. Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE, - Result.getNotConstant(), C, DL, + Val.getNotConstant(), C, DL, TLI); if (Res->isNullValue()) return LazyValueInfo::False; } else if (Pred == ICmpInst::ICMP_NE) { // !C1 != C -> true iff C1 == C. Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE, - Result.getNotConstant(), C, DL, + Val.getNotConstant(), C, DL, TLI); if (Res->isNullValue()) return LazyValueInfo::True; @@ -1890,12 +1857,65 @@ void LazyValueInfo::eraseBlock(BasicBlock *BB) { } -void LazyValueInfo::printCache(Function &F, raw_ostream &OS) { +void LazyValueInfo::printLVI(Function &F, DominatorTree &DTree, raw_ostream &OS) { if (PImpl) { - getImpl(PImpl, AC, DL, DT).printCache(F, OS); + getImpl(PImpl, AC, DL, DT).printLVI(F, DTree, OS); } } +// Print the LVI for the function arguments at the start of each basic block. +void LazyValueInfoAnnotatedWriter::emitBasicBlockStartAnnot( + const BasicBlock *BB, formatted_raw_ostream &OS) { + // Find if there are latticevalues defined for arguments of the function. + auto *F = BB->getParent(); + for (auto &Arg : F->args()) { + LVILatticeVal Result = LVIImpl->getValueInBlock( + const_cast(&Arg), const_cast(BB)); + if (Result.isUndefined()) + continue; + OS << "; LatticeVal for: '" << Arg << "' is: " << Result << "\n"; + } +} + +// This function prints the LVI analysis for the instruction I at the beginning +// of various basic blocks. It relies on calculated values that are stored in +// the LazyValueInfoCache, and in the absence of cached values, recalculte the +// LazyValueInfo for `I`, and print that info. +void LazyValueInfoAnnotatedWriter::emitInstructionAnnot( + const Instruction *I, formatted_raw_ostream &OS) { + + auto *ParentBB = I->getParent(); + SmallPtrSet BlocksContainingLVI; + // We can generate (solve) LVI values only for blocks that are dominated by + // the I's parent. However, to avoid generating LVI for all dominating blocks, + // that contain redundant/uninteresting information, we print LVI for + // blocks that may use this LVI information (such as immediate successor + // blocks, and blocks that contain uses of `I`). + auto printResult = [&](const BasicBlock *BB) { + if (!BlocksContainingLVI.insert(BB).second) + return; + LVILatticeVal Result = LVIImpl->getValueInBlock( + const_cast(I), const_cast(BB)); + OS << "; LatticeVal for: '" << *I << "' in BB: '"; + BB->printAsOperand(OS, false); + OS << "' is: " << Result << "\n"; + }; + + printResult(ParentBB); + // Print the LVI analysis results for the the immediate successor blocks, that + // are dominated by `ParentBB`. + for (auto *BBSucc : successors(ParentBB)) + if (DT.dominates(ParentBB, BBSucc)) + printResult(BBSucc); + + // Print LVI in blocks where `I` is used. + for (auto *U : I->users()) + if (auto *UseI = dyn_cast(U)) + if (!isa(UseI) || DT.dominates(ParentBB, UseI->getParent())) + printResult(UseI->getParent()); + +} + namespace { // Printer class for LazyValueInfo results. class LazyValueInfoPrinter : public FunctionPass { @@ -1908,12 +1928,16 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); AU.addRequired(); + AU.addRequired(); } + // Get the mandatory dominator tree analysis and pass this in to the + // LVIPrinter. We cannot rely on the LVI's DT, since it's optional. bool runOnFunction(Function &F) override { dbgs() << "LVI for function '" << F.getName() << "':\n"; auto &LVI = getAnalysis().getLVI(); - LVI.printCache(F, dbgs()); + auto &DTree = getAnalysis().getDomTree(); + LVI.printLVI(F, DTree, dbgs()); return false; } }; diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp index e6391792bc23..9713588537b3 100644 --- a/lib/Analysis/Lint.cpp +++ b/lib/Analysis/Lint.cpp @@ -58,13 +58,13 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/Module.h" #include "llvm/IR/InstVisitor.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/Pass.h" diff --git a/lib/Analysis/MemDepPrinter.cpp b/lib/Analysis/MemDepPrinter.cpp index e7a85ae06e68..5c0cbb26484c 100644 --- a/lib/Analysis/MemDepPrinter.cpp +++ b/lib/Analysis/MemDepPrinter.cpp @@ -10,9 +10,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/Passes.h" #include "llvm/ADT/SetVector.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/Analysis/Passes.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/LLVMContext.h" diff --git a/lib/Analysis/MemDerefPrinter.cpp b/lib/Analysis/MemDerefPrinter.cpp index fa0cc5a46c2b..4231a78352ce 100644 --- a/lib/Analysis/MemDerefPrinter.cpp +++ b/lib/Analysis/MemDerefPrinter.cpp @@ -7,10 +7,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/Passes.h" #include "llvm/ADT/SetVector.h" -#include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/Analysis/Passes.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InstIterator.h" diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index 188885063b39..3fdedbb0ab3c 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -15,17 +15,17 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/MemoryBuiltins.h" -#include "llvm/Analysis/PHITransAddr.h" #include "llvm/Analysis/OrderedBasicBlock.h" -#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/PHITransAddr.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" diff --git a/lib/Analysis/MemorySSAUpdater.cpp b/lib/Analysis/MemorySSAUpdater.cpp index da5c79ab6c81..1ff84471c094 100644 --- a/lib/Analysis/MemorySSAUpdater.cpp +++ b/lib/Analysis/MemorySSAUpdater.cpp @@ -14,6 +14,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/Analysis/MemorySSA.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/GlobalVariable.h" @@ -24,7 +25,6 @@ #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/Analysis/MemorySSA.h" #include #define DEBUG_TYPE "memoryssa" @@ -124,17 +124,12 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefInBlock(MemoryAccess *MA) { return &*Iter; } else { // Otherwise, have to walk the all access iterator. - auto Iter = MA->getReverseIterator(); - ++Iter; - while (&*Iter != &*Defs->begin()) { - if (!isa(*Iter)) - return &*Iter; - --Iter; - } - // At this point it must be pointing at firstdef - assert(&*Iter == &*Defs->begin() && - "Should have hit first def walking backwards"); - return &*Iter; + auto End = MSSA->getWritableBlockAccesses(MA->getBlock())->rend(); + for (auto &U : make_range(++MA->getReverseIterator(), End)) + if (!isa(U)) + return cast(&U); + // Note that if MA comes before Defs->begin(), we won't hit a def. + return nullptr; } } return nullptr; diff --git a/lib/Analysis/ModuleDebugInfoPrinter.cpp b/lib/Analysis/ModuleDebugInfoPrinter.cpp index f675830aa67d..e12cdf9182c7 100644 --- a/lib/Analysis/ModuleDebugInfoPrinter.cpp +++ b/lib/Analysis/ModuleDebugInfoPrinter.cpp @@ -15,8 +15,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/Passes.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/Passes.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/Function.h" #include "llvm/Pass.h" diff --git a/lib/Analysis/ModuleSummaryAnalysis.cpp b/lib/Analysis/ModuleSummaryAnalysis.cpp index 3253f27c010d..095647e1bd20 100644 --- a/lib/Analysis/ModuleSummaryAnalysis.cpp +++ b/lib/Analysis/ModuleSummaryAnalysis.cpp @@ -447,6 +447,11 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex( }); } + bool IsThinLTO = true; + if (auto *MD = + mdconst::extract_or_null(M.getModuleFlag("ThinLTO"))) + IsThinLTO = MD->getZExtValue(); + for (auto &GlobalList : Index) { // Ignore entries for references that are undefined in the current module. if (GlobalList.second.SummaryList.empty()) @@ -455,6 +460,11 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex( assert(GlobalList.second.SummaryList.size() == 1 && "Expected module's index to have one summary per GUID"); auto &Summary = GlobalList.second.SummaryList[0]; + if (!IsThinLTO) { + Summary->setNotEligibleToImport(); + continue; + } + bool AllRefsCanBeExternallyReferenced = llvm::all_of(Summary->refs(), [&](const ValueInfo &VI) { return !CantBePromoted.count(VI.getGUID()); diff --git a/lib/Analysis/ObjCARCInstKind.cpp b/lib/Analysis/ObjCARCInstKind.cpp index 1e75c0824d03..f374dd33f86f 100644 --- a/lib/Analysis/ObjCARCInstKind.cpp +++ b/lib/Analysis/ObjCARCInstKind.cpp @@ -20,8 +20,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/ObjCARCInstKind.h" -#include "llvm/Analysis/ObjCARCAnalysisUtils.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/Analysis/ObjCARCAnalysisUtils.h" #include "llvm/IR/Intrinsics.h" using namespace llvm; diff --git a/lib/Analysis/RegionPrinter.cpp b/lib/Analysis/RegionPrinter.cpp index 30a4e011060e..5986b8c4e0c3 100644 --- a/lib/Analysis/RegionPrinter.cpp +++ b/lib/Analysis/RegionPrinter.cpp @@ -9,14 +9,14 @@ // Print out the region tree of a function using dotty/graphviz. //===----------------------------------------------------------------------===// -#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/RegionPrinter.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/DOTGraphTraitsPass.h" +#include "llvm/Analysis/Passes.h" #include "llvm/Analysis/RegionInfo.h" #include "llvm/Analysis/RegionIterator.h" -#include "llvm/Analysis/RegionPrinter.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index d96697cafbe9..b9c4716b5528 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -91,8 +91,8 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/SaveAndRestore.h" +#include "llvm/Support/raw_ostream.h" #include using namespace llvm; @@ -6793,7 +6793,7 @@ static bool CanConstantFold(const Instruction *I) { if (const CallInst *CI = dyn_cast(I)) if (const Function *F = CI->getCalledFunction()) - return canConstantFoldCallTo(F); + return canConstantFoldCallTo(CI, F); return false; } diff --git a/lib/Analysis/ScalarEvolutionNormalization.cpp b/lib/Analysis/ScalarEvolutionNormalization.cpp index 54c44c8e542d..3740039b8f86 100644 --- a/lib/Analysis/ScalarEvolutionNormalization.cpp +++ b/lib/Analysis/ScalarEvolutionNormalization.cpp @@ -12,9 +12,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Analysis/ScalarEvolutionNormalization.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" -#include "llvm/Analysis/ScalarEvolutionNormalization.h" using namespace llvm; /// TransformKind - Different types of transformations that diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp index ac646716476b..488cb332a0b0 100644 --- a/lib/Analysis/TargetTransformInfo.cpp +++ b/lib/Analysis/TargetTransformInfo.cpp @@ -133,6 +133,10 @@ bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, Scale, AddrSpace); } +bool TargetTransformInfo::isLSRCostLess(LSRCost &C1, LSRCost &C2) const { + return TTIImpl->isLSRCostLess(C1, C2); +} + bool TargetTransformInfo::isLegalMaskedStore(Type *DataType) const { return TTIImpl->isLegalMaskedStore(DataType); } @@ -464,6 +468,10 @@ bool TargetTransformInfo::getTgtMemIntrinsic(IntrinsicInst *Inst, return TTIImpl->getTgtMemIntrinsic(Inst, Info); } +unsigned TargetTransformInfo::getAtomicMemIntrinsicMaxElementSize() const { + return TTIImpl->getAtomicMemIntrinsicMaxElementSize(); +} + Value *TargetTransformInfo::getOrCreateResultFromMemIntrinsic( IntrinsicInst *Inst, Type *ExpectedType) const { return TTIImpl->getOrCreateResultFromMemIntrinsic(Inst, ExpectedType); diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index a5dceb6c2271..c0181662fd9d 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -17,9 +17,9 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/OptimizationDiagnosticInfo.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/CallSite.h" @@ -1982,7 +1982,7 @@ static bool isAddOfNonZero(const Value *V1, const Value *V2, const Query &Q) { /// Return true if it is known that V1 != V2. static bool isKnownNonEqual(const Value *V1, const Value *V2, const Query &Q) { - if (V1->getType()->isVectorTy() || V1 == V2) + if (V1 == V2) return false; if (V1->getType() != V2->getType()) // We can't look through casts yet. @@ -1990,18 +1990,14 @@ static bool isKnownNonEqual(const Value *V1, const Value *V2, const Query &Q) { if (isAddOfNonZero(V1, V2, Q) || isAddOfNonZero(V2, V1, Q)) return true; - if (IntegerType *Ty = dyn_cast(V1->getType())) { + if (V1->getType()->isIntOrIntVectorTy()) { // Are any known bits in V1 contradictory to known bits in V2? If V1 // has a known zero where V2 has a known one, they must not be equal. - auto BitWidth = Ty->getBitWidth(); - KnownBits Known1(BitWidth); - computeKnownBits(V1, Known1, 0, Q); - KnownBits Known2(BitWidth); - computeKnownBits(V2, Known2, 0, Q); + KnownBits Known1 = computeKnownBits(V1, 0, Q); + KnownBits Known2 = computeKnownBits(V2, 0, Q); - APInt OppositeBits = (Known1.Zero & Known2.One) | - (Known2.Zero & Known1.One); - if (OppositeBits.getBoolValue()) + if (Known1.Zero.intersects(Known2.One) || + Known2.Zero.intersects(Known1.One)) return true; } return false; @@ -3082,7 +3078,7 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str, Str = StringRef("", 1); return true; } - // We cannot instantiate a StringRef as we do not have an apropriate string + // We cannot instantiate a StringRef as we do not have an appropriate string // of 0s at hand. return false; } diff --git a/lib/Analysis/VectorUtils.cpp b/lib/Analysis/VectorUtils.cpp index 2d2249da4e13..0ace8fa382bc 100644 --- a/lib/Analysis/VectorUtils.cpp +++ b/lib/Analysis/VectorUtils.cpp @@ -11,19 +11,19 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Analysis/VectorUtils.h" #include "llvm/ADT/EquivalenceClasses.h" #include "llvm/Analysis/DemandedBits.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/Analysis/VectorUtils.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/GetElementPtrTypeIterator.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Value.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/IRBuilder.h" using namespace llvm; using namespace llvm::PatternMatch; diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index ff1ea44a18a7..9ad31125f4b8 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -15,9 +15,10 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/AsmParser/SlotMapping.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/Argument.h" #include "llvm/IR/AutoUpgrade.h" #include "llvm/IR/BasicBlock.h" @@ -41,7 +42,6 @@ #include "llvm/IR/Value.h" #include "llvm/IR/ValueSymbolTable.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/SaveAndRestore.h" diff --git a/lib/AsmParser/LLVMBuild.txt b/lib/AsmParser/LLVMBuild.txt index 3bc31ed910a7..82dba8c15bb8 100644 --- a/lib/AsmParser/LLVMBuild.txt +++ b/lib/AsmParser/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Library name = AsmParser parent = Libraries -required_libraries = Core Support +required_libraries = BinaryFormat Core Support diff --git a/lib/BinaryFormat/CMakeLists.txt b/lib/BinaryFormat/CMakeLists.txt new file mode 100644 index 000000000000..cb78ea6fdf92 --- /dev/null +++ b/lib/BinaryFormat/CMakeLists.txt @@ -0,0 +1,8 @@ +add_llvm_library(LLVMBinaryFormat + Dwarf.cpp + Magic.cpp + + ADDITIONAL_HEADER_DIRS + ${LLVM_MAIN_INCLUDE_DIR}/llvm/BinaryFormat + ) + \ No newline at end of file diff --git a/lib/Support/Dwarf.cpp b/lib/BinaryFormat/Dwarf.cpp similarity index 77% rename from lib/Support/Dwarf.cpp rename to lib/BinaryFormat/Dwarf.cpp index 200546857de7..37c4579ef0f8 100644 --- a/lib/Support/Dwarf.cpp +++ b/lib/BinaryFormat/Dwarf.cpp @@ -1,4 +1,4 @@ -//===-- llvm/Support/Dwarf.cpp - Dwarf Framework ----------------*- C++ -*-===// +//===-- llvm/BinaryFormat/Dwarf.cpp - Dwarf Framework ------------*- C++-*-===// // // The LLVM Compiler Infrastructure // @@ -11,7 +11,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Support/Dwarf.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/ErrorHandling.h" @@ -25,15 +25,15 @@ StringRef llvm::dwarf::TagString(unsigned Tag) { #define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR) \ case DW_TAG_##NAME: \ return "DW_TAG_" #NAME; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" } } unsigned llvm::dwarf::getTag(StringRef TagString) { return StringSwitch(TagString) #define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR) \ - .Case("DW_TAG_" #NAME, DW_TAG_##NAME) -#include "llvm/Support/Dwarf.def" + .Case("DW_TAG_" #NAME, DW_TAG_##NAME) +#include "llvm/BinaryFormat/Dwarf.def" .Default(DW_TAG_invalid); } @@ -44,7 +44,7 @@ unsigned llvm::dwarf::TagVersion(dwarf::Tag Tag) { #define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR) \ case DW_TAG_##NAME: \ return VERSION; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" } } @@ -55,14 +55,16 @@ unsigned llvm::dwarf::TagVendor(dwarf::Tag Tag) { #define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR) \ case DW_TAG_##NAME: \ return DWARF_VENDOR_##VENDOR; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" } } StringRef llvm::dwarf::ChildrenString(unsigned Children) { switch (Children) { - case DW_CHILDREN_no: return "DW_CHILDREN_no"; - case DW_CHILDREN_yes: return "DW_CHILDREN_yes"; + case DW_CHILDREN_no: + return "DW_CHILDREN_no"; + case DW_CHILDREN_yes: + return "DW_CHILDREN_yes"; } return StringRef(); } @@ -74,7 +76,7 @@ StringRef llvm::dwarf::AttributeString(unsigned Attribute) { #define HANDLE_DW_AT(ID, NAME, VERSION, VENDOR) \ case DW_AT_##NAME: \ return "DW_AT_" #NAME; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" } } @@ -85,7 +87,7 @@ unsigned llvm::dwarf::AttributeVersion(dwarf::Attribute Attribute) { #define HANDLE_DW_AT(ID, NAME, VERSION, VENDOR) \ case DW_AT_##NAME: \ return VERSION; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" } } @@ -96,7 +98,7 @@ unsigned llvm::dwarf::AttributeVendor(dwarf::Attribute Attribute) { #define HANDLE_DW_AT(ID, NAME, VERSION, VENDOR) \ case DW_AT_##NAME: \ return DWARF_VENDOR_##VENDOR; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" } } @@ -107,7 +109,7 @@ StringRef llvm::dwarf::FormEncodingString(unsigned Encoding) { #define HANDLE_DW_FORM(ID, NAME, VERSION, VENDOR) \ case DW_FORM_##NAME: \ return "DW_FORM_" #NAME; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" } } @@ -118,7 +120,7 @@ unsigned llvm::dwarf::FormVersion(dwarf::Form Form) { #define HANDLE_DW_FORM(ID, NAME, VERSION, VENDOR) \ case DW_FORM_##NAME: \ return VERSION; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" } } @@ -129,7 +131,7 @@ unsigned llvm::dwarf::FormVendor(dwarf::Form Form) { #define HANDLE_DW_FORM(ID, NAME, VERSION, VENDOR) \ case DW_FORM_##NAME: \ return DWARF_VENDOR_##VENDOR; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" } } @@ -140,7 +142,7 @@ StringRef llvm::dwarf::OperationEncodingString(unsigned Encoding) { #define HANDLE_DW_OP(ID, NAME, VERSION, VENDOR) \ case DW_OP_##NAME: \ return "DW_OP_" #NAME; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" case DW_OP_LLVM_fragment: return "DW_OP_LLVM_fragment"; } @@ -149,8 +151,8 @@ StringRef llvm::dwarf::OperationEncodingString(unsigned Encoding) { unsigned llvm::dwarf::getOperationEncoding(StringRef OperationEncodingString) { return StringSwitch(OperationEncodingString) #define HANDLE_DW_OP(ID, NAME, VERSION, VENDOR) \ - .Case("DW_OP_" #NAME, DW_OP_##NAME) -#include "llvm/Support/Dwarf.def" + .Case("DW_OP_" #NAME, DW_OP_##NAME) +#include "llvm/BinaryFormat/Dwarf.def" .Case("DW_OP_LLVM_fragment", DW_OP_LLVM_fragment) .Default(0); } @@ -162,7 +164,7 @@ unsigned llvm::dwarf::OperationVersion(dwarf::LocationAtom Op) { #define HANDLE_DW_OP(ID, NAME, VERSION, VENDOR) \ case DW_OP_##NAME: \ return VERSION; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" } } @@ -173,7 +175,7 @@ unsigned llvm::dwarf::OperationVendor(dwarf::LocationAtom Op) { #define HANDLE_DW_OP(ID, NAME, VERSION, VENDOR) \ case DW_OP_##NAME: \ return DWARF_VENDOR_##VENDOR; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" } } @@ -184,15 +186,15 @@ StringRef llvm::dwarf::AttributeEncodingString(unsigned Encoding) { #define HANDLE_DW_ATE(ID, NAME, VERSION, VENDOR) \ case DW_ATE_##NAME: \ return "DW_ATE_" #NAME; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" } } unsigned llvm::dwarf::getAttributeEncoding(StringRef EncodingString) { return StringSwitch(EncodingString) #define HANDLE_DW_ATE(ID, NAME, VERSION, VENDOR) \ - .Case("DW_ATE_" #NAME, DW_ATE_##NAME) -#include "llvm/Support/Dwarf.def" + .Case("DW_ATE_" #NAME, DW_ATE_##NAME) +#include "llvm/BinaryFormat/Dwarf.def" .Default(0); } @@ -203,7 +205,7 @@ unsigned llvm::dwarf::AttributeEncodingVersion(dwarf::TypeKind ATE) { #define HANDLE_DW_ATE(ID, NAME, VERSION, VENDOR) \ case DW_ATE_##NAME: \ return VERSION; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" } } @@ -214,28 +216,38 @@ unsigned llvm::dwarf::AttributeEncodingVendor(dwarf::TypeKind ATE) { #define HANDLE_DW_ATE(ID, NAME, VERSION, VENDOR) \ case DW_ATE_##NAME: \ return DWARF_VENDOR_##VENDOR; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" } } StringRef llvm::dwarf::DecimalSignString(unsigned Sign) { switch (Sign) { - case DW_DS_unsigned: return "DW_DS_unsigned"; - case DW_DS_leading_overpunch: return "DW_DS_leading_overpunch"; - case DW_DS_trailing_overpunch: return "DW_DS_trailing_overpunch"; - case DW_DS_leading_separate: return "DW_DS_leading_separate"; - case DW_DS_trailing_separate: return "DW_DS_trailing_separate"; + case DW_DS_unsigned: + return "DW_DS_unsigned"; + case DW_DS_leading_overpunch: + return "DW_DS_leading_overpunch"; + case DW_DS_trailing_overpunch: + return "DW_DS_trailing_overpunch"; + case DW_DS_leading_separate: + return "DW_DS_leading_separate"; + case DW_DS_trailing_separate: + return "DW_DS_trailing_separate"; } return StringRef(); } StringRef llvm::dwarf::EndianityString(unsigned Endian) { switch (Endian) { - case DW_END_default: return "DW_END_default"; - case DW_END_big: return "DW_END_big"; - case DW_END_little: return "DW_END_little"; - case DW_END_lo_user: return "DW_END_lo_user"; - case DW_END_hi_user: return "DW_END_hi_user"; + case DW_END_default: + return "DW_END_default"; + case DW_END_big: + return "DW_END_big"; + case DW_END_little: + return "DW_END_little"; + case DW_END_lo_user: + return "DW_END_lo_user"; + case DW_END_hi_user: + return "DW_END_hi_user"; } return StringRef(); } @@ -243,18 +255,24 @@ StringRef llvm::dwarf::EndianityString(unsigned Endian) { StringRef llvm::dwarf::AccessibilityString(unsigned Access) { switch (Access) { // Accessibility codes - case DW_ACCESS_public: return "DW_ACCESS_public"; - case DW_ACCESS_protected: return "DW_ACCESS_protected"; - case DW_ACCESS_private: return "DW_ACCESS_private"; + case DW_ACCESS_public: + return "DW_ACCESS_public"; + case DW_ACCESS_protected: + return "DW_ACCESS_protected"; + case DW_ACCESS_private: + return "DW_ACCESS_private"; } return StringRef(); } StringRef llvm::dwarf::VisibilityString(unsigned Visibility) { switch (Visibility) { - case DW_VIS_local: return "DW_VIS_local"; - case DW_VIS_exported: return "DW_VIS_exported"; - case DW_VIS_qualified: return "DW_VIS_qualified"; + case DW_VIS_local: + return "DW_VIS_local"; + case DW_VIS_exported: + return "DW_VIS_exported"; + case DW_VIS_qualified: + return "DW_VIS_qualified"; } return StringRef(); } @@ -266,7 +284,7 @@ StringRef llvm::dwarf::VirtualityString(unsigned Virtuality) { #define HANDLE_DW_VIRTUALITY(ID, NAME) \ case DW_VIRTUALITY_##NAME: \ return "DW_VIRTUALITY_" #NAME; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" } } @@ -274,7 +292,7 @@ unsigned llvm::dwarf::getVirtuality(StringRef VirtualityString) { return StringSwitch(VirtualityString) #define HANDLE_DW_VIRTUALITY(ID, NAME) \ .Case("DW_VIRTUALITY_" #NAME, DW_VIRTUALITY_##NAME) -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" .Default(DW_VIRTUALITY_invalid); } @@ -285,7 +303,7 @@ StringRef llvm::dwarf::LanguageString(unsigned Language) { #define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR) \ case DW_LANG_##NAME: \ return "DW_LANG_" #NAME; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" } } @@ -293,7 +311,7 @@ unsigned llvm::dwarf::getLanguage(StringRef LanguageString) { return StringSwitch(LanguageString) #define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR) \ .Case("DW_LANG_" #NAME, DW_LANG_##NAME) -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" .Default(0); } @@ -304,7 +322,7 @@ unsigned llvm::dwarf::LanguageVersion(dwarf::SourceLanguage Lang) { #define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR) \ case DW_LANG_##NAME: \ return VERSION; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" } } @@ -315,16 +333,20 @@ unsigned llvm::dwarf::LanguageVendor(dwarf::SourceLanguage Lang) { #define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR) \ case DW_LANG_##NAME: \ return DWARF_VENDOR_##VENDOR; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" } } StringRef llvm::dwarf::CaseString(unsigned Case) { switch (Case) { - case DW_ID_case_sensitive: return "DW_ID_case_sensitive"; - case DW_ID_up_case: return "DW_ID_up_case"; - case DW_ID_down_case: return "DW_ID_down_case"; - case DW_ID_case_insensitive: return "DW_ID_case_insensitive"; + case DW_ID_case_sensitive: + return "DW_ID_case_sensitive"; + case DW_ID_up_case: + return "DW_ID_up_case"; + case DW_ID_down_case: + return "DW_ID_down_case"; + case DW_ID_case_insensitive: + return "DW_ID_case_insensitive"; } return StringRef(); } @@ -333,42 +355,50 @@ StringRef llvm::dwarf::ConventionString(unsigned CC) { switch (CC) { default: return StringRef(); -#define HANDLE_DW_CC(ID, NAME) \ - case DW_CC_##NAME: \ +#define HANDLE_DW_CC(ID, NAME) \ + case DW_CC_##NAME: \ return "DW_CC_" #NAME; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" } } unsigned llvm::dwarf::getCallingConvention(StringRef CCString) { return StringSwitch(CCString) #define HANDLE_DW_CC(ID, NAME) .Case("DW_CC_" #NAME, DW_CC_##NAME) -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" .Default(0); } StringRef llvm::dwarf::InlineCodeString(unsigned Code) { switch (Code) { - case DW_INL_not_inlined: return "DW_INL_not_inlined"; - case DW_INL_inlined: return "DW_INL_inlined"; - case DW_INL_declared_not_inlined: return "DW_INL_declared_not_inlined"; - case DW_INL_declared_inlined: return "DW_INL_declared_inlined"; + case DW_INL_not_inlined: + return "DW_INL_not_inlined"; + case DW_INL_inlined: + return "DW_INL_inlined"; + case DW_INL_declared_not_inlined: + return "DW_INL_declared_not_inlined"; + case DW_INL_declared_inlined: + return "DW_INL_declared_inlined"; } return StringRef(); } StringRef llvm::dwarf::ArrayOrderString(unsigned Order) { switch (Order) { - case DW_ORD_row_major: return "DW_ORD_row_major"; - case DW_ORD_col_major: return "DW_ORD_col_major"; + case DW_ORD_row_major: + return "DW_ORD_row_major"; + case DW_ORD_col_major: + return "DW_ORD_col_major"; } return StringRef(); } StringRef llvm::dwarf::DiscriminantString(unsigned Discriminant) { switch (Discriminant) { - case DW_DSC_label: return "DW_DSC_label"; - case DW_DSC_range: return "DW_DSC_range"; + case DW_DSC_label: + return "DW_DSC_label"; + case DW_DSC_range: + return "DW_DSC_range"; } return StringRef(); } @@ -377,10 +407,10 @@ StringRef llvm::dwarf::LNStandardString(unsigned Standard) { switch (Standard) { default: return StringRef(); -#define HANDLE_DW_LNS(ID, NAME) \ - case DW_LNS_##NAME: \ +#define HANDLE_DW_LNS(ID, NAME) \ + case DW_LNS_##NAME: \ return "DW_LNS_" #NAME; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" } } @@ -388,22 +418,28 @@ StringRef llvm::dwarf::LNExtendedString(unsigned Encoding) { switch (Encoding) { default: return StringRef(); -#define HANDLE_DW_LNE(ID, NAME) \ - case DW_LNE_##NAME: \ +#define HANDLE_DW_LNE(ID, NAME) \ + case DW_LNE_##NAME: \ return "DW_LNE_" #NAME; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" } } StringRef llvm::dwarf::MacinfoString(unsigned Encoding) { switch (Encoding) { // Macinfo Type Encodings - case DW_MACINFO_define: return "DW_MACINFO_define"; - case DW_MACINFO_undef: return "DW_MACINFO_undef"; - case DW_MACINFO_start_file: return "DW_MACINFO_start_file"; - case DW_MACINFO_end_file: return "DW_MACINFO_end_file"; - case DW_MACINFO_vendor_ext: return "DW_MACINFO_vendor_ext"; - case DW_MACINFO_invalid: return "DW_MACINFO_invalid"; + case DW_MACINFO_define: + return "DW_MACINFO_define"; + case DW_MACINFO_undef: + return "DW_MACINFO_undef"; + case DW_MACINFO_start_file: + return "DW_MACINFO_start_file"; + case DW_MACINFO_end_file: + return "DW_MACINFO_end_file"; + case DW_MACINFO_vendor_ext: + return "DW_MACINFO_vendor_ext"; + case DW_MACINFO_invalid: + return "DW_MACINFO_invalid"; } return StringRef(); } @@ -422,10 +458,10 @@ StringRef llvm::dwarf::CallFrameString(unsigned Encoding) { switch (Encoding) { default: return StringRef(); -#define HANDLE_DW_CFA(ID, NAME) \ - case DW_CFA_##NAME: \ +#define HANDLE_DW_CFA(ID, NAME) \ + case DW_CFA_##NAME: \ return "DW_CFA_" #NAME; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" } } @@ -433,10 +469,10 @@ StringRef llvm::dwarf::ApplePropertyString(unsigned Prop) { switch (Prop) { default: return StringRef(); -#define HANDLE_DW_APPLE_PROPERTY(ID, NAME) \ - case DW_APPLE_PROPERTY_##NAME: \ +#define HANDLE_DW_APPLE_PROPERTY(ID, NAME) \ + case DW_APPLE_PROPERTY_##NAME: \ return "DW_APPLE_PROPERTY_" #NAME; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" } } @@ -447,7 +483,7 @@ StringRef llvm::dwarf::UnitTypeString(unsigned UT) { #define HANDLE_DW_UT(ID, NAME) \ case DW_UT_##NAME: \ return "DW_UT_" #NAME; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" } } diff --git a/lib/BinaryFormat/LLVMBuild.txt b/lib/BinaryFormat/LLVMBuild.txt new file mode 100644 index 000000000000..d7d4dcb5f23d --- /dev/null +++ b/lib/BinaryFormat/LLVMBuild.txt @@ -0,0 +1,22 @@ +;===- ./lib/BinaryFormat/LLVMBuild.txt -------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = BinaryFormat +parent = Libraries +required_libraries = Support diff --git a/lib/BinaryFormat/Magic.cpp b/lib/BinaryFormat/Magic.cpp new file mode 100644 index 000000000000..ca4d93f99d92 --- /dev/null +++ b/lib/BinaryFormat/Magic.cpp @@ -0,0 +1,216 @@ +//===- llvm/BinaryFormat/Magic.cpp - File magic identification --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/BinaryFormat/Magic.h" + +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/BinaryFormat/MachO.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/FileSystem.h" + +#if !defined(_MSC_VER) && !defined(__MINGW32__) +#include +#else +#include +#endif + +using namespace llvm; +using namespace llvm::support::endian; +using namespace llvm::sys::fs; + +template +static bool startswith(StringRef Magic, const char (&S)[N]) { + return Magic.startswith(StringRef(S, N - 1)); +} + +/// @brief Identify the magic in magic. +file_magic llvm::identify_magic(StringRef Magic) { + if (Magic.size() < 4) + return file_magic::unknown; + switch ((unsigned char)Magic[0]) { + case 0x00: { + // COFF bigobj, CL.exe's LTO object file, or short import library file + if (startswith(Magic, "\0\0\xFF\xFF")) { + size_t MinSize = + offsetof(COFF::BigObjHeader, UUID) + sizeof(COFF::BigObjMagic); + if (Magic.size() < MinSize) + return file_magic::coff_import_library; + + const char *Start = Magic.data() + offsetof(COFF::BigObjHeader, UUID); + if (memcmp(Start, COFF::BigObjMagic, sizeof(COFF::BigObjMagic)) == 0) + return file_magic::coff_object; + if (memcmp(Start, COFF::ClGlObjMagic, sizeof(COFF::BigObjMagic)) == 0) + return file_magic::coff_cl_gl_object; + return file_magic::coff_import_library; + } + // Windows resource file + if (startswith(Magic, "\0\0\0\0\x20\0\0\0\xFF")) + return file_magic::windows_resource; + // 0x0000 = COFF unknown machine type + if (Magic[1] == 0) + return file_magic::coff_object; + if (startswith(Magic, "\0asm")) + return file_magic::wasm_object; + break; + } + case 0xDE: // 0x0B17C0DE = BC wraper + if (startswith(Magic, "\xDE\xC0\x17\x0B")) + return file_magic::bitcode; + break; + case 'B': + if (startswith(Magic, "BC\xC0\xDE")) + return file_magic::bitcode; + break; + case '!': + if (startswith(Magic, "!\n") || startswith(Magic, "!\n")) + return file_magic::archive; + break; + + case '\177': + if (startswith(Magic, "\177ELF") && Magic.size() >= 18) { + bool Data2MSB = Magic[5] == 2; + unsigned high = Data2MSB ? 16 : 17; + unsigned low = Data2MSB ? 17 : 16; + if (Magic[high] == 0) { + switch (Magic[low]) { + default: + return file_magic::elf; + case 1: + return file_magic::elf_relocatable; + case 2: + return file_magic::elf_executable; + case 3: + return file_magic::elf_shared_object; + case 4: + return file_magic::elf_core; + } + } + // It's still some type of ELF file. + return file_magic::elf; + } + break; + + case 0xCA: + if (startswith(Magic, "\xCA\xFE\xBA\xBE") || + startswith(Magic, "\xCA\xFE\xBA\xBF")) { + // This is complicated by an overlap with Java class files. + // See the Mach-O section in /usr/share/file/magic for details. + if (Magic.size() >= 8 && Magic[7] < 43) + return file_magic::macho_universal_binary; + } + break; + + // The two magic numbers for mach-o are: + // 0xfeedface - 32-bit mach-o + // 0xfeedfacf - 64-bit mach-o + case 0xFE: + case 0xCE: + case 0xCF: { + uint16_t type = 0; + if (startswith(Magic, "\xFE\xED\xFA\xCE") || + startswith(Magic, "\xFE\xED\xFA\xCF")) { + /* Native endian */ + size_t MinSize; + if (Magic[3] == char(0xCE)) + MinSize = sizeof(MachO::mach_header); + else + MinSize = sizeof(MachO::mach_header_64); + if (Magic.size() >= MinSize) + type = Magic[12] << 24 | Magic[13] << 12 | Magic[14] << 8 | Magic[15]; + } else if (startswith(Magic, "\xCE\xFA\xED\xFE") || + startswith(Magic, "\xCF\xFA\xED\xFE")) { + /* Reverse endian */ + size_t MinSize; + if (Magic[0] == char(0xCE)) + MinSize = sizeof(MachO::mach_header); + else + MinSize = sizeof(MachO::mach_header_64); + if (Magic.size() >= MinSize) + type = Magic[15] << 24 | Magic[14] << 12 | Magic[13] << 8 | Magic[12]; + } + switch (type) { + default: + break; + case 1: + return file_magic::macho_object; + case 2: + return file_magic::macho_executable; + case 3: + return file_magic::macho_fixed_virtual_memory_shared_lib; + case 4: + return file_magic::macho_core; + case 5: + return file_magic::macho_preload_executable; + case 6: + return file_magic::macho_dynamically_linked_shared_lib; + case 7: + return file_magic::macho_dynamic_linker; + case 8: + return file_magic::macho_bundle; + case 9: + return file_magic::macho_dynamically_linked_shared_lib_stub; + case 10: + return file_magic::macho_dsym_companion; + case 11: + return file_magic::macho_kext_bundle; + } + break; + } + case 0xF0: // PowerPC Windows + case 0x83: // Alpha 32-bit + case 0x84: // Alpha 64-bit + case 0x66: // MPS R4000 Windows + case 0x50: // mc68K + case 0x4c: // 80386 Windows + case 0xc4: // ARMNT Windows + if (Magic[1] == 0x01) + return file_magic::coff_object; + LLVM_FALLTHROUGH; + + case 0x90: // PA-RISC Windows + case 0x68: // mc68K Windows + if (Magic[1] == 0x02) + return file_magic::coff_object; + break; + + case 'M': // Possible MS-DOS stub on Windows PE file + if (startswith(Magic, "MZ")) { + uint32_t off = read32le(Magic.data() + 0x3c); + // PE/COFF file, either EXE or DLL. + if (off < Magic.size() && + memcmp(Magic.data() + off, COFF::PEMagic, sizeof(COFF::PEMagic)) == 0) + return file_magic::pecoff_executable; + } + break; + + case 0x64: // x86-64 Windows. + if (Magic[1] == char(0x86)) + return file_magic::coff_object; + break; + + default: + break; + } + return file_magic::unknown; +} + +std::error_code llvm::identify_magic(const Twine &Path, file_magic &Result) { + int FD; + if (std::error_code EC = openFileForRead(Path, FD)) + return EC; + + char Buffer[32]; + int Length = read(FD, Buffer, sizeof(Buffer)); + if (close(FD) != 0 || Length < 0) + return std::error_code(errno, std::generic_category()); + + Result = identify_magic(StringRef(Buffer, Length)); + return std::error_code(); +} diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index fffa9045b2fd..95987fac74e1 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -28,8 +28,8 @@ #include "llvm/IR/Attributes.h" #include "llvm/IR/AutoUpgrade.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CallingConv.h" #include "llvm/IR/CallSite.h" +#include "llvm/IR/CallingConv.h" #include "llvm/IR/Comdat.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" @@ -40,13 +40,13 @@ #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/Function.h" +#include "llvm/IR/GVMaterializer.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalIFunc.h" #include "llvm/IR/GlobalIndirectSymbol.h" #include "llvm/IR/GlobalObject.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/GVMaterializer.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/InstrTypes.h" @@ -5370,12 +5370,20 @@ static Expected readStrtab(BitstreamCursor &Stream) { Expected> llvm::getBitcodeModuleList(MemoryBufferRef Buffer) { + auto FOrErr = getBitcodeFileContents(Buffer); + if (!FOrErr) + return FOrErr.takeError(); + return std::move(FOrErr->Mods); +} + +Expected +llvm::getBitcodeFileContents(MemoryBufferRef Buffer) { Expected StreamOrErr = initStream(Buffer); if (!StreamOrErr) return StreamOrErr.takeError(); BitstreamCursor &Stream = *StreamOrErr; - std::vector Modules; + BitcodeFileContents F; while (true) { uint64_t BCBegin = Stream.getCurrentByteNo(); @@ -5383,7 +5391,7 @@ llvm::getBitcodeModuleList(MemoryBufferRef Buffer) { // of the bitcode stream (e.g. Apple's ar tool). If we are close enough to // the end that there cannot possibly be another module, stop looking. if (BCBegin + 8 >= Stream.getBitcodeBytes().size()) - return Modules; + return F; BitstreamEntry Entry = Stream.advance(); switch (Entry.Kind) { @@ -5409,10 +5417,10 @@ llvm::getBitcodeModuleList(MemoryBufferRef Buffer) { if (Stream.SkipBlock()) return error("Malformed block"); - Modules.push_back({Stream.getBitcodeBytes().slice( - BCBegin, Stream.getCurrentByteNo() - BCBegin), - Buffer.getBufferIdentifier(), IdentificationBit, - ModuleBit}); + F.Mods.push_back({Stream.getBitcodeBytes().slice( + BCBegin, Stream.getCurrentByteNo() - BCBegin), + Buffer.getBufferIdentifier(), IdentificationBit, + ModuleBit}); continue; } @@ -5424,7 +5432,7 @@ llvm::getBitcodeModuleList(MemoryBufferRef Buffer) { // not have its own string table. A bitcode file may have multiple // string tables if it was created by binary concatenation, for example // with "llvm-cat -b". - for (auto I = Modules.rbegin(), E = Modules.rend(); I != E; ++I) { + for (auto I = F.Mods.rbegin(), E = F.Mods.rend(); I != E; ++I) { if (!I->Strtab.empty()) break; I->Strtab = *Strtab; diff --git a/lib/Bitcode/Reader/MetadataLoader.cpp b/lib/Bitcode/Reader/MetadataLoader.cpp index d80e1da911ca..ee2fe2a0cc18 100644 --- a/lib/Bitcode/Reader/MetadataLoader.cpp +++ b/lib/Bitcode/Reader/MetadataLoader.cpp @@ -53,8 +53,8 @@ #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/ModuleSummaryIndex.h" diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index 9043b8c12d25..d5879fec95cb 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -3305,7 +3305,15 @@ static const uint64_t INDEX_VERSION = 3; /// Emit the per-module summary section alongside the rest of /// the module's bitcode. void ModuleBitcodeWriter::writePerModuleGlobalValueSummary() { - Stream.EnterSubblock(bitc::GLOBALVAL_SUMMARY_BLOCK_ID, 4); + // By default we compile with ThinLTO if the module has a summary, but the + // client can request full LTO with a module flag. + bool IsThinLTO = true; + if (auto *MD = + mdconst::extract_or_null(M.getModuleFlag("ThinLTO"))) + IsThinLTO = MD->getZExtValue(); + Stream.EnterSubblock(IsThinLTO ? bitc::GLOBALVAL_SUMMARY_BLOCK_ID + : bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID, + 4); Stream.EmitRecord(bitc::FS_VERSION, ArrayRef{INDEX_VERSION}); diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index 73fc2b35fe4e..f7c09be15fb7 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -4,6 +4,7 @@ add_subdirectory(IR) add_subdirectory(IRReader) add_subdirectory(CodeGen) +add_subdirectory(BinaryFormat) add_subdirectory(Bitcode) add_subdirectory(Transforms) add_subdirectory(Linker) diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp index 09a37a77e9fb..c2aecc651b79 100644 --- a/lib/CodeGen/Analysis.cpp +++ b/lib/CodeGen/Analysis.cpp @@ -24,8 +24,8 @@ #include "llvm/IR/Module.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Utils/GlobalStatus.h" diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp index 61149d9229b7..8b1376ab363d 100644 --- a/lib/CodeGen/AsmPrinter/ARMException.cpp +++ b/lib/CodeGen/AsmPrinter/ARMException.cpp @@ -14,6 +14,7 @@ #include "DwarfException.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -27,7 +28,6 @@ #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetOptions.h" diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index e61e22abe82a..407d5623d670 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/AsmPrinter.h" #include "AsmPrinterHandler.h" #include "CodeViewDebug.h" #include "DwarfDebug.h" @@ -19,18 +20,19 @@ #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/ObjectUtils.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/Analysis.h" -#include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/GCMetadataPrinter.h" #include "llvm/CodeGen/GCStrategy.h" @@ -82,14 +84,12 @@ #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" -#include "llvm/Support/Dwarf.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/Timer.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index 0185c380cc39..0edf9051d342 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -15,6 +15,7 @@ #include "DwarfDebug.h" #include "DwarfExpression.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/DIE.h" #include "llvm/CodeGen/MachineFunction.h" @@ -26,7 +27,6 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MachineLocation.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index a0bf1632dff3..eae79ad101d3 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -11,9 +11,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/AsmPrinter.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Twine.h" +#include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp index 385c78bbccef..e94616fd5900 100644 --- a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -1,4 +1,4 @@ -//===-- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp --*- C++ -*--===// +//===- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp ----------------------===// // // The LLVM Compiler Infrastructure // @@ -12,37 +12,82 @@ //===----------------------------------------------------------------------===// #include "CodeViewDebug.h" +#include "llvm/ADT/APSInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/TinyPtrVector.h" +#include "llvm/ADT/Triple.h" +#include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/LexicalScopes.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/Config/llvm-config.h" #include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" #include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h" #include "llvm/DebugInfo/CodeView/Line.h" #include "llvm/DebugInfo/CodeView/SymbolRecord.h" -#include "llvm/DebugInfo/CodeView/TypeDatabase.h" #include "llvm/DebugInfo/CodeView/TypeDumpVisitor.h" #include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" #include "llvm/DebugInfo/CodeView/TypeTableCollection.h" -#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionCOFF.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/BinaryByteStream.h" #include "llvm/Support/BinaryStreamReader.h" -#include "llvm/Support/COFF.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ScopedPrinter.h" +#include "llvm/Support/SMLoc.h" #include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include using namespace llvm; using namespace llvm::codeview; CodeViewDebug::CodeViewDebug(AsmPrinter *AP) - : DebugHandlerBase(AP), OS(*Asm->OutStreamer), Allocator(), - TypeTable(Allocator), CurFn(nullptr) { + : DebugHandlerBase(AP), OS(*Asm->OutStreamer), TypeTable(Allocator) { // If module doesn't have named metadata anchors or COFF debug section // is not available, skip any debug info related stuff. if (!MMI->getModule()->getNamedMetadata("llvm.dbg.cu") || @@ -178,7 +223,8 @@ static const DISubprogram *getQualifiedNameComponents( static std::string getQualifiedName(ArrayRef QualifiedNameComponents, StringRef TypeName) { std::string FullyQualifiedName; - for (StringRef QualifiedNameComponent : reverse(QualifiedNameComponents)) { + for (StringRef QualifiedNameComponent : + llvm::reverse(QualifiedNameComponents)) { FullyQualifiedName.append(QualifiedNameComponent); FullyQualifiedName.append("::"); } @@ -571,7 +617,7 @@ static CPUType mapArchToCVCPUType(Triple::ArchType Type) { } } -} // anonymous namespace +} // end anonymous namespace void CodeViewDebug::emitCompilerInformation() { MCContext &Context = MMI->getContext(); @@ -1581,11 +1627,11 @@ struct llvm::ClassInfo { uint64_t BaseOffset; }; // [MemberInfo] - typedef std::vector MemberList; + using MemberList = std::vector; - typedef TinyPtrVector MethodsList; + using MethodsList = TinyPtrVector; // MethodName -> MethodsList - typedef MapVector MethodsMap; + using MethodsMap = MapVector; /// Base classes. std::vector Inheritance; @@ -1850,7 +1896,7 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) { translateMethodOptionFlags(SP), VFTableOffset, Name)); MemberCount++; } - assert(Methods.size() > 0 && "Empty methods map entry"); + assert(!Methods.empty() && "Empty methods map entry"); if (Methods.size() == 1) FLBR.writeMemberType(Methods[0]); else { diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/lib/CodeGen/AsmPrinter/CodeViewDebug.h index 1c0c1644edaf..2cd495aec6dc 100644 --- a/lib/CodeGen/AsmPrinter/CodeViewDebug.h +++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.h @@ -1,4 +1,4 @@ -//===-- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h ----*- C++ -*--===// +//===- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h --------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -14,29 +14,44 @@ #ifndef LLVM_LIB_CODEGEN_ASMPRINTER_CODEVIEWDEBUG_H #define LLVM_LIB_CODEGEN_ASMPRINTER_CODEVIEWDEBUG_H +#include "DbgValueHistoryCalculator.h" #include "DebugHandlerBase.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/StringMap.h" -#include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/DebugInfo/CodeView/TypeTableBuilder.h" -#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugLoc.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Compiler.h" +#include +#include +#include +#include +#include +#include +#include namespace llvm { -class StringRef; -class LexicalScope; struct ClassInfo; +class StringRef; +class AsmPrinter; +class Function; +class GlobalVariable; +class MCSectionCOFF; +class MCStreamer; +class MCSymbol; +class MachineFunction; /// \brief Collects and handles line tables information in a CodeView format. class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { MCStreamer &OS; - llvm::BumpPtrAllocator Allocator; + BumpPtrAllocator Allocator; codeview::TypeTableBuilder TypeTable; /// Represents the most general definition range. @@ -110,7 +125,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { unsigned LastFileId = 0; bool HaveLineInfo = false; }; - FunctionInfo *CurFn; + FunctionInfo *CurFn = nullptr; /// The set of comdat .debug$S sections that we've seen so far. Each section /// must start with a magic version number that must only be emitted once. @@ -176,8 +191,9 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { std::vector> LocalUDTs, GlobalUDTs; - typedef std::map FileToFilepathMapTy; + using FileToFilepathMapTy = std::map; FileToFilepathMapTy FileToFilepathMap; + StringRef getFullFilepath(const DIFile *S); unsigned maybeRecordFile(const DIFile *F); @@ -223,7 +239,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { void emitInlinedCallSite(const FunctionInfo &FI, const DILocation *InlinedAt, const InlineSite &Site); - typedef DbgValueHistoryMap::InlinedVariable InlinedVariable; + using InlinedVariable = DbgValueHistoryMap::InlinedVariable; void collectVariableInfo(const DISubprogram *SP); @@ -309,7 +325,7 @@ protected: public: CodeViewDebug(AsmPrinter *Asm); - void setSymbolSize(const llvm::MCSymbol *, uint64_t) override {} + void setSymbolSize(const MCSymbol *, uint64_t) override {} /// \brief Emit the COFF section that holds the line table information. void endModule() override; @@ -317,6 +333,7 @@ public: /// \brief Process beginning of an instruction. void beginInstruction(const MachineInstr *MI) override; }; -} // End of namespace llvm -#endif +} // end namespace llvm + +#endif // LLVM_LIB_CODEGEN_ASMPRINTER_CODEVIEWDEBUG_H diff --git a/lib/CodeGen/AsmPrinter/DIEHash.cpp b/lib/CodeGen/AsmPrinter/DIEHash.cpp index 201030f0ac5c..15ade3c96dfe 100644 --- a/lib/CodeGen/AsmPrinter/DIEHash.cpp +++ b/lib/CodeGen/AsmPrinter/DIEHash.cpp @@ -11,15 +11,15 @@ // //===----------------------------------------------------------------------===// -#include "ByteStreamer.h" #include "DIEHash.h" +#include "ByteStreamer.h" #include "DwarfDebug.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/DIE.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/Endian.h" #include "llvm/Support/MD5.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/CodeGen/AsmPrinter/DebugLocStream.h b/lib/CodeGen/AsmPrinter/DebugLocStream.h index 3656e9d95099..0c551dfff9cc 100644 --- a/lib/CodeGen/AsmPrinter/DebugLocStream.h +++ b/lib/CodeGen/AsmPrinter/DebugLocStream.h @@ -10,9 +10,9 @@ #ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCSTREAM_H #define LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCSTREAM_H +#include "ByteStreamer.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" -#include "ByteStreamer.h" namespace llvm { diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h index 05ac1cb02f76..b1ef8cfe989d 100644 --- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h +++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h @@ -16,12 +16,12 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringMap.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/DIE.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/FormattedStream.h" diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp index e08306b001fb..dd7f7931b06b 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp @@ -14,6 +14,7 @@ #include "DwarfException.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -28,7 +29,6 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MachineLocation.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Target/TargetFrameLowering.h" diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index b8f57472f17c..3c2fb8d99db7 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -15,8 +15,8 @@ #define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H #include "DwarfUnit.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/DebugInfo.h" -#include "llvm/Support/Dwarf.h" namespace llvm { diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index bf27516e1ccd..e3fd21a1fd70 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -22,6 +22,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/DIE.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" @@ -38,7 +39,6 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/LEB128.h" diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp index ccd326917bfd..d96479f43433 100644 --- a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp @@ -14,8 +14,8 @@ #include "DwarfExpression.h" #include "DwarfDebug.h" #include "llvm/ADT/SmallBitVector.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 667afbb450bd..7f7d3e650e02 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -18,19 +18,19 @@ #include "DwarfExpression.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/None.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Metadata.h" -#include "llvm/MC/MachineLocation.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MachineLocation.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetLoweringObjectFile.h" diff --git a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp index 342efc3611c7..c5795559fb7d 100644 --- a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp @@ -13,6 +13,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/GCMetadataPrinter.h" @@ -25,8 +26,7 @@ #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Support/ELF.h" +#include "llvm/Target/TargetLoweringObjectFile.h" using namespace llvm; diff --git a/lib/CodeGen/AsmPrinter/LLVMBuild.txt b/lib/CodeGen/AsmPrinter/LLVMBuild.txt index 2bb66d12f376..bde8148d259b 100644 --- a/lib/CodeGen/AsmPrinter/LLVMBuild.txt +++ b/lib/CodeGen/AsmPrinter/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Library name = AsmPrinter parent = Libraries -required_libraries = Analysis CodeGen Core DebugInfoCodeView DebugInfoMSF MC MCParser Support Target +required_libraries = Analysis BinaryFormat CodeGen Core DebugInfoCodeView DebugInfoMSF MC MCParser Support Target diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp index 8baee4db772e..035f1a0063aa 100644 --- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp @@ -1,4 +1,4 @@ -//===-- OcamlGCPrinter.cpp - Ocaml frametable emitter ---------------------===// +//===- OcamlGCPrinter.cpp - Ocaml frametable emitter ----------------------===// // // The LLVM Compiler Infrastructure // @@ -11,23 +11,27 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/GCs.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Twine.h" #include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/GCMetadataPrinter.h" +#include "llvm/CodeGen/GCs.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" #include "llvm/IR/Mangler.h" #include "llvm/IR/Module.h" -#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbol.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/FormattedStream.h" #include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include +#include +#include +#include + using namespace llvm; namespace { @@ -37,7 +41,8 @@ public: void beginAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) override; void finishAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) override; }; -} + +} // end anonymous namespace static GCMetadataPrinterRegistry::Add Y("ocaml", "ocaml 3.10-compatible collector"); @@ -50,7 +55,7 @@ static void EmitCamlGlobal(const Module &M, AsmPrinter &AP, const char *Id) { std::string SymName; SymName += "caml"; size_t Letter = SymName.size(); - SymName.append(MId.begin(), find(MId, '.')); + SymName.append(MId.begin(), llvm::find(MId, '.')); SymName += "__"; SymName += Id; diff --git a/lib/CodeGen/AsmPrinter/WinException.cpp b/lib/CodeGen/AsmPrinter/WinException.cpp index 815658bfb637..5d485f213573 100644 --- a/lib/CodeGen/AsmPrinter/WinException.cpp +++ b/lib/CodeGen/AsmPrinter/WinException.cpp @@ -14,6 +14,8 @@ #include "WinException.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -29,8 +31,6 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCWin64EH.h" -#include "llvm/Support/COFF.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Target/TargetFrameLowering.h" diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp index d3fced436b68..be93ff0dad29 100644 --- a/lib/CodeGen/BasicTargetTransformInfo.cpp +++ b/lib/CodeGen/BasicTargetTransformInfo.cpp @@ -15,10 +15,10 @@ /// //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/BasicTTIImpl.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/TargetTransformInfoImpl.h" +#include "llvm/CodeGen/BasicTTIImpl.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Support/CommandLine.h" #include diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index 03ceac10beec..530954976292 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -1,4 +1,4 @@ -//===-- BranchFolding.cpp - Fold machine code branch instructions ---------===// +//===- BranchFolding.cpp - Fold machine code branch instructions ----------===// // // The LLVM Compiler Infrastructure // @@ -18,30 +18,46 @@ //===----------------------------------------------------------------------===// #include "BranchFolding.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" -#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Pass.h" +#include "llvm/Support/BlockFrequency.h" +#include "llvm/Support/BranchProbability.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" -#include +#include +#include +#include +#include +#include + using namespace llvm; #define DEBUG_TYPE "branch-folder" @@ -69,10 +85,12 @@ TailMergeSize("tail-merge-size", cl::init(3), cl::Hidden); namespace { + /// BranchFolderPass - Wrap branch folder in a machine function pass. class BranchFolderPass : public MachineFunctionPass { public: static char ID; + explicit BranchFolderPass(): MachineFunctionPass(ID) {} bool runOnMachineFunction(MachineFunction &MF) override; @@ -84,7 +102,8 @@ namespace { MachineFunctionPass::getAnalysisUsage(AU); } }; -} + +} // end anonymous namespace char BranchFolderPass::ID = 0; char &llvm::BranchFolderPassID = BranchFolderPass::ID; @@ -368,7 +387,7 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB, // Create the fall-through block. MachineFunction::iterator MBBI = CurMBB.getIterator(); - MachineBasicBlock *NewMBB =MF.CreateMachineBasicBlock(BB); + MachineBasicBlock *NewMBB = MF.CreateMachineBasicBlock(BB); CurMBB.getParent()->insert(++MBBI, NewMBB); // Move all the successors of this block to the specified block. @@ -506,7 +525,7 @@ static unsigned CountTerminators(MachineBasicBlock *MBB, MachineBasicBlock::iterator &I) { I = MBB->end(); unsigned NumTerms = 0; - for (;;) { + while (true) { if (I == MBB->begin()) { I = MBB->end(); break; @@ -1601,7 +1620,6 @@ ReoptimizeBlock: // block doesn't fall through into some other block, see if we can find a // place to move this block where a fall-through will happen. if (!PrevBB.canFallThrough()) { - // Now we know that there was no fall-through into this block, check to // see if it has a fall-through into its successor. bool CurFallsThru = MBB->canFallThrough(); diff --git a/lib/CodeGen/BranchRelaxation.cpp b/lib/CodeGen/BranchRelaxation.cpp index e3de61c7816f..27ee12c4c5ff 100644 --- a/lib/CodeGen/BranchRelaxation.cpp +++ b/lib/CodeGen/BranchRelaxation.cpp @@ -7,17 +7,17 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; diff --git a/lib/CodeGen/BuiltinGCs.cpp b/lib/CodeGen/BuiltinGCs.cpp index e4eab8c513d9..abac555d6602 100644 --- a/lib/CodeGen/BuiltinGCs.cpp +++ b/lib/CodeGen/BuiltinGCs.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/GCs.h" #include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/GCs.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/Support/Casting.h" diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp index dc2d38a95f99..c2ced19458ed 100644 --- a/lib/CodeGen/CalcSpillWeights.cpp +++ b/lib/CodeGen/CalcSpillWeights.cpp @@ -7,13 +7,13 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/VirtRegMap.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/VirtRegMap.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp index 256a0c95d365..faa5f139cf7b 100644 --- a/lib/CodeGen/CodeGen.cpp +++ b/lib/CodeGen/CodeGen.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/InitializePasses.h" #include "llvm-c/Initialization.h" +#include "llvm/InitializePasses.h" #include "llvm/PassRegistry.h" using namespace llvm; diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index 568b278dd47c..c2037cb7f1ae 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -13,8 +13,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" @@ -31,6 +29,7 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -236,12 +235,12 @@ class TypePromotionTransaction; void eliminateMostlyEmptyBlock(BasicBlock *BB); bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB, bool isPreheader); - bool optimizeBlock(BasicBlock &BB, bool& ModifiedDT); - bool optimizeInst(Instruction *I, bool& ModifiedDT); + bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT); + bool optimizeInst(Instruction *I, bool &ModifiedDT); bool optimizeMemoryInst(Instruction *I, Value *Addr, Type *AccessTy, unsigned AS); bool optimizeInlineAsmInst(CallInst *CS); - bool optimizeCallInst(CallInst *CI, bool& ModifiedDT); + bool optimizeCallInst(CallInst *CI, bool &ModifiedDT); bool optimizeExt(Instruction *&I); bool optimizeExtUses(Instruction *I); bool optimizeLoadExt(LoadInst *I); @@ -1662,25 +1661,29 @@ class MemCmpExpansion { BasicBlock *EndBlock; PHINode *PhiRes; bool IsUsedForZeroCmp; + const DataLayout &DL; + int calculateNumBlocks(unsigned Size); void createLoadCmpBlocks(); void createResultBlock(); void setupResultBlockPHINodes(); void setupEndBlockPHINodes(); - void emitLoadCompareBlock(unsigned Index, int LoadSize, int GEPIndex, - bool IsLittleEndian); + void emitLoadCompareBlock(unsigned Index, int LoadSize, int GEPIndex); + Value *getCompareLoadPairs(unsigned Index, unsigned Size, + unsigned &NumBytesProcessed, IRBuilder<> &Builder); void emitLoadCompareBlockMultipleLoads(unsigned Index, unsigned Size, unsigned &NumBytesProcessed); void emitLoadCompareByteBlock(unsigned Index, int GEPIndex); - void emitMemCmpResultBlock(bool IsLittleEndian); - Value *getMemCmpExpansionZeroCase(unsigned Size, bool IsLittleEndian); + void emitMemCmpResultBlock(); + Value *getMemCmpExpansionZeroCase(unsigned Size); + Value *getMemCmpEqZeroOneBlock(unsigned Size); unsigned getLoadSize(unsigned Size); unsigned getNumLoads(unsigned Size); public: - MemCmpExpansion(CallInst *CI, unsigned MaxLoadSize, - unsigned NumLoadsPerBlock); - Value *getMemCmpExpansion(bool IsLittleEndian); + MemCmpExpansion(CallInst *CI, uint64_t Size, unsigned MaxLoadSize, + unsigned NumLoadsPerBlock, const DataLayout &DL); + Value *getMemCmpExpansion(uint64_t Size); }; MemCmpExpansion::ResultBlock::ResultBlock() @@ -1694,39 +1697,41 @@ MemCmpExpansion::ResultBlock::ResultBlock() // return from. // 3. ResultBlock, block to branch to for early exit when a // LoadCmpBlock finds a difference. -MemCmpExpansion::MemCmpExpansion(CallInst *CI, unsigned MaxLoadSize, - unsigned NumLoadsPerBlock) - : CI(CI), MaxLoadSize(MaxLoadSize), NumLoadsPerBlock(NumLoadsPerBlock) { +MemCmpExpansion::MemCmpExpansion(CallInst *CI, uint64_t Size, + unsigned MaxLoadSize, unsigned LoadsPerBlock, + const DataLayout &TheDataLayout) + : CI(CI), MaxLoadSize(MaxLoadSize), NumLoadsPerBlock(LoadsPerBlock), + DL(TheDataLayout) { + + // A memcmp with zero-comparison with only one block of load and compare does + // not need to set up any extra blocks. This case could be handled in the DAG, + // but since we have all of the machinery to flexibly expand any memcpy here, + // we choose to handle this case too to avoid fragmented lowering. + IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI); + NumBlocks = calculateNumBlocks(Size); + if (!IsUsedForZeroCmp || NumBlocks != 1) { + BasicBlock *StartBlock = CI->getParent(); + EndBlock = StartBlock->splitBasicBlock(CI, "endblock"); + setupEndBlockPHINodes(); + createResultBlock(); + + // If return value of memcmp is not used in a zero equality, we need to + // calculate which source was larger. The calculation requires the + // two loaded source values of each load compare block. + // These will be saved in the phi nodes created by setupResultBlockPHINodes. + if (!IsUsedForZeroCmp) + setupResultBlockPHINodes(); + + // Create the number of required load compare basic blocks. + createLoadCmpBlocks(); + + // Update the terminator added by splitBasicBlock to branch to the first + // LoadCmpBlock. + StartBlock->getTerminator()->setSuccessor(0, LoadCmpBlocks[0]); + } IRBuilder<> Builder(CI->getContext()); - - BasicBlock *StartBlock = CI->getParent(); - EndBlock = StartBlock->splitBasicBlock(CI, "endblock"); - setupEndBlockPHINodes(); - IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI); - - ConstantInt *SizeCast = dyn_cast(CI->getArgOperand(2)); - uint64_t Size = SizeCast->getZExtValue(); - - // Calculate how many load compare blocks are required for an expansion of - // given Size. - NumBlocks = calculateNumBlocks(Size); - createResultBlock(); - - // If return value of memcmp is not used in a zero equality, we need to - // calculate which source was larger. The calculation requires the - // two loaded source values of each load compare block. - // These will be saved in the phi nodes created by setupResultBlockPHINodes. - if (!IsUsedForZeroCmp) - setupResultBlockPHINodes(); - - // Create the number of required load compare basic blocks. - createLoadCmpBlocks(); - - // Update the terminator added by splitBasicBlock to branch to the first - // LoadCmpBlock. Builder.SetCurrentDebugLocation(CI->getDebugLoc()); - StartBlock->getTerminator()->setSuccessor(0, LoadCmpBlocks[0]); } void MemCmpExpansion::createLoadCmpBlocks() { @@ -1743,7 +1748,7 @@ void MemCmpExpansion::createResultBlock() { } // This function creates the IR instructions for loading and comparing 1 byte. -// It loads 1 byte from each source of the memcmp paramters with the given +// It loads 1 byte from each source of the memcmp parameters with the given // GEPIndex. It then subtracts the two loaded values and adds this result to the // final phi node for selecting the memcmp result. void MemCmpExpansion::emitLoadCompareByteBlock(unsigned Index, int GEPIndex) { @@ -1754,13 +1759,13 @@ void MemCmpExpansion::emitLoadCompareByteBlock(unsigned Index, int GEPIndex) { Builder.SetInsertPoint(LoadCmpBlocks[Index]); Type *LoadSizeType = Type::getInt8Ty(CI->getContext()); - // Cast source to LoadSizeType* + // Cast source to LoadSizeType*. if (Source1->getType() != LoadSizeType) Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo()); if (Source2->getType() != LoadSizeType) Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo()); - // Get the base address using the GEPIndex + // Get the base address using the GEPIndex. if (GEPIndex != 0) { Source1 = Builder.CreateGEP(LoadSizeType, Source1, ConstantInt::get(LoadSizeType, GEPIndex)); @@ -1778,16 +1783,15 @@ void MemCmpExpansion::emitLoadCompareByteBlock(unsigned Index, int GEPIndex) { PhiRes->addIncoming(Diff, LoadCmpBlocks[Index]); if (Index < (LoadCmpBlocks.size() - 1)) { - // Early exit branch if difference found to EndBlock, otherwise continue to - // next LoadCmpBlock - + // Early exit branch if difference found to EndBlock. Otherwise, continue to + // next LoadCmpBlock, Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, Diff, ConstantInt::get(Diff->getType(), 0)); BranchInst *CmpBr = BranchInst::Create(EndBlock, LoadCmpBlocks[Index + 1], Cmp); Builder.Insert(CmpBr); } else { - // The last block has an unconditional branch to EndBlock + // The last block has an unconditional branch to EndBlock. BranchInst *CmpBr = BranchInst::Create(EndBlock); Builder.Insert(CmpBr); } @@ -1801,11 +1805,12 @@ unsigned MemCmpExpansion::getLoadSize(unsigned Size) { return MinAlign(PowerOf2Floor(Size), MaxLoadSize); } -void MemCmpExpansion::emitLoadCompareBlockMultipleLoads( - unsigned Index, unsigned Size, unsigned &NumBytesProcessed) { - - IRBuilder<> Builder(CI->getContext()); - +/// Generate an equality comparison for one or more pairs of loaded values. +/// This is used in the case where the memcmp() call is compared equal or not +/// equal to zero. +Value *MemCmpExpansion::getCompareLoadPairs(unsigned Index, unsigned Size, + unsigned &NumBytesProcessed, + IRBuilder<> &Builder) { std::vector XorList, OrList; Value *Diff; @@ -1813,8 +1818,13 @@ void MemCmpExpansion::emitLoadCompareBlockMultipleLoads( unsigned NumLoadsRemaining = getNumLoads(RemainingBytes); unsigned NumLoads = std::min(NumLoadsRemaining, NumLoadsPerBlock); - Builder.SetInsertPoint(LoadCmpBlocks[Index]); + // For a single-block expansion, start inserting before the memcmp call. + if (LoadCmpBlocks.empty()) + Builder.SetInsertPoint(CI); + else + Builder.SetInsertPoint(LoadCmpBlocks[Index]); + Value *Cmp = nullptr; for (unsigned i = 0; i < NumLoads; ++i) { unsigned LoadSize = getLoadSize(RemainingBytes); unsigned GEPIndex = NumBytesProcessed / LoadSize; @@ -1827,13 +1837,13 @@ void MemCmpExpansion::emitLoadCompareBlockMultipleLoads( Value *Source1 = CI->getArgOperand(0); Value *Source2 = CI->getArgOperand(1); - // Cast source to LoadSizeType* + // Cast source to LoadSizeType*. if (Source1->getType() != LoadSizeType) Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo()); if (Source2->getType() != LoadSizeType) Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo()); - // Get the base address using the GEPIndex + // Get the base address using the GEPIndex. if (GEPIndex != 0) { Source1 = Builder.CreateGEP(LoadSizeType, Source1, ConstantInt::get(LoadSizeType, GEPIndex)); @@ -1841,16 +1851,23 @@ void MemCmpExpansion::emitLoadCompareBlockMultipleLoads( ConstantInt::get(LoadSizeType, GEPIndex)); } - // Load LoadSizeType from the base address + // Load LoadSizeType from the base address. Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); - if (LoadSizeType != MaxLoadType) { - LoadSrc1 = Builder.CreateZExtOrTrunc(LoadSrc1, MaxLoadType); - LoadSrc2 = Builder.CreateZExtOrTrunc(LoadSrc2, MaxLoadType); + if (NumLoads != 1) { + if (LoadSizeType != MaxLoadType) { + LoadSrc1 = Builder.CreateZExtOrTrunc(LoadSrc1, MaxLoadType); + LoadSrc2 = Builder.CreateZExtOrTrunc(LoadSrc2, MaxLoadType); + } + // If we have multiple loads per block, we need to generate a composite + // comparison using xor+or. + Diff = Builder.CreateXor(LoadSrc1, LoadSrc2); + Diff = Builder.CreateZExtOrTrunc(Diff, MaxLoadType); + XorList.push_back(Diff); + } else { + // If there's only one load per block, we just compare the loaded values. + Cmp = Builder.CreateICmpNE(LoadSrc1, LoadSrc2); } - Diff = Builder.CreateXor(LoadSrc1, LoadSrc2); - Diff = Builder.CreateZExtOrTrunc(Diff, MaxLoadType); - XorList.push_back(Diff); } auto pairWiseOr = [&](std::vector &InList) -> std::vector { @@ -1864,27 +1881,36 @@ void MemCmpExpansion::emitLoadCompareBlockMultipleLoads( return OutList; }; - // Pair wise OR the XOR results - OrList = pairWiseOr(XorList); + if (!Cmp) { + // Pairwise OR the XOR results. + OrList = pairWiseOr(XorList); - // Pair wise OR the OR results until one result left - while (OrList.size() != 1) { - OrList = pairWiseOr(OrList); + // Pairwise OR the OR results until one result left. + while (OrList.size() != 1) { + OrList = pairWiseOr(OrList); + } + Cmp = Builder.CreateICmpNE(OrList[0], ConstantInt::get(Diff->getType(), 0)); } - Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, OrList[0], - ConstantInt::get(Diff->getType(), 0)); + return Cmp; +} + +void MemCmpExpansion::emitLoadCompareBlockMultipleLoads( + unsigned Index, unsigned Size, unsigned &NumBytesProcessed) { + IRBuilder<> Builder(CI->getContext()); + Value *Cmp = getCompareLoadPairs(Index, Size, NumBytesProcessed, Builder); + BasicBlock *NextBB = (Index == (LoadCmpBlocks.size() - 1)) ? EndBlock : LoadCmpBlocks[Index + 1]; - // Early exit branch if difference found to ResultBlock, otherwise continue to - // next LoadCmpBlock or EndBlock. + // Early exit branch if difference found to ResultBlock. Otherwise, + // continue to next LoadCmpBlock or EndBlock. BranchInst *CmpBr = BranchInst::Create(ResBlock.BB, NextBB, Cmp); Builder.Insert(CmpBr); // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0 // since early exit to ResultBlock was not taken (no difference was found in - // any of the bytes) + // any of the bytes). if (Index == LoadCmpBlocks.size() - 1) { Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0); PhiRes->addIncoming(Zero, LoadCmpBlocks[Index]); @@ -1901,7 +1927,7 @@ void MemCmpExpansion::emitLoadCompareBlockMultipleLoads( // a special case through emitLoadCompareByteBlock. The special handling can // simply subtract the loaded values and add it to the result phi node. void MemCmpExpansion::emitLoadCompareBlock(unsigned Index, int LoadSize, - int GEPIndex, bool IsLittleEndian) { + int GEPIndex) { if (LoadSize == 1) { MemCmpExpansion::emitLoadCompareByteBlock(Index, GEPIndex); return; @@ -1916,13 +1942,13 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned Index, int LoadSize, Value *Source2 = CI->getArgOperand(1); Builder.SetInsertPoint(LoadCmpBlocks[Index]); - // Cast source to LoadSizeType* + // Cast source to LoadSizeType*. if (Source1->getType() != LoadSizeType) Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo()); if (Source2->getType() != LoadSizeType) Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo()); - // Get the base address using the GEPIndex + // Get the base address using the GEPIndex. if (GEPIndex != 0) { Source1 = Builder.CreateGEP(LoadSizeType, Source1, ConstantInt::get(LoadSizeType, GEPIndex)); @@ -1930,11 +1956,11 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned Index, int LoadSize, ConstantInt::get(LoadSizeType, GEPIndex)); } - // Load LoadSizeType from the base address + // Load LoadSizeType from the base address. Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); - if (IsLittleEndian) { + if (DL.isLittleEndian()) { Function *F = LoadCmpBlocks[Index]->getParent(); Function *Bswap = Intrinsic::getDeclaration(F->getParent(), @@ -1962,14 +1988,14 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned Index, int LoadSize, BasicBlock *NextBB = (Index == (LoadCmpBlocks.size() - 1)) ? EndBlock : LoadCmpBlocks[Index + 1]; - // Early exit branch if difference found to ResultBlock, otherwise continue to - // next LoadCmpBlock or EndBlock. + // Early exit branch if difference found to ResultBlock. Otherwise, continue + // to next LoadCmpBlock or EndBlock. BranchInst *CmpBr = BranchInst::Create(ResBlock.BB, NextBB, Cmp); Builder.Insert(CmpBr); // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0 // since early exit to ResultBlock was not taken (no difference was found in - // any of the bytes) + // any of the bytes). if (Index == LoadCmpBlocks.size() - 1) { Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0); PhiRes->addIncoming(Zero, LoadCmpBlocks[Index]); @@ -1979,7 +2005,7 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned Index, int LoadSize, // This function populates the ResultBlock with a sequence to calculate the // memcmp result. It compares the two loaded source values and returns -1 if // src1 < src2 and 1 if src1 > src2. -void MemCmpExpansion::emitMemCmpResultBlock(bool IsLittleEndian) { +void MemCmpExpansion::emitMemCmpResultBlock() { IRBuilder<> Builder(CI->getContext()); // Special case: if memcmp result is used in a zero equality, result does not @@ -2010,17 +2036,17 @@ void MemCmpExpansion::emitMemCmpResultBlock(bool IsLittleEndian) { int MemCmpExpansion::calculateNumBlocks(unsigned Size) { int NumBlocks = 0; - bool haveOneByteLoad = false; + bool HaveOneByteLoad = false; unsigned RemainingSize = Size; unsigned LoadSize = MaxLoadSize; while (RemainingSize) { if (LoadSize == 1) - haveOneByteLoad = true; + HaveOneByteLoad = true; NumBlocks += RemainingSize / LoadSize; RemainingSize = RemainingSize % LoadSize; LoadSize = LoadSize / 2; } - NumBlocksNonOneByte = haveOneByteLoad ? (NumBlocks - 1) : NumBlocks; + NumBlocksNonOneByte = HaveOneByteLoad ? (NumBlocks - 1) : NumBlocks; if (IsUsedForZeroCmp) NumBlocks = NumBlocks / NumLoadsPerBlock + @@ -2046,63 +2072,66 @@ void MemCmpExpansion::setupEndBlockPHINodes() { PhiRes = Builder.CreatePHI(Type::getInt32Ty(CI->getContext()), 2, "phi.res"); } -Value *MemCmpExpansion::getMemCmpExpansionZeroCase(unsigned Size, - bool IsLittleEndian) { +Value *MemCmpExpansion::getMemCmpExpansionZeroCase(unsigned Size) { unsigned NumBytesProcessed = 0; - // This loop populates each of the LoadCmpBlocks with IR sequence to handle - // multiple loads per block - for (unsigned i = 0; i < NumBlocks; ++i) { + // This loop populates each of the LoadCmpBlocks with the IR sequence to + // handle multiple loads per block. + for (unsigned i = 0; i < NumBlocks; ++i) emitLoadCompareBlockMultipleLoads(i, Size, NumBytesProcessed); - } - emitMemCmpResultBlock(IsLittleEndian); + emitMemCmpResultBlock(); return PhiRes; } +/// A memcmp expansion that compares equality with 0 and only has one block of +/// load and compare can bypass the compare, branch, and phi IR that is required +/// in the general case. +Value *MemCmpExpansion::getMemCmpEqZeroOneBlock(unsigned Size) { + unsigned NumBytesProcessed = 0; + IRBuilder<> Builder(CI->getContext()); + Value *Cmp = getCompareLoadPairs(0, Size, NumBytesProcessed, Builder); + return Builder.CreateZExt(Cmp, Type::getInt32Ty(CI->getContext())); +} + // This function expands the memcmp call into an inline expansion and returns // the memcmp result. -Value *MemCmpExpansion::getMemCmpExpansion(bool IsLittleEndian) { +Value *MemCmpExpansion::getMemCmpExpansion(uint64_t Size) { + if (IsUsedForZeroCmp) + return NumBlocks == 1 ? getMemCmpEqZeroOneBlock(Size) : + getMemCmpExpansionZeroCase(Size); - ConstantInt *SizeCast = dyn_cast(CI->getArgOperand(2)); - uint64_t Size = SizeCast->getZExtValue(); - - int LoadSize = MaxLoadSize; - int NumBytesToBeProcessed = Size; - - if (IsUsedForZeroCmp) { - return getMemCmpExpansionZeroCase(Size, IsLittleEndian); - } - - unsigned Index = 0; - // This loop calls emitLoadCompareBlock for comparing SizeVal bytes of the two - // memcmp source. It starts with loading using the maximum load size set by + // This loop calls emitLoadCompareBlock for comparing Size bytes of the two + // memcmp sources. It starts with loading using the maximum load size set by // the target. It processes any remaining bytes using a load size which is the // next smallest power of 2. + int LoadSize = MaxLoadSize; + int NumBytesToBeProcessed = Size; + unsigned Index = 0; while (NumBytesToBeProcessed) { - // Calculate how many blocks we can create with the current load size + // Calculate how many blocks we can create with the current load size. int NumBlocks = NumBytesToBeProcessed / LoadSize; int GEPIndex = (Size - NumBytesToBeProcessed) / LoadSize; NumBytesToBeProcessed = NumBytesToBeProcessed % LoadSize; // For each NumBlocks, populate the instruction sequence for loading and - // comparing LoadSize bytes + // comparing LoadSize bytes. while (NumBlocks--) { - emitLoadCompareBlock(Index, LoadSize, GEPIndex, IsLittleEndian); + emitLoadCompareBlock(Index, LoadSize, GEPIndex); Index++; GEPIndex++; } - // Get the next LoadSize to use + // Get the next LoadSize to use. LoadSize = LoadSize / 2; } - emitMemCmpResultBlock(IsLittleEndian); + emitMemCmpResultBlock(); return PhiRes; } // This function checks to see if an expansion of memcmp can be generated. // It checks for constant compare size that is less than the max inline size. // If an expansion cannot occur, returns false to leave as a library call. -// Otherwise, the library call is replaced wtih new IR instruction sequence. +// Otherwise, the library call is replaced with a new IR instruction sequence. /// We want to transform: /// %call = call signext i32 @memcmp(i8* %0, i8* %1, i64 15) /// To: @@ -2177,27 +2206,25 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI, NumMemCmpCalls++; IRBuilder<> Builder(CI->getContext()); - // TTI call to check if target would like to expand memcmp and get the - // MaxLoadSize + // TTI call to check if target would like to expand memcmp. Also, get the + // MaxLoadSize. unsigned MaxLoadSize; if (!TTI->expandMemCmp(CI, MaxLoadSize)) return false; - // Early exit from expansion if -Oz - if (CI->getParent()->getParent()->optForMinSize()) { + // Early exit from expansion if -Oz. + if (CI->getFunction()->optForMinSize()) return false; - } - // Early exit from expansion if size is not a constant + // Early exit from expansion if size is not a constant. ConstantInt *SizeCast = dyn_cast(CI->getArgOperand(2)); if (!SizeCast) { NumMemCmpNotConstant++; return false; } - // Early exit from expansion if size greater than max bytes to load + // Early exit from expansion if size greater than max bytes to load. uint64_t SizeVal = SizeCast->getZExtValue(); - unsigned NumLoads = 0; unsigned RemainingSize = SizeVal; unsigned LoadSize = MaxLoadSize; @@ -2207,29 +2234,28 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI, LoadSize = LoadSize / 2; } - if (NumLoads > - TLI->getMaxExpandSizeMemcmp(CI->getParent()->getParent()->optForSize())) { + if (NumLoads > TLI->getMaxExpandSizeMemcmp(CI->getFunction()->optForSize())) { NumMemCmpGreaterThanMax++; return false; } NumMemCmpInlined++; - // MemCmpHelper object, creates and sets up basic blocks required for - // expanding memcmp with size SizeVal + // MemCmpHelper object creates and sets up basic blocks required for + // expanding memcmp with size SizeVal. unsigned NumLoadsPerBlock = MemCmpNumLoadsPerBlock; - MemCmpExpansion MemCmpHelper(CI, MaxLoadSize, NumLoadsPerBlock); + MemCmpExpansion MemCmpHelper(CI, SizeVal, MaxLoadSize, NumLoadsPerBlock, *DL); - Value *Res = MemCmpHelper.getMemCmpExpansion(DL->isLittleEndian()); + Value *Res = MemCmpHelper.getMemCmpExpansion(SizeVal); - // Replace call with result of expansion and erarse call. + // Replace call with result of expansion and erase call. CI->replaceAllUsesWith(Res); CI->eraseFromParent(); return true; } -bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) { +bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { BasicBlock *BB = CI->getParent(); // Lower inline assembly if we can. @@ -2382,12 +2408,10 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) { } LibFunc Func; - if (TLInfo->getLibFunc(*CI->getCalledFunction(), Func) && - Func == LibFunc_memcmp) { - if (expandMemCmp(CI, TTI, TLI, DL)) { - ModifiedDT = true; - return true; - } + if (TLInfo->getLibFunc(ImmutableCallSite(CI), Func) && + Func == LibFunc_memcmp && expandMemCmp(CI, TTI, TLI, DL)) { + ModifiedDT = true; + return true; } return false; } @@ -3934,7 +3958,7 @@ bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) { static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, const TargetLowering &TLI, const TargetRegisterInfo &TRI) { - const Function *F = CI->getParent()->getParent(); + const Function *F = CI->getFunction(); TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(F->getParent()->getDataLayout(), &TRI, ImmutableCallSite(CI)); @@ -4531,7 +4555,7 @@ bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) { bool MadeChange = false; const TargetRegisterInfo *TRI = - TM->getSubtargetImpl(*CS->getParent()->getParent())->getRegisterInfo(); + TM->getSubtargetImpl(*CS->getFunction())->getRegisterInfo(); TargetLowering::AsmOperandInfoVector TargetConstraints = TLI->ParseConstraints(*DL, TRI, CS); unsigned ArgNo = 0; @@ -6015,7 +6039,7 @@ static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, return true; } -bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) { +bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) { // Bail out if we inserted the instruction to prevent optimizations from // stepping on each other's toes. if (InsertedInsts.count(I)) @@ -6170,7 +6194,7 @@ static bool makeBitReverse(Instruction &I, const DataLayout &DL, // In this pass we look for GEP and cast instructions that are used // across basic blocks and rewrite them to improve basic-block-at-a-time // selection. -bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool& ModifiedDT) { +bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool &ModifiedDT) { SunkAddrs.clear(); bool MadeChange = false; diff --git a/lib/CodeGen/DFAPacketizer.cpp b/lib/CodeGen/DFAPacketizer.cpp index 65f58e5686e0..853b9afa1026 100644 --- a/lib/CodeGen/DFAPacketizer.cpp +++ b/lib/CodeGen/DFAPacketizer.cpp @@ -23,49 +23,59 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "packets" - #include "llvm/CodeGen/DFAPacketizer.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBundle.h" +#include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/ScheduleDAGInstrs.h" +#include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCInstrItineraries.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include +#include +#include +#include +#include using namespace llvm; +#define DEBUG_TYPE "packets" + static cl::opt InstrLimit("dfa-instr-limit", cl::Hidden, cl::init(0), cl::desc("If present, stops packetizing after N instructions")); + static unsigned InstrCount = 0; // -------------------------------------------------------------------- // Definitions shared between DFAPacketizer.cpp and DFAPacketizerEmitter.cpp -namespace { - DFAInput addDFAFuncUnits(DFAInput Inp, unsigned FuncUnits) { - return (Inp << DFA_MAX_RESOURCES) | FuncUnits; - } - - /// Return the DFAInput for an instruction class input vector. - /// This function is used in both DFAPacketizer.cpp and in - /// DFAPacketizerEmitter.cpp. - DFAInput getDFAInsnInput(const std::vector &InsnClass) { - DFAInput InsnInput = 0; - assert((InsnClass.size() <= DFA_MAX_RESTERMS) && - "Exceeded maximum number of DFA terms"); - for (auto U : InsnClass) - InsnInput = addDFAFuncUnits(InsnInput, U); - return InsnInput; - } +static DFAInput addDFAFuncUnits(DFAInput Inp, unsigned FuncUnits) { + return (Inp << DFA_MAX_RESOURCES) | FuncUnits; } + +/// Return the DFAInput for an instruction class input vector. +/// This function is used in both DFAPacketizer.cpp and in +/// DFAPacketizerEmitter.cpp. +static DFAInput getDFAInsnInput(const std::vector &InsnClass) { + DFAInput InsnInput = 0; + assert((InsnClass.size() <= DFA_MAX_RESTERMS) && + "Exceeded maximum number of DFA terms"); + for (auto U : InsnClass) + InsnInput = addDFAFuncUnits(InsnInput, U); + return InsnInput; +} + // -------------------------------------------------------------------- DFAPacketizer::DFAPacketizer(const InstrItineraryData *I, const DFAStateInput (*SIT)[2], const unsigned *SET): - InstrItins(I), CurrentState(0), DFAStateInputTable(SIT), - DFAStateEntryTable(SET) { + InstrItins(I), DFAStateInputTable(SIT), DFAStateEntryTable(SET) { // Make sure DFA types are large enough for the number of terms & resources. static_assert((DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= (8 * sizeof(DFAInput)), @@ -75,7 +85,6 @@ DFAPacketizer::DFAPacketizer(const InstrItineraryData *I, "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAStateInput"); } - // Read the DFA transition table and update CachedTable. // // Format of the transition tables: @@ -97,7 +106,6 @@ void DFAPacketizer::ReadTable(unsigned int state) { DFAStateInputTable[i][1]; } - // Return the DFAInput for an instruction class. DFAInput DFAPacketizer::getInsnInput(unsigned InsnClass) { // Note: this logic must match that in DFAPacketizerDefs.h for input vectors. @@ -112,16 +120,14 @@ DFAInput DFAPacketizer::getInsnInput(unsigned InsnClass) { return InsnInput; } - // Return the DFAInput for an instruction class input vector. DFAInput DFAPacketizer::getInsnInput(const std::vector &InsnClass) { return getDFAInsnInput(InsnClass); } - // Check if the resources occupied by a MCInstrDesc are available in the // current state. -bool DFAPacketizer::canReserveResources(const llvm::MCInstrDesc *MID) { +bool DFAPacketizer::canReserveResources(const MCInstrDesc *MID) { unsigned InsnClass = MID->getSchedClass(); DFAInput InsnInput = getInsnInput(InsnClass); UnsignPair StateTrans = UnsignPair(CurrentState, InsnInput); @@ -129,10 +135,9 @@ bool DFAPacketizer::canReserveResources(const llvm::MCInstrDesc *MID) { return CachedTable.count(StateTrans) != 0; } - // Reserve the resources occupied by a MCInstrDesc and change the current // state to reflect that change. -void DFAPacketizer::reserveResources(const llvm::MCInstrDesc *MID) { +void DFAPacketizer::reserveResources(const MCInstrDesc *MID) { unsigned InsnClass = MID->getSchedClass(); DFAInput InsnInput = getInsnInput(InsnClass); UnsignPair StateTrans = UnsignPair(CurrentState, InsnInput); @@ -141,24 +146,22 @@ void DFAPacketizer::reserveResources(const llvm::MCInstrDesc *MID) { CurrentState = CachedTable[StateTrans]; } - // Check if the resources occupied by a machine instruction are available // in the current state. -bool DFAPacketizer::canReserveResources(llvm::MachineInstr &MI) { - const llvm::MCInstrDesc &MID = MI.getDesc(); +bool DFAPacketizer::canReserveResources(MachineInstr &MI) { + const MCInstrDesc &MID = MI.getDesc(); return canReserveResources(&MID); } - // Reserve the resources occupied by a machine instruction and change the // current state to reflect that change. -void DFAPacketizer::reserveResources(llvm::MachineInstr &MI) { - const llvm::MCInstrDesc &MID = MI.getDesc(); +void DFAPacketizer::reserveResources(MachineInstr &MI) { + const MCInstrDesc &MID = MI.getDesc(); reserveResources(&MID); } - namespace llvm { + // This class extends ScheduleDAGInstrs and overrides the schedule method // to build the dependence graph. class DefaultVLIWScheduler : public ScheduleDAGInstrs { @@ -166,9 +169,11 @@ private: AliasAnalysis *AA; /// Ordered list of DAG postprocessing steps. std::vector> Mutations; + public: DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI, AliasAnalysis *AA); + // Actual scheduling work. void schedule() override; @@ -176,11 +181,12 @@ public: void addMutation(std::unique_ptr Mutation) { Mutations.push_back(std::move(Mutation)); } + protected: void postprocessDAG(); }; -} +} // end namespace llvm DefaultVLIWScheduler::DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI, @@ -189,21 +195,18 @@ DefaultVLIWScheduler::DefaultVLIWScheduler(MachineFunction &MF, CanHandleTerminators = true; } - /// Apply each ScheduleDAGMutation step in order. void DefaultVLIWScheduler::postprocessDAG() { for (auto &M : Mutations) M->apply(this); } - void DefaultVLIWScheduler::schedule() { // Build the scheduling graph. buildSchedGraph(AA); postprocessDAG(); } - VLIWPacketizerList::VLIWPacketizerList(MachineFunction &mf, MachineLoopInfo &mli, AliasAnalysis *aa) : MF(mf), TII(mf.getSubtarget().getInstrInfo()), AA(aa) { @@ -211,13 +214,11 @@ VLIWPacketizerList::VLIWPacketizerList(MachineFunction &mf, VLIWScheduler = new DefaultVLIWScheduler(MF, mli, AA); } - VLIWPacketizerList::~VLIWPacketizerList() { delete VLIWScheduler; delete ResourceTracker; } - // End the current packet, bundle packet instructions and reset DFA state. void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB, MachineBasicBlock::iterator MI) { @@ -237,7 +238,6 @@ void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB, DEBUG(dbgs() << "End packet\n"); } - // Bundle machine instructions into packets. void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB, MachineBasicBlock::iterator BeginItr, @@ -336,7 +336,6 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB, VLIWScheduler->finishBlock(); } - // Add a DAG mutation object to the ordered list. void VLIWPacketizerList::addMutation( std::unique_ptr Mutation) { diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp index 265dda16bfa7..91d18e2bcaa6 100644 --- a/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -11,10 +11,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp index 06ae5cd72c85..2f833260bca2 100644 --- a/lib/CodeGen/DwarfEHPrepare.cpp +++ b/lib/CodeGen/DwarfEHPrepare.cpp @@ -12,13 +12,13 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" diff --git a/lib/CodeGen/ExpandISelPseudos.cpp b/lib/CodeGen/ExpandISelPseudos.cpp index 88d422a0f545..324ea171293d 100644 --- a/lib/CodeGen/ExpandISelPseudos.cpp +++ b/lib/CodeGen/ExpandISelPseudos.cpp @@ -14,9 +14,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/Support/Debug.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp index 27cd639b2a49..4ce86f27a7dd 100644 --- a/lib/CodeGen/ExpandPostRAPseudos.cpp +++ b/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -12,11 +12,11 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" diff --git a/lib/CodeGen/ExpandReductions.cpp b/lib/CodeGen/ExpandReductions.cpp index a40ea28056dd..70dca3b74b2f 100644 --- a/lib/CodeGen/ExpandReductions.cpp +++ b/lib/CodeGen/ExpandReductions.cpp @@ -12,17 +12,17 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/ExpandReductions.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" -#include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" -#include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Pass.h" +#include "llvm/Transforms/Utils/LoopUtils.h" using namespace llvm; diff --git a/lib/CodeGen/FaultMaps.cpp b/lib/CodeGen/FaultMaps.cpp index 43f364128978..2924b011e0c1 100644 --- a/lib/CodeGen/FaultMaps.cpp +++ b/lib/CodeGen/FaultMaps.cpp @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/FaultMaps.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/FaultMaps.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCObjectFileInfo.h" diff --git a/lib/CodeGen/FuncletLayout.cpp b/lib/CodeGen/FuncletLayout.cpp index 0bdd5e64a7f2..9c71b18619a1 100644 --- a/lib/CodeGen/FuncletLayout.cpp +++ b/lib/CodeGen/FuncletLayout.cpp @@ -11,10 +11,10 @@ // funclets being contiguous. // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/Passes.h" using namespace llvm; #define DEBUG_TYPE "funclet-layout" diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp index be21c7306da1..456fa799e8e1 100644 --- a/lib/CodeGen/GCMetadata.cpp +++ b/lib/CodeGen/GCMetadata.cpp @@ -11,22 +11,27 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/GCStrategy.h" -#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/Function.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Pass.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include + using namespace llvm; namespace { class Printer : public FunctionPass { static char ID; + raw_ostream &OS; public: @@ -38,7 +43,8 @@ public: bool runOnFunction(Function &F) override; bool doFinalization(Module &M) override; }; -} + +} // end anonymous namespace INITIALIZE_PASS(GCModuleInfo, "collector-metadata", "Create Garbage Collector Module Metadata", false, false) @@ -48,7 +54,7 @@ INITIALIZE_PASS(GCModuleInfo, "collector-metadata", GCFunctionInfo::GCFunctionInfo(const Function &F, GCStrategy &S) : F(F), S(S), FrameSize(~0LL) {} -GCFunctionInfo::~GCFunctionInfo() {} +GCFunctionInfo::~GCFunctionInfo() = default; // ----------------------------------------------------------------------------- @@ -67,7 +73,7 @@ GCFunctionInfo &GCModuleInfo::getFunctionInfo(const Function &F) { return *I->second; GCStrategy *S = getGCStrategy(F.getGC()); - Functions.push_back(make_unique(F, *S)); + Functions.push_back(llvm::make_unique(F, *S)); GCFunctionInfo *GFI = Functions.back().get(); FInfoMap[&F] = GFI; return *GFI; diff --git a/lib/CodeGen/GCMetadataPrinter.cpp b/lib/CodeGen/GCMetadataPrinter.cpp index d183c7f2980b..bc7beb6f6c2d 100644 --- a/lib/CodeGen/GCMetadataPrinter.cpp +++ b/lib/CodeGen/GCMetadataPrinter.cpp @@ -1,4 +1,4 @@ -//===-- GCMetadataPrinter.cpp - Garbage collection infrastructure ---------===// +//===- GCMetadataPrinter.cpp - Garbage collection infrastructure ----------===// // // The LLVM Compiler Infrastructure // @@ -12,10 +12,11 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GCMetadataPrinter.h" + using namespace llvm; LLVM_INSTANTIATE_REGISTRY(GCMetadataPrinterRegistry) -GCMetadataPrinter::GCMetadataPrinter() {} +GCMetadataPrinter::GCMetadataPrinter() = default; -GCMetadataPrinter::~GCMetadataPrinter() {} +GCMetadataPrinter::~GCMetadataPrinter() = default; diff --git a/lib/CodeGen/GlobalISel/IRTranslator.cpp b/lib/CodeGen/GlobalISel/IRTranslator.cpp index afc18a15aa1c..dccd8e0706ca 100644 --- a/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -16,10 +16,10 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/OptimizationDiagnosticInfo.h" -#include "llvm/CodeGen/GlobalISel/CallLowering.h" #include "llvm/CodeGen/Analysis.h" -#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/GlobalISel/CallLowering.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetPassConfig.h" @@ -784,6 +784,21 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { return false; MIB.addUse(getOrCreateVReg(*Arg)); } + + // Add a MachineMemOperand if it is a target mem intrinsic. + const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering(); + TargetLowering::IntrinsicInfo Info; + // TODO: Add a GlobalISel version of getTgtMemIntrinsic. + if (TLI.getTgtMemIntrinsic(Info, CI, ID)) { + MachineMemOperand::Flags Flags = + Info.vol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone; + Flags |= + Info.readMem ? MachineMemOperand::MOLoad : MachineMemOperand::MOStore; + uint64_t Size = Info.memVT.getSizeInBits() >> 3; + MIB.addMemOperand(MF->getMachineMemOperand(MachinePointerInfo(Info.ptrVal), + Flags, Size, Info.align)); + } + return true; } diff --git a/lib/CodeGen/GlobalISel/Legalizer.cpp b/lib/CodeGen/GlobalISel/Legalizer.cpp index aec379197dfb..1b50489deeba 100644 --- a/lib/CodeGen/GlobalISel/Legalizer.cpp +++ b/lib/CodeGen/GlobalISel/Legalizer.cpp @@ -15,7 +15,6 @@ #include "llvm/CodeGen/GlobalISel/Legalizer.h" #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" -#include "llvm/CodeGen/GlobalISel/Legalizer.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/CodeGen/MachineRegisterInfo.h" diff --git a/lib/CodeGen/GlobalMerge.cpp b/lib/CodeGen/GlobalMerge.cpp index 3603f9b7ed93..c6ca49ce24d7 100644 --- a/lib/CodeGen/GlobalMerge.cpp +++ b/lib/CodeGen/GlobalMerge.cpp @@ -553,7 +553,8 @@ bool GlobalMerge::doInitialization(Module &M) { // Grab all non-const globals. for (auto &GV : M.globals()) { // Merge is safe for "normal" internal or external globals only - if (GV.isDeclaration() || GV.isThreadLocal() || GV.hasSection()) + if (GV.isDeclaration() || GV.isThreadLocal() || + GV.hasSection() || GV.hasImplicitSection()) continue; // It's not safe to merge globals that may be preempted diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index 1c33f3b6800e..c98c9b68ac0e 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "BranchFolding.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopeExit.h" @@ -25,6 +24,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetSchedule.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" diff --git a/lib/CodeGen/ImplicitNullChecks.cpp b/lib/CodeGen/ImplicitNullChecks.cpp index 444416a77008..b831ddfa601a 100644 --- a/lib/CodeGen/ImplicitNullChecks.cpp +++ b/lib/CodeGen/ImplicitNullChecks.cpp @@ -31,21 +31,21 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/FaultMaps.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/LLVMContext.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp index b7ab404070b1..4e6a3ec21866 100644 --- a/lib/CodeGen/InlineSpiller.cpp +++ b/lib/CodeGen/InlineSpiller.cpp @@ -857,21 +857,46 @@ void InlineSpiller::insertReload(unsigned NewVReg, ++NumReloads; } +/// Check if \p Def fully defines a VReg with an undefined value. +/// If that's the case, that means the value of VReg is actually +/// not relevant. +static bool isFullUndefDef(const MachineInstr &Def) { + if (!Def.isImplicitDef()) + return false; + assert(Def.getNumOperands() == 1 && + "Implicit def with more than one definition"); + // We can say that the VReg defined by Def is undef, only if it is + // fully defined by Def. Otherwise, some of the lanes may not be + // undef and the value of the VReg matters. + return !Def.getOperand(0).getSubReg(); +} + /// insertSpill - Insert a spill of NewVReg after MI. void InlineSpiller::insertSpill(unsigned NewVReg, bool isKill, MachineBasicBlock::iterator MI) { MachineBasicBlock &MBB = *MI->getParent(); MachineInstrSpan MIS(MI); - TII.storeRegToStackSlot(MBB, std::next(MI), NewVReg, isKill, StackSlot, - MRI.getRegClass(NewVReg), &TRI); + bool IsRealSpill = true; + if (isFullUndefDef(*MI)) { + // Don't spill undef value. + // Anything works for undef, in particular keeping the memory + // uninitialized is a viable option and it saves code size and + // run time. + BuildMI(MBB, std::next(MI), MI->getDebugLoc(), TII.get(TargetOpcode::KILL)) + .addReg(NewVReg, getKillRegState(isKill)); + IsRealSpill = false; + } else + TII.storeRegToStackSlot(MBB, std::next(MI), NewVReg, isKill, StackSlot, + MRI.getRegClass(NewVReg), &TRI); LIS.InsertMachineInstrRangeInMaps(std::next(MI), MIS.end()); DEBUG(dumpMachineInstrRangeWithSlotIndex(std::next(MI), MIS.end(), LIS, "spill")); ++NumSpills; - HSpiller.addToMergeableSpills(*std::next(MI), StackSlot, Original); + if (IsRealSpill) + HSpiller.addToMergeableSpills(*std::next(MI), StackSlot, Original); } /// spillAroundUses - insert spill code around each use of Reg. diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index be3b258315bb..f2defb4fd623 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Target/TargetMachine.h" #include "llvm/Analysis/Passes.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/BasicTTIImpl.h" @@ -31,21 +30,11 @@ #include "llvm/Support/FormattedStream.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/Scalar.h" using namespace llvm; -// Enable or disable FastISel. Both options are needed, because -// FastISel is enabled by default with -fast, and we wish to be -// able to enable or disable fast-isel independently from -O0. -static cl::opt -EnableFastISelOption("fast-isel", cl::Hidden, - cl::desc("Enable the \"fast\" instruction selector")); - -static cl::opt - EnableGlobalISel("global-isel", cl::Hidden, - cl::desc("Enable the \"global\" instruction selector")); - void LLVMTargetMachine::initAsmInfo() { MRI = TheTarget.createMCRegInfo(getTargetTriple().str()); MII = TheTarget.createMCInstrInfo(); @@ -71,8 +60,7 @@ void LLVMTargetMachine::initAsmInfo() { TmpAsmInfo->setPreserveAsmComments(Options.MCOptions.PreserveAsmComments); - if (Options.CompressDebugSections) - TmpAsmInfo->setCompressDebugSections(DebugCompressionType::DCT_ZlibGnu); + TmpAsmInfo->setCompressDebugSections(Options.CompressDebugSections); TmpAsmInfo->setRelaxELFRelocations(Options.RelaxELFRelocations); @@ -106,9 +94,7 @@ static MCContext * addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM, bool DisableVerify, AnalysisID StartBefore, AnalysisID StartAfter, AnalysisID StopBefore, - AnalysisID StopAfter, - MachineFunctionInitializer *MFInitializer = nullptr) { - + AnalysisID StopAfter) { // Targets may override createPassConfig to provide a target-specific // subclass. TargetPassConfig *PassConfig = TM->createPassConfig(PM); @@ -117,99 +103,22 @@ addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM, // Set PassConfig options provided by TargetMachine. PassConfig->setDisableVerify(DisableVerify); PM.add(PassConfig); - - // When in emulated TLS mode, add the LowerEmuTLS pass. - if (TM->Options.EmulatedTLS) - PM.add(createLowerEmuTLSPass()); - - PM.add(createPreISelIntrinsicLoweringPass()); - - // Add internal analysis passes from the target machine. - PM.add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis())); - - PassConfig->addIRPasses(); - - PassConfig->addCodeGenPrepare(); - - PassConfig->addPassesToHandleExceptions(); - - PassConfig->addISelPrepare(); - MachineModuleInfo *MMI = new MachineModuleInfo(TM); - MMI->setMachineFunctionInitializer(MFInitializer); PM.add(MMI); - // Enable FastISel with -fast, but allow that to be overridden. - TM->setO0WantsFastISel(EnableFastISelOption != cl::BOU_FALSE); - if (EnableFastISelOption == cl::BOU_TRUE || - (TM->getOptLevel() == CodeGenOpt::None && - TM->getO0WantsFastISel())) - TM->setFastISel(true); - - // Ask the target for an isel. - // Enable GlobalISel if the target wants to, but allow that to be overriden. - if (EnableGlobalISel == cl::BOU_TRUE || (EnableGlobalISel == cl::BOU_UNSET && - PassConfig->isGlobalISelEnabled())) { - if (PassConfig->addIRTranslator()) - return nullptr; - - PassConfig->addPreLegalizeMachineIR(); - - if (PassConfig->addLegalizeMachineIR()) - return nullptr; - - // Before running the register bank selector, ask the target if it - // wants to run some passes. - PassConfig->addPreRegBankSelect(); - - if (PassConfig->addRegBankSelect()) - return nullptr; - - PassConfig->addPreGlobalInstructionSelect(); - - if (PassConfig->addGlobalInstructionSelect()) - return nullptr; - - // Pass to reset the MachineFunction if the ISel failed. - PM.add(createResetMachineFunctionPass( - PassConfig->reportDiagnosticWhenGlobalISelFallback(), - PassConfig->isGlobalISelAbortEnabled())); - - // Provide a fallback path when we do not want to abort on - // not-yet-supported input. - if (!PassConfig->isGlobalISelAbortEnabled() && - PassConfig->addInstSelector()) - return nullptr; - - } else if (PassConfig->addInstSelector()) + if (PassConfig->addISelPasses()) return nullptr; - PassConfig->addMachinePasses(); - PassConfig->setInitialized(); return &MMI->getContext(); } -bool LLVMTargetMachine::addPassesToEmitFile( - PassManagerBase &PM, raw_pwrite_stream &Out, CodeGenFileType FileType, - bool DisableVerify, AnalysisID StartBefore, AnalysisID StartAfter, - AnalysisID StopBefore, AnalysisID StopAfter, - MachineFunctionInitializer *MFInitializer) { - // Add common CodeGen passes. - MCContext *Context = - addPassesToGenerateCode(this, PM, DisableVerify, StartBefore, StartAfter, - StopBefore, StopAfter, MFInitializer); - if (!Context) - return true; - - if (StopBefore || StopAfter) { - PM.add(createPrintMIRPass(Out)); - return false; - } - +bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM, + raw_pwrite_stream &Out, CodeGenFileType FileType, + MCContext &Context) { if (Options.MCOptions.MCSaveTempLabels) - Context->setAllowTemporaryLabels(false); + Context.setAllowTemporaryLabels(false); const MCSubtargetInfo &STI = *getMCSubtargetInfo(); const MCAsmInfo &MAI = *getMCAsmInfo(); @@ -226,14 +135,14 @@ bool LLVMTargetMachine::addPassesToEmitFile( // Create a code emitter if asked to show the encoding. MCCodeEmitter *MCE = nullptr; if (Options.MCOptions.ShowMCEncoding) - MCE = getTarget().createMCCodeEmitter(MII, MRI, *Context); + MCE = getTarget().createMCCodeEmitter(MII, MRI, Context); MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple().str(), TargetCPU, Options.MCOptions); auto FOut = llvm::make_unique(Out); MCStreamer *S = getTarget().createAsmStreamer( - *Context, std::move(FOut), Options.MCOptions.AsmVerbose, + Context, std::move(FOut), Options.MCOptions.AsmVerbose, Options.MCOptions.MCUseDwarfDirectory, InstPrinter, MCE, MAB, Options.MCOptions.ShowMCInst); AsmStreamer.reset(S); @@ -242,7 +151,7 @@ bool LLVMTargetMachine::addPassesToEmitFile( case CGFT_ObjectFile: { // Create the code emitter for the target if it exists. If not, .o file // emission fails. - MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(MII, MRI, *Context); + MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(MII, MRI, Context); MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple().str(), TargetCPU, Options.MCOptions); @@ -250,11 +159,11 @@ bool LLVMTargetMachine::addPassesToEmitFile( return true; // Don't waste memory on names of temp labels. - Context->setUseNamesOnTempLabels(false); + Context.setUseNamesOnTempLabels(false); Triple T(getTargetTriple().str()); AsmStreamer.reset(getTarget().createMCObjectStreamer( - T, *Context, *MAB, Out, MCE, STI, Options.MCOptions.MCRelaxAll, + T, Context, *MAB, Out, MCE, STI, Options.MCOptions.MCRelaxAll, Options.MCOptions.MCIncrementalLinkerCompatible, /*DWARFMustBeAtTheEnd*/ true)); break; @@ -262,7 +171,7 @@ bool LLVMTargetMachine::addPassesToEmitFile( case CGFT_Null: // The Null output is intended for use for performance analysis and testing, // not real users. - AsmStreamer.reset(getTarget().createNullStreamer(*Context)); + AsmStreamer.reset(getTarget().createNullStreamer(Context)); break; } @@ -273,8 +182,28 @@ bool LLVMTargetMachine::addPassesToEmitFile( return true; PM.add(Printer); - PM.add(createFreeMachineFunctionPass()); + return false; +} +bool LLVMTargetMachine::addPassesToEmitFile( + PassManagerBase &PM, raw_pwrite_stream &Out, CodeGenFileType FileType, + bool DisableVerify, AnalysisID StartBefore, AnalysisID StartAfter, + AnalysisID StopBefore, AnalysisID StopAfter) { + // Add common CodeGen passes. + MCContext *Context = + addPassesToGenerateCode(this, PM, DisableVerify, StartBefore, StartAfter, + StopBefore, StopAfter); + if (!Context) + return true; + + if (StopBefore || StopAfter) { + PM.add(createPrintMIRPass(Out)); + } else { + if (addAsmPrinter(PM, Out, FileType, *Context)) + return true; + } + + PM.add(createFreeMachineFunctionPass()); return false; } diff --git a/lib/CodeGen/LexicalScopes.cpp b/lib/CodeGen/LexicalScopes.cpp index 40ee7ea785f0..995c58a63564 100644 --- a/lib/CodeGen/LexicalScopes.cpp +++ b/lib/CodeGen/LexicalScopes.cpp @@ -14,9 +14,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/LexicalScopes.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/CodeGen/LexicalScopes.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index 0c05dbeacba0..471dcea4bb39 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -14,15 +14,15 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "LiveRangeCalc.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveInterval.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" diff --git a/lib/CodeGen/LiveIntervalUnion.cpp b/lib/CodeGen/LiveIntervalUnion.cpp index b4aa0dc326a5..b3248e53d0a5 100644 --- a/lib/CodeGen/LiveIntervalUnion.cpp +++ b/lib/CodeGen/LiveIntervalUnion.cpp @@ -13,10 +13,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/SparseBitVector.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervalUnion.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SparseBitVector.h" +#include "llvm/CodeGen/LiveInterval.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" #include diff --git a/lib/CodeGen/LiveRegMatrix.cpp b/lib/CodeGen/LiveRegMatrix.cpp index 882de1a3fad9..60033db38ee4 100644 --- a/lib/CodeGen/LiveRegMatrix.cpp +++ b/lib/CodeGen/LiveRegMatrix.cpp @@ -11,17 +11,17 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/LiveRegMatrix.h" #include "RegisterCoalescer.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/CodeGen/LiveRegMatrix.h" -#include "llvm/CodeGen/VirtRegMap.h" #include "llvm/CodeGen/LiveIntervalUnion.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/Pass.h" +#include "llvm/CodeGen/VirtRegMap.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp index 17cab0ae910e..b109f1922a3e 100644 --- a/lib/CodeGen/LocalStackSlotAllocation.cpp +++ b/lib/CodeGen/LocalStackSlotAllocation.cpp @@ -14,7 +14,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" @@ -23,6 +22,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/StackProtector.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" diff --git a/lib/CodeGen/MIRParser/MILexer.h b/lib/CodeGen/MIRParser/MILexer.h index edba749b5fce..3e9513111bf4 100644 --- a/lib/CodeGen/MIRParser/MILexer.h +++ b/lib/CodeGen/MIRParser/MILexer.h @@ -16,8 +16,8 @@ #define LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H #include "llvm/ADT/APSInt.h" -#include "llvm/ADT/StringRef.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" #include namespace llvm { diff --git a/lib/CodeGen/MIRParser/MIParser.cpp b/lib/CodeGen/MIRParser/MIParser.cpp index 1d36ff4e1458..f58d1f8b83ae 100644 --- a/lib/CodeGen/MIRParser/MIParser.cpp +++ b/lib/CodeGen/MIRParser/MIParser.cpp @@ -11,11 +11,19 @@ // //===----------------------------------------------------------------------===// -#include "MIParser.h" - #include "MILexer.h" +#include "MIParser.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/APSInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" #include "llvm/AsmParser/Parser.h" #include "llvm/AsmParser/SlotMapping.h" #include "llvm/CodeGen/MIRPrinter.h" @@ -26,19 +34,48 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/ModuleSlotTracker.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" #include "llvm/IR/ValueSymbolTable.h" +#include "llvm/MC/LaneBitmask.h" +#include "llvm/MC/MCDwarf.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/AtomicOrdering.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/LowLevelTypeImpl.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SMLoc.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include +#include #include +#include +#include +#include +#include +#include using namespace llvm; @@ -2039,7 +2076,7 @@ bool MIParser::parseMemoryPseudoSourceValue(const PseudoSourceValue *&PSV) { // The token was already consumed, so use return here instead of break. return false; } - case MIToken::kw_call_entry: { + case MIToken::kw_call_entry: lex(); switch (Token.kind()) { case MIToken::GlobalValue: @@ -2059,7 +2096,6 @@ bool MIParser::parseMemoryPseudoSourceValue(const PseudoSourceValue *&PSV) { "expected a global value or an external symbol after 'call-entry'"); } break; - } default: llvm_unreachable("The current token should be pseudo source value"); } diff --git a/lib/CodeGen/MIRParser/MIParser.h b/lib/CodeGen/MIRParser/MIParser.h index 9b3879cf8377..2307881068ef 100644 --- a/lib/CodeGen/MIRParser/MIParser.h +++ b/lib/CodeGen/MIRParser/MIParser.h @@ -1,4 +1,4 @@ -//===- MIParser.h - Machine Instructions Parser ---------------------------===// +//===- MIParser.h - Machine Instructions Parser -----------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -15,21 +15,19 @@ #define LLVM_LIB_CODEGEN_MIRPARSER_MIPARSER_H #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/Support/Allocator.h" namespace llvm { -class StringRef; -class BasicBlock; class MachineBasicBlock; class MachineFunction; -class MachineInstr; -class MachineRegisterInfo; class MDNode; class RegisterBank; struct SlotMapping; class SMDiagnostic; class SourceMgr; +class StringRef; class TargetRegisterClass; struct VRegInfo { @@ -45,8 +43,8 @@ struct VRegInfo { unsigned PreferredReg = 0; }; -typedef StringMap Name2RegClassMap; -typedef StringMap Name2RegBankMap; +using Name2RegClassMap = StringMap; +using Name2RegBankMap = StringMap; struct PerFunctionMIParsingState { BumpPtrAllocator Allocator; @@ -122,4 +120,4 @@ bool parseMDNode(PerFunctionMIParsingState &PFS, MDNode *&Node, StringRef Src, } // end namespace llvm -#endif +#endif // LLVM_LIB_CODEGEN_MIRPARSER_MIPARSER_H diff --git a/lib/CodeGen/MIRParser/MIRParser.cpp b/lib/CodeGen/MIRParser/MIRParser.cpp index ff12297e3fc6..78b57f357781 100644 --- a/lib/CodeGen/MIRParser/MIRParser.cpp +++ b/lib/CodeGen/MIRParser/MIRParser.cpp @@ -50,18 +50,24 @@ namespace llvm { /// file. class MIRParserImpl { SourceMgr SM; + yaml::Input In; StringRef Filename; LLVMContext &Context; - StringMap> Functions; SlotMapping IRSlots; /// Maps from register class names to register classes. Name2RegClassMap Names2RegClasses; /// Maps from register bank names to register banks. Name2RegBankMap Names2RegBanks; + /// True when the MIR file doesn't have LLVM IR. Dummy IR functions are + /// created and inserted into the given module when this is true. + bool NoLLVMIR = false; + /// True when a well formed MIR file does not contain any MIR/machine function + /// parts. + bool NoMIRDocuments = false; public: - MIRParserImpl(std::unique_ptr Contents, StringRef Filename, - LLVMContext &Context); + MIRParserImpl(std::unique_ptr Contents, + StringRef Filename, LLVMContext &Context); void reportDiagnostic(const SMDiagnostic &Diag); @@ -85,22 +91,22 @@ public: /// file. /// /// Return null if an error occurred. - std::unique_ptr parse(); + std::unique_ptr parseIRModule(); + + bool parseMachineFunctions(Module &M, MachineModuleInfo &MMI); /// Parse the machine function in the current YAML document. /// - /// \param NoLLVMIR - set to true when the MIR file doesn't have LLVM IR. - /// A dummy IR function is created and inserted into the given module when - /// this parameter is true. /// /// Return true if an error occurred. - bool parseMachineFunction(yaml::Input &In, Module &M, bool NoLLVMIR); + bool parseMachineFunction(Module &M, MachineModuleInfo &MMI); /// Initialize the machine function to the state that's described in the MIR /// file. /// /// Return true if error occurred. - bool initializeMachineFunction(MachineFunction &MF); + bool initializeMachineFunction(const yaml::MachineFunction &YamlMF, + MachineFunction &MF); bool parseRegisterInfo(PerFunctionMIParsingState &PFS, const yaml::MachineFunction &YamlMF); @@ -144,9 +150,6 @@ private: SMDiagnostic diagFromBlockStringDiag(const SMDiagnostic &Error, SMRange SourceRange); - /// Create an empty function with the given name. - void createDummyFunction(StringRef Name, Module &M); - void initNames2RegClasses(const MachineFunction &MF); void initNames2RegBanks(const MachineFunction &MF); @@ -166,10 +169,19 @@ private: } // end namespace llvm +static void handleYAMLDiag(const SMDiagnostic &Diag, void *Context) { + reinterpret_cast(Context)->reportDiagnostic(Diag); +} + MIRParserImpl::MIRParserImpl(std::unique_ptr Contents, StringRef Filename, LLVMContext &Context) - : SM(), Filename(Filename), Context(Context) { - SM.AddNewSourceBuffer(std::move(Contents), SMLoc()); + : SM(), + In(SM.getMemoryBuffer( + SM.AddNewSourceBuffer(std::move(Contents), SMLoc()))->getBuffer(), + nullptr, handleYAMLDiag, this), + Filename(Filename), + Context(Context) { + In.setContext(&In); } bool MIRParserImpl::error(const Twine &Message) { @@ -206,24 +218,16 @@ void MIRParserImpl::reportDiagnostic(const SMDiagnostic &Diag) { Context.diagnose(DiagnosticInfoMIRParser(Kind, Diag)); } -static void handleYAMLDiag(const SMDiagnostic &Diag, void *Context) { - reinterpret_cast(Context)->reportDiagnostic(Diag); -} - -std::unique_ptr MIRParserImpl::parse() { - yaml::Input In(SM.getMemoryBuffer(SM.getMainFileID())->getBuffer(), - /*Ctxt=*/nullptr, handleYAMLDiag, this); - In.setContext(&In); - +std::unique_ptr MIRParserImpl::parseIRModule() { if (!In.setCurrentDocument()) { if (In.error()) return nullptr; // Create an empty module when the MIR file is empty. + NoMIRDocuments = true; return llvm::make_unique(Filename, Context); } std::unique_ptr M; - bool NoLLVMIR = false; // Parse the block scalar manually so that we can return unique pointer // without having to go trough YAML traits. if (const auto *BSN = @@ -237,49 +241,68 @@ std::unique_ptr MIRParserImpl::parse() { } In.nextDocument(); if (!In.setCurrentDocument()) - return M; + NoMIRDocuments = true; } else { // Create an new, empty module. M = llvm::make_unique(Filename, Context); NoLLVMIR = true; } - - // Parse the machine functions. - do { - if (parseMachineFunction(In, *M, NoLLVMIR)) - return nullptr; - In.nextDocument(); - } while (In.setCurrentDocument()); - return M; } -bool MIRParserImpl::parseMachineFunction(yaml::Input &In, Module &M, - bool NoLLVMIR) { - auto MF = llvm::make_unique(); - yaml::EmptyContext Ctx; - yaml::yamlize(In, *MF, false, Ctx); - if (In.error()) - return true; - auto FunctionName = MF->Name; - if (Functions.find(FunctionName) != Functions.end()) - return error(Twine("redefinition of machine function '") + FunctionName + - "'"); - Functions.insert(std::make_pair(FunctionName, std::move(MF))); - if (NoLLVMIR) - createDummyFunction(FunctionName, M); - else if (!M.getFunction(FunctionName)) - return error(Twine("function '") + FunctionName + - "' isn't defined in the provided LLVM IR"); +bool MIRParserImpl::parseMachineFunctions(Module &M, MachineModuleInfo &MMI) { + if (NoMIRDocuments) + return false; + + // Parse the machine functions. + do { + if (parseMachineFunction(M, MMI)) + return true; + In.nextDocument(); + } while (In.setCurrentDocument()); + return false; } -void MIRParserImpl::createDummyFunction(StringRef Name, Module &M) { +/// Create an empty function with the given name. +static Function *createDummyFunction(StringRef Name, Module &M) { auto &Context = M.getContext(); Function *F = cast(M.getOrInsertFunction( Name, FunctionType::get(Type::getVoidTy(Context), false))); BasicBlock *BB = BasicBlock::Create(Context, "entry", F); new UnreachableInst(Context, BB); + return F; +} + +bool MIRParserImpl::parseMachineFunction(Module &M, MachineModuleInfo &MMI) { + // Parse the yaml. + yaml::MachineFunction YamlMF; + yaml::EmptyContext Ctx; + yaml::yamlize(In, YamlMF, false, Ctx); + if (In.error()) + return true; + + // Search for the corresponding IR function. + StringRef FunctionName = YamlMF.Name; + Function *F = M.getFunction(FunctionName); + if (!F) { + if (NoLLVMIR) { + F = createDummyFunction(FunctionName, M); + } else { + return error(Twine("function '") + FunctionName + + "' isn't defined in the provided LLVM IR"); + } + } + if (MMI.getMachineFunction(*F) != nullptr) + return error(Twine("redefinition of machine function '") + FunctionName + + "'"); + + // Create the MachineFunction. + MachineFunction &MF = MMI.getOrCreateMachineFunction(*F); + if (initializeMachineFunction(YamlMF, MF)) + return true; + + return false; } static bool isSSA(const MachineFunction &MF) { @@ -319,15 +342,12 @@ void MIRParserImpl::computeFunctionProperties(MachineFunction &MF) { Properties.set(MachineFunctionProperties::Property::NoVRegs); } -bool MIRParserImpl::initializeMachineFunction(MachineFunction &MF) { - auto It = Functions.find(MF.getName()); - if (It == Functions.end()) - return error(Twine("no machine function information for function '") + - MF.getName() + "' in the MIR file"); +bool +MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF, + MachineFunction &MF) { // TODO: Recreate the machine function. initNames2RegClasses(MF); initNames2RegBanks(MF); - const yaml::MachineFunction &YamlMF = *It->getValue(); if (YamlMF.Alignment) MF.setAlignment(YamlMF.Alignment); MF.setExposesReturnsTwice(YamlMF.ExposesReturnsTwice); @@ -838,16 +858,18 @@ MIRParser::MIRParser(std::unique_ptr Impl) MIRParser::~MIRParser() {} -std::unique_ptr MIRParser::parseLLVMModule() { return Impl->parse(); } +std::unique_ptr MIRParser::parseIRModule() { + return Impl->parseIRModule(); +} -bool MIRParser::initializeMachineFunction(MachineFunction &MF) { - return Impl->initializeMachineFunction(MF); +bool MIRParser::parseMachineFunctions(Module &M, MachineModuleInfo &MMI) { + return Impl->parseMachineFunctions(M, MMI); } std::unique_ptr llvm::createMIRParserFromFile(StringRef Filename, SMDiagnostic &Error, LLVMContext &Context) { - auto FileOrErr = MemoryBuffer::getFile(Filename); + auto FileOrErr = MemoryBuffer::getFileOrSTDIN(Filename); if (std::error_code EC = FileOrErr.getError()) { Error = SMDiagnostic(Filename, SourceMgr::DK_Error, "Could not open input file: " + EC.message()); diff --git a/lib/CodeGen/MIRPrinter.cpp b/lib/CodeGen/MIRPrinter.cpp index 293fc7358b8e..c524a9835f33 100644 --- a/lib/CodeGen/MIRPrinter.cpp +++ b/lib/CodeGen/MIRPrinter.cpp @@ -12,35 +12,65 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/MIRPrinter.h" - -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/None.h" #include "llvm/ADT/SmallBitVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" #include "llvm/CodeGen/GlobalISel/RegisterBank.h" -#include "llvm/CodeGen/MIRYamlMapping.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MIRPrinter.h" +#include "llvm/CodeGen/MIRYamlMapping.h" +#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfo.h" -#include "llvm/IR/IRPrintingPasses.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/Module.h" #include "llvm/IR/ModuleSlotTracker.h" +#include "llvm/IR/Value.h" +#include "llvm/MC/LaneBitmask.h" +#include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/Support/AtomicOrdering.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/Options.h" -#include "llvm/Support/YAMLTraits.h" +#include "llvm/Support/LowLevelTypeImpl.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/YAMLTraits.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include +#include +#include +#include +#include +#include +#include +#include using namespace llvm; @@ -147,6 +177,7 @@ template <> struct BlockScalarTraits { static void output(const Module &Mod, void *Ctxt, raw_ostream &OS) { Mod.print(OS, nullptr); } + static StringRef input(StringRef Str, void *Ctxt, Module &Mod) { llvm_unreachable("LLVM Module is supposed to be parsed separately"); return ""; @@ -210,6 +241,8 @@ void MIRPrinter::print(const MachineFunction &MF) { } StrOS.flush(); yaml::Output Out(OS); + if (!SimplifyMIR) + Out.setWriteDefaultValues(true); Out << YamlMF; } @@ -516,7 +549,6 @@ bool MIPrinter::canPredictSuccessors(const MachineBasicBlock &MBB) const { return std::equal(MBB.succ_begin(), MBB.succ_end(), GuessedSuccs.begin()); } - void MIPrinter::print(const MachineBasicBlock &MBB) { assert(MBB.getNumber() >= 0 && "Invalid MBB number"); OS << "bb." << MBB.getNumber(); @@ -908,7 +940,7 @@ void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI, OS << "%const." << Op.getIndex(); printOffset(Op.getOffset()); break; - case MachineOperand::MO_TargetIndex: { + case MachineOperand::MO_TargetIndex: OS << "target-index("; if (const auto *Name = getTargetIndexName( *Op.getParent()->getParent()->getParent(), Op.getIndex())) @@ -918,15 +950,20 @@ void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI, OS << ')'; printOffset(Op.getOffset()); break; - } case MachineOperand::MO_JumpTableIndex: OS << "%jump-table." << Op.getIndex(); break; - case MachineOperand::MO_ExternalSymbol: + case MachineOperand::MO_ExternalSymbol: { + StringRef Name = Op.getSymbolName(); OS << '$'; - printLLVMNameWithoutPrefix(OS, Op.getSymbolName()); + if (Name.empty()) { + OS << "\"\""; + } else { + printLLVMNameWithoutPrefix(OS, Name); + } printOffset(Op.getOffset()); break; + } case MachineOperand::MO_GlobalAddress: Op.getGlobal()->printAsOperand(OS, /*PrintType=*/false, MST); printOffset(Op.getOffset()); diff --git a/lib/CodeGen/MIRPrintingPass.cpp b/lib/CodeGen/MIRPrintingPass.cpp index 671cf1eddc2d..09354cf70c3c 100644 --- a/lib/CodeGen/MIRPrintingPass.cpp +++ b/lib/CodeGen/MIRPrintingPass.cpp @@ -14,9 +14,9 @@ #include "llvm/CodeGen/MIRPrinter.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MIRYamlMapping.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index c1ca8e8e83b4..fc52b0da0d61 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -25,8 +25,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/TargetPassConfig.h" #include "BranchFolding.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" @@ -41,7 +39,9 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachinePostDominators.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TailDuplicator.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp index 34f6bbd59e9b..582ff139f886 100644 --- a/lib/CodeGen/MachineCSE.cpp +++ b/lib/CodeGen/MachineCSE.cpp @@ -13,7 +13,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/ScopedHashTable.h" #include "llvm/ADT/SmallSet.h" @@ -22,6 +21,7 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/Support/Debug.h" #include "llvm/Support/RecyclingAllocator.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp index f83b5481e0a5..7d5a68192e6b 100644 --- a/lib/CodeGen/MachineCopyPropagation.cpp +++ b/lib/CodeGen/MachineCopyPropagation.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" @@ -19,6 +18,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/CodeGen/MachineDominanceFrontier.cpp b/lib/CodeGen/MachineDominanceFrontier.cpp index acb7c4810b16..28ecc8f96805 100644 --- a/lib/CodeGen/MachineDominanceFrontier.cpp +++ b/lib/CodeGen/MachineDominanceFrontier.cpp @@ -12,7 +12,6 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/Passes.h" - using namespace llvm; namespace llvm { diff --git a/lib/CodeGen/MachineDominators.cpp b/lib/CodeGen/MachineDominators.cpp index e3a6c51c47ad..65e9e5d195a4 100644 --- a/lib/CodeGen/MachineDominators.cpp +++ b/lib/CodeGen/MachineDominators.cpp @@ -13,8 +13,8 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/SmallBitVector.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/Support/CommandLine.h" using namespace llvm; diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index ac4ccb81b884..bbdae6e1a49e 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -20,7 +20,6 @@ #include "llvm/Analysis/EHPersonalities.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineFunctionInitializer.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" @@ -52,8 +51,6 @@ static cl::opt cl::desc("Force the alignment of all functions."), cl::init(0), cl::Hidden); -void MachineFunctionInitializer::anchor() {} - static const char *getPropertyName(MachineFunctionProperties::Property Prop) { typedef MachineFunctionProperties::Property P; switch(Prop) { diff --git a/lib/CodeGen/MachineFunctionPass.cpp b/lib/CodeGen/MachineFunctionPass.cpp index 2265676ff8b1..5ffe33006131 100644 --- a/lib/CodeGen/MachineFunctionPass.cpp +++ b/lib/CodeGen/MachineFunctionPass.cpp @@ -42,7 +42,7 @@ bool MachineFunctionPass::runOnFunction(Function &F) { return false; MachineModuleInfo &MMI = getAnalysis(); - MachineFunction &MF = MMI.getMachineFunction(F); + MachineFunction &MF = MMI.getOrCreateMachineFunction(F); MachineFunctionProperties &MFProps = MF.getProperties(); diff --git a/lib/CodeGen/MachineFunctionPrinterPass.cpp b/lib/CodeGen/MachineFunctionPrinterPass.cpp index 0d533c3f4f23..55d9defced3a 100644 --- a/lib/CodeGen/MachineFunctionPrinterPass.cpp +++ b/lib/CodeGen/MachineFunctionPrinterPass.cpp @@ -11,9 +11,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index 306b75dbbae7..2a6cb07dbd2d 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -11,20 +11,20 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/None.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/CodeGen/GlobalISel/RegisterBank.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineMemOperand.h" @@ -47,8 +47,8 @@ #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/MC/MCInstrDesc.h" -#include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index 95c62d820b0e..52d5819f8dbc 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -16,7 +16,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" @@ -26,6 +25,7 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/TargetSchedule.h" #include "llvm/Support/CommandLine.h" diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp index c1b72430e605..825290a438a6 100644 --- a/lib/CodeGen/MachineModuleInfo.cpp +++ b/lib/CodeGen/MachineModuleInfo.cpp @@ -7,14 +7,13 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/TinyPtrVector.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFunctionInitializer.h" -#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DerivedTypes.h" @@ -259,7 +258,14 @@ void MachineModuleInfo::addPersonality(const Function *Personality) { /// \} -MachineFunction &MachineModuleInfo::getMachineFunction(const Function &F) { +MachineFunction * +MachineModuleInfo::getMachineFunction(const Function &F) const { + auto I = MachineFunctions.find(&F); + return I != MachineFunctions.end() ? I->second.get() : nullptr; +} + +MachineFunction & +MachineModuleInfo::getOrCreateMachineFunction(const Function &F) { // Shortcut for the common case where a sequence of MachineFunctionPasses // all query for the same Function. if (LastRequest == &F) @@ -273,10 +279,6 @@ MachineFunction &MachineModuleInfo::getMachineFunction(const Function &F) { MF = new MachineFunction(&F, TM, NextFnNum++, *this); // Update the set entry. I.first->second.reset(MF); - - if (MFInitializer) - if (MFInitializer->initializeMachineFunction(*MF)) - report_fatal_error("Unable to initialize machine function"); } else { MF = I.first->second.get(); } diff --git a/lib/CodeGen/MachineOutliner.cpp b/lib/CodeGen/MachineOutliner.cpp index 9ea3c00a2fc4..fd6b2427891d 100644 --- a/lib/CodeGen/MachineOutliner.cpp +++ b/lib/CodeGen/MachineOutliner.cpp @@ -1111,7 +1111,7 @@ MachineOutliner::createOutlinedFunction(Module &M, const OutlinedFunction &OF, Builder.CreateRetVoid(); MachineModuleInfo &MMI = getAnalysis(); - MachineFunction &MF = MMI.getMachineFunction(*F); + MachineFunction &MF = MMI.getOrCreateMachineFunction(*F); MachineBasicBlock &MBB = *MF.CreateMachineBasicBlock(); const TargetSubtargetInfo &STI = MF.getSubtarget(); const TargetInstrInfo &TII = *STI.getInstrInfo(); @@ -1207,7 +1207,7 @@ bool MachineOutliner::runOnModule(Module &M) { return false; MachineModuleInfo &MMI = getAnalysis(); - const TargetSubtargetInfo &STI = MMI.getMachineFunction(*M.begin()) + const TargetSubtargetInfo &STI = MMI.getOrCreateMachineFunction(*M.begin()) .getSubtarget(); const TargetRegisterInfo *TRI = STI.getRegisterInfo(); const TargetInstrInfo *TII = STI.getInstrInfo(); @@ -1216,7 +1216,7 @@ bool MachineOutliner::runOnModule(Module &M) { // Build instruction mappings for each function in the module. for (Function &F : M) { - MachineFunction &MF = MMI.getMachineFunction(F); + MachineFunction &MF = MMI.getOrCreateMachineFunction(F); // Is the function empty? Safe to outline from? if (F.empty() || !TII->isFunctionSafeToOutlineFrom(MF)) diff --git a/lib/CodeGen/MachinePipeliner.cpp b/lib/CodeGen/MachinePipeliner.cpp index 8f5ac8b3fc45..19e9a50e2c43 100644 --- a/lib/CodeGen/MachinePipeliner.cpp +++ b/lib/CodeGen/MachinePipeliner.cpp @@ -61,7 +61,6 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/PriorityQueue.h" #include "llvm/ADT/SetVector.h" @@ -69,6 +68,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/ValueTracking.h" diff --git a/lib/CodeGen/MachineRegionInfo.cpp b/lib/CodeGen/MachineRegionInfo.cpp index 2402ffdbbcb1..1e74104e89ed 100644 --- a/lib/CodeGen/MachineRegionInfo.cpp +++ b/lib/CodeGen/MachineRegionInfo.cpp @@ -7,10 +7,10 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/MachineRegionInfo.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/RegionInfoImpl.h" #include "llvm/CodeGen/MachinePostDominators.h" -#include "llvm/CodeGen/MachineRegionInfo.h" #include "llvm/Pass.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index 128910f8eb2a..9a92ee279cdc 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/ADT/iterator_range.h" #include "llvm/CodeGen/LowLevelType.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -18,7 +19,6 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index edc3783afa2f..01a2286b8d66 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -12,13 +12,14 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/MachineScheduler.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/PriorityQueue.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" @@ -30,12 +31,11 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachinePassRegistry.h" -#include "llvm/CodeGen/RegisterPressure.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineScheduler.h" #include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/RegisterPressure.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/CodeGen/ScheduleDAGMutation.h" diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp index 7c34e71a0cce..79e3fea3f90c 100644 --- a/lib/CodeGen/MachineSink.cpp +++ b/lib/CodeGen/MachineSink.cpp @@ -16,7 +16,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SparseBitVector.h" @@ -33,6 +32,7 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachinePostDominators.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/IR/LLVMContext.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp index 01391a1a0e50..6c5abc66fba1 100644 --- a/lib/CodeGen/MachineTraceMetrics.cpp +++ b/lib/CodeGen/MachineTraceMetrics.cpp @@ -7,6 +7,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/MachineTraceMetrics.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Optional.h" @@ -21,7 +22,6 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineTraceMetrics.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index f6dbf667cf02..e65c256c1bb5 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -23,7 +23,6 @@ // the verifier errors. //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SetOperations.h" @@ -36,6 +35,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/InlineAsm.h" @@ -945,7 +945,6 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { VerifyStackMapConstant(VarStart + StatepointOpers::NumDeoptOperandsOffset); // TODO: verify we have properly encoded deopt arguments - }; } @@ -1947,9 +1946,11 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI); const VNInfo *PVNI = LR.getVNInfoBefore(PEnd); - // All predecessors must have a live-out value if this is not a - // subregister liverange. - if (!PVNI && LaneMask.none()) { + // All predecessors must have a live-out value. However for a phi + // instruction with subregister intervals + // only one of the subregisters (not necessarily the current one) needs to + // be defined. + if (!PVNI && (LaneMask.none() || !IsPHI) ) { report("Register not marked live out of predecessor", *PI); report_context(LR, Reg, LaneMask); report_context(*VNI); diff --git a/lib/CodeGen/OptimizePHIs.cpp b/lib/CodeGen/OptimizePHIs.cpp index 76ad668104b4..f7aeb4204c5b 100644 --- a/lib/CodeGen/OptimizePHIs.cpp +++ b/lib/CodeGen/OptimizePHIs.cpp @@ -12,12 +12,12 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/IR/Function.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/PatchableFunction.cpp b/lib/CodeGen/PatchableFunction.cpp index 00e72971a01e..513e82716564 100644 --- a/lib/CodeGen/PatchableFunction.cpp +++ b/lib/CodeGen/PatchableFunction.cpp @@ -12,10 +12,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp index 6d643457e9a9..da8fac6d3834 100644 --- a/lib/CodeGen/PeepholeOptimizer.cpp +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -66,7 +66,6 @@ // C = copy A <-- same-bank copy //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" @@ -79,6 +78,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" diff --git a/lib/CodeGen/PostRAHazardRecognizer.cpp b/lib/CodeGen/PostRAHazardRecognizer.cpp index 5bc5f7524dbf..425a59dc0375 100644 --- a/lib/CodeGen/PostRAHazardRecognizer.cpp +++ b/lib/CodeGen/PostRAHazardRecognizer.cpp @@ -27,9 +27,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" diff --git a/lib/CodeGen/RegAllocBase.cpp b/lib/CodeGen/RegAllocBase.cpp index fb49a934431c..a7b7a9f8ab15 100644 --- a/lib/CodeGen/RegAllocBase.cpp +++ b/lib/CodeGen/RegAllocBase.cpp @@ -21,13 +21,12 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/VirtRegMap.h" -#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/Timer.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp index 24be7ea98d82..774306154a89 100644 --- a/lib/CodeGen/RegAllocBasic.cpp +++ b/lib/CodeGen/RegAllocBasic.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "AllocationOrder.h" #include "LiveDebugVariables.h" #include "RegAllocBase.h" @@ -28,6 +27,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/PassAnalysisSupport.h" diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index 47d726f6da7a..50d241bff23d 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -1,4 +1,4 @@ -//===-- RegAllocGreedy.cpp - greedy register allocator --------------------===// +//===- RegAllocGreedy.cpp - greedy register allocator ---------------------===// // // The LLVM Compiler Infrastructure // @@ -19,36 +19,63 @@ #include "SpillPlacement.h" #include "Spiller.h" #include "SplitKit.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/IndexedMap.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/OptimizationDiagnosticInfo.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/EdgeBundles.h" +#include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveIntervalUnion.h" #include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/LiveRegMatrix.h" #include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/IR/Function.h" #include "llvm/IR/LLVMContext.h" -#include "llvm/PassAnalysisSupport.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Pass.h" +#include "llvm/Support/BlockFrequency.h" #include "llvm/Support/BranchProbability.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include +#include +#include +#include #include +#include +#include using namespace llvm; @@ -106,13 +133,14 @@ static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator", createGreedyRegisterAllocator); namespace { + class RAGreedy : public MachineFunctionPass, public RegAllocBase, private LiveRangeEdit::Delegate { // Convenient shortcuts. - typedef std::priority_queue > PQueue; - typedef SmallPtrSet SmallLISet; - typedef SmallSet SmallVirtRegSet; + using PQueue = std::priority_queue>; + using SmallLISet = SmallPtrSet; + using SmallVirtRegSet = SmallSet; // context MachineFunction *MF; @@ -201,12 +229,12 @@ class RAGreedy : public MachineFunctionPass, // RegInfo - Keep additional information about each live range. struct RegInfo { - LiveRangeStage Stage; + LiveRangeStage Stage = RS_New; // Cascade - Eviction loop prevention. See canEvictInterference(). - unsigned Cascade; + unsigned Cascade = 0; - RegInfo() : Stage(RS_New), Cascade(0) {} + RegInfo() = default; }; IndexedMap ExtraRegInfo; @@ -232,10 +260,10 @@ class RAGreedy : public MachineFunctionPass, /// Cost of evicting interference. struct EvictionCost { - unsigned BrokenHints; ///< Total number of broken hints. - float MaxWeight; ///< Maximum spill weight evicted. + unsigned BrokenHints = 0; ///< Total number of broken hints. + float MaxWeight = 0; ///< Maximum spill weight evicted. - EvictionCost(): BrokenHints(0), MaxWeight(0) {} + EvictionCost() = default; bool isMax() const { return BrokenHints == ~0u; } @@ -413,10 +441,12 @@ private: /// Its currently assigned register. /// In case of a physical register Reg == PhysReg. unsigned PhysReg; + HintInfo(BlockFrequency Freq, unsigned Reg, unsigned PhysReg) : Freq(Freq), Reg(Reg), PhysReg(PhysReg) {} }; - typedef SmallVector HintsInfo; + using HintsInfo = SmallVector; + BlockFrequency getBrokenHintFreq(const HintsInfo &, unsigned); void collectHintInfo(unsigned, HintsInfo &); @@ -436,6 +466,7 @@ private: } } }; + } // end anonymous namespace char RAGreedy::ID = 0; @@ -475,7 +506,6 @@ const char *const RAGreedy::StageName[] = { // This helps stabilize decisions based on float comparisons. const float Hysteresis = (2007 / 2048.0f); // 0.97998046875 - FunctionPass* llvm::createGreedyRegisterAllocator() { return new RAGreedy(); } @@ -511,7 +541,6 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); } - //===----------------------------------------------------------------------===// // LiveRangeEdit delegate methods //===----------------------------------------------------------------------===// @@ -634,7 +663,6 @@ LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) { return LI; } - //===----------------------------------------------------------------------===// // Direct Assignment //===----------------------------------------------------------------------===// @@ -682,7 +710,6 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, return CheapReg ? CheapReg : PhysReg; } - //===----------------------------------------------------------------------===// // Interference eviction //===----------------------------------------------------------------------===// @@ -954,7 +981,6 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, return BestPhys; } - //===----------------------------------------------------------------------===// // Region Splitting //===----------------------------------------------------------------------===// @@ -1025,7 +1051,6 @@ bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf, return SpillPlacer->scanActiveBundles(); } - /// addThroughConstraints - Add constraints and links to SpillPlacer from the /// live-through blocks in Blocks. void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf, @@ -1083,7 +1108,7 @@ void RAGreedy::growRegion(GlobalSplitCandidate &Cand) { unsigned Visited = 0; #endif - for (;;) { + while (true) { ArrayRef NewBundles = SpillPlacer->getRecentPositive(); // Find new through blocks in the periphery of PrefRegBundles. for (int i = 0, e = NewBundles.size(); i != e; ++i) { @@ -1197,8 +1222,8 @@ BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) { for (unsigned i = 0; i != UseBlocks.size(); ++i) { const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; SpillPlacement::BlockConstraint &BC = SplitConstraints[i]; - bool RegIn = LiveBundles[Bundles->getBundle(BC.Number, 0)]; - bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, 1)]; + bool RegIn = LiveBundles[Bundles->getBundle(BC.Number, false)]; + bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, true)]; unsigned Ins = 0; if (BI.LiveIn) @@ -1211,8 +1236,8 @@ BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) { for (unsigned i = 0, e = Cand.ActiveBlocks.size(); i != e; ++i) { unsigned Number = Cand.ActiveBlocks[i]; - bool RegIn = LiveBundles[Bundles->getBundle(Number, 0)]; - bool RegOut = LiveBundles[Bundles->getBundle(Number, 1)]; + bool RegIn = LiveBundles[Bundles->getBundle(Number, false)]; + bool RegOut = LiveBundles[Bundles->getBundle(Number, true)]; if (!RegIn && !RegOut) continue; if (RegIn && RegOut) { @@ -1264,7 +1289,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, unsigned IntvIn = 0, IntvOut = 0; SlotIndex IntfIn, IntfOut; if (BI.LiveIn) { - unsigned CandIn = BundleCand[Bundles->getBundle(Number, 0)]; + unsigned CandIn = BundleCand[Bundles->getBundle(Number, false)]; if (CandIn != NoCand) { GlobalSplitCandidate &Cand = GlobalCand[CandIn]; IntvIn = Cand.IntvIdx; @@ -1273,7 +1298,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, } } if (BI.LiveOut) { - unsigned CandOut = BundleCand[Bundles->getBundle(Number, 1)]; + unsigned CandOut = BundleCand[Bundles->getBundle(Number, true)]; if (CandOut != NoCand) { GlobalSplitCandidate &Cand = GlobalCand[CandOut]; IntvOut = Cand.IntvIdx; @@ -1313,7 +1338,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, unsigned IntvIn = 0, IntvOut = 0; SlotIndex IntfIn, IntfOut; - unsigned CandIn = BundleCand[Bundles->getBundle(Number, 0)]; + unsigned CandIn = BundleCand[Bundles->getBundle(Number, false)]; if (CandIn != NoCand) { GlobalSplitCandidate &Cand = GlobalCand[CandIn]; IntvIn = Cand.IntvIdx; @@ -1321,7 +1346,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, IntfIn = Cand.Intf.first(); } - unsigned CandOut = BundleCand[Bundles->getBundle(Number, 1)]; + unsigned CandOut = BundleCand[Bundles->getBundle(Number, true)]; if (CandOut != NoCand) { GlobalSplitCandidate &Cand = GlobalCand[CandOut]; IntvOut = Cand.IntvIdx; @@ -1533,7 +1558,6 @@ unsigned RAGreedy::doRegionSplit(LiveInterval &VirtReg, unsigned BestCand, return 0; } - //===----------------------------------------------------------------------===// // Per-Block Splitting //===----------------------------------------------------------------------===// @@ -1580,7 +1604,6 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, return 0; } - //===----------------------------------------------------------------------===// // Per-Instruction Splitting //===----------------------------------------------------------------------===// @@ -1664,12 +1687,10 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, return 0; } - //===----------------------------------------------------------------------===// // Local Splitting //===----------------------------------------------------------------------===// - /// calcGapWeights - Compute the maximum spill weight that needs to be evicted /// in order to use PhysReg between two entries in SA->UseSlots. /// @@ -1740,7 +1761,7 @@ void RAGreedy::calcGapWeights(unsigned PhysReg, break; for (; Gap != NumGaps; ++Gap) { - GapWeight[Gap] = llvm::huge_valf; + GapWeight[Gap] = huge_valf; if (Uses[Gap+1].getBaseIndex() >= I->end) break; } @@ -1846,7 +1867,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, // Remove any gaps with regmask clobbers. if (Matrix->checkRegMaskInterference(VirtReg, PhysReg)) for (unsigned i = 0, e = RegMaskGaps.size(); i != e; ++i) - GapWeight[RegMaskGaps[i]] = llvm::huge_valf; + GapWeight[RegMaskGaps[i]] = huge_valf; // Try to find the best sequence of gaps to close. // The new spill weight must be larger than any gap interference. @@ -1858,7 +1879,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, // It is the spill weight that needs to be evicted. float MaxGap = GapWeight[0]; - for (;;) { + while (true) { // Live before/after split? const bool LiveBefore = SplitBefore != 0 || BI.LiveIn; const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut; @@ -1881,7 +1902,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, // Legally, without causing looping? bool Legal = !ProgressRequired || NewGaps < NumGaps; - if (Legal && MaxGap < llvm::huge_valf) { + if (Legal && MaxGap < huge_valf) { // Estimate the new spill weight. Each instruction reads or writes the // register. Conservatively assume there are no read-modify-write // instructions. @@ -2680,6 +2701,7 @@ void RAGreedy::reportNumberOfSplillsReloads(MachineLoop *L, unsigned &Reloads, if (Reloads || FoldedReloads || Spills || FoldedSpills) { using namespace ore; + MachineOptimizationRemarkMissed R(DEBUG_TYPE, "LoopSpillReload", L->getStartLoc(), L->getHeader()); if (Spills) diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index b2dfef91add5..e3baff4be4bc 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -29,15 +29,16 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/RegAllocPBQP.h" #include "RegisterCoalescer.h" #include "Spiller.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/CalcSpillWeights.h" @@ -56,7 +57,6 @@ #include "llvm/CodeGen/PBQP/Math.h" #include "llvm/CodeGen/PBQP/Solution.h" #include "llvm/CodeGen/PBQPRAConstraint.h" -#include "llvm/CodeGen/RegAllocPBQP.h" #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/VirtRegMap.h" @@ -84,8 +84,8 @@ #include #include #include -#include #include +#include using namespace llvm; @@ -738,7 +738,15 @@ void RegAllocPBQP::finalizeAlloc(MachineFunction &MF, if (PReg == 0) { const TargetRegisterClass &RC = *MRI.getRegClass(LI.reg); - PReg = RC.getRawAllocationOrder(MF).front(); + const ArrayRef RawPRegOrder = RC.getRawAllocationOrder(MF); + for (unsigned CandidateReg : RawPRegOrder) { + if (!VRM.getRegInfo().isReserved(CandidateReg)) { + PReg = CandidateReg; + break; + } + } + assert(PReg && + "No un-reserved physical registers in this register class"); } VRM.assignVirt2Phys(LI.reg, PReg); diff --git a/lib/CodeGen/RegisterClassInfo.cpp b/lib/CodeGen/RegisterClassInfo.cpp index 82a3bd9a0bd1..956dec39fc38 100644 --- a/lib/CodeGen/RegisterClassInfo.cpp +++ b/lib/CodeGen/RegisterClassInfo.cpp @@ -14,12 +14,12 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp index c726edc88b41..88e0a3b58940 100644 --- a/lib/CodeGen/RegisterPressure.cpp +++ b/lib/CodeGen/RegisterPressure.cpp @@ -12,9 +12,10 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/RegisterPressure.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -24,7 +25,6 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterClassInfo.h" -#include "llvm/CodeGen/RegisterPressure.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCRegisterInfo.h" diff --git a/lib/CodeGen/RegisterUsageInfo.cpp b/lib/CodeGen/RegisterUsageInfo.cpp index 66f196678dea..d7a3ac080823 100644 --- a/lib/CodeGen/RegisterUsageInfo.cpp +++ b/lib/CodeGen/RegisterUsageInfo.cpp @@ -12,11 +12,22 @@ /// //===----------------------------------------------------------------------===// +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/RegisterUsageInfo.h" #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/IR/Function.h" #include "llvm/IR/Module.h" -#include "llvm/Support/Debug.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include +#include +#include +#include +#include using namespace llvm; @@ -63,7 +74,7 @@ PhysicalRegisterUsageInfo::getRegUsageInfo(const Function *FP) { void PhysicalRegisterUsageInfo::print(raw_ostream &OS, const Module *M) const { const TargetRegisterInfo *TRI; - typedef std::pair> FuncPtrRegMaskPair; + using FuncPtrRegMaskPair = std::pair>; SmallVector FPRMPairVector; diff --git a/lib/CodeGen/RenameIndependentSubregs.cpp b/lib/CodeGen/RenameIndependentSubregs.cpp index cc32e43968bb..d2eff950d861 100644 --- a/lib/CodeGen/RenameIndependentSubregs.cpp +++ b/lib/CodeGen/RenameIndependentSubregs.cpp @@ -32,10 +32,10 @@ #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" using namespace llvm; @@ -212,7 +212,7 @@ void RenameIndependentSubregs::rewriteOperands(const IntEqClasses &Classes, const SmallVectorImpl &SubRangeInfos, const SmallVectorImpl &Intervals) const { const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo(); - unsigned Reg = Intervals[0]->reg;; + unsigned Reg = Intervals[0]->reg; for (MachineRegisterInfo::reg_nodbg_iterator I = MRI->reg_nodbg_begin(Reg), E = MRI->reg_nodbg_end(); I != E; ) { MachineOperand &MO = *I++; @@ -243,6 +243,11 @@ void RenameIndependentSubregs::rewriteOperands(const IntEqClasses &Classes, unsigned VReg = Intervals[ID]->reg; MO.setReg(VReg); + if (MO.isTied()) { + /// Undef use operands are not tracked in the equivalence class but need + /// to be update if they are tied. + MO.getParent()->substituteRegister(Reg, VReg, 0, TRI); + } } // TODO: We could attempt to recompute new register classes while visiting // the operands: Some of the split register may be fine with less constraint diff --git a/lib/CodeGen/ResetMachineFunctionPass.cpp b/lib/CodeGen/ResetMachineFunctionPass.cpp index 3e259927ac5c..01b3db43b283 100644 --- a/lib/CodeGen/ResetMachineFunctionPass.cpp +++ b/lib/CodeGen/ResetMachineFunctionPass.cpp @@ -14,9 +14,9 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/Support/Debug.h" using namespace llvm; diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index dc72ac073258..3cd270cec3a6 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -12,11 +12,11 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/iterator_range.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/Support/CommandLine.h" diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 3fdbd2459361..7dd66d799be4 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -12,19 +12,20 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/ADT/IntEqClasses.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SparseSet.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineMemOperand.h" @@ -33,7 +34,6 @@ #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/RegisterPressure.h" #include "llvm/CodeGen/ScheduleDAG.h" -#include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/CodeGen/ScheduleDFS.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/IR/Constants.h" diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp index ca2881cb91e0..bb6a45996f63 100644 --- a/lib/CodeGen/ScheduleDAGPrinter.cpp +++ b/lib/CodeGen/ScheduleDAGPrinter.cpp @@ -11,11 +11,11 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/IR/Constants.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GraphWriter.h" diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index fb51a4eb1421..a0967f574006 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1028,13 +1028,13 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { switch (Opc) { default: break; case ISD::AssertSext: - return DAG.getNode(ISD::AssertSext, DL, PVT, - SExtPromoteOperand(Op.getOperand(0), PVT), - Op.getOperand(1)); + if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT)) + return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1)); + break; case ISD::AssertZext: - return DAG.getNode(ISD::AssertZext, DL, PVT, - ZExtPromoteOperand(Op.getOperand(0), PVT), - Op.getOperand(1)); + if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT)) + return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1)); + break; case ISD::Constant: { unsigned ExtOpc = Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; @@ -1563,7 +1563,7 @@ SDValue DAGCombiner::combine(SDNode *N) { // If N is a commutative binary node, try commuting it to enable more // sdisel CSE. - if (!RV.getNode() && SelectionDAG::isCommutativeBinOp(N->getOpcode()) && + if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) && N->getNumValues() == 1) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -12488,12 +12488,18 @@ void DAGCombiner::getStoreMergeCandidates( if (BasePtr.Base.isUndef()) return; - bool IsLoadSrc = isa(St->getValue()); bool IsConstantSrc = isa(St->getValue()) || isa(St->getValue()); bool IsExtractVecSrc = (St->getValue().getOpcode() == ISD::EXTRACT_VECTOR_ELT || St->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR); + bool IsLoadSrc = isa(St->getValue()); + BaseIndexOffset LBasePtr; + // Match on loadbaseptr if relevant. + if (IsLoadSrc) + LBasePtr = BaseIndexOffset::match( + cast(St->getValue())->getBasePtr(), DAG); + auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr) -> bool { if (Other->isVolatile() || Other->isIndexed()) return false; @@ -12502,9 +12508,15 @@ void DAGCombiner::getStoreMergeCandidates( if (!(MemVT.isInteger() && MemVT.bitsEq(Other->getMemoryVT()) && isa(Other->getValue()))) return false; - if (IsLoadSrc) - if (!isa(Other->getValue())) + if (IsLoadSrc) { + // The Load's Base Ptr must also match + if (LoadSDNode *OtherLd = dyn_cast(Other->getValue())) { + auto LPtr = BaseIndexOffset::match(OtherLd->getBasePtr(), DAG); + if (!(LBasePtr.equalBaseIndex(LPtr))) + return false; + } else return false; + } if (IsConstantSrc) if (!(isa(Other->getValue()) || isa(Other->getValue()))) diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 5003b79974eb..b2599b2e17f1 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -39,6 +39,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/FastISel.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APSInt.h" #include "llvm/ADT/DenseMap.h" @@ -50,7 +51,6 @@ #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/CodeGen/Analysis.h" -#include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineBasicBlock.h" diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index da2fb72bec45..e54eaa3b81be 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -4598,6 +4598,14 @@ void SelectionDAG::Legalize() { AssignTopologicalOrder(); SmallPtrSet LegalizedNodes; + // Use a delete listener to remove nodes which were deleted during + // legalization from LegalizeNodes. This is needed to handle the situation + // where a new node is allocated by the object pool to the same address of a + // previously deleted node. + DAGNodeDeletedListener DeleteListener( + *this, + [&LegalizedNodes](SDNode *N, SDNode *E) { LegalizedNodes.erase(N); }); + SelectionDAGLegalize Legalizer(*this, LegalizedNodes); // Visit all the nodes. We start in topological order, so that we see diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index d80a281279b6..137994093277 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -11,12 +11,12 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/SchedulerRegistry.h" #include "InstrEmitter.h" #include "ScheduleDAGSDNodes.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 579112c9bfc8..593efc5121f9 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -15,13 +15,13 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/SchedulerRegistry.h" #include "ScheduleDAGSDNodes.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp index eee4a4b06718..631cb34717c4 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp @@ -18,12 +18,12 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/SchedulerRegistry.h" #include "ScheduleDAGSDNodes.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LatencyPriorityQueue.h" #include "llvm/CodeGen/ResourcePriorityQueue.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/IR/DataLayout.h" #include "llvm/Support/Debug.h" diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 80a03ea4eea0..dff8bd2ad37d 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/SelectionDAG.h" #include "SDNodeDbgValue.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" @@ -19,9 +20,9 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/None.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/ValueTracking.h" @@ -33,7 +34,6 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/RuntimeLibcalls.h" -#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/CodeGen/ValueTypes.h" @@ -589,6 +589,11 @@ void SelectionDAG::RemoveDeadNodes(SmallVectorImpl &DeadNodes) { // worklist. while (!DeadNodes.empty()) { SDNode *N = DeadNodes.pop_back_val(); + // Skip to next node if we've already managed to delete the node. This could + // happen if replacing a node causes a node previously added to the node to + // be deleted. + if (N->getOpcode() == ISD::DELETED_NODE) + continue; for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next) DUL->NodeDeleted(N, nullptr); @@ -2661,7 +2666,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, if (DemandedElts[EltIdx]) { computeKnownBits(InVal, Known2, Depth + 1); Known.One &= Known2.One.zextOrTrunc(Known.One.getBitWidth()); - Known.Zero &= Known2.Zero.zextOrTrunc(Known.Zero.getBitWidth());; + Known.Zero &= Known2.Zero.zextOrTrunc(Known.Zero.getBitWidth()); } // If we demand the source vector then add its common known bits, ensuring @@ -2677,7 +2682,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, computeKnownBits(InVec, Known, Depth + 1); computeKnownBits(InVal, Known2, Depth + 1); Known.One &= Known2.One.zextOrTrunc(Known.One.getBitWidth()); - Known.Zero &= Known2.Zero.zextOrTrunc(Known.Zero.getBitWidth());; + Known.Zero &= Known2.Zero.zextOrTrunc(Known.Zero.getBitWidth()); } break; } @@ -3883,7 +3888,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, // fold (add Sym, c) -> Sym+c if (GlobalAddressSDNode *GA = dyn_cast(Cst1)) return FoldSymbolOffset(Opcode, VT, GA, Cst2); - if (isCommutativeBinOp(Opcode)) + if (TLI->isCommutativeBinOp(Opcode)) if (GlobalAddressSDNode *GA = dyn_cast(Cst2)) return FoldSymbolOffset(Opcode, VT, GA, Cst1); @@ -4029,7 +4034,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ConstantFPSDNode *N2CFP = dyn_cast(N2); // Canonicalize constant to RHS if commutative. - if (isCommutativeBinOp(Opcode)) { + if (TLI->isCommutativeBinOp(Opcode)) { if (N1C && !N2C) { std::swap(N1C, N2C); std::swap(N1, N2); @@ -4413,7 +4418,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, // Canonicalize an UNDEF to the RHS, even over a constant. if (N1.isUndef()) { - if (isCommutativeBinOp(Opcode)) { + if (TLI->isCommutativeBinOp(Opcode)) { std::swap(N1, N2); } else { switch (Opcode) { diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index b895da21a7ff..d34ac40b9496 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -101,7 +101,8 @@ static const unsigned MaxParallelChains = 64; static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, - MVT PartVT, EVT ValueVT, const Value *V); + MVT PartVT, EVT ValueVT, const Value *V, + bool IsABIRegCopy); /// getCopyFromParts - Create a value that contains the specified legal parts /// combined into the value they represent. If the parts combine to a type @@ -111,10 +112,11 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, const Value *V, - Optional AssertOp = None) { + Optional AssertOp = None, + bool IsABIRegCopy = false) { if (ValueVT.isVector()) return getCopyFromPartsVector(DAG, DL, Parts, NumParts, - PartVT, ValueVT, V); + PartVT, ValueVT, V, IsABIRegCopy); assert(NumParts > 0 && "No parts to assemble!"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -258,7 +260,8 @@ static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V, /// ValueVT (ISD::AssertSext). static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, - MVT PartVT, EVT ValueVT, const Value *V) { + MVT PartVT, EVT ValueVT, const Value *V, + bool IsABIRegCopy) { assert(ValueVT.isVector() && "Not a vector value"); assert(NumParts > 0 && "No parts to assemble!"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -269,9 +272,18 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, EVT IntermediateVT; MVT RegisterVT; unsigned NumIntermediates; - unsigned NumRegs = - TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, - NumIntermediates, RegisterVT); + unsigned NumRegs; + + if (IsABIRegCopy) { + NumRegs = TLI.getVectorTypeBreakdownForCallingConv( + *DAG.getContext(), ValueVT, IntermediateVT, NumIntermediates, + RegisterVT); + } else { + NumRegs = + TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, + NumIntermediates, RegisterVT); + } + assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); NumParts = NumRegs; // Silence a compiler warning. assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); @@ -300,9 +312,14 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the // intermediate operands. + EVT BuiltVectorTy = + EVT::getVectorVT(*DAG.getContext(), IntermediateVT.getScalarType(), + (IntermediateVT.isVector() + ? IntermediateVT.getVectorNumElements() * NumParts + : NumIntermediates)); Val = DAG.getNode(IntermediateVT.isVector() ? ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, - DL, ValueVT, Ops); + DL, BuiltVectorTy, Ops); } // There is now one part, held in Val. Correct it to match ValueVT. @@ -341,13 +358,29 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, TLI.isTypeLegal(ValueVT)) return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); - // Handle cases such as i8 -> <1 x i1> if (ValueVT.getVectorNumElements() != 1) { - diagnosePossiblyInvalidConstraint(*DAG.getContext(), V, - "non-trivial scalar-to-vector conversion"); - return DAG.getUNDEF(ValueVT); + // Certain ABIs require that vectors are passed as integers. For vectors + // are the same size, this is an obvious bitcast. + if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) { + return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); + } else if (ValueVT.getSizeInBits() < PartEVT.getSizeInBits()) { + // Bitcast Val back the original type and extract the corresponding + // vector we want. + unsigned Elts = PartEVT.getSizeInBits() / ValueVT.getScalarSizeInBits(); + EVT WiderVecType = EVT::getVectorVT(*DAG.getContext(), + ValueVT.getVectorElementType(), Elts); + Val = DAG.getBitcast(WiderVecType, Val); + return DAG.getNode( + ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, + DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + } + + diagnosePossiblyInvalidConstraint( + *DAG.getContext(), V, "non-trivial scalar-to-vector conversion"); + return DAG.getUNDEF(ValueVT); } + // Handle cases such as i8 -> <1 x i1> EVT ValueSVT = ValueVT.getVectorElementType(); if (ValueVT.getVectorNumElements() == 1 && ValueSVT != PartEVT) Val = ValueVT.isFloatingPoint() ? DAG.getFPExtendOrRound(Val, DL, ValueSVT) @@ -358,7 +391,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl, SDValue Val, SDValue *Parts, unsigned NumParts, - MVT PartVT, const Value *V); + MVT PartVT, const Value *V, bool IsABIRegCopy); /// getCopyToParts - Create a series of nodes that contain the specified value /// split into legal parts. If the parts contain more bits than Val, then, for @@ -366,12 +399,14 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl, static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, const Value *V, - ISD::NodeType ExtendKind = ISD::ANY_EXTEND) { + ISD::NodeType ExtendKind = ISD::ANY_EXTEND, + bool IsABIRegCopy = false) { EVT ValueVT = Val.getValueType(); // Handle the vector case separately. if (ValueVT.isVector()) - return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V); + return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V, + IsABIRegCopy); unsigned PartBits = PartVT.getSizeInBits(); unsigned OrigNumParts = NumParts; @@ -496,7 +531,9 @@ static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, /// value split into legal parts. static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, - MVT PartVT, const Value *V) { + MVT PartVT, const Value *V, + bool IsABIRegCopy) { + EVT ValueVT = Val.getValueType(); assert(ValueVT.isVector() && "Not a vector"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -537,13 +574,20 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, // Promoted vector extract Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT); - } else{ - // Vector -> scalar conversion. - assert(ValueVT.getVectorNumElements() == 1 && - "Only trivial vector-to-scalar conversions should get here!"); - Val = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val, - DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + } else { + if (ValueVT.getVectorNumElements() == 1) { + Val = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val, + DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + + } else { + assert(PartVT.getSizeInBits() > ValueVT.getSizeInBits() && + "lossy conversion of vector to scalar type"); + EVT IntermediateType = + EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()); + Val = DAG.getBitcast(IntermediateType, Val); + Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT); + } } assert(Val.getValueType() == PartVT && "Unexpected vector part value type"); @@ -555,15 +599,31 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, EVT IntermediateVT; MVT RegisterVT; unsigned NumIntermediates; - unsigned NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, - IntermediateVT, - NumIntermediates, RegisterVT); + unsigned NumRegs; + if (IsABIRegCopy) { + NumRegs = TLI.getVectorTypeBreakdownForCallingConv( + *DAG.getContext(), ValueVT, IntermediateVT, NumIntermediates, + RegisterVT); + } else { + NumRegs = + TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, + NumIntermediates, RegisterVT); + } unsigned NumElements = ValueVT.getVectorNumElements(); assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); NumParts = NumRegs; // Silence a compiler warning. assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); + // Convert the vector to the appropiate type if necessary. + unsigned DestVectorNoElts = + NumIntermediates * + (IntermediateVT.isVector() ? IntermediateVT.getVectorNumElements() : 1); + EVT BuiltVectorTy = EVT::getVectorVT( + *DAG.getContext(), IntermediateVT.getScalarType(), DestVectorNoElts); + if (Val.getValueType() != BuiltVectorTy) + Val = DAG.getNode(ISD::BITCAST, DL, BuiltVectorTy, Val); + // Split the vector into intermediate operands. SmallVector Ops(NumIntermediates); for (unsigned i = 0; i != NumIntermediates; ++i) { @@ -596,22 +656,31 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, } } -RegsForValue::RegsForValue() {} +RegsForValue::RegsForValue() { IsABIMangled = false; } RegsForValue::RegsForValue(const SmallVector ®s, MVT regvt, - EVT valuevt) - : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {} + EVT valuevt, bool IsABIMangledValue) + : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs), + RegCount(1, regs.size()), IsABIMangled(IsABIMangledValue) {} RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI, - const DataLayout &DL, unsigned Reg, Type *Ty) { + const DataLayout &DL, unsigned Reg, Type *Ty, + bool IsABIMangledValue) { ComputeValueVTs(TLI, DL, Ty, ValueVTs); + IsABIMangled = IsABIMangledValue; + for (EVT ValueVT : ValueVTs) { - unsigned NumRegs = TLI.getNumRegisters(Context, ValueVT); - MVT RegisterVT = TLI.getRegisterType(Context, ValueVT); + unsigned NumRegs = IsABIMangledValue + ? TLI.getNumRegistersForCallingConv(Context, ValueVT) + : TLI.getNumRegisters(Context, ValueVT); + MVT RegisterVT = IsABIMangledValue + ? TLI.getRegisterTypeForCallingConv(Context, ValueVT) + : TLI.getRegisterType(Context, ValueVT); for (unsigned i = 0; i != NumRegs; ++i) Regs.push_back(Reg + i); RegVTs.push_back(RegisterVT); + RegCount.push_back(NumRegs); Reg += NumRegs; } } @@ -632,8 +701,10 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { // Copy the legal parts from the registers. EVT ValueVT = ValueVTs[Value]; - unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVT); - MVT RegisterVT = RegVTs[Value]; + unsigned NumRegs = RegCount[Value]; + MVT RegisterVT = IsABIMangled + ? TLI.getRegisterTypeForCallingConv(RegVTs[Value]) + : RegVTs[Value]; Parts.resize(NumRegs); for (unsigned i = 0; i != NumRegs; ++i) { @@ -728,9 +799,11 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, unsigned NumRegs = Regs.size(); SmallVector Parts(NumRegs); for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { - EVT ValueVT = ValueVTs[Value]; - unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT); - MVT RegisterVT = RegVTs[Value]; + unsigned NumParts = RegCount[Value]; + + MVT RegisterVT = IsABIMangled + ? TLI.getRegisterTypeForCallingConv(RegVTs[Value]) + : RegVTs[Value]; if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT)) ExtendKind = ISD::ZERO_EXTEND; @@ -953,10 +1026,16 @@ SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) { if (It != FuncInfo.ValueMap.end()) { unsigned InReg = It->second; + bool IsABIRegCopy = + V && ((isa(V) && + !(static_cast(V))->isInlineAsm()) || + isa(V)); + RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), - DAG.getDataLayout(), InReg, Ty); + DAG.getDataLayout(), InReg, Ty, IsABIRegCopy); SDValue Chain = DAG.getEntryNode(); - Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); + Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, + V); resolveDanglingDebugInfo(V, Result); } @@ -1142,8 +1221,13 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { // If this is an instruction which fast-isel has deferred, select it now. if (const Instruction *Inst = dyn_cast(V)) { unsigned InReg = FuncInfo.InitializeRegForValue(Inst); + bool IsABIRegCopy = + V && ((isa(V) && + !(static_cast(V))->isInlineAsm()) || + isa(V)); + RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg, - Inst->getType()); + Inst->getType(), IsABIRegCopy); SDValue Chain = DAG.getEntryNode(); return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); } @@ -1371,12 +1455,12 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) VT = TLI.getTypeForExtReturn(Context, VT, ExtendKind); - unsigned NumParts = TLI.getNumRegisters(Context, VT); - MVT PartVT = TLI.getRegisterType(Context, VT); + unsigned NumParts = TLI.getNumRegistersForCallingConv(Context, VT); + MVT PartVT = TLI.getRegisterTypeForCallingConv(Context, VT); SmallVector Parts(NumParts); getCopyToParts(DAG, getCurSDLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + j), - &Parts[0], NumParts, PartVT, &I, ExtendKind); + &Parts[0], NumParts, PartVT, &I, ExtendKind, true); // 'inreg' on function refers to return value ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); @@ -5998,20 +6082,6 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, } } -/// Return true if it only matters that the value is equal or not-equal to zero. -static bool IsOnlyUsedInZeroEqualityComparison(const Value *V) { - for (const User *U : V->users()) { - if (const ICmpInst *IC = dyn_cast(U)) - if (IC->isEquality()) - if (const Constant *C = dyn_cast(IC->getOperand(1))) - if (C->isNullValue()) - continue; - // Unknown instruction. - return false; - } - return true; -} - static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, SelectionDAGBuilder &Builder) { @@ -6098,7 +6168,7 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0 // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0 - if (!CSize || !IsOnlyUsedInZeroEqualityComparison(&I)) + if (!CSize || !isOnlyUsedInZeroEqualityComparison(&I)) return false; // If the target has a fast compare for the given size, it will return a @@ -7126,8 +7196,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { SDLoc dl = getCurSDLoc(); // Use the produced MatchedRegs object to - MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl, - Chain, &Flag, CS.getInstruction()); + MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Flag, + CS.getInstruction()); MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, true, OpInfo.getMatchedOperand(), dl, DAG, AsmNodeOperands); @@ -7813,8 +7883,10 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { } else { for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { EVT VT = RetTys[I]; - MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); - unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); + MVT RegisterVT = + getRegisterTypeForCallingConv(CLI.RetTy->getContext(), VT); + unsigned NumRegs = + getNumRegistersForCallingConv(CLI.RetTy->getContext(), VT); for (unsigned i = 0; i != NumRegs; ++i) { ISD::InputArg MyFlags; MyFlags.VT = RegisterVT; @@ -7863,7 +7935,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { SDValue Op = SDValue(Args[i].Node.getNode(), Args[i].Node.getResNo() + Value); ISD::ArgFlagsTy Flags; - unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy); + + // Certain targets (such as MIPS), may have a different ABI alignment + // for a type depending on the context. Give the target a chance to + // specify the alignment it wants. + unsigned OriginalAlignment = getABIAlignmentForCallingConv(ArgTy, DL); if (Args[i].IsZExt) Flags.setZExt(); @@ -7918,8 +7994,9 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Flags.setInConsecutiveRegs(); Flags.setOrigAlign(OriginalAlignment); - MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT); - unsigned NumParts = getNumRegisters(CLI.RetTy->getContext(), VT); + MVT PartVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(), VT); + unsigned NumParts = + getNumRegistersForCallingConv(CLI.RetTy->getContext(), VT); SmallVector Parts(NumParts); ISD::NodeType ExtendKind = ISD::ANY_EXTEND; @@ -7949,7 +8026,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { } getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT, - CLI.CS ? CLI.CS->getInstruction() : nullptr, ExtendKind); + CLI.CS ? CLI.CS->getInstruction() : nullptr, ExtendKind, + true); for (unsigned j = 0; j != NumParts; ++j) { // if it isn't first piece, alignment must be 1 @@ -8049,12 +8127,14 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { unsigned CurReg = 0; for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { EVT VT = RetTys[I]; - MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); - unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); + MVT RegisterVT = + getRegisterTypeForCallingConv(CLI.RetTy->getContext(), VT); + unsigned NumRegs = + getNumRegistersForCallingConv(CLI.RetTy->getContext(), VT); ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg], NumRegs, RegisterVT, VT, nullptr, - AssertOp)); + AssertOp, true)); CurReg += NumRegs; } @@ -8090,8 +8170,15 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) { assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + // If this is an InlineAsm we have to match the registers required, not the + // notional registers required by the type. + bool IsABIRegCopy = + V && ((isa(V) && + !(static_cast(V))->isInlineAsm()) || + isa(V)); + RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, - V->getType()); + V->getType(), IsABIRegCopy); SDValue Chain = DAG.getEntryNode(); ISD::NodeType ExtendType = (FuncInfo.PreferredExtendType.find(V) == @@ -8333,7 +8420,12 @@ void SelectionDAGISel::LowerArguments(const Function &F) { EVT VT = ValueVTs[Value]; Type *ArgTy = VT.getTypeForEVT(*DAG.getContext()); ISD::ArgFlagsTy Flags; - unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy); + + // Certain targets (such as MIPS), may have a different ABI alignment + // for a type depending on the context. Give the target a chance to + // specify the alignment it wants. + unsigned OriginalAlignment = + TLI->getABIAlignmentForCallingConv(ArgTy, DL); if (Arg.hasAttribute(Attribute::ZExt)) Flags.setZExt(); @@ -8395,8 +8487,10 @@ void SelectionDAGISel::LowerArguments(const Function &F) { if (ArgCopyElisionCandidates.count(&Arg)) Flags.setCopyElisionCandidate(); - MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT); - unsigned NumRegs = TLI->getNumRegisters(*CurDAG->getContext(), VT); + MVT RegisterVT = + TLI->getRegisterTypeForCallingConv(*CurDAG->getContext(), VT); + unsigned NumRegs = + TLI->getNumRegistersForCallingConv(*CurDAG->getContext(), VT); for (unsigned i = 0; i != NumRegs; ++i) { ISD::InputArg MyFlags(Flags, RegisterVT, VT, isArgValueUsed, ArgNo, PartBase+i*RegisterVT.getStoreSize()); @@ -8500,8 +8594,10 @@ void SelectionDAGISel::LowerArguments(const Function &F) { for (unsigned Val = 0; Val != NumValues; ++Val) { EVT VT = ValueVTs[Val]; - MVT PartVT = TLI->getRegisterType(*CurDAG->getContext(), VT); - unsigned NumParts = TLI->getNumRegisters(*CurDAG->getContext(), VT); + MVT PartVT = + TLI->getRegisterTypeForCallingConv(*CurDAG->getContext(), VT); + unsigned NumParts = + TLI->getNumRegistersForCallingConv(*CurDAG->getContext(), VT); // Even an apparant 'unused' swifterror argument needs to be returned. So // we do generate a copy for it that can be used on return from the @@ -8514,7 +8610,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) { AssertOp = ISD::AssertZext; ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts, - PartVT, VT, nullptr, AssertOp)); + PartVT, VT, nullptr, AssertOp, + true)); } i += NumParts; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 77e131fa551c..431d52b4b9b9 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -975,18 +975,28 @@ struct RegsForValue { /// expanded value requires multiple registers. SmallVector Regs; + /// This list holds the number of registers for each value. + SmallVector RegCount; + + /// Records if this value needs to be treated in an ABI dependant manner, + /// different to normal type legalization. + bool IsABIMangled; + RegsForValue(); - RegsForValue(const SmallVector ®s, MVT regvt, EVT valuevt); + RegsForValue(const SmallVector ®s, MVT regvt, EVT valuevt, + bool IsABIMangledValue = false); RegsForValue(LLVMContext &Context, const TargetLowering &TLI, - const DataLayout &DL, unsigned Reg, Type *Ty); + const DataLayout &DL, unsigned Reg, Type *Ty, + bool IsABIMangledValue = false); /// Add the specified values to this one. void append(const RegsForValue &RHS) { ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end()); RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end()); Regs.append(RHS.Regs.begin(), RHS.Regs.end()); + RegCount.push_back(RHS.Regs.size()); } /// Emit a series of CopyFromReg nodes that copies from this value and returns diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 0dbd9e846aa6..3dd58975b1f1 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -11,12 +11,12 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/SelectionDAG.h" #include "ScheduleDAGSDNodes.h" #include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/Intrinsics.h" diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index b5ccd64ee76c..b67f11f85b70 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -1,4 +1,4 @@ -//===-- SelectionDAGISel.cpp - Implement the SelectionDAGISel class -------===// +//===- SelectionDAGISel.cpp - Implement the SelectionDAGISel class --------===// // // The LLVM Compiler Infrastructure // @@ -17,11 +17,11 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/None.h" #include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BranchProbabilityInfo.h" @@ -31,6 +31,7 @@ #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GCMetadata.h" +#include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -38,7 +39,6 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachinePassRegistry.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -51,9 +51,11 @@ #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/InstrTypes.h" @@ -64,6 +66,7 @@ #include "llvm/IR/Metadata.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" +#include "llvm/IR/Value.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" @@ -89,6 +92,7 @@ #include #include #include +#include #include #include #include @@ -333,11 +337,12 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { /// SplitCriticalSideEffectEdges - Look for critical edges with a PHI value that /// may trap on it. In this case we have to split the edge so that the path /// through the predecessor block that doesn't go to the phi block doesn't -/// execute the possibly trapping instruction. -/// +/// execute the possibly trapping instruction. If available, we pass a +/// dominator tree to be updated when we split critical edges. This is because +/// SelectionDAGISel preserves the DominatorTree. /// This is required for correctness, so it must be done at -O0. /// -static void SplitCriticalSideEffectEdges(Function &Fn) { +static void SplitCriticalSideEffectEdges(Function &Fn, DominatorTree *DT) { // Loop for blocks with phi nodes. for (BasicBlock &BB : Fn) { PHINode *PN = dyn_cast(BB.begin()); @@ -363,7 +368,7 @@ static void SplitCriticalSideEffectEdges(Function &Fn) { // Okay, we have to split this edge. SplitCriticalEdge( Pred->getTerminator(), GetSuccessorNumber(Pred, &BB), - CriticalEdgeSplittingOptions().setMergeIdenticalEdges()); + CriticalEdgeSplittingOptions(DT).setMergeIdenticalEdges()); goto ReprocessBlock; } } @@ -399,10 +404,12 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { LibInfo = &getAnalysis().getTLI(); GFI = Fn.hasGC() ? &getAnalysis().getFunctionInfo(Fn) : nullptr; ORE = make_unique(&Fn); + auto *DTWP = getAnalysisIfAvailable(); + DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); - SplitCriticalSideEffectEdges(const_cast(Fn)); + SplitCriticalSideEffectEdges(const_cast(Fn), DT); CurDAG->init(*MF, *ORE); FuncInfo->set(Fn, *MF, CurDAG); @@ -763,7 +770,6 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { DEBUG(dbgs() << "Optimized type-legalized selection DAG: BB#" << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump()); - } { @@ -1134,7 +1140,7 @@ static void processDbgDeclares(FunctionLoweringInfo *FuncInfo) { // Check if the variable is a static alloca or a byval or inalloca // argument passed in memory. If it is not, then we will ignore this // intrinsic and handle this during isel like dbg.value. - int FI = INT_MAX; + int FI = std::numeric_limits::max(); if (const auto *AI = dyn_cast(Address)) { auto SI = FuncInfo->StaticAllocaMap.find(AI); if (SI != FuncInfo->StaticAllocaMap.end()) @@ -1142,7 +1148,7 @@ static void processDbgDeclares(FunctionLoweringInfo *FuncInfo) { } else if (const auto *Arg = dyn_cast(Address)) FI = FuncInfo->getArgumentFrameIndex(Arg); - if (FI == INT_MAX) + if (FI == std::numeric_limits::max()) continue; DIExpression *Expr = DI->getExpression(); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index 2764688518c2..11561dfa5947 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -11,13 +11,13 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/SelectionDAG.h" #include "ScheduleDAGSDNodes.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfo.h" #include "llvm/Support/Debug.h" diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index eed667dbe7e0..5d78bba86d73 100644 --- a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -17,9 +17,9 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" -#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/IR/CallingConv.h" @@ -840,7 +840,7 @@ SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP, // completely and make statepoint call to return a tuple. unsigned Reg = FuncInfo.CreateRegs(RetTy); RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), - DAG.getDataLayout(), Reg, RetTy); + DAG.getDataLayout(), Reg, RetTy, true); SDValue Chain = DAG.getEntryNode(); RFV.getCopyToRegs(ReturnValue, DAG, getCurSDLoc(), Chain, nullptr); diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index adb2b188265b..cfda0fffd031 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -2166,7 +2166,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond); if (N0.getOperand(1) == N1.getOperand(1)) return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond); - if (DAG.isCommutativeBinOp(N0.getOpcode())) { + if (isCommutativeBinOp(N0.getOpcode())) { // If X op Y == Y op X, try other combinations. if (N0.getOperand(0) == N1.getOperand(1)) return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0), @@ -2230,7 +2230,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, return DAG.getSetCC(dl, VT, N0.getOperand(1), DAG.getConstant(0, dl, N0.getValueType()), Cond); if (N0.getOperand(1) == N1) { - if (DAG.isCommutativeBinOp(N0.getOpcode())) + if (isCommutativeBinOp(N0.getOpcode())) return DAG.getSetCC(dl, VT, N0.getOperand(0), DAG.getConstant(0, dl, N0.getValueType()), Cond); @@ -2257,7 +2257,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, return DAG.getSetCC(dl, VT, N1.getOperand(1), DAG.getConstant(0, dl, N1.getValueType()), Cond); if (N1.getOperand(1) == N0) { - if (DAG.isCommutativeBinOp(N1.getOpcode())) + if (isCommutativeBinOp(N1.getOpcode())) return DAG.getSetCC(dl, VT, N1.getOperand(0), DAG.getConstant(0, dl, N1.getValueType()), Cond); if (N1.getNode()->hasOneUse()) { diff --git a/lib/CodeGen/ShadowStackGCLowering.cpp b/lib/CodeGen/ShadowStackGCLowering.cpp index 6750fde57638..7b60d22c7ace 100644 --- a/lib/CodeGen/ShadowStackGCLowering.cpp +++ b/lib/CodeGen/ShadowStackGCLowering.cpp @@ -16,9 +16,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicInst.h" diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp index 09e9c3bb3354..7886737b879c 100644 --- a/lib/CodeGen/SjLjEHPrepare.cpp +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -12,11 +12,11 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" diff --git a/lib/CodeGen/StackMaps.cpp b/lib/CodeGen/StackMaps.cpp index 916b6f08c1b9..b4fa29d9a86b 100644 --- a/lib/CodeGen/StackMaps.cpp +++ b/lib/CodeGen/StackMaps.cpp @@ -7,6 +7,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/StackMaps.h" #include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Twine.h" @@ -15,7 +16,6 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" -#include "llvm/CodeGen/StackMaps.h" #include "llvm/IR/DataLayout.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp index ca8bde2d114a..d8e7840a2576 100644 --- a/lib/CodeGen/StackProtector.cpp +++ b/lib/CodeGen/StackProtector.cpp @@ -21,6 +21,7 @@ #include "llvm/Analysis/OptimizationDiagnosticInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/StackProtector.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" @@ -28,6 +29,7 @@ #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" @@ -58,6 +60,7 @@ static cl::opt EnableSelectionDAGSP("enable-selectiondag-sp", cl::init(true), cl::Hidden); char StackProtector::ID = 0; + INITIALIZE_PASS_BEGIN(StackProtector, DEBUG_TYPE, "Insert stack protectors", false, true) INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) @@ -92,6 +95,11 @@ void StackProtector::adjustForColoring(const AllocaInst *From, } } +void StackProtector::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addPreserved(); +} + bool StackProtector::runOnFunction(Function &Fn) { F = &Fn; M = F->getParent(); diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp index d1758ecbd79f..856bca19dee8 100644 --- a/lib/CodeGen/StackSlotColoring.cpp +++ b/lib/CodeGen/StackSlotColoring.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -22,6 +21,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/IR/Module.h" #include "llvm/Support/CommandLine.h" diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp index ad0b04373656..489a607eb176 100644 --- a/lib/CodeGen/TailDuplication.cpp +++ b/lib/CodeGen/TailDuplication.cpp @@ -1,4 +1,4 @@ -//===-- TailDuplication.cpp - Duplicate blocks into predecessors' tails ---===// +//===- TailDuplication.cpp - Duplicate blocks into predecessors' tails ----===// // // The LLVM Compiler Infrastructure // @@ -12,22 +12,25 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TailDuplicator.h" -#include "llvm/IR/Function.h" -#include "llvm/Support/Debug.h" +#include "llvm/Pass.h" + using namespace llvm; #define DEBUG_TYPE "tailduplication" namespace { + /// Perform tail duplication. Delegates to TailDuplicator class TailDuplicatePass : public MachineFunctionPass { TailDuplicator Duplicator; public: static char ID; + explicit TailDuplicatePass() : MachineFunctionPass(ID) {} bool runOnMachineFunction(MachineFunction &MF) override; @@ -35,8 +38,9 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override; }; +} // end anonymous namespace + char TailDuplicatePass::ID = 0; -} char &llvm::TailDuplicateID = TailDuplicatePass::ID; diff --git a/lib/CodeGen/TailDuplicator.cpp b/lib/CodeGen/TailDuplicator.cpp index d40f7af431a9..dc7265dcf6c2 100644 --- a/lib/CodeGen/TailDuplicator.cpp +++ b/lib/CodeGen/TailDuplicator.cpp @@ -1,4 +1,4 @@ -//===-- TailDuplicator.cpp - Duplicate blocks into predecessors' tails ---===// +//===- TailDuplicator.cpp - Duplicate blocks into predecessors' tails -----===// // // The LLVM Compiler Infrastructure // @@ -12,22 +12,36 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/TailDuplicator.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" -#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineSSAUpdater.h" +#include "llvm/CodeGen/TailDuplicator.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include +#include +#include +#include + using namespace llvm; #define DEBUG_TYPE "tailduplication" @@ -41,15 +55,13 @@ STATISTIC(NumTailDupRemoved, STATISTIC(NumDeadBlocks, "Number of dead blocks removed"); STATISTIC(NumAddedPHIs, "Number of phis added"); -namespace llvm { - // Heuristic for tail duplication. static cl::opt TailDuplicateSize( "tail-dup-size", cl::desc("Maximum instructions to consider tail duplicating"), cl::init(2), cl::Hidden); -cl::opt TailDupIndirectBranchSize( +static cl::opt TailDupIndirectBranchSize( "tail-dup-indirect-size", cl::desc("Maximum instructions to consider tail duplicating blocks that " "end with indirect branches."), cl::init(20), @@ -138,7 +150,7 @@ bool TailDuplicator::tailDuplicateAndUpdate( bool IsSimple, MachineBasicBlock *MBB, MachineBasicBlock *ForcedLayoutPred, SmallVectorImpl *DuplicatedPreds, - llvm::function_ref *RemovalCallback) { + function_ref *RemovalCallback) { // Save the successors list. SmallSetVector Succs(MBB->succ_begin(), MBB->succ_end()); @@ -971,7 +983,7 @@ void TailDuplicator::appendCopies(MachineBasicBlock *MBB, /// the CFG. void TailDuplicator::removeDeadBlock( MachineBasicBlock *MBB, - llvm::function_ref *RemovalCallback) { + function_ref *RemovalCallback) { assert(MBB->pred_empty() && "MBB must be dead!"); DEBUG(dbgs() << "\nRemoving MBB: " << *MBB); @@ -985,5 +997,3 @@ void TailDuplicator::removeDeadBlock( // Remove the block. MBB->eraseFromParent(); } - -} // End llvm namespace diff --git a/lib/CodeGen/TargetFrameLoweringImpl.cpp b/lib/CodeGen/TargetFrameLoweringImpl.cpp index e5def6752e07..9dd98b4020d2 100644 --- a/lib/CodeGen/TargetFrameLoweringImpl.cpp +++ b/lib/CodeGen/TargetFrameLoweringImpl.cpp @@ -1,4 +1,4 @@ -//===----- TargetFrameLoweringImpl.cpp - Implement target frame interface --==// +//===- TargetFrameLoweringImpl.cpp - Implement target frame interface ------==// // // The LLVM Compiler Infrastructure // @@ -14,19 +14,21 @@ #include "llvm/ADT/BitVector.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/Attributes.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Function.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/Compiler.h" #include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" -#include + using namespace llvm; -TargetFrameLowering::~TargetFrameLowering() { -} +TargetFrameLowering::~TargetFrameLowering() = default; /// The default implementation just looks at attribute "no-frame-pointer-elim". bool TargetFrameLowering::noFramePointerElim(const MachineFunction &MF) const { diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index c43a5e18ad23..581cfaf60755 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Target/TargetLowering.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" @@ -34,6 +33,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -1637,8 +1637,10 @@ void llvm::GetReturnInfo(Type *ReturnType, AttributeList attr, VT = MinVT; } - unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT); - MVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT); + unsigned NumParts = + TLI.getNumRegistersForCallingConv(ReturnType->getContext(), VT); + MVT PartVT = + TLI.getRegisterTypeForCallingConv(ReturnType->getContext(), VT); // 'inreg' on function refers to return value ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 1d232c71d824..a0c68e1dcce8 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -12,14 +12,18 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" -#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/IR/Comdat.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -48,11 +52,7 @@ #include "llvm/ProfileData/InstrProf.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CodeGen.h" -#include "llvm/Support/COFF.h" -#include "llvm/Support/Dwarf.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MachO.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include @@ -61,10 +61,53 @@ using namespace llvm; using namespace dwarf; +static void GetObjCImageInfo(ArrayRef ModuleFlags, + unsigned &Version, unsigned &Flags, + StringRef &Section) { + for (const auto &MFE: ModuleFlags) { + // Ignore flags with 'Require' behaviour. + if (MFE.Behavior == Module::Require) + continue; + + StringRef Key = MFE.Key->getString(); + if (Key == "Objective-C Image Info Version") { + Version = mdconst::extract(MFE.Val)->getZExtValue(); + } else if (Key == "Objective-C Garbage Collection" || + Key == "Objective-C GC Only" || + Key == "Objective-C Is Simulated" || + Key == "Objective-C Class Properties" || + Key == "Objective-C Image Swift Version") { + Flags |= mdconst::extract(MFE.Val)->getZExtValue(); + } else if (Key == "Objective-C Image Info Section") { + Section = cast(MFE.Val)->getString(); + } + } +} + //===----------------------------------------------------------------------===// // ELF //===----------------------------------------------------------------------===// +void TargetLoweringObjectFileELF::emitModuleFlags( + MCStreamer &Streamer, ArrayRef ModuleFlags, + const TargetMachine &TM) const { + unsigned Version = 0; + unsigned Flags = 0; + StringRef Section; + + GetObjCImageInfo(ModuleFlags, Version, Flags, Section); + if (Section.empty()) + return; + + auto &C = getContext(); + auto *S = C.getELFSection(Section, ELF::SHT_PROGBITS, ELF::SHF_ALLOC); + Streamer.SwitchSection(S); + Streamer.EmitLabel(C.getOrCreateSymbol(StringRef("OBJC_IMAGE_INFO"))); + Streamer.EmitIntValue(Version, 4); + Streamer.EmitIntValue(Flags, 4); + Streamer.AddBlankLine(); +} + MCSymbol *TargetLoweringObjectFileELF::getCFIPersonalitySymbol( const GlobalValue *GV, const TargetMachine &TM, MachineModuleInfo *MMI) const { @@ -248,6 +291,25 @@ MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal( const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { StringRef SectionName = GO->getSection(); + // Check if '#pragma clang section' name is applicable. + // Note that pragma directive overrides -ffunction-section, -fdata-section + // and so section name is exactly as user specified and not uniqued. + const GlobalVariable *GV = dyn_cast(GO); + if (GV && GV->hasImplicitSection()) { + auto Attrs = GV->getAttributes(); + if (Attrs.hasAttribute("bss-section") && Kind.isBSS()) { + SectionName = Attrs.getAttribute("bss-section").getValueAsString(); + } else if (Attrs.hasAttribute("rodata-section") && Kind.isReadOnly()) { + SectionName = Attrs.getAttribute("rodata-section").getValueAsString(); + } else if (Attrs.hasAttribute("data-section") && Kind.isData()) { + SectionName = Attrs.getAttribute("data-section").getValueAsString(); + } + } + const Function *F = dyn_cast(GO); + if (F && F->hasFnAttribute("implicit-section-name")) { + SectionName = F->getFnAttribute("implicit-section-name").getValueAsString(); + } + // Infer section flags from the section name if we can. Kind = getELFKindForNamedSection(SectionName, Kind); @@ -560,32 +622,12 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx, void TargetLoweringObjectFileMachO::emitModuleFlags( MCStreamer &Streamer, ArrayRef ModuleFlags, const TargetMachine &TM) const { - unsigned VersionVal = 0; - unsigned ImageInfoFlags = 0; MDNode *LinkerOptions = nullptr; - StringRef SectionVal; for (const auto &MFE : ModuleFlags) { - // Ignore flags with 'Require' behavior. - if (MFE.Behavior == Module::Require) - continue; - StringRef Key = MFE.Key->getString(); - Metadata *Val = MFE.Val; - - if (Key == "Objective-C Image Info Version") { - VersionVal = mdconst::extract(Val)->getZExtValue(); - } else if (Key == "Objective-C Garbage Collection" || - Key == "Objective-C GC Only" || - Key == "Objective-C Is Simulated" || - Key == "Objective-C Class Properties" || - Key == "Objective-C Image Swift Version") { - ImageInfoFlags |= mdconst::extract(Val)->getZExtValue(); - } else if (Key == "Objective-C Image Info Section") { - SectionVal = cast(Val)->getString(); - } else if (Key == "Linker Options") { - LinkerOptions = cast(Val); - } + if (Key == "Linker Options") + LinkerOptions = cast(MFE.Val); } // Emit the linker options if present. @@ -598,8 +640,14 @@ void TargetLoweringObjectFileMachO::emitModuleFlags( } } + unsigned VersionVal = 0; + unsigned ImageInfoFlags = 0; + StringRef SectionVal; + GetObjCImageInfo(ModuleFlags, VersionVal, ImageInfoFlags, SectionVal); + // The section is mandatory. If we don't have it, then we don't have GC info. - if (SectionVal.empty()) return; + if (SectionVal.empty()) + return; StringRef Segment, Section; unsigned TAA = 0, StubSize = 0; @@ -1137,6 +1185,24 @@ void TargetLoweringObjectFileCOFF::emitModuleFlags( } } } + + unsigned Version = 0; + unsigned Flags = 0; + StringRef Section; + + GetObjCImageInfo(ModuleFlags, Version, Flags, Section); + if (Section.empty()) + return; + + auto &C = getContext(); + auto *S = C.getCOFFSection( + Section, COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ, + SectionKind::getReadOnly()); + Streamer.SwitchSection(S); + Streamer.EmitLabel(C.getOrCreateSymbol(StringRef("OBJC_IMAGE_INFO"))); + Streamer.EmitIntValue(Version, 4); + Streamer.EmitIntValue(Flags, 4); + Streamer.AddBlankLine(); } void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx, diff --git a/lib/CodeGen/TargetOptionsImpl.cpp b/lib/CodeGen/TargetOptionsImpl.cpp index c20d5ab814f8..ed845e1706f8 100644 --- a/lib/CodeGen/TargetOptionsImpl.cpp +++ b/lib/CodeGen/TargetOptionsImpl.cpp @@ -11,10 +11,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/IR/Function.h" -#include "llvm/IR/Module.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/TargetPassConfig.cpp b/lib/CodeGen/TargetPassConfig.cpp index 72d5e995ac22..b1918b19e1df 100644 --- a/lib/CodeGen/TargetPassConfig.cpp +++ b/lib/CodeGen/TargetPassConfig.cpp @@ -20,6 +20,7 @@ #include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/ScopedNoAliasAA.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/TypeBasedAliasAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/RegAllocRegistry.h" @@ -95,6 +96,16 @@ static cl::opt VerifyMachineCode("verify-machineinstrs", cl::Hidden, static cl::opt EnableMachineOutliner("enable-machine-outliner", cl::Hidden, cl::desc("Enable machine outliner")); +// Enable or disable FastISel. Both options are needed, because +// FastISel is enabled by default with -fast, and we wish to be +// able to enable or disable fast-isel independently from -O0. +static cl::opt +EnableFastISelOption("fast-isel", cl::Hidden, + cl::desc("Enable the \"fast\" instruction selector")); + +static cl::opt + EnableGlobalISel("global-isel", cl::Hidden, + cl::desc("Enable the \"global\" instruction selector")); static cl::opt PrintMachineInstrs("print-machineinstrs", cl::ValueOptional, @@ -571,6 +582,66 @@ void TargetPassConfig::addISelPrepare() { addPass(createVerifierPass()); } +bool TargetPassConfig::addCoreISelPasses() { + // Enable FastISel with -fast, but allow that to be overridden. + TM->setO0WantsFastISel(EnableFastISelOption != cl::BOU_FALSE); + if (EnableFastISelOption == cl::BOU_TRUE || + (TM->getOptLevel() == CodeGenOpt::None && TM->getO0WantsFastISel())) + TM->setFastISel(true); + + // Ask the target for an isel. + // Enable GlobalISel if the target wants to, but allow that to be overriden. + if (EnableGlobalISel == cl::BOU_TRUE || + (EnableGlobalISel == cl::BOU_UNSET && isGlobalISelEnabled())) { + if (addIRTranslator()) + return true; + + addPreLegalizeMachineIR(); + + if (addLegalizeMachineIR()) + return true; + + // Before running the register bank selector, ask the target if it + // wants to run some passes. + addPreRegBankSelect(); + + if (addRegBankSelect()) + return true; + + addPreGlobalInstructionSelect(); + + if (addGlobalInstructionSelect()) + return true; + + // Pass to reset the MachineFunction if the ISel failed. + addPass(createResetMachineFunctionPass( + reportDiagnosticWhenGlobalISelFallback(), isGlobalISelAbortEnabled())); + + // Provide a fallback path when we do not want to abort on + // not-yet-supported input. + if (!isGlobalISelAbortEnabled() && addInstSelector()) + return true; + + } else if (addInstSelector()) + return true; + + return false; +} + +bool TargetPassConfig::addISelPasses() { + if (TM->Options.EmulatedTLS) + addPass(createLowerEmuTLSPass()); + + addPass(createPreISelIntrinsicLoweringPass()); + addPass(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis())); + addIRPasses(); + addCodeGenPrepare(); + addPassesToHandleExceptions(); + addISelPrepare(); + + return addCoreISelPasses(); +} + /// -regalloc=... command line option. static FunctionPass *useDefaultRegisterAllocator() { return nullptr; } static cl::opt -getRTroughputFromItineraries(unsigned schedClass, - const InstrItineraryData *IID){ +getRThroughputFromItineraries(unsigned schedClass, + const InstrItineraryData *IID){ double Unknown = std::numeric_limits::infinity(); double Throughput = Unknown; @@ -356,9 +356,9 @@ getRTroughputFromItineraries(unsigned schedClass, } static Optional -getRTroughputFromInstrSchedModel(const MCSchedClassDesc *SCDesc, - const TargetSubtargetInfo *STI, - const MCSchedModel &SchedModel) { +getRThroughputFromInstrSchedModel(const MCSchedClassDesc *SCDesc, + const TargetSubtargetInfo *STI, + const MCSchedModel &SchedModel) { double Unknown = std::numeric_limits::infinity(); double Throughput = Unknown; @@ -380,11 +380,11 @@ getRTroughputFromInstrSchedModel(const MCSchedClassDesc *SCDesc, Optional TargetSchedModel::computeInstrRThroughput(const MachineInstr *MI) const { if (hasInstrItineraries()) - return getRTroughputFromItineraries(MI->getDesc().getSchedClass(), - getInstrItineraries()); + return getRThroughputFromItineraries(MI->getDesc().getSchedClass(), + getInstrItineraries()); if (hasInstrSchedModel()) - return getRTroughputFromInstrSchedModel(resolveSchedClass(MI), STI, - SchedModel); + return getRThroughputFromInstrSchedModel(resolveSchedClass(MI), STI, + SchedModel); return Optional(); } @@ -392,11 +392,11 @@ Optional TargetSchedModel::computeInstrRThroughput(unsigned Opcode) const { unsigned SchedClass = TII->get(Opcode).getSchedClass(); if (hasInstrItineraries()) - return getRTroughputFromItineraries(SchedClass, getInstrItineraries()); + return getRThroughputFromItineraries(SchedClass, getInstrItineraries()); if (hasInstrSchedModel()) { const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SchedClass); if (SCDesc->isValid() && !SCDesc->isVariant()) - return getRTroughputFromInstrSchedModel(SCDesc, STI, SchedModel); + return getRThroughputFromInstrSchedModel(SCDesc, STI, SchedModel); } return Optional(); } diff --git a/lib/CodeGen/TargetSubtargetInfo.cpp b/lib/CodeGen/TargetSubtargetInfo.cpp index 0a444e0fff07..82e85bab1474 100644 --- a/lib/CodeGen/TargetSubtargetInfo.cpp +++ b/lib/CodeGen/TargetSubtargetInfo.cpp @@ -11,10 +11,10 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/TargetSchedule.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; //--------------------------------------------------------------------------- diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index d10ca1a7ff91..124c2790f68c 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -72,6 +72,17 @@ void VirtRegMap::grow() { Virt2SplitMap.resize(NumRegs); } +void VirtRegMap::assignVirt2Phys(unsigned virtReg, MCPhysReg physReg) { + assert(TargetRegisterInfo::isVirtualRegister(virtReg) && + TargetRegisterInfo::isPhysicalRegister(physReg)); + assert(Virt2PhysMap[virtReg] == NO_PHYS_REG && + "attempt to assign physical register to already mapped " + "virtual register"); + assert(!getRegInfo().isReserved(physReg) && + "Attempt to map virtReg to a reserved physReg"); + Virt2PhysMap[virtReg] = physReg; +} + unsigned VirtRegMap::createSpillSlot(const TargetRegisterClass *RC) { unsigned Size = TRI->getSpillSize(*RC); unsigned Align = TRI->getSpillAlignment(*RC); diff --git a/lib/CodeGen/WinEHPrepare.cpp b/lib/CodeGen/WinEHPrepare.cpp index 4e7542bf31e0..c63a0a9e60ea 100644 --- a/lib/CodeGen/WinEHPrepare.cpp +++ b/lib/CodeGen/WinEHPrepare.cpp @@ -16,13 +16,13 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/Verifier.h" #include "llvm/MC/MCSymbol.h" diff --git a/lib/CodeGen/XRayInstrumentation.cpp b/lib/CodeGen/XRayInstrumentation.cpp index 2df3602733f3..1a8d5a4f45da 100644 --- a/lib/CodeGen/XRayInstrumentation.cpp +++ b/lib/CodeGen/XRayInstrumentation.cpp @@ -1,4 +1,4 @@ -//===-- XRayInstrumentation.cpp - Adds XRay instrumentation to functions. -===// +//===- XRayInstrumentation.cpp - Adds XRay instrumentation to functions. --===// // // The LLVM Compiler Infrastructure // @@ -14,20 +14,26 @@ // //===---------------------------------------------------------------------===// -#include "llvm/CodeGen/Analysis.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Triple.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/Support/TargetRegistry.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/Function.h" +#include "llvm/Pass.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; namespace { + struct XRayInstrumentation : public MachineFunctionPass { static char ID; @@ -66,7 +72,8 @@ private: void prependRetWithPatchableExit(MachineFunction &MF, const TargetInstrInfo *TII); }; -} // anonymous namespace + +} // end anonymous namespace void XRayInstrumentation::replaceRetWithPatchableRet( MachineFunction &MF, const TargetInstrInfo *TII) { @@ -134,18 +141,23 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) { if (Attr.getValueAsString().getAsInteger(10, XRayThreshold)) return false; // Invalid value for threshold. + // Count the number of MachineInstr`s in MachineFunction + int64_t MICount = 0; + for (const auto& MBB : MF) + MICount += MBB.size(); + // Check if we have a loop. // FIXME: Maybe make this smarter, and see whether the loops are dependent // on inputs or side-effects? MachineLoopInfo &MLI = getAnalysis(); - if (MLI.empty() && F.size() < XRayThreshold) + if (MLI.empty() && MICount < XRayThreshold) return false; // Function is too small and has no loops. } // We look for the first non-empty MachineBasicBlock, so that we can insert // the function instrumentation in the appropriate place. - auto MBI = - find_if(MF, [&](const MachineBasicBlock &MBB) { return !MBB.empty(); }); + auto MBI = llvm::find_if( + MF, [&](const MachineBasicBlock &MBB) { return !MBB.empty(); }); if (MBI == MF.end()) return false; // The function is empty. diff --git a/lib/DebugInfo/CodeView/CMakeLists.txt b/lib/DebugInfo/CodeView/CMakeLists.txt index 410b89bc949e..2f9e8981b698 100644 --- a/lib/DebugInfo/CodeView/CMakeLists.txt +++ b/lib/DebugInfo/CodeView/CMakeLists.txt @@ -3,11 +3,9 @@ add_llvm_library(LLVMDebugInfoCodeView CodeViewRecordIO.cpp CVSymbolVisitor.cpp CVTypeVisitor.cpp - EnumTables.cpp - Formatters.cpp - LazyRandomTypeCollection.cpp - Line.cpp DebugChecksumsSubsection.cpp + DebugCrossExSubsection.cpp + DebugCrossImpSubsection.cpp DebugFrameDataSubsection.cpp DebugInlineeLinesSubsection.cpp DebugLinesSubsection.cpp @@ -15,7 +13,12 @@ add_llvm_library(LLVMDebugInfoCodeView DebugSubsection.cpp DebugSubsectionRecord.cpp DebugSubsectionVisitor.cpp + DebugSymbolRVASubsection.cpp DebugSymbolsSubsection.cpp + EnumTables.cpp + Formatters.cpp + LazyRandomTypeCollection.cpp + Line.cpp RecordSerialization.cpp SymbolRecordMapping.cpp SymbolDumper.cpp diff --git a/lib/DebugInfo/CodeView/DebugChecksumsSubsection.cpp b/lib/DebugInfo/CodeView/DebugChecksumsSubsection.cpp index 1a85a339f8c3..c31b8d1c96d5 100644 --- a/lib/DebugInfo/CodeView/DebugChecksumsSubsection.cpp +++ b/lib/DebugInfo/CodeView/DebugChecksumsSubsection.cpp @@ -25,8 +25,8 @@ struct FileChecksumEntryHeader { // Checksum bytes follow. }; -Error llvm::VarStreamArrayExtractor::extract( - BinaryStreamRef Stream, uint32_t &Len, FileChecksumEntry &Item) { +Error llvm::VarStreamArrayExtractor:: +operator()(BinaryStreamRef Stream, uint32_t &Len, FileChecksumEntry &Item) { BinaryStreamReader Reader(Stream); const FileChecksumEntryHeader *Header; diff --git a/lib/DebugInfo/CodeView/DebugCrossExSubsection.cpp b/lib/DebugInfo/CodeView/DebugCrossExSubsection.cpp new file mode 100644 index 000000000000..21e2cc56075b --- /dev/null +++ b/lib/DebugInfo/CodeView/DebugCrossExSubsection.cpp @@ -0,0 +1,51 @@ +//===- DebugCrossExSubsection.cpp -------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/DebugCrossExSubsection.h" + +#include "llvm/DebugInfo/CodeView/CodeViewError.h" + +using namespace llvm; +using namespace llvm::codeview; + +Error DebugCrossModuleExportsSubsectionRef::initialize( + BinaryStreamReader Reader) { + if (Reader.bytesRemaining() % sizeof(CrossModuleExport) != 0) + return make_error( + cv_error_code::corrupt_record, + "Cross Scope Exports section is an invalid size!"); + + uint32_t Size = Reader.bytesRemaining() / sizeof(CrossModuleExport); + return Reader.readArray(References, Size); +} + +Error DebugCrossModuleExportsSubsectionRef::initialize(BinaryStreamRef Stream) { + BinaryStreamReader Reader(Stream); + return initialize(Reader); +} + +void DebugCrossModuleExportsSubsection::addMapping(uint32_t Local, + uint32_t Global) { + Mappings[Local] = Global; +} + +uint32_t DebugCrossModuleExportsSubsection::calculateSerializedSize() const { + return Mappings.size() * sizeof(CrossModuleExport); +} + +Error DebugCrossModuleExportsSubsection::commit( + BinaryStreamWriter &Writer) const { + for (const auto &M : Mappings) { + if (auto EC = Writer.writeInteger(M.first)) + return EC; + if (auto EC = Writer.writeInteger(M.second)) + return EC; + } + return Error::success(); +} diff --git a/lib/DebugInfo/CodeView/DebugCrossImpSubsection.cpp b/lib/DebugInfo/CodeView/DebugCrossImpSubsection.cpp new file mode 100644 index 000000000000..2c4a0b779342 --- /dev/null +++ b/lib/DebugInfo/CodeView/DebugCrossImpSubsection.cpp @@ -0,0 +1,91 @@ +//===- DebugCrossImpSubsection.cpp ------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/DebugCrossImpSubsection.h" + +#include "llvm/DebugInfo/CodeView/CodeViewError.h" +#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h" + +using namespace llvm; +using namespace llvm::codeview; + +namespace llvm { +Error VarStreamArrayExtractor:: +operator()(BinaryStreamRef Stream, uint32_t &Len, + codeview::CrossModuleImportItem &Item) { + BinaryStreamReader Reader(Stream); + if (Reader.bytesRemaining() < sizeof(CrossModuleImport)) + return make_error( + cv_error_code::insufficient_buffer, + "Not enough bytes for a Cross Module Import Header!"); + if (auto EC = Reader.readObject(Item.Header)) + return EC; + if (Reader.bytesRemaining() < Item.Header->Count * sizeof(uint32_t)) + return make_error( + cv_error_code::insufficient_buffer, + "Not enough to read specified number of Cross Module References!"); + if (auto EC = Reader.readArray(Item.Imports, Item.Header->Count)) + return EC; + return Error::success(); +} +} + +Error DebugCrossModuleImportsSubsectionRef::initialize( + BinaryStreamReader Reader) { + return Reader.readArray(References, Reader.bytesRemaining()); +} + +Error DebugCrossModuleImportsSubsectionRef::initialize(BinaryStreamRef Stream) { + BinaryStreamReader Reader(Stream); + return initialize(Reader); +} + +void DebugCrossModuleImportsSubsection::addImport(StringRef Module, + uint32_t ImportId) { + Strings.insert(Module); + std::vector Targets = {support::ulittle32_t(ImportId)}; + auto Result = Mappings.insert(std::make_pair(Module, Targets)); + if (!Result.second) + Result.first->getValue().push_back(Targets[0]); +} + +uint32_t DebugCrossModuleImportsSubsection::calculateSerializedSize() const { + uint32_t Size = 0; + for (const auto &Item : Mappings) { + Size += sizeof(CrossModuleImport); + Size += sizeof(support::ulittle32_t) * Item.second.size(); + } + return Size; +} + +Error DebugCrossModuleImportsSubsection::commit( + BinaryStreamWriter &Writer) const { + using T = decltype(&*Mappings.begin()); + std::vector Ids; + Ids.reserve(Mappings.size()); + + for (const auto &M : Mappings) + Ids.push_back(&M); + + std::sort(Ids.begin(), Ids.end(), [this](const T &L1, const T &L2) { + return Strings.getStringId(L1->getKey()) < + Strings.getStringId(L2->getKey()); + }); + + for (const auto &Item : Ids) { + CrossModuleImport Imp; + Imp.ModuleNameOffset = Strings.getStringId(Item->getKey()); + Imp.Count = Item->getValue().size(); + if (auto EC = Writer.writeObject(Imp)) + return EC; + if (auto EC = Writer.writeArray(makeArrayRef(Item->getValue()))) + return EC; + } + return Error::success(); +} diff --git a/lib/DebugInfo/CodeView/DebugInlineeLinesSubsection.cpp b/lib/DebugInfo/CodeView/DebugInlineeLinesSubsection.cpp index 520a0ee4454f..e7719d05dbdc 100644 --- a/lib/DebugInfo/CodeView/DebugInlineeLinesSubsection.cpp +++ b/lib/DebugInfo/CodeView/DebugInlineeLinesSubsection.cpp @@ -17,9 +17,8 @@ using namespace llvm; using namespace llvm::codeview; -Error VarStreamArrayExtractor::extract( - BinaryStreamRef Stream, uint32_t &Len, InlineeSourceLine &Item, - bool HasExtraFiles) { +Error VarStreamArrayExtractor:: +operator()(BinaryStreamRef Stream, uint32_t &Len, InlineeSourceLine &Item) { BinaryStreamReader Reader(Stream); if (auto EC = Reader.readObject(Item.Header)) @@ -44,8 +43,8 @@ Error DebugInlineeLinesSubsectionRef::initialize(BinaryStreamReader Reader) { if (auto EC = Reader.readEnum(Signature)) return EC; - if (auto EC = - Reader.readArray(Lines, Reader.bytesRemaining(), hasExtraFiles())) + Lines.getExtractor().HasExtraFiles = hasExtraFiles(); + if (auto EC = Reader.readArray(Lines, Reader.bytesRemaining())) return EC; assert(Reader.bytesRemaining() == 0); diff --git a/lib/DebugInfo/CodeView/DebugLinesSubsection.cpp b/lib/DebugInfo/CodeView/DebugLinesSubsection.cpp index 2fce06ca2a17..fbcad61d60a6 100644 --- a/lib/DebugInfo/CodeView/DebugLinesSubsection.cpp +++ b/lib/DebugInfo/CodeView/DebugLinesSubsection.cpp @@ -17,9 +17,8 @@ using namespace llvm; using namespace llvm::codeview; -Error LineColumnExtractor::extract(BinaryStreamRef Stream, uint32_t &Len, - LineColumnEntry &Item, - const LineFragmentHeader *Header) { +Error LineColumnExtractor::operator()(BinaryStreamRef Stream, uint32_t &Len, + LineColumnEntry &Item) { using namespace codeview; const LineBlockFragmentHeader *BlockHeader; BinaryStreamReader Reader(Stream); @@ -56,8 +55,8 @@ Error DebugLinesSubsectionRef::initialize(BinaryStreamReader Reader) { if (auto EC = Reader.readObject(Header)) return EC; - if (auto EC = - Reader.readArray(LinesAndColumns, Reader.bytesRemaining(), Header)) + LinesAndColumns.getExtractor().Header = Header; + if (auto EC = Reader.readArray(LinesAndColumns, Reader.bytesRemaining())) return EC; return Error::success(); @@ -145,7 +144,7 @@ uint32_t DebugLinesSubsection::calculateSerializedSize() const { } void DebugLinesSubsection::setRelocationAddress(uint16_t Segment, - uint16_t Offset) { + uint32_t Offset) { RelocOffset = Offset; RelocSegment = Segment; } diff --git a/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp b/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp index 2e72242181b0..6e647c4b976b 100644 --- a/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp +++ b/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp @@ -23,6 +23,9 @@ Error DebugStringTableSubsectionRef::initialize(BinaryStreamRef Contents) { Stream = Contents; return Error::success(); } +Error DebugStringTableSubsectionRef::initialize(BinaryStreamReader &Reader) { + return Reader.readStreamRef(Stream); +} Expected DebugStringTableSubsectionRef::getString(uint32_t Offset) const { @@ -52,20 +55,19 @@ uint32_t DebugStringTableSubsection::calculateSerializedSize() const { } Error DebugStringTableSubsection::commit(BinaryStreamWriter &Writer) const { - assert(Writer.bytesRemaining() == StringSize); - uint32_t MaxOffset = 1; + uint32_t Begin = Writer.getOffset(); + uint32_t End = Begin + StringSize; for (auto &Pair : Strings) { StringRef S = Pair.getKey(); - uint32_t Offset = Pair.getValue(); + uint32_t Offset = Begin + Pair.getValue(); Writer.setOffset(Offset); if (auto EC = Writer.writeCString(S)) return EC; - MaxOffset = std::max(MaxOffset, Offset + S.size() + 1); + assert(Writer.getOffset() <= End); } - Writer.setOffset(MaxOffset); - assert(Writer.bytesRemaining() == 0); + Writer.setOffset(End); return Error::success(); } diff --git a/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp b/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp index cfd1c5d3ab0c..e9124e68fe82 100644 --- a/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp +++ b/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp @@ -34,14 +34,6 @@ Error DebugSubsectionRecord::initialize(BinaryStreamRef Stream, DebugSubsectionKind Kind = static_cast(uint32_t(Header->Kind)); - switch (Kind) { - case DebugSubsectionKind::FileChecksums: - case DebugSubsectionKind::Lines: - case DebugSubsectionKind::InlineeLines: - break; - default: - llvm_unreachable("Unexpected debug fragment kind!"); - } if (auto EC = Reader.readStreamRef(Info.Data, Header->Length)) return EC; Info.Container = Container; @@ -50,9 +42,7 @@ Error DebugSubsectionRecord::initialize(BinaryStreamRef Stream, } uint32_t DebugSubsectionRecord::getRecordLength() const { - uint32_t Result = sizeof(DebugSubsectionHeader) + Data.getLength(); - assert(Result % alignOf(Container) == 0); - return Result; + return sizeof(DebugSubsectionHeader) + Data.getLength(); } DebugSubsectionKind DebugSubsectionRecord::kind() const { return Kind; } @@ -64,25 +54,29 @@ DebugSubsectionRecordBuilder::DebugSubsectionRecordBuilder( : Subsection(std::move(Subsection)), Container(Container) {} uint32_t DebugSubsectionRecordBuilder::calculateSerializedLength() { - uint32_t Size = - sizeof(DebugSubsectionHeader) + - alignTo(Subsection->calculateSerializedSize(), alignOf(Container)); + // The length of the entire subsection is always padded to 4 bytes, regardless + // of the container kind. + uint32_t Size = sizeof(DebugSubsectionHeader) + + alignTo(Subsection->calculateSerializedSize(), 4); return Size; } -Error DebugSubsectionRecordBuilder::commit(BinaryStreamWriter &Writer) { +Error DebugSubsectionRecordBuilder::commit(BinaryStreamWriter &Writer) const { assert(Writer.getOffset() % alignOf(Container) == 0 && "Debug Subsection not properly aligned"); DebugSubsectionHeader Header; Header.Kind = uint32_t(Subsection->kind()); - Header.Length = calculateSerializedLength() - sizeof(DebugSubsectionHeader); + // The value written into the Header's Length field is only padded to the + // container's alignment + Header.Length = + alignTo(Subsection->calculateSerializedSize(), alignOf(Container)); if (auto EC = Writer.writeObject(Header)) return EC; if (auto EC = Subsection->commit(Writer)) return EC; - if (auto EC = Writer.padToAlignment(alignOf(Container))) + if (auto EC = Writer.padToAlignment(4)) return EC; return Error::success(); diff --git a/lib/DebugInfo/CodeView/DebugSubsectionVisitor.cpp b/lib/DebugInfo/CodeView/DebugSubsectionVisitor.cpp index f2c4dea8685f..8550107741ce 100644 --- a/lib/DebugInfo/CodeView/DebugSubsectionVisitor.cpp +++ b/lib/DebugInfo/CodeView/DebugSubsectionVisitor.cpp @@ -1,4 +1,4 @@ -//===- DebugSubsectionVisitor.cpp ---------------------------*- C++ -*-===// +//===- DebugSubsectionVisitor.cpp -------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -10,9 +10,15 @@ #include "llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h" #include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugCrossExSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugCrossImpSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h" #include "llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h" #include "llvm/DebugInfo/CodeView/DebugLinesSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h" #include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h" +#include "llvm/DebugInfo/CodeView/DebugSymbolRVASubsection.h" +#include "llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h" #include "llvm/DebugInfo/CodeView/DebugUnknownSubsection.h" #include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/BinaryStreamRef.h" @@ -20,8 +26,40 @@ using namespace llvm; using namespace llvm::codeview; +DebugSubsectionState::DebugSubsectionState() {} + +DebugSubsectionState::DebugSubsectionState( + const DebugStringTableSubsectionRef &Strings) + : Strings(&Strings) {} + +DebugSubsectionState::DebugSubsectionState( + const DebugStringTableSubsectionRef &Strings, + const DebugChecksumsSubsectionRef &Checksums) + : Strings(&Strings), Checksums(&Checksums) {} + +void DebugSubsectionState::initializeStrings(const DebugSubsectionRecord &SR) { + assert(SR.kind() == DebugSubsectionKind::StringTable); + assert(!Strings && "Found a string table even though we already have one!"); + + OwnedStrings = llvm::make_unique(); + consumeError(OwnedStrings->initialize(SR.getRecordData())); + Strings = OwnedStrings.get(); +} + +void DebugSubsectionState::initializeChecksums( + const DebugSubsectionRecord &FCR) { + assert(FCR.kind() == DebugSubsectionKind::FileChecksums); + if (Checksums) + return; + + OwnedChecksums = llvm::make_unique(); + consumeError(OwnedChecksums->initialize(FCR.getRecordData())); + Checksums = OwnedChecksums.get(); +} + Error llvm::codeview::visitDebugSubsection(const DebugSubsectionRecord &R, - DebugSubsectionVisitor &V) { + DebugSubsectionVisitor &V, + const DebugSubsectionState &State) { BinaryStreamReader Reader(R.getRecordData()); switch (R.kind()) { case DebugSubsectionKind::Lines: { @@ -29,20 +67,56 @@ Error llvm::codeview::visitDebugSubsection(const DebugSubsectionRecord &R, if (auto EC = Fragment.initialize(Reader)) return EC; - return V.visitLines(Fragment); + return V.visitLines(Fragment, State); } case DebugSubsectionKind::FileChecksums: { DebugChecksumsSubsectionRef Fragment; if (auto EC = Fragment.initialize(Reader)) return EC; - return V.visitFileChecksums(Fragment); + return V.visitFileChecksums(Fragment, State); } case DebugSubsectionKind::InlineeLines: { DebugInlineeLinesSubsectionRef Fragment; if (auto EC = Fragment.initialize(Reader)) return EC; - return V.visitInlineeLines(Fragment); + return V.visitInlineeLines(Fragment, State); + } + case DebugSubsectionKind::CrossScopeExports: { + DebugCrossModuleExportsSubsectionRef Section; + if (auto EC = Section.initialize(Reader)) + return EC; + return V.visitCrossModuleExports(Section, State); + } + case DebugSubsectionKind::CrossScopeImports: { + DebugCrossModuleImportsSubsectionRef Section; + if (auto EC = Section.initialize(Reader)) + return EC; + return V.visitCrossModuleImports(Section, State); + } + case DebugSubsectionKind::Symbols: { + DebugSymbolsSubsectionRef Section; + if (auto EC = Section.initialize(Reader)) + return EC; + return V.visitSymbols(Section, State); + } + case DebugSubsectionKind::StringTable: { + DebugStringTableSubsectionRef Section; + if (auto EC = Section.initialize(Reader)) + return EC; + return V.visitStringTable(Section, State); + } + case DebugSubsectionKind::FrameData: { + DebugFrameDataSubsectionRef Section; + if (auto EC = Section.initialize(Reader)) + return EC; + return V.visitFrameData(Section, State); + } + case DebugSubsectionKind::CoffSymbolRVA: { + DebugSymbolRVASubsectionRef Section; + if (auto EC = Section.initialize(Reader)) + return EC; + return V.visitCOFFSymbolRVAs(Section, State); } default: { DebugUnknownSubsectionRef Fragment(R.kind(), R.getRecordData()); diff --git a/lib/DebugInfo/CodeView/DebugSymbolRVASubsection.cpp b/lib/DebugInfo/CodeView/DebugSymbolRVASubsection.cpp new file mode 100644 index 000000000000..5f91b68f3ad8 --- /dev/null +++ b/lib/DebugInfo/CodeView/DebugSymbolRVASubsection.cpp @@ -0,0 +1,31 @@ +//===- DebugSymbolRVASubsection.cpp ------------------------------*- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/DebugSymbolRVASubsection.h" + +using namespace llvm; +using namespace llvm::codeview; + +DebugSymbolRVASubsectionRef::DebugSymbolRVASubsectionRef() + : DebugSubsectionRef(DebugSubsectionKind::CoffSymbolRVA) {} + +Error DebugSymbolRVASubsectionRef::initialize(BinaryStreamReader &Reader) { + return Reader.readArray(RVAs, Reader.bytesRemaining() / sizeof(uint32_t)); +} + +DebugSymbolRVASubsection::DebugSymbolRVASubsection() + : DebugSubsection(DebugSubsectionKind::CoffSymbolRVA) {} + +Error DebugSymbolRVASubsection::commit(BinaryStreamWriter &Writer) const { + return Writer.writeArray(makeArrayRef(RVAs)); +} + +uint32_t DebugSymbolRVASubsection::calculateSerializedSize() const { + return RVAs.size() * sizeof(uint32_t); +} diff --git a/lib/DebugInfo/CodeView/TypeTableCollection.cpp b/lib/DebugInfo/CodeView/TypeTableCollection.cpp index 699694fde928..8d974d522f28 100644 --- a/lib/DebugInfo/CodeView/TypeTableCollection.cpp +++ b/lib/DebugInfo/CodeView/TypeTableCollection.cpp @@ -51,7 +51,8 @@ void TypeTableCollection::ensureTypeExists(TypeIndex Index) { CVType Type; uint32_t Len; - error(VarStreamArrayExtractor::extract(Bytes, Len, Type)); + VarStreamArrayExtractor Extract; + error(Extract(Bytes, Len, Type)); TypeDatabaseVisitor DBV(Database); error(codeview::visitTypeRecord(Type, Index, DBV)); diff --git a/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp b/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp index e7b4b777b43f..57eac91f8c19 100644 --- a/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp +++ b/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp @@ -7,13 +7,14 @@ // //===----------------------------------------------------------------------===// +#include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h" + #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" -#include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" #include "llvm/DebugInfo/DWARF/DWARFUnit.h" #include "llvm/Support/DataExtractor.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include diff --git a/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp b/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp index a12f8adfafe5..97b52f0fbdd6 100644 --- a/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp +++ b/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp @@ -7,12 +7,13 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/SmallVector.h" #include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h" + +#include "llvm/ADT/SmallVector.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp b/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp index 6e550f2e9ec9..358e9bf43d00 100644 --- a/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp +++ b/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp @@ -15,7 +15,7 @@ using namespace llvm; -void DWARFCompileUnit::dump(raw_ostream &OS) { +void DWARFCompileUnit::dump(raw_ostream &OS, DIDumpOptions DumpOpts) { OS << format("0x%08x", getOffset()) << ": Compile Unit:" << " length = " << format("0x%08x", getLength()) << " version = " << format("0x%04x", getVersion()); @@ -27,7 +27,7 @@ void DWARFCompileUnit::dump(raw_ostream &OS) { << ")\n"; if (DWARFDie CUDie = getUnitDIE(false)) - CUDie.dump(OS, -1U); + CUDie.dump(OS, -1U, 0, DumpOpts); else OS << "\n\n"; } diff --git a/lib/DebugInfo/DWARF/DWARFContext.cpp b/lib/DebugInfo/DWARF/DWARFContext.cpp index 1be156d6ea9b..42ab48808f9a 100644 --- a/lib/DebugInfo/DWARF/DWARFContext.cpp +++ b/lib/DebugInfo/DWARF/DWARFContext.cpp @@ -84,6 +84,123 @@ static void dumpAccelSection(raw_ostream &OS, StringRef Name, Accel.dump(OS); } +static void +dumpDWARFv5StringOffsetsSection(raw_ostream &OS, StringRef SectionName, + const DWARFSection &StringOffsetsSection, + StringRef StringSection, bool LittleEndian) { + DataExtractor StrOffsetExt(StringOffsetsSection.Data, LittleEndian, 0); + uint32_t Offset = 0; + uint64_t SectionSize = StringOffsetsSection.Data.size(); + + while (Offset < SectionSize) { + unsigned Version = 0; + DwarfFormat Format = DWARF32; + unsigned EntrySize = 4; + // Perform validation and extract the segment size from the header. + if (!StrOffsetExt.isValidOffsetForDataOfSize(Offset, 4)) { + OS << "error: invalid contribution to string offsets table in section ." + << SectionName << ".\n"; + return; + } + uint32_t ContributionStart = Offset; + uint64_t ContributionSize = StrOffsetExt.getU32(&Offset); + // A contribution size of 0xffffffff indicates DWARF64, with the actual size + // in the following 8 bytes. Otherwise, the DWARF standard mandates that + // the contribution size must be at most 0xfffffff0. + if (ContributionSize == 0xffffffff) { + if (!StrOffsetExt.isValidOffsetForDataOfSize(Offset, 8)) { + OS << "error: invalid contribution to string offsets table in section ." + << SectionName << ".\n"; + return; + } + Format = DWARF64; + EntrySize = 8; + ContributionSize = StrOffsetExt.getU64(&Offset); + } else if (ContributionSize > 0xfffffff0) { + OS << "error: invalid contribution to string offsets table in section ." + << SectionName << ".\n"; + return; + } + + // We must ensure that we don't read a partial record at the end, so we + // validate for a multiple of EntrySize. Also, we're expecting a version + // number and padding, which adds an additional 4 bytes. + uint64_t ValidationSize = + 4 + ((ContributionSize + EntrySize - 1) & (-(uint64_t)EntrySize)); + if (!StrOffsetExt.isValidOffsetForDataOfSize(Offset, ValidationSize)) { + OS << "error: contribution to string offsets table in section ." + << SectionName << " has invalid length.\n"; + return; + } + + Version = StrOffsetExt.getU16(&Offset); + Offset += 2; + OS << format("0x%8.8x: ", ContributionStart); + OS << "Contribution size = " << ContributionSize + << ", Version = " << Version << "\n"; + + uint32_t ContributionBase = Offset; + DataExtractor StrData(StringSection, LittleEndian, 0); + while (Offset - ContributionBase < ContributionSize) { + OS << format("0x%8.8x: ", Offset); + // FIXME: We can only extract strings in DWARF32 format at the moment. + uint64_t StringOffset = getRelocatedValue( + StrOffsetExt, EntrySize, &Offset, &StringOffsetsSection.Relocs); + if (Format == DWARF32) { + OS << format("%8.8x ", StringOffset); + uint32_t StringOffset32 = (uint32_t)StringOffset; + const char *S = StrData.getCStr(&StringOffset32); + if (S) + OS << format("\"%s\"", S); + } else + OS << format("%16.16x ", StringOffset); + OS << "\n"; + } + } +} + +// Dump a DWARF string offsets section. This may be a DWARF v5 formatted +// string offsets section, where each compile or type unit contributes a +// number of entries (string offsets), with each contribution preceded by +// a header containing size and version number. Alternatively, it may be a +// monolithic series of string offsets, as generated by the pre-DWARF v5 +// implementation of split DWARF. +static void dumpStringOffsetsSection(raw_ostream &OS, StringRef SectionName, + const DWARFSection &StringOffsetsSection, + StringRef StringSection, bool LittleEndian, + unsigned MaxVersion) { + if (StringOffsetsSection.Data.empty()) + return; + OS << "\n." << SectionName << " contents:\n"; + // If we have at least one (compile or type) unit with DWARF v5 or greater, + // we assume that the section is formatted like a DWARF v5 string offsets + // section. + if (MaxVersion >= 5) + dumpDWARFv5StringOffsetsSection(OS, SectionName, StringOffsetsSection, + StringSection, LittleEndian); + else { + DataExtractor strOffsetExt(StringOffsetsSection.Data, LittleEndian, 0); + uint32_t offset = 0; + uint64_t size = StringOffsetsSection.Data.size(); + // Ensure that size is a multiple of the size of an entry. + if (size & ((uint64_t)(sizeof(uint32_t) - 1))) { + OS << "error: size of ." << SectionName << " is not a multiple of " + << sizeof(uint32_t) << ".\n"; + size &= -(uint64_t)sizeof(uint32_t); + } + DataExtractor StrData(StringSection, LittleEndian, 0); + while (offset < size) { + OS << format("0x%8.8x: ", offset); + uint32_t StringOffset = strOffsetExt.getU32(&offset); + OS << format("%8.8x ", StringOffset); + const char *S = StrData.getCStr(&StringOffset); + if (S) + OS << format("\"%s\"", S); + OS << "\n"; + } + } +} + void DWARFContext::dump(raw_ostream &OS, DIDumpOptions DumpOpts){ DIDumpType DumpType = DumpOpts.DumpType; @@ -104,14 +221,14 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpOptions DumpOpts){ if (DumpType == DIDT_All || DumpType == DIDT_Info) { OS << "\n.debug_info contents:\n"; for (const auto &CU : compile_units()) - CU->dump(OS); + CU->dump(OS, DumpOpts); } if ((DumpType == DIDT_All || DumpType == DIDT_InfoDwo) && getNumDWOCompileUnits()) { OS << "\n.debug_info.dwo contents:\n"; for (const auto &DWOCU : dwo_compile_units()) - DWOCU->dump(OS); + DWOCU->dump(OS, DumpOpts); } if ((DumpType == DIDT_All || DumpType == DIDT_Types) && getNumTypeUnits()) { @@ -258,17 +375,15 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpOptions DumpOpts){ true /* GnuStyle */) .dump("debug_gnu_pubtypes", OS); - if ((DumpType == DIDT_All || DumpType == DIDT_StrOffsetsDwo) && - !getStringOffsetDWOSection().empty()) { - OS << "\n.debug_str_offsets.dwo contents:\n"; - DataExtractor strOffsetExt(getStringOffsetDWOSection(), isLittleEndian(), - 0); - offset = 0; - uint64_t size = getStringOffsetDWOSection().size(); - while (offset < size) { - OS << format("0x%8.8x: ", offset); - OS << format("%8.8x\n", strOffsetExt.getU32(&offset)); - } + if (DumpType == DIDT_All || DumpType == DIDT_StrOffsets) + dumpStringOffsetsSection(OS, "debug_str_offsets", getStringOffsetSection(), + getStringSection(), isLittleEndian(), + getMaxVersion()); + + if (DumpType == DIDT_All || DumpType == DIDT_StrOffsetsDwo) { + dumpStringOffsetsSection(OS, "debug_str_offsets.dwo", + getStringOffsetDWOSection(), getStringDWOSection(), + isLittleEndian(), getMaxVersion()); } if ((DumpType == DIDT_All || DumpType == DIDT_GdbIndex) && @@ -1109,6 +1224,10 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj, TypesDWOSections[Section].Data = data; } + // Map platform specific debug section names to DWARF standard section + // names. + name = Obj.mapDebugSectionName(name); + if (RelocatedSection == Obj.section_end()) continue; @@ -1141,6 +1260,7 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj, .Case("debug_loc", &LocSection.Relocs) .Case("debug_info.dwo", &InfoDWOSection.Relocs) .Case("debug_line", &LineSection.Relocs) + .Case("debug_str_offsets", &StringOffsetSection.Relocs) .Case("debug_ranges", &RangeSection.Relocs) .Case("debug_addr", &AddrSection.Relocs) .Case("apple_names", &AppleNamesSection.Relocs) @@ -1211,6 +1331,7 @@ StringRef *DWARFContextInMemory::MapSectionToMember(StringRef Name) { .Case("debug_frame", &DebugFrameSection) .Case("eh_frame", &EHFrameSection) .Case("debug_str", &StringSection) + .Case("debug_str_offsets", &StringOffsetSection.Data) .Case("debug_ranges", &RangeSection.Data) .Case("debug_macinfo", &MacinfoSection) .Case("debug_pubnames", &PubNamesSection) @@ -1222,7 +1343,7 @@ StringRef *DWARFContextInMemory::MapSectionToMember(StringRef Name) { .Case("debug_loc.dwo", &LocDWOSection.Data) .Case("debug_line.dwo", &LineDWOSection.Data) .Case("debug_str.dwo", &StringDWOSection) - .Case("debug_str_offsets.dwo", &StringOffsetDWOSection) + .Case("debug_str_offsets.dwo", &StringOffsetDWOSection.Data) .Case("debug_addr", &AddrSection.Data) .Case("apple_names", &AppleNamesSection.Data) .Case("apple_types", &AppleTypesSection.Data) diff --git a/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp b/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp index b55ed6a46849..e6e007896cc8 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp @@ -7,18 +7,19 @@ // //===----------------------------------------------------------------------===// +#include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h" + #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Optional.h" -#include "llvm/ADT/SmallString.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" -#include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/DataExtractor.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" @@ -584,6 +585,7 @@ void DWARFDebugFrame::parse(DataExtractor Data) { switch (AugmentationString[i]) { default: ReportError("Unknown augmentation character in entry at %lx"); + llvm_unreachable("ReportError should not return."); case 'L': LSDAPointerEncoding = Data.getU8(&Offset); break; diff --git a/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp b/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp index 35f673c7acc6..dbcc64fc0832 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h" #include "llvm/ADT/Optional.h" #include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h" -#include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h" #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" #include "llvm/DebugInfo/DWARF/DWARFUnit.h" #include "llvm/Support/DataExtractor.h" diff --git a/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/lib/DebugInfo/DWARF/DWARFDebugLine.cpp index f32e8fe76357..cda3e75fbc3e 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugLine.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugLine.cpp @@ -9,10 +9,10 @@ #include "llvm/DebugInfo/DWARF/DWARFDebugLine.h" #include "llvm/ADT/SmallString.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/Format.h" #include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp b/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp index d5c34216ed53..2178bef65d1d 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp @@ -7,11 +7,11 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/StringRef.h" -#include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include diff --git a/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp b/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp index e0a9adde8e58..1b77be6192dd 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// -#include "SyntaxHighlighting.h" #include "llvm/DebugInfo/DWARF/DWARFDebugMacro.h" -#include "llvm/Support/Dwarf.h" +#include "SyntaxHighlighting.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/Support/raw_ostream.h" #include diff --git a/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp b/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp index daded255f8c7..5a4e39f3c2af 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp @@ -7,10 +7,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/StringRef.h" #include "llvm/DebugInfo/DWARF/DWARFDebugPubTable.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/Support/DataExtractor.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include diff --git a/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp b/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp index 6b5e1d3c931b..43201293fe60 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include diff --git a/lib/DebugInfo/DWARF/DWARFDie.cpp b/lib/DebugInfo/DWARF/DWARFDie.cpp index fd45c77d3745..b4b682dd11b5 100644 --- a/lib/DebugInfo/DWARF/DWARFDie.cpp +++ b/lib/DebugInfo/DWARF/DWARFDie.cpp @@ -7,18 +7,18 @@ // //===----------------------------------------------------------------------===// +#include "llvm/DebugInfo/DWARF/DWARFDie.h" #include "SyntaxHighlighting.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h" -#include "llvm/DebugInfo/DWARF/DWARFDie.h" #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" #include "llvm/DebugInfo/DWARF/DWARFUnit.h" #include "llvm/Support/DataExtractor.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/Format.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" @@ -67,7 +67,8 @@ static void dumpRanges(raw_ostream &OS, const DWARFAddressRangesVector& Ranges, static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die, uint32_t *OffsetPtr, dwarf::Attribute Attr, - dwarf::Form Form, unsigned Indent) { + dwarf::Form Form, unsigned Indent, + DIDumpOptions DumpOpts) { if (!Die.isValid()) return; const char BaseIndent[] = " "; @@ -78,13 +79,15 @@ static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die, WithColor(OS, syntax::Attribute) << attrString; else WithColor(OS, syntax::Attribute).get() << format("DW_AT_Unknown_%x", Attr); - - auto formString = FormEncodingString(Form); - if (!formString.empty()) - OS << " [" << formString << ']'; - else - OS << format(" [DW_FORM_Unknown_%x]", Form); - + + if (!DumpOpts.Brief) { + auto formString = FormEncodingString(Form); + if (!formString.empty()) + OS << " [" << formString << ']'; + else + OS << format(" [DW_FORM_Unknown_%x]", Form); + } + DWARFUnit *U = Die.getDwarfUnit(); DWARFFormValue formValue(Form); @@ -301,8 +304,8 @@ void DWARFDie::getCallerFrame(uint32_t &CallFile, uint32_t &CallLine, CallDiscriminator = toUnsigned(find(DW_AT_GNU_discriminator), 0); } -void DWARFDie::dump(raw_ostream &OS, unsigned RecurseDepth, - unsigned Indent) const { +void DWARFDie::dump(raw_ostream &OS, unsigned RecurseDepth, unsigned Indent, + DIDumpOptions DumpOpts) const { if (!isValid()) return; DataExtractor debug_info_data = U->getDebugInfoExtractor(); @@ -322,10 +325,12 @@ void DWARFDie::dump(raw_ostream &OS, unsigned RecurseDepth, else WithColor(OS, syntax::Tag).get().indent(Indent) << format("DW_TAG_Unknown_%x", getTag()); - - OS << format(" [%u] %c\n", abbrCode, - AbbrevDecl->hasChildren() ? '*' : ' '); - + + if (!DumpOpts.Brief) + OS << format(" [%u] %c", abbrCode, + AbbrevDecl->hasChildren() ? '*' : ' '); + OS << '\n'; + // Dump all data in the DIE for the attributes. for (const auto &AttrSpec : AbbrevDecl->attributes()) { if (AttrSpec.Form == DW_FORM_implicit_const) { @@ -335,13 +340,13 @@ void DWARFDie::dump(raw_ostream &OS, unsigned RecurseDepth, continue; } dumpAttribute(OS, *this, &offset, AttrSpec.Attr, AttrSpec.Form, - Indent); + Indent, DumpOpts); } DWARFDie child = getFirstChild(); if (RecurseDepth > 0 && child) { while (child) { - child.dump(OS, RecurseDepth-1, Indent+2); + child.dump(OS, RecurseDepth-1, Indent+2, DumpOpts); child = child.getSibling(); } } diff --git a/lib/DebugInfo/DWARF/DWARFFormValue.cpp b/lib/DebugInfo/DWARF/DWARFFormValue.cpp index 0963d7bfd713..ed1f5f46dcfb 100644 --- a/lib/DebugInfo/DWARF/DWARFFormValue.cpp +++ b/lib/DebugInfo/DWARF/DWARFFormValue.cpp @@ -13,10 +13,10 @@ #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" #include "llvm/DebugInfo/DWARF/DWARFUnit.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" @@ -301,6 +301,7 @@ bool DWARFFormValue::isFormClass(DWARFFormValue::FormClass FC) const { return (FC == FC_Address); case DW_FORM_GNU_str_index: case DW_FORM_GNU_strp_alt: + case DW_FORM_strx: return (FC == FC_String); case DW_FORM_implicit_const: return (FC == FC_Constant); @@ -415,6 +416,7 @@ bool DWARFFormValue::extractValue(const DataExtractor &Data, break; case DW_FORM_GNU_addr_index: case DW_FORM_GNU_str_index: + case DW_FORM_strx: Value.uval = Data.getULEB128(OffsetPtr); break; default: @@ -542,6 +544,7 @@ void DWARFFormValue::dump(raw_ostream &OS) const { OS << format(" .debug_str[0x%8.8x] = ", (uint32_t)UValue); dumpString(OS); break; + case DW_FORM_strx: case DW_FORM_GNU_str_index: OS << format(" indexed (%8.8x) string = ", (uint32_t)UValue); dumpString(OS); @@ -620,10 +623,11 @@ Optional DWARFFormValue::getAsCString() const { if (Form == DW_FORM_GNU_strp_alt || U == nullptr) return None; uint32_t Offset = Value.uval; - if (Form == DW_FORM_GNU_str_index) { - uint32_t StrOffset; + if (Form == DW_FORM_GNU_str_index || Form == DW_FORM_strx) { + uint64_t StrOffset; if (!U->getStringOffsetSectionItem(Offset, StrOffset)) return None; + StrOffset += U->getStringOffsetSectionRelocation(Offset); Offset = StrOffset; } if (const char *Str = U->getStringExtractor().getCStr(&Offset)) { diff --git a/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp b/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp index 0625d01097c9..ebd6104ab878 100644 --- a/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp +++ b/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/DebugInfo/DWARF/DWARFGdbIndex.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" -#include "llvm/DebugInfo/DWARF/DWARFGdbIndex.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include diff --git a/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp b/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp index 25824f6eb83b..fd1684d33a16 100644 --- a/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp +++ b/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp @@ -7,10 +7,10 @@ // //===----------------------------------------------------------------------===// +#include "llvm/DebugInfo/DWARF/DWARFTypeUnit.h" #include "llvm/DebugInfo/DIContext.h" #include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h" #include "llvm/DebugInfo/DWARF/DWARFDie.h" -#include "llvm/DebugInfo/DWARF/DWARFTypeUnit.h" #include "llvm/DebugInfo/DWARF/DWARFUnit.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/DebugInfo/DWARF/DWARFUnit.cpp b/lib/DebugInfo/DWARF/DWARFUnit.cpp index c5add6a478b3..09e6a292e5fe 100644 --- a/lib/DebugInfo/DWARF/DWARFUnit.cpp +++ b/lib/DebugInfo/DWARF/DWARFUnit.cpp @@ -7,8 +7,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/SmallString.h" +#include "llvm/DebugInfo/DWARF/DWARFUnit.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" #include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" @@ -16,7 +17,6 @@ #include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h" #include "llvm/DebugInfo/DWARF/DWARFDie.h" #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" -#include "llvm/DebugInfo/DWARF/DWARFUnit.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/Casting.h" #include "llvm/Support/DataExtractor.h" @@ -33,8 +33,9 @@ using namespace dwarf; void DWARFUnitSectionBase::parse(DWARFContext &C, const DWARFSection &Section) { parseImpl(C, Section, C.getDebugAbbrev(), &C.getRangeSection(), - C.getStringSection(), StringRef(), &C.getAddrSection(), - C.getLineSection().Data, C.isLittleEndian(), false); + C.getStringSection(), C.getStringOffsetSection(), + &C.getAddrSection(), C.getLineSection().Data, C.isLittleEndian(), + false); } void DWARFUnitSectionBase::parseDWO(DWARFContext &C, @@ -48,19 +49,14 @@ void DWARFUnitSectionBase::parseDWO(DWARFContext &C, DWARFUnit::DWARFUnit(DWARFContext &DC, const DWARFSection &Section, const DWARFDebugAbbrev *DA, const DWARFSection *RS, - StringRef SS, StringRef SOS, const DWARFSection *AOS, - StringRef LS, bool LE, bool IsDWO, + StringRef SS, const DWARFSection &SOS, + const DWARFSection *AOS, StringRef LS, bool LE, bool IsDWO, const DWARFUnitSectionBase &UnitSection, const DWARFUnitIndex::Entry *IndexEntry) : Context(DC), InfoSection(Section), Abbrev(DA), RangeSection(RS), - LineSection(LS), StringSection(SS), StringOffsetSection([&]() { - if (IndexEntry) - if (const auto *C = IndexEntry->getOffset(DW_SECT_STR_OFFSETS)) - return SOS.slice(C->Offset, C->Offset + C->Length); - return SOS; - }()), - AddrOffsetSection(AOS), isLittleEndian(LE), isDWO(IsDWO), - UnitSection(UnitSection), IndexEntry(IndexEntry) { + LineSection(LS), StringSection(SS), StringOffsetSection(SOS), + StringOffsetSectionBase(0), AddrOffsetSection(AOS), isLittleEndian(LE), + isDWO(IsDWO), UnitSection(UnitSection), IndexEntry(IndexEntry) { clear(); } @@ -77,17 +73,25 @@ bool DWARFUnit::getAddrOffsetSectionItem(uint32_t Index, } bool DWARFUnit::getStringOffsetSectionItem(uint32_t Index, - uint32_t &Result) const { - // FIXME: string offset section entries are 8-byte for DWARF64. - const uint32_t ItemSize = 4; - uint32_t Offset = Index * ItemSize; - if (StringOffsetSection.size() < Offset + ItemSize) + uint64_t &Result) const { + unsigned ItemSize = getFormat() == DWARF64 ? 8 : 4; + uint32_t Offset = StringOffsetSectionBase + Index * ItemSize; + if (StringOffsetSection.Data.size() < Offset + ItemSize) return false; - DataExtractor DA(StringOffsetSection, isLittleEndian, 0); - Result = DA.getU32(&Offset); + DataExtractor DA(StringOffsetSection.Data, isLittleEndian, 0); + Result = ItemSize == 4 ? DA.getU32(&Offset) : DA.getU64(&Offset); return true; } +uint64_t DWARFUnit::getStringOffsetSectionRelocation(uint32_t Index) const { + unsigned ItemSize = getFormat() == DWARF64 ? 8 : 4; + uint64_t ByteOffset = StringOffsetSectionBase + Index * ItemSize; + RelocAddrMap::const_iterator AI = getStringOffsetsRelocMap().find(ByteOffset); + if (AI != getStringOffsetsRelocMap().end()) + return AI->second.Value; + return 0; +} + bool DWARFUnit::extractImpl(DataExtractor debug_info, uint32_t *offset_ptr) { Length = debug_info.getU32(offset_ptr); Version = debug_info.getU16(offset_ptr); @@ -119,6 +123,9 @@ bool DWARFUnit::extractImpl(DataExtractor debug_info, uint32_t *offset_ptr) { if (!LengthOK || !VersionOK || !AddrSizeOK) return false; + // Keep track of the highest DWARF version we encounter across all units. + Context.setMaxVersionIfGreater(Version); + Abbrevs = Abbrev->getAbbreviationDeclarationSet(AbbrOffset); return Abbrevs != nullptr; } @@ -242,6 +249,17 @@ size_t DWARFUnit::extractDIEsIfNeeded(bool CUDieOnly) { setBaseAddress(*BaseAddr); AddrOffsetSectionBase = toSectionOffset(UnitDie.find(DW_AT_GNU_addr_base), 0); RangeSectionBase = toSectionOffset(UnitDie.find(DW_AT_rnglists_base), 0); + + // In general, we derive the offset of the unit's contibution to the + // debug_str_offsets{.dwo} section from the unit DIE's + // DW_AT_str_offsets_base attribute. In dwp files we add to it the offset + // we get from the index table. + StringOffsetSectionBase = + toSectionOffset(UnitDie.find(DW_AT_str_offsets_base), 0); + if (IndexEntry) + if (const auto *C = IndexEntry->getOffset(DW_SECT_STR_OFFSETS)) + StringOffsetSectionBase += C->Offset; + // Don't fall back to DW_AT_GNU_ranges_base: it should be ignored for // skeleton CU DIE, so that DWARF users not aware of it are not broken. } diff --git a/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp b/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp index 0981a4dfdfa5..59b3d0ca55a6 100644 --- a/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp +++ b/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/DebugInfo/DWARF/LLVMBuild.txt b/lib/DebugInfo/DWARF/LLVMBuild.txt index 9f8b1047ef6b..8242a7f2e7f7 100644 --- a/lib/DebugInfo/DWARF/LLVMBuild.txt +++ b/lib/DebugInfo/DWARF/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Library name = DebugInfoDWARF parent = DebugInfo -required_libraries = Object Support +required_libraries = BinaryFormat Object Support diff --git a/lib/DebugInfo/PDB/DIA/DIAEnumDebugStreams.cpp b/lib/DebugInfo/PDB/DIA/DIAEnumDebugStreams.cpp index cae817c1b367..f62c4991fe33 100644 --- a/lib/DebugInfo/PDB/DIA/DIAEnumDebugStreams.cpp +++ b/lib/DebugInfo/PDB/DIA/DIAEnumDebugStreams.cpp @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/DebugInfo/PDB/PDBSymbol.h" -#include "llvm/DebugInfo/PDB/DIA/DIADataStream.h" #include "llvm/DebugInfo/PDB/DIA/DIAEnumDebugStreams.h" +#include "llvm/DebugInfo/PDB/DIA/DIADataStream.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" using namespace llvm; using namespace llvm::pdb; diff --git a/lib/DebugInfo/PDB/DIA/DIAEnumLineNumbers.cpp b/lib/DebugInfo/PDB/DIA/DIAEnumLineNumbers.cpp index 4741d9c9a849..796ce214b383 100644 --- a/lib/DebugInfo/PDB/DIA/DIAEnumLineNumbers.cpp +++ b/lib/DebugInfo/PDB/DIA/DIAEnumLineNumbers.cpp @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/DIA/DIAEnumLineNumbers.h" #include "llvm/DebugInfo/PDB/DIA/DIALineNumber.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" using namespace llvm; using namespace llvm::pdb; diff --git a/lib/DebugInfo/PDB/DIA/DIAEnumSourceFiles.cpp b/lib/DebugInfo/PDB/DIA/DIAEnumSourceFiles.cpp index ccf8c4e622cc..b9311d060128 100644 --- a/lib/DebugInfo/PDB/DIA/DIAEnumSourceFiles.cpp +++ b/lib/DebugInfo/PDB/DIA/DIAEnumSourceFiles.cpp @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/DIA/DIAEnumSourceFiles.h" #include "llvm/DebugInfo/PDB/DIA/DIASourceFile.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" using namespace llvm; using namespace llvm::pdb; diff --git a/lib/DebugInfo/PDB/DIA/DIAEnumSymbols.cpp b/lib/DebugInfo/PDB/DIA/DIAEnumSymbols.cpp index 3c211b569044..266638530c2f 100644 --- a/lib/DebugInfo/PDB/DIA/DIAEnumSymbols.cpp +++ b/lib/DebugInfo/PDB/DIA/DIAEnumSymbols.cpp @@ -7,10 +7,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/DIA/DIAEnumSymbols.h" #include "llvm/DebugInfo/PDB/DIA/DIARawSymbol.h" #include "llvm/DebugInfo/PDB/DIA/DIASession.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" using namespace llvm; using namespace llvm::pdb; diff --git a/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp b/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp index 4e2474c51cb1..0b48a366bd24 100644 --- a/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp +++ b/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp @@ -372,8 +372,11 @@ DIARawSymbol::findChildren(PDB_SymType Type) const { enum SymTagEnum EnumVal = static_cast(Type); CComPtr DiaEnumerator; - if (S_OK != Symbol->findChildrenEx(EnumVal, nullptr, nsNone, &DiaEnumerator)) - return nullptr; + if (S_OK != + Symbol->findChildrenEx(EnumVal, nullptr, nsNone, &DiaEnumerator)) { + if (S_OK != Symbol->findChildren(EnumVal, nullptr, nsNone, &DiaEnumerator)) + return nullptr; + } return llvm::make_unique(Session, DiaEnumerator); } diff --git a/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp b/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp index 22c2ef31bd71..396dffaa68b1 100644 --- a/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp +++ b/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp @@ -10,6 +10,7 @@ #include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/BinaryFormat/COFF.h" #include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h" #include "llvm/DebugInfo/MSF/MSFBuilder.h" #include "llvm/DebugInfo/MSF/MSFCommon.h" @@ -19,7 +20,6 @@ #include "llvm/DebugInfo/PDB/Native/RawError.h" #include "llvm/Support/BinaryItemStream.h" #include "llvm/Support/BinaryStreamWriter.h" -#include "llvm/Support/COFF.h" using namespace llvm; using namespace llvm::codeview; @@ -38,12 +38,12 @@ template <> struct BinaryItemTraits { static uint32_t calculateDiSymbolStreamSize(uint32_t SymbolByteSize, uint32_t C13Size) { - uint32_t Size = sizeof(uint32_t); // Signature - Size += SymbolByteSize; // Symbol Data - Size += 0; // TODO: Layout.C11Bytes - Size += C13Size; // C13 Debug Info Size - Size += sizeof(uint32_t); // GlobalRefs substream size (always 0) - Size += 0; // GlobalRefs substream bytes + uint32_t Size = sizeof(uint32_t); // Signature + Size += alignTo(SymbolByteSize, 4); // Symbol Data + Size += 0; // TODO: Layout.C11Bytes + Size += C13Size; // C13 Debug Info Size + Size += sizeof(uint32_t); // GlobalRefs substream size (always 0) + Size += 0; // GlobalRefs substream bytes return Size; } @@ -156,6 +156,8 @@ Error DbiModuleDescriptorBuilder::commit(BinaryStreamWriter &ModiWriter, BinaryStreamRef RecordsRef(Records); if (auto EC = SymbolWriter.writeStreamRef(RecordsRef)) return EC; + if (auto EC = SymbolWriter.padToAlignment(4)) + return EC; // TODO: Write C11 Line data assert(SymbolWriter.getOffset() % alignOf(CodeViewContainer::Pdb) == 0 && "Invalid debug section alignment!"); diff --git a/lib/DebugInfo/PDB/Native/DbiStream.cpp b/lib/DebugInfo/PDB/Native/DbiStream.cpp index 320b11dc5cab..24322d942fac 100644 --- a/lib/DebugInfo/PDB/Native/DbiStream.cpp +++ b/lib/DebugInfo/PDB/Native/DbiStream.cpp @@ -216,10 +216,12 @@ FixedStreamArray DbiStream::getSectionMap() const { void DbiStream::visitSectionContributions( ISectionContribVisitor &Visitor) const { - if (SectionContribVersion == DbiSecContribVer60) { + if (!SectionContribs.empty()) { + assert(SectionContribVersion == DbiSecContribVer60); for (auto &SC : SectionContribs) Visitor.visit(SC); - } else if (SectionContribVersion == DbiSecContribV2) { + } else if (!SectionContribs2.empty()) { + assert(SectionContribVersion == DbiSecContribV2); for (auto &SC : SectionContribs2) Visitor.visit(SC); } diff --git a/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp b/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp index 55c20fdb9af6..355c7b57f4d1 100644 --- a/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp +++ b/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp @@ -10,6 +10,7 @@ #include "llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/BinaryFormat/COFF.h" #include "llvm/DebugInfo/MSF/MSFBuilder.h" #include "llvm/DebugInfo/MSF/MappedBlockStream.h" #include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h" @@ -17,7 +18,6 @@ #include "llvm/DebugInfo/PDB/Native/RawError.h" #include "llvm/Object/COFF.h" #include "llvm/Support/BinaryStreamWriter.h" -#include "llvm/Support/COFF.h" using namespace llvm; using namespace llvm::codeview; diff --git a/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp b/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp index c4ff30011a17..4186f2eb6ba0 100644 --- a/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp +++ b/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp @@ -90,14 +90,14 @@ Error ModuleDebugStreamRef::commit() { return Error::success(); } Expected ModuleDebugStreamRef::findChecksumsSubsection() const { + codeview::DebugChecksumsSubsectionRef Result; for (const auto &SS : subsections()) { if (SS.kind() != DebugSubsectionKind::FileChecksums) continue; - codeview::DebugChecksumsSubsectionRef Result; if (auto EC = Result.initialize(SS.getRecordData())) return std::move(EC); return Result; } - return make_error(raw_error_code::no_entry); + return Result; } diff --git a/lib/DebugInfo/PDB/Native/PublicsStream.cpp b/lib/DebugInfo/PDB/Native/PublicsStream.cpp index 58202577672a..091ac67035dc 100644 --- a/lib/DebugInfo/PDB/Native/PublicsStream.cpp +++ b/lib/DebugInfo/PDB/Native/PublicsStream.cpp @@ -105,10 +105,12 @@ Error PublicsStream::reload() { "Could not read a thunk map.")); // Something called "section map" follows. - if (auto EC = Reader.readArray(SectionOffsets, Header->NumSections)) - return joinErrors(std::move(EC), - make_error(raw_error_code::corrupt_file, - "Could not read a section map.")); + if (Reader.bytesRemaining() > 0) { + if (auto EC = Reader.readArray(SectionOffsets, Header->NumSections)) + return joinErrors(std::move(EC), + make_error(raw_error_code::corrupt_file, + "Could not read a section map.")); + } if (Reader.bytesRemaining() > 0) return make_error(raw_error_code::corrupt_file, diff --git a/lib/DebugInfo/PDB/PDBContext.cpp b/lib/DebugInfo/PDB/PDBContext.cpp index f6b6b951ebe1..df0feac2bc40 100644 --- a/lib/DebugInfo/PDB/PDBContext.cpp +++ b/lib/DebugInfo/PDB/PDBContext.cpp @@ -12,8 +12,8 @@ #include "llvm/DebugInfo/PDB/IPDBLineNumber.h" #include "llvm/DebugInfo/PDB/IPDBSourceFile.h" #include "llvm/DebugInfo/PDB/PDBSymbol.h" -#include "llvm/DebugInfo/PDB/PDBSymbolFunc.h" #include "llvm/DebugInfo/PDB/PDBSymbolData.h" +#include "llvm/DebugInfo/PDB/PDBSymbolFunc.h" #include "llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h" #include "llvm/Object/COFF.h" diff --git a/lib/DebugInfo/PDB/PDBSymbolBlock.cpp b/lib/DebugInfo/PDB/PDBSymbolBlock.cpp index 7385d3ba1489..7076b4aec347 100644 --- a/lib/DebugInfo/PDB/PDBSymbolBlock.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolBlock.cpp @@ -9,8 +9,8 @@ #include "llvm/DebugInfo/PDB/PDBSymbolBlock.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include diff --git a/lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp b/lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp index e08450e0ad0c..f73cd36d057a 100644 --- a/lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp @@ -9,8 +9,8 @@ #include "llvm/DebugInfo/PDB/PDBSymbolCompilandDetails.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include diff --git a/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp b/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp index 2f1c43666ae5..df696fa8c5f2 100644 --- a/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp @@ -10,8 +10,8 @@ #include "llvm/DebugInfo/PDB/PDBSymbolCompilandEnv.h" #include "llvm/DebugInfo/PDB/IPDBRawSymbol.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include diff --git a/lib/DebugInfo/PDB/PDBSymbolCustom.cpp b/lib/DebugInfo/PDB/PDBSymbolCustom.cpp index 9ec20bb62d75..a7b69a755941 100644 --- a/lib/DebugInfo/PDB/PDBSymbolCustom.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolCustom.cpp @@ -10,8 +10,8 @@ #include "llvm/DebugInfo/PDB/PDBSymbolCustom.h" #include "llvm/DebugInfo/PDB/IPDBRawSymbol.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include diff --git a/lib/DebugInfo/PDB/PDBSymbolFunc.cpp b/lib/DebugInfo/PDB/PDBSymbolFunc.cpp index 0734a1f8314a..5a5cb4c1b5ca 100644 --- a/lib/DebugInfo/PDB/PDBSymbolFunc.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolFunc.cpp @@ -12,10 +12,10 @@ #include "llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h" #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" #include "llvm/DebugInfo/PDB/IPDBSession.h" +#include "llvm/DebugInfo/PDB/PDBSymDumper.h" #include "llvm/DebugInfo/PDB/PDBSymbolData.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h" -#include "llvm/DebugInfo/PDB/PDBSymDumper.h" #include "llvm/DebugInfo/PDB/PDBTypes.h" #include diff --git a/lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp b/lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp index 482c95e3a850..4a4195beb4ea 100644 --- a/lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp @@ -9,8 +9,8 @@ #include "llvm/DebugInfo/PDB/PDBSymbolFuncDebugEnd.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include diff --git a/lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp b/lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp index ae23c7619e2a..a448a404dc4a 100644 --- a/lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp @@ -9,8 +9,8 @@ #include "llvm/DebugInfo/PDB/PDBSymbolFuncDebugStart.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include diff --git a/lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp b/lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp index 87bb4044216b..dbec16fcbaac 100644 --- a/lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp @@ -9,8 +9,8 @@ #include "llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp index 0ee18d471624..0fdf8b6d0f77 100644 --- a/lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp @@ -9,8 +9,8 @@ #include "llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp index f617d8d0c2df..726e7e1cdbb4 100644 --- a/lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp @@ -9,8 +9,8 @@ #include "llvm/DebugInfo/PDB/PDBSymbolTypeCustom.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp index 68ba87c1cdf8..6c84b984d210 100644 --- a/lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp @@ -10,8 +10,8 @@ #include "llvm/DebugInfo/PDB/PDBSymbolTypeDimension.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp index ec27985e91d1..c01877287888 100644 --- a/lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp @@ -9,8 +9,8 @@ #include "llvm/DebugInfo/PDB/PDBSymbolTypeFriend.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp index 473529d1b043..0304c6286c8f 100644 --- a/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp @@ -12,9 +12,9 @@ #include "llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h" #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" #include "llvm/DebugInfo/PDB/IPDBSession.h" +#include "llvm/DebugInfo/PDB/PDBSymDumper.h" #include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeFunctionArg.h" -#include "llvm/DebugInfo/PDB/PDBSymDumper.h" #include diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp index 86e0ec4f8565..7cfba823b4fa 100644 --- a/lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp @@ -9,8 +9,8 @@ #include "llvm/DebugInfo/PDB/PDBSymbolTypeManaged.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp index a516a4d2c429..ddc0574617c5 100644 --- a/lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp @@ -9,8 +9,8 @@ #include "llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include diff --git a/lib/DebugInfo/PDB/PDBSymbolUnknown.cpp b/lib/DebugInfo/PDB/PDBSymbolUnknown.cpp index dbbea9c93e20..fdbe845f455a 100644 --- a/lib/DebugInfo/PDB/PDBSymbolUnknown.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolUnknown.cpp @@ -9,8 +9,8 @@ #include "llvm/DebugInfo/PDB/PDBSymbolUnknown.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include diff --git a/lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp b/lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp index 020aec9e98a8..f40578f4372a 100644 --- a/lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp @@ -9,8 +9,8 @@ #include "llvm/DebugInfo/PDB/PDBSymbolUsingNamespace.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include diff --git a/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp b/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp index f672680cb9ea..2a89faff9647 100644 --- a/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp +++ b/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp @@ -15,12 +15,12 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/COFF.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" #include "llvm/Object/COFF.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Object/SymbolSize.h" -#include "llvm/Support/COFF.h" #include "llvm/Support/Casting.h" #include "llvm/Support/DataExtractor.h" #include "llvm/Support/Error.h" diff --git a/lib/DebugInfo/Symbolize/Symbolize.cpp b/lib/DebugInfo/Symbolize/Symbolize.cpp index 9de3ddc039d6..19711ca58c6f 100644 --- a/lib/DebugInfo/Symbolize/Symbolize.cpp +++ b/lib/DebugInfo/Symbolize/Symbolize.cpp @@ -16,6 +16,7 @@ #include "SymbolizableObjectFile.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/BinaryFormat/COFF.h" #include "llvm/Config/config.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/PDB/PDB.h" @@ -24,7 +25,6 @@ #include "llvm/Object/ELFObjectFile.h" #include "llvm/Object/MachO.h" #include "llvm/Object/MachOUniversal.h" -#include "llvm/Support/COFF.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compression.h" #include "llvm/Support/DataExtractor.h" @@ -39,6 +39,8 @@ #if defined(_MSC_VER) #include + +// This must be included after windows.h. #include #pragma comment(lib, "dbghelp.lib") diff --git a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp index 0051c69efb7d..a7b1fe206f10 100644 --- a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp +++ b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp @@ -12,10 +12,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Config/config.h" #include "IntelJITEventsWrapper.h" #include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Config/config.h" #include "llvm/DebugInfo/DIContext.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/ExecutionEngine/JITEventListener.h" diff --git a/lib/ExecutionEngine/IntelJITEvents/jitprofiling.c b/lib/ExecutionEngine/IntelJITEvents/jitprofiling.c index e9668892c05b..f2d36a76a315 100644 --- a/lib/ExecutionEngine/IntelJITEvents/jitprofiling.c +++ b/lib/ExecutionEngine/IntelJITEvents/jitprofiling.c @@ -22,8 +22,8 @@ #include #pragma optimize("", off) #else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#include #include +#include #include #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ #include diff --git a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp index ee75bee9c533..64dca930722e 100644 --- a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp +++ b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp @@ -22,7 +22,7 @@ #include "Interpreter.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/Config/config.h" // Detect libffi +#include "llvm/Config/config.h" // Detect libffi #include "llvm/ExecutionEngine/GenericValue.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" @@ -33,8 +33,8 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/Mutex.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/UniqueLock.h" +#include "llvm/Support/raw_ostream.h" #include #include #include diff --git a/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp index 57b5d85bb550..3581d6458395 100644 --- a/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp +++ b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Config/config.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Config/config.h" #include "llvm/ExecutionEngine/JITEventListener.h" #include "llvm/ExecutionEngine/OProfileWrapper.h" #include "llvm/ExecutionEngine/RuntimeDyld.h" diff --git a/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/lib/ExecutionEngine/Orc/IndirectionUtils.cpp index 711b887da6ef..e3a456849f90 100644 --- a/lib/ExecutionEngine/Orc/IndirectionUtils.cpp +++ b/lib/ExecutionEngine/Orc/IndirectionUtils.cpp @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ExecutionEngine/Orc/IndirectionUtils.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Triple.h" -#include "llvm/ExecutionEngine/Orc/IndirectionUtils.h" #include "llvm/ExecutionEngine/Orc/OrcABISupport.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/IRBuilder.h" diff --git a/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h b/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h index a27573f93b97..7dd6b17d33cb 100644 --- a/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h +++ b/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h @@ -20,11 +20,11 @@ #include "llvm/ExecutionEngine/ExecutionEngine.h" #include "llvm/ExecutionEngine/GenericValue.h" #include "llvm/ExecutionEngine/JITSymbol.h" -#include "llvm/ExecutionEngine/RuntimeDyld.h" #include "llvm/ExecutionEngine/Orc/CompileUtils.h" #include "llvm/ExecutionEngine/Orc/IRCompileLayer.h" #include "llvm/ExecutionEngine/Orc/LazyEmittingLayer.h" #include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h" +#include "llvm/ExecutionEngine/RuntimeDyld.h" #include "llvm/IR/Function.h" #include "llvm/IR/Mangler.h" #include "llvm/Object/Archive.h" @@ -34,10 +34,10 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" +#include #include #include #include -#include #include #include #include diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp index ab86e5d6a0fd..2b69f1a0269f 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp @@ -12,13 +12,13 @@ //===----------------------------------------------------------------------===// #include "llvm/ExecutionEngine/RuntimeDyld.h" -#include "RuntimeDyldCheckerImpl.h" #include "RuntimeDyldCOFF.h" +#include "RuntimeDyldCheckerImpl.h" #include "RuntimeDyldELF.h" #include "RuntimeDyldImpl.h" #include "RuntimeDyldMachO.h" -#include "llvm/Object/ELFObjectFile.h" #include "llvm/Object/COFF.h" +#include "llvm/Object/ELFObjectFile.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/MutexGuard.h" diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp index 9ce3974529bb..3d12eadea4dd 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -18,10 +18,10 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCStreamer.h" #include "llvm/Object/ELFObjectFile.h" #include "llvm/Object/ObjectFile.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/Endian.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/TargetRegistry.h" diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h index 18c23c5a2a5d..5268bc5a1868 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h @@ -28,8 +28,8 @@ #include "llvm/Support/Mutex.h" #include "llvm/Support/SwapByteOrder.h" #include -#include #include +#include using namespace llvm; using namespace llvm::object; diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFI386.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFI386.h index 6aa1a2bdb926..901f77865ba1 100644 --- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFI386.h +++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFI386.h @@ -14,9 +14,9 @@ #ifndef LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDCOFFI386_H #define LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDCOFFI386_H -#include "llvm/Object/COFF.h" -#include "llvm/Support/COFF.h" #include "../RuntimeDyldCOFF.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/Object/COFF.h" #define DEBUG_TYPE "dyld" diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h index 318afa21a88b..3e4b0c8f75bb 100644 --- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h +++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h @@ -14,9 +14,9 @@ #ifndef LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDCOFFTHUMB_H #define LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDCOFFTHUMB_H -#include "llvm/Object/COFF.h" -#include "llvm/Support/COFF.h" #include "../RuntimeDyldCOFF.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/Object/COFF.h" #define DEBUG_TYPE "dyld" diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h index 26e73989d7ed..7cbb43854151 100644 --- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h +++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h @@ -14,9 +14,9 @@ #ifndef LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDCOFF86_64_H #define LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDCOFF86_64_H -#include "llvm/Object/COFF.h" -#include "llvm/Support/COFF.h" #include "../RuntimeDyldCOFF.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/Object/COFF.h" #define DEBUG_TYPE "dyld" diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFMips.cpp b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFMips.cpp index cae4d69789a2..926996d6f7b3 100644 --- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFMips.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFMips.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// #include "RuntimeDyldELFMips.h" -#include "llvm/Support/ELF.h" +#include "llvm/BinaryFormat/ELF.h" #define DEBUG_TYPE "dyld" diff --git a/lib/ExecutionEngine/SectionMemoryManager.cpp b/lib/ExecutionEngine/SectionMemoryManager.cpp index 50478eac6827..8904475f084f 100644 --- a/lib/ExecutionEngine/SectionMemoryManager.cpp +++ b/lib/ExecutionEngine/SectionMemoryManager.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Config/config.h" #include "llvm/ExecutionEngine/SectionMemoryManager.h" +#include "llvm/Config/config.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Process.h" diff --git a/lib/Fuzzer/FuzzerDriver.cpp b/lib/Fuzzer/FuzzerDriver.cpp index e93c79cfcec6..9aad3771784d 100644 --- a/lib/Fuzzer/FuzzerDriver.cpp +++ b/lib/Fuzzer/FuzzerDriver.cpp @@ -10,9 +10,9 @@ //===----------------------------------------------------------------------===// #include "FuzzerCorpus.h" +#include "FuzzerIO.h" #include "FuzzerInterface.h" #include "FuzzerInternal.h" -#include "FuzzerIO.h" #include "FuzzerMutate.h" #include "FuzzerRandom.h" #include "FuzzerShmem.h" @@ -149,7 +149,7 @@ static bool ParseOneFlag(const char *Param) { int Val = MyStol(Str); *FlagDescriptions[F].IntFlag = Val; if (Flags.verbosity >= 2) - Printf("Flag: %s %d\n", Name, Val);; + Printf("Flag: %s %d\n", Name, Val); return true; } else if (FlagDescriptions[F].UIntFlag) { unsigned int Val = std::stoul(Str); diff --git a/lib/Fuzzer/FuzzerExtFunctionsDlsymWin.cpp b/lib/Fuzzer/FuzzerExtFunctionsDlsymWin.cpp index 77521698c80a..321b3ec5d414 100644 --- a/lib/Fuzzer/FuzzerExtFunctionsDlsymWin.cpp +++ b/lib/Fuzzer/FuzzerExtFunctionsDlsymWin.cpp @@ -14,6 +14,8 @@ #include "FuzzerExtFunctions.h" #include "FuzzerIO.h" #include "Windows.h" + +// This must be included after Windows.h. #include "Psapi.h" namespace fuzzer { diff --git a/lib/Fuzzer/FuzzerLoop.cpp b/lib/Fuzzer/FuzzerLoop.cpp index 14caa203c5ef..f6083282ab61 100644 --- a/lib/Fuzzer/FuzzerLoop.cpp +++ b/lib/Fuzzer/FuzzerLoop.cpp @@ -10,8 +10,8 @@ //===----------------------------------------------------------------------===// #include "FuzzerCorpus.h" -#include "FuzzerInternal.h" #include "FuzzerIO.h" +#include "FuzzerInternal.h" #include "FuzzerMutate.h" #include "FuzzerRandom.h" #include "FuzzerShmem.h" diff --git a/lib/Fuzzer/FuzzerMerge.cpp b/lib/Fuzzer/FuzzerMerge.cpp index e66460c29e2f..612f4bbb28f2 100644 --- a/lib/Fuzzer/FuzzerMerge.cpp +++ b/lib/Fuzzer/FuzzerMerge.cpp @@ -9,9 +9,9 @@ // Merging corpora. //===----------------------------------------------------------------------===// -#include "FuzzerInternal.h" -#include "FuzzerIO.h" #include "FuzzerMerge.h" +#include "FuzzerIO.h" +#include "FuzzerInternal.h" #include "FuzzerTracePC.h" #include "FuzzerUtil.h" diff --git a/lib/Fuzzer/FuzzerMutate.cpp b/lib/Fuzzer/FuzzerMutate.cpp index e60d4130de10..53cb9027e455 100644 --- a/lib/Fuzzer/FuzzerMutate.cpp +++ b/lib/Fuzzer/FuzzerMutate.cpp @@ -9,11 +9,11 @@ // Mutate a test input. //===----------------------------------------------------------------------===// +#include "FuzzerMutate.h" #include "FuzzerCorpus.h" #include "FuzzerDefs.h" #include "FuzzerExtFunctions.h" #include "FuzzerIO.h" -#include "FuzzerMutate.h" #include "FuzzerOptions.h" namespace fuzzer { diff --git a/lib/Fuzzer/FuzzerShmemPosix.cpp b/lib/Fuzzer/FuzzerShmemPosix.cpp index 2723bdd86f48..50cdcfb509dc 100644 --- a/lib/Fuzzer/FuzzerShmemPosix.cpp +++ b/lib/Fuzzer/FuzzerShmemPosix.cpp @@ -14,14 +14,14 @@ #include "FuzzerIO.h" #include "FuzzerShmem.h" -#include -#include #include #include -#include #include #include #include +#include +#include +#include #include namespace fuzzer { diff --git a/lib/Fuzzer/FuzzerShmemWindows.cpp b/lib/Fuzzer/FuzzerShmemWindows.cpp index 6325b4b8e5b4..d330ebf4fd07 100644 --- a/lib/Fuzzer/FuzzerShmemWindows.cpp +++ b/lib/Fuzzer/FuzzerShmemWindows.cpp @@ -14,10 +14,10 @@ #include "FuzzerIO.h" #include "FuzzerShmem.h" -#include -#include #include #include +#include +#include namespace fuzzer { diff --git a/lib/Fuzzer/FuzzerTracePC.cpp b/lib/Fuzzer/FuzzerTracePC.cpp index ce0f7a47eee6..ea93468ea0ed 100644 --- a/lib/Fuzzer/FuzzerTracePC.cpp +++ b/lib/Fuzzer/FuzzerTracePC.cpp @@ -12,12 +12,12 @@ // //===----------------------------------------------------------------------===// +#include "FuzzerTracePC.h" #include "FuzzerCorpus.h" #include "FuzzerDefs.h" #include "FuzzerDictionary.h" #include "FuzzerExtFunctions.h" #include "FuzzerIO.h" -#include "FuzzerTracePC.h" #include "FuzzerUtil.h" #include "FuzzerValueBitMap.h" #include diff --git a/lib/Fuzzer/FuzzerTraceState.cpp b/lib/Fuzzer/FuzzerTraceState.cpp index a486223d650c..8670e2ad6727 100644 --- a/lib/Fuzzer/FuzzerTraceState.cpp +++ b/lib/Fuzzer/FuzzerTraceState.cpp @@ -10,8 +10,8 @@ //===----------------------------------------------------------------------===// #include "FuzzerDictionary.h" -#include "FuzzerInternal.h" #include "FuzzerIO.h" +#include "FuzzerInternal.h" #include "FuzzerMutate.h" #include "FuzzerTracePC.h" #include diff --git a/lib/Fuzzer/FuzzerUtilWindows.cpp b/lib/Fuzzer/FuzzerUtilWindows.cpp index 08bb3cf3be15..25ac976fc2db 100644 --- a/lib/Fuzzer/FuzzerUtilWindows.cpp +++ b/lib/Fuzzer/FuzzerUtilWindows.cpp @@ -22,6 +22,8 @@ #include #include #include + +// This must be included after windows.h. #include namespace fuzzer { diff --git a/lib/Fuzzer/afl/afl_driver.cpp b/lib/Fuzzer/afl/afl_driver.cpp index 3815ed11cf60..d0521bdfdd67 100644 --- a/lib/Fuzzer/afl/afl_driver.cpp +++ b/lib/Fuzzer/afl/afl_driver.cpp @@ -12,8 +12,8 @@ Usage: ################################################################################ cat << EOF > test_fuzzer.cc -#include #include +#include extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { if (size > 0 && data[0] == 'H') if (size > 1 && data[1] == 'I') @@ -50,18 +50,18 @@ statistics from the file. If that fails then the process will quit. */ #include -#include -#include -#include -#include -#include #include #include +#include +#include +#include +#include #include #include +#include -#include #include +#include #include // Platform detection. Copied from FuzzerInternal.h diff --git a/lib/Fuzzer/test/AFLDriverTest.cpp b/lib/Fuzzer/test/AFLDriverTest.cpp index e3f5f7100883..b949adc7de15 100644 --- a/lib/Fuzzer/test/AFLDriverTest.cpp +++ b/lib/Fuzzer/test/AFLDriverTest.cpp @@ -3,8 +3,8 @@ // Contains dummy functions used to avoid dependency on AFL. #include -#include #include +#include extern "C" void __afl_manual_init() {} diff --git a/lib/Fuzzer/test/AbsNegAndConstant64Test.cpp b/lib/Fuzzer/test/AbsNegAndConstant64Test.cpp index 69b0d59fb8ef..dfb6007b7970 100644 --- a/lib/Fuzzer/test/AbsNegAndConstant64Test.cpp +++ b/lib/Fuzzer/test/AbsNegAndConstant64Test.cpp @@ -2,11 +2,11 @@ // License. See LICENSE.TXT for details. // abs(x) < 0 and y == Const puzzle, 64-bit variant. -#include -#include -#include #include +#include #include +#include +#include extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { if (Size < 16) return 0; diff --git a/lib/Fuzzer/test/AbsNegAndConstantTest.cpp b/lib/Fuzzer/test/AbsNegAndConstantTest.cpp index 69075a454c99..e9d983ff1ebf 100644 --- a/lib/Fuzzer/test/AbsNegAndConstantTest.cpp +++ b/lib/Fuzzer/test/AbsNegAndConstantTest.cpp @@ -2,11 +2,11 @@ // License. See LICENSE.TXT for details. // abs(x) < 0 and y == Const puzzle. -#include -#include -#include #include +#include #include +#include +#include extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { if (Size < 8) return 0; diff --git a/lib/Fuzzer/test/AccumulateAllocationsTest.cpp b/lib/Fuzzer/test/AccumulateAllocationsTest.cpp index 604d8fa299ae..e9acd7ccbd30 100644 --- a/lib/Fuzzer/test/AccumulateAllocationsTest.cpp +++ b/lib/Fuzzer/test/AccumulateAllocationsTest.cpp @@ -2,8 +2,8 @@ // License. See LICENSE.TXT for details. // Test with a more mallocs than frees, but no leak. -#include #include +#include const int kAllocatedPointersSize = 10000; int NumAllocatedPointers = 0; diff --git a/lib/Fuzzer/test/BadStrcmpTest.cpp b/lib/Fuzzer/test/BadStrcmpTest.cpp index 159cd7ea5f70..ba2b068f741d 100644 --- a/lib/Fuzzer/test/BadStrcmpTest.cpp +++ b/lib/Fuzzer/test/BadStrcmpTest.cpp @@ -2,9 +2,9 @@ // License. See LICENSE.TXT for details. // Test that we don't creash in case of bad strcmp params. +#include #include #include -#include static volatile int Sink; diff --git a/lib/Fuzzer/test/BufferOverflowOnInput.cpp b/lib/Fuzzer/test/BufferOverflowOnInput.cpp index b9d14052aee4..75e1fb90a19a 100644 --- a/lib/Fuzzer/test/BufferOverflowOnInput.cpp +++ b/lib/Fuzzer/test/BufferOverflowOnInput.cpp @@ -3,9 +3,9 @@ // Simple test for a fuzzer. The fuzzer must find the string "Hi!". #include +#include #include #include -#include #include static volatile bool SeedLargeBuffer; diff --git a/lib/Fuzzer/test/CallerCalleeTest.cpp b/lib/Fuzzer/test/CallerCalleeTest.cpp index 3ec025d02301..ed9f37cc1521 100644 --- a/lib/Fuzzer/test/CallerCalleeTest.cpp +++ b/lib/Fuzzer/test/CallerCalleeTest.cpp @@ -3,9 +3,9 @@ // Simple test for a fuzzer. // Try to find the target using the indirect caller-callee pairs. +#include #include #include -#include #include #include diff --git a/lib/Fuzzer/test/CleanseTest.cpp b/lib/Fuzzer/test/CleanseTest.cpp index faea8dcb3c30..ee1845701269 100644 --- a/lib/Fuzzer/test/CleanseTest.cpp +++ b/lib/Fuzzer/test/CleanseTest.cpp @@ -3,9 +3,9 @@ // Test the the fuzzer is able to 'cleanse' the reproducer // by replacing all irrelevant bytes with garbage. +#include #include #include -#include extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { if (Size >= 20 && Data[1] == '1' && Data[5] == '5' && Data[10] == 'A' && diff --git a/lib/Fuzzer/test/CustomMutatorTest.cpp b/lib/Fuzzer/test/CustomMutatorTest.cpp index 4f84519a90e6..521d7f506b4d 100644 --- a/lib/Fuzzer/test/CustomMutatorTest.cpp +++ b/lib/Fuzzer/test/CustomMutatorTest.cpp @@ -3,9 +3,9 @@ // Simple test for a cutom mutator. #include +#include #include #include -#include #include #include "FuzzerInterface.h" diff --git a/lib/Fuzzer/test/CxxStringEqTest.cpp b/lib/Fuzzer/test/CxxStringEqTest.cpp index e0e23c972ccb..924851c5ad53 100644 --- a/lib/Fuzzer/test/CxxStringEqTest.cpp +++ b/lib/Fuzzer/test/CxxStringEqTest.cpp @@ -3,11 +3,11 @@ // Simple test for a fuzzer. Must find a specific string // used in std::string operator ==. +#include #include #include -#include -#include #include +#include static volatile int Sink; diff --git a/lib/Fuzzer/test/DSOTestMain.cpp b/lib/Fuzzer/test/DSOTestMain.cpp index 3e225d886128..e0c857d4fdec 100644 --- a/lib/Fuzzer/test/DSOTestMain.cpp +++ b/lib/Fuzzer/test/DSOTestMain.cpp @@ -4,9 +4,9 @@ // Source code for a simple DSO. #include +#include #include #include -#include extern int DSO1(int a); extern int DSO2(int a); extern int DSOTestExtra(int a); diff --git a/lib/Fuzzer/test/DivTest.cpp b/lib/Fuzzer/test/DivTest.cpp index 63f6960f4e90..bce13feb790f 100644 --- a/lib/Fuzzer/test/DivTest.cpp +++ b/lib/Fuzzer/test/DivTest.cpp @@ -3,9 +3,9 @@ // Simple test for a fuzzer: find the interesting argument for div. #include +#include #include #include -#include #include static volatile int Sink; diff --git a/lib/Fuzzer/test/FourIndependentBranchesTest.cpp b/lib/Fuzzer/test/FourIndependentBranchesTest.cpp index 62b3be76e3aa..bbf5ea235c7a 100644 --- a/lib/Fuzzer/test/FourIndependentBranchesTest.cpp +++ b/lib/Fuzzer/test/FourIndependentBranchesTest.cpp @@ -2,9 +2,9 @@ // License. See LICENSE.TXT for details. // Simple test for a fuzzer. The fuzzer must find the string "FUZZ". +#include #include #include -#include #include extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { diff --git a/lib/Fuzzer/test/FullCoverageSetTest.cpp b/lib/Fuzzer/test/FullCoverageSetTest.cpp index 415e0b4760c5..6d7e48fe51f8 100644 --- a/lib/Fuzzer/test/FullCoverageSetTest.cpp +++ b/lib/Fuzzer/test/FullCoverageSetTest.cpp @@ -2,9 +2,9 @@ // License. See LICENSE.TXT for details. // Simple test for a fuzzer. The fuzzer must find the string "FUZZER". +#include #include #include -#include #include extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { diff --git a/lib/Fuzzer/test/FuzzerUnittest.cpp b/lib/Fuzzer/test/FuzzerUnittest.cpp index 78ea874f2ce2..c8beb4331bfa 100644 --- a/lib/Fuzzer/test/FuzzerUnittest.cpp +++ b/lib/Fuzzer/test/FuzzerUnittest.cpp @@ -6,12 +6,12 @@ #define _LIBCPP_HAS_NO_ASAN #include "FuzzerCorpus.h" -#include "FuzzerInternal.h" #include "FuzzerDictionary.h" +#include "FuzzerInternal.h" #include "FuzzerMerge.h" #include "FuzzerMutate.h" -#include "FuzzerTracePC.h" #include "FuzzerRandom.h" +#include "FuzzerTracePC.h" #include "gtest/gtest.h" #include #include diff --git a/lib/Fuzzer/test/LeakTest.cpp b/lib/Fuzzer/test/LeakTest.cpp index 22e5164050e5..ea89e3901057 100644 --- a/lib/Fuzzer/test/LeakTest.cpp +++ b/lib/Fuzzer/test/LeakTest.cpp @@ -2,8 +2,8 @@ // License. See LICENSE.TXT for details. // Test with a leak. -#include #include +#include static volatile void *Sink; diff --git a/lib/Fuzzer/test/LeakTimeoutTest.cpp b/lib/Fuzzer/test/LeakTimeoutTest.cpp index 4f31b3e52c16..92526194a508 100644 --- a/lib/Fuzzer/test/LeakTimeoutTest.cpp +++ b/lib/Fuzzer/test/LeakTimeoutTest.cpp @@ -2,8 +2,8 @@ // License. See LICENSE.TXT for details. // Test with a leak. -#include #include +#include static volatile int *Sink; diff --git a/lib/Fuzzer/test/LoadTest.cpp b/lib/Fuzzer/test/LoadTest.cpp index eef16c7be51e..67a28c7cb22f 100644 --- a/lib/Fuzzer/test/LoadTest.cpp +++ b/lib/Fuzzer/test/LoadTest.cpp @@ -3,9 +3,9 @@ // Simple test for a fuzzer: find interesting value of array index. #include +#include #include #include -#include #include static volatile int Sink; diff --git a/lib/Fuzzer/test/Memcmp64BytesTest.cpp b/lib/Fuzzer/test/Memcmp64BytesTest.cpp index e81526b578a3..5b6cb707173f 100644 --- a/lib/Fuzzer/test/Memcmp64BytesTest.cpp +++ b/lib/Fuzzer/test/Memcmp64BytesTest.cpp @@ -3,10 +3,10 @@ // Simple test for a fuzzer. The fuzzer must find a particular string. #include -#include #include #include #include +#include extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { const char kString64Bytes[] = diff --git a/lib/Fuzzer/test/MemcmpTest.cpp b/lib/Fuzzer/test/MemcmpTest.cpp index fdbf94683f76..8dbb7d84fbba 100644 --- a/lib/Fuzzer/test/MemcmpTest.cpp +++ b/lib/Fuzzer/test/MemcmpTest.cpp @@ -2,10 +2,10 @@ // License. See LICENSE.TXT for details. // Simple test for a fuzzer. The fuzzer must find a particular string. -#include #include #include #include +#include extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { // TODO: check other sizes. diff --git a/lib/Fuzzer/test/NotinstrumentedTest.cpp b/lib/Fuzzer/test/NotinstrumentedTest.cpp index ffe952c749d2..91418990b192 100644 --- a/lib/Fuzzer/test/NotinstrumentedTest.cpp +++ b/lib/Fuzzer/test/NotinstrumentedTest.cpp @@ -2,8 +2,8 @@ // License. See LICENSE.TXT for details. // This test should not be instrumented. -#include #include +#include extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { return 0; diff --git a/lib/Fuzzer/test/NthRunCrashTest.cpp b/lib/Fuzzer/test/NthRunCrashTest.cpp index b43e69e51b25..da5fbd33e962 100644 --- a/lib/Fuzzer/test/NthRunCrashTest.cpp +++ b/lib/Fuzzer/test/NthRunCrashTest.cpp @@ -2,8 +2,8 @@ // License. See LICENSE.TXT for details. // Crash on the N-th execution. -#include #include +#include #include static int Counter; diff --git a/lib/Fuzzer/test/NullDerefOnEmptyTest.cpp b/lib/Fuzzer/test/NullDerefOnEmptyTest.cpp index 153710920a5f..459db51f8a3b 100644 --- a/lib/Fuzzer/test/NullDerefOnEmptyTest.cpp +++ b/lib/Fuzzer/test/NullDerefOnEmptyTest.cpp @@ -2,9 +2,9 @@ // License. See LICENSE.TXT for details. // Simple test for a fuzzer. The fuzzer must find the empty string. +#include #include #include -#include #include static volatile int *Null = 0; diff --git a/lib/Fuzzer/test/NullDerefTest.cpp b/lib/Fuzzer/test/NullDerefTest.cpp index 3f03d2498197..1b44b682ace6 100644 --- a/lib/Fuzzer/test/NullDerefTest.cpp +++ b/lib/Fuzzer/test/NullDerefTest.cpp @@ -2,9 +2,9 @@ // License. See LICENSE.TXT for details. // Simple test for a fuzzer. The fuzzer must find the string "Hi!". +#include #include #include -#include #include static volatile int Sink; diff --git a/lib/Fuzzer/test/OneHugeAllocTest.cpp b/lib/Fuzzer/test/OneHugeAllocTest.cpp index 8d3d1d6d302d..32a557871000 100644 --- a/lib/Fuzzer/test/OneHugeAllocTest.cpp +++ b/lib/Fuzzer/test/OneHugeAllocTest.cpp @@ -3,9 +3,9 @@ // Tests OOM handling when there is a single large allocation. #include +#include #include #include -#include #include #include diff --git a/lib/Fuzzer/test/OutOfMemorySingleLargeMallocTest.cpp b/lib/Fuzzer/test/OutOfMemorySingleLargeMallocTest.cpp index 316b7682b8e6..a07795a08dff 100644 --- a/lib/Fuzzer/test/OutOfMemorySingleLargeMallocTest.cpp +++ b/lib/Fuzzer/test/OutOfMemorySingleLargeMallocTest.cpp @@ -3,9 +3,9 @@ // Tests OOM handling. #include +#include #include #include -#include #include #include diff --git a/lib/Fuzzer/test/OutOfMemoryTest.cpp b/lib/Fuzzer/test/OutOfMemoryTest.cpp index 078a39ee1fe9..5e59bde09853 100644 --- a/lib/Fuzzer/test/OutOfMemoryTest.cpp +++ b/lib/Fuzzer/test/OutOfMemoryTest.cpp @@ -3,9 +3,9 @@ // Tests OOM handling. #include +#include #include #include -#include #include #include #include diff --git a/lib/Fuzzer/test/RepeatedBytesTest.cpp b/lib/Fuzzer/test/RepeatedBytesTest.cpp index 2fa6c78c26d8..14222f284747 100644 --- a/lib/Fuzzer/test/RepeatedBytesTest.cpp +++ b/lib/Fuzzer/test/RepeatedBytesTest.cpp @@ -3,9 +3,9 @@ // Simple test for a fuzzer. The fuzzer must find repeated bytes. #include +#include #include #include -#include #include extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { diff --git a/lib/Fuzzer/test/RepeatedMemcmp.cpp b/lib/Fuzzer/test/RepeatedMemcmp.cpp index 7377f65ed76d..18369deac3b0 100644 --- a/lib/Fuzzer/test/RepeatedMemcmp.cpp +++ b/lib/Fuzzer/test/RepeatedMemcmp.cpp @@ -1,11 +1,10 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. - -#include #include #include #include +#include extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { int Matches1 = 0; diff --git a/lib/Fuzzer/test/ShrinkControlFlowTest.cpp b/lib/Fuzzer/test/ShrinkControlFlowTest.cpp index 0fd7c5e9a1fb..d09542963626 100644 --- a/lib/Fuzzer/test/ShrinkControlFlowTest.cpp +++ b/lib/Fuzzer/test/ShrinkControlFlowTest.cpp @@ -2,11 +2,11 @@ // License. See LICENSE.TXT for details. // Test that we can find the minimal item in the corpus (3 bytes: "FUZ"). -#include -#include #include -#include +#include #include +#include +#include static volatile int Sink; diff --git a/lib/Fuzzer/test/ShrinkValueProfileTest.cpp b/lib/Fuzzer/test/ShrinkValueProfileTest.cpp index 026b8ce26591..86e4e3cb0d9a 100644 --- a/lib/Fuzzer/test/ShrinkValueProfileTest.cpp +++ b/lib/Fuzzer/test/ShrinkValueProfileTest.cpp @@ -2,11 +2,11 @@ // License. See LICENSE.TXT for details. // Test that we can find the minimal item in the corpus (3 bytes: "FUZ"). -#include -#include #include -#include +#include #include +#include +#include static volatile uint32_t Sink; diff --git a/lib/Fuzzer/test/SignedIntOverflowTest.cpp b/lib/Fuzzer/test/SignedIntOverflowTest.cpp index 7df32ad57933..d80060207dee 100644 --- a/lib/Fuzzer/test/SignedIntOverflowTest.cpp +++ b/lib/Fuzzer/test/SignedIntOverflowTest.cpp @@ -3,11 +3,11 @@ // Test for signed-integer-overflow. #include +#include +#include #include #include -#include #include -#include static volatile int Sink; static int Large = INT_MAX; diff --git a/lib/Fuzzer/test/SimpleCmpTest.cpp b/lib/Fuzzer/test/SimpleCmpTest.cpp index 12b5cdda0660..8acad4ac77e8 100644 --- a/lib/Fuzzer/test/SimpleCmpTest.cpp +++ b/lib/Fuzzer/test/SimpleCmpTest.cpp @@ -3,9 +3,9 @@ // Simple test for a fuzzer. The fuzzer must find several narrow ranges. #include +#include #include #include -#include extern int AllLines[]; diff --git a/lib/Fuzzer/test/SimpleDictionaryTest.cpp b/lib/Fuzzer/test/SimpleDictionaryTest.cpp index cd7292bd006c..a1cd20047224 100644 --- a/lib/Fuzzer/test/SimpleDictionaryTest.cpp +++ b/lib/Fuzzer/test/SimpleDictionaryTest.cpp @@ -5,9 +5,9 @@ // The fuzzer must find a string based on dictionary words: // "Elvis" // "Presley" +#include #include #include -#include #include #include diff --git a/lib/Fuzzer/test/SimpleHashTest.cpp b/lib/Fuzzer/test/SimpleHashTest.cpp index 00599de78ebe..99e96cb25dcd 100644 --- a/lib/Fuzzer/test/SimpleHashTest.cpp +++ b/lib/Fuzzer/test/SimpleHashTest.cpp @@ -5,9 +5,9 @@ // and then compares the last 4 bytes with the computed value. // A fuzzer with cmp traces is expected to defeat this check. #include +#include #include #include -#include // A modified jenkins_one_at_a_time_hash initialized by non-zero, // so that simple_hash(0) != 0. See also diff --git a/lib/Fuzzer/test/SimpleTest.cpp b/lib/Fuzzer/test/SimpleTest.cpp index e53ea160ed8f..a8b4988dff10 100644 --- a/lib/Fuzzer/test/SimpleTest.cpp +++ b/lib/Fuzzer/test/SimpleTest.cpp @@ -3,9 +3,9 @@ // Simple test for a fuzzer. The fuzzer must find the string "Hi!". #include +#include #include #include -#include #include static volatile int Sink; diff --git a/lib/Fuzzer/test/SimpleThreadedTest.cpp b/lib/Fuzzer/test/SimpleThreadedTest.cpp index 5f02d3f8457c..1abdc3fc6d6b 100644 --- a/lib/Fuzzer/test/SimpleThreadedTest.cpp +++ b/lib/Fuzzer/test/SimpleThreadedTest.cpp @@ -3,8 +3,8 @@ // Threaded test for a fuzzer. The fuzzer should find "H" #include -#include #include +#include #include #include #include diff --git a/lib/Fuzzer/test/SingleByteInputTest.cpp b/lib/Fuzzer/test/SingleByteInputTest.cpp index 4ce819d230ce..72b58ba912eb 100644 --- a/lib/Fuzzer/test/SingleByteInputTest.cpp +++ b/lib/Fuzzer/test/SingleByteInputTest.cpp @@ -2,10 +2,10 @@ // License. See LICENSE.TXT for details. // Simple test for a fuzzer, need just one byte to crash. -#include -#include #include +#include #include +#include extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { if (Size > 0 && Data[Size/2] == 42) { diff --git a/lib/Fuzzer/test/SingleMemcmpTest.cpp b/lib/Fuzzer/test/SingleMemcmpTest.cpp index c73f68a7ee6e..83c09e0428ec 100644 --- a/lib/Fuzzer/test/SingleMemcmpTest.cpp +++ b/lib/Fuzzer/test/SingleMemcmpTest.cpp @@ -2,10 +2,10 @@ // License. See LICENSE.TXT for details. // Simple test for a fuzzer. The fuzzer must find a particular string. -#include #include #include #include +#include extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { char *S = (char*)Data; diff --git a/lib/Fuzzer/test/SingleStrcmpTest.cpp b/lib/Fuzzer/test/SingleStrcmpTest.cpp index 48f481dfc51a..149073444c9c 100644 --- a/lib/Fuzzer/test/SingleStrcmpTest.cpp +++ b/lib/Fuzzer/test/SingleStrcmpTest.cpp @@ -2,10 +2,10 @@ // License. See LICENSE.TXT for details. // Simple test for a fuzzer. The fuzzer must find a particular string. -#include #include #include #include +#include extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { if (Size >= 7) { diff --git a/lib/Fuzzer/test/SingleStrncmpTest.cpp b/lib/Fuzzer/test/SingleStrncmpTest.cpp index e5601da86329..b302670fb743 100644 --- a/lib/Fuzzer/test/SingleStrncmpTest.cpp +++ b/lib/Fuzzer/test/SingleStrncmpTest.cpp @@ -2,10 +2,10 @@ // License. See LICENSE.TXT for details. // Simple test for a fuzzer. The fuzzer must find a particular string. -#include #include #include #include +#include extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { char *S = (char*)Data; diff --git a/lib/Fuzzer/test/SpamyTest.cpp b/lib/Fuzzer/test/SpamyTest.cpp index d294d4dc53e0..721134e1841c 100644 --- a/lib/Fuzzer/test/SpamyTest.cpp +++ b/lib/Fuzzer/test/SpamyTest.cpp @@ -3,9 +3,9 @@ // The test spams to stderr and stdout. #include +#include #include #include -#include #include extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { diff --git a/lib/Fuzzer/test/StrcmpTest.cpp b/lib/Fuzzer/test/StrcmpTest.cpp index cd91dda76f30..e7636e8812fc 100644 --- a/lib/Fuzzer/test/StrcmpTest.cpp +++ b/lib/Fuzzer/test/StrcmpTest.cpp @@ -2,11 +2,11 @@ // License. See LICENSE.TXT for details. // Break through a series of strcmp. -#include +#include #include #include #include -#include +#include bool Eq(const uint8_t *Data, size_t Size, const char *Str) { char Buff[1024]; diff --git a/lib/Fuzzer/test/StrncmpOOBTest.cpp b/lib/Fuzzer/test/StrncmpOOBTest.cpp index f70b003afad6..4ed71d9d021d 100644 --- a/lib/Fuzzer/test/StrncmpOOBTest.cpp +++ b/lib/Fuzzer/test/StrncmpOOBTest.cpp @@ -3,10 +3,10 @@ // Test that libFuzzer itself does not read out of bounds. #include -#include -#include -#include #include +#include +#include +#include #include static volatile int Sink; diff --git a/lib/Fuzzer/test/StrncmpTest.cpp b/lib/Fuzzer/test/StrncmpTest.cpp index 5ffd011dcdff..f71f01ee3098 100644 --- a/lib/Fuzzer/test/StrncmpTest.cpp +++ b/lib/Fuzzer/test/StrncmpTest.cpp @@ -2,10 +2,10 @@ // License. See LICENSE.TXT for details. // Simple test for a fuzzer. The fuzzer must find a particular string. -#include #include #include #include +#include static volatile int sink; diff --git a/lib/Fuzzer/test/StrstrTest.cpp b/lib/Fuzzer/test/StrstrTest.cpp index f021e75ec0fd..a3ea4e03b3d2 100644 --- a/lib/Fuzzer/test/StrstrTest.cpp +++ b/lib/Fuzzer/test/StrstrTest.cpp @@ -2,11 +2,11 @@ // License. See LICENSE.TXT for details. // Test strstr and strcasestr hooks. -#include -#include #include #include #include +#include +#include // Windows does not have strcasestr and memmem, so we are not testing them. #ifdef _WIN32 diff --git a/lib/Fuzzer/test/SwapCmpTest.cpp b/lib/Fuzzer/test/SwapCmpTest.cpp index b90ac72c22c4..bbfbefe6ab71 100644 --- a/lib/Fuzzer/test/SwapCmpTest.cpp +++ b/lib/Fuzzer/test/SwapCmpTest.cpp @@ -3,9 +3,9 @@ // The fuzzer must find several constants with swapped bytes. #include +#include #include #include -#include extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { if (Size < 14) return 0; diff --git a/lib/Fuzzer/test/Switch2Test.cpp b/lib/Fuzzer/test/Switch2Test.cpp index 3c6a3004907e..5f66ac8b499e 100644 --- a/lib/Fuzzer/test/Switch2Test.cpp +++ b/lib/Fuzzer/test/Switch2Test.cpp @@ -2,11 +2,11 @@ // License. See LICENSE.TXT for details. // Simple test for a fuzzer. The fuzzer must find the interesting switch value. -#include -#include -#include -#include #include +#include +#include +#include +#include int Switch(int a) { switch(a) { diff --git a/lib/Fuzzer/test/SwitchTest.cpp b/lib/Fuzzer/test/SwitchTest.cpp index 3dc051ff7b5b..86944cad21c5 100644 --- a/lib/Fuzzer/test/SwitchTest.cpp +++ b/lib/Fuzzer/test/SwitchTest.cpp @@ -2,11 +2,11 @@ // License. See LICENSE.TXT for details. // Simple test for a fuzzer. The fuzzer must find the interesting switch value. -#include -#include -#include -#include #include +#include +#include +#include +#include static volatile int Sink; diff --git a/lib/Fuzzer/test/TableLookupTest.cpp b/lib/Fuzzer/test/TableLookupTest.cpp index f9d5610820ff..8126eeabaf42 100644 --- a/lib/Fuzzer/test/TableLookupTest.cpp +++ b/lib/Fuzzer/test/TableLookupTest.cpp @@ -3,11 +3,11 @@ // Make sure the fuzzer eventually finds all possible values of a variable // within a range. -#include +#include #include #include #include -#include +#include #include const size_t N = 1 << 12; diff --git a/lib/Fuzzer/test/ThreadedLeakTest.cpp b/lib/Fuzzer/test/ThreadedLeakTest.cpp index 751107110871..538d3b434808 100644 --- a/lib/Fuzzer/test/ThreadedLeakTest.cpp +++ b/lib/Fuzzer/test/ThreadedLeakTest.cpp @@ -2,8 +2,8 @@ // License. See LICENSE.TXT for details. // The fuzzer should find a leak in a non-main thread. -#include #include +#include #include static volatile int *Sink; diff --git a/lib/Fuzzer/test/ThreadedTest.cpp b/lib/Fuzzer/test/ThreadedTest.cpp index 09137a9a70c1..bb51ba764eba 100644 --- a/lib/Fuzzer/test/ThreadedTest.cpp +++ b/lib/Fuzzer/test/ThreadedTest.cpp @@ -3,8 +3,8 @@ // Threaded test for a fuzzer. The fuzzer should not crash. #include -#include #include +#include #include #include diff --git a/lib/Fuzzer/test/TimeoutEmptyTest.cpp b/lib/Fuzzer/test/TimeoutEmptyTest.cpp index 8066f480b655..1ddf1fa34589 100644 --- a/lib/Fuzzer/test/TimeoutEmptyTest.cpp +++ b/lib/Fuzzer/test/TimeoutEmptyTest.cpp @@ -2,8 +2,8 @@ // License. See LICENSE.TXT for details. // Simple test for a fuzzer. The fuzzer must find the empty string. -#include #include +#include extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { static volatile int Zero = 0; diff --git a/lib/Fuzzer/test/TimeoutTest.cpp b/lib/Fuzzer/test/TimeoutTest.cpp index f8107012c841..e3cdba3eec38 100644 --- a/lib/Fuzzer/test/TimeoutTest.cpp +++ b/lib/Fuzzer/test/TimeoutTest.cpp @@ -2,9 +2,9 @@ // License. See LICENSE.TXT for details. // Simple test for a fuzzer. The fuzzer must find the string "Hi!". +#include #include #include -#include #include static volatile int Sink; diff --git a/lib/Fuzzer/test/TraceMallocTest.cpp b/lib/Fuzzer/test/TraceMallocTest.cpp index 43e6950e185f..af9975603aa1 100644 --- a/lib/Fuzzer/test/TraceMallocTest.cpp +++ b/lib/Fuzzer/test/TraceMallocTest.cpp @@ -3,9 +3,9 @@ // Tests -trace_malloc #include +#include #include #include -#include #include int *Ptr; diff --git a/lib/Fuzzer/test/TwoDifferentBugsTest.cpp b/lib/Fuzzer/test/TwoDifferentBugsTest.cpp index 42c0d192ba86..77d2cb1a25f9 100644 --- a/lib/Fuzzer/test/TwoDifferentBugsTest.cpp +++ b/lib/Fuzzer/test/TwoDifferentBugsTest.cpp @@ -2,9 +2,9 @@ // License. See LICENSE.TXT for details. // Simple test for a fuzzer. This test may trigger two different bugs. +#include #include #include -#include #include static volatile int *Null = 0; diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp index ec4663018bd4..556e122ff82f 100644 --- a/lib/IR/AsmWriter.cpp +++ b/lib/IR/AsmWriter.cpp @@ -20,6 +20,7 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/AssemblyAnnotationWriter.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/CFG.h" @@ -39,7 +40,6 @@ #include "llvm/IR/UseListOrder.h" #include "llvm/IR/ValueSymbolTable.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/FormattedStream.h" diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index a76c944f0005..a518f7b5c81a 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -13,17 +13,17 @@ // //===----------------------------------------------------------------------===// +#include "llvm/IR/Attributes.h" #include "AttributeImpl.h" #include "LLVMContextImpl.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/Optional.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" -#include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Type.h" diff --git a/lib/IR/Comdat.cpp b/lib/IR/Comdat.cpp index e27ecad0a884..c735f9b2eb1e 100644 --- a/lib/IR/Comdat.cpp +++ b/lib/IR/Comdat.cpp @@ -11,9 +11,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/IR/Comdat.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" -#include "llvm/IR/Comdat.h" using namespace llvm; diff --git a/lib/IR/ConstantRange.cpp b/lib/IR/ConstantRange.cpp index 509caba3acd4..21d1996ef851 100644 --- a/lib/IR/ConstantRange.cpp +++ b/lib/IR/ConstantRange.cpp @@ -21,10 +21,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/IR/Instruction.h" -#include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Operator.h" #include "llvm/IR/ConstantRange.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Operator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -577,9 +577,6 @@ ConstantRange ConstantRange::truncate(uint32_t DstTySize) const { if (isFullSet()) return ConstantRange(DstTySize, /*isFullSet=*/true); - APInt MaxValue = APInt::getLowBitsSet(getBitWidth(), DstTySize); - APInt MaxBitValue = APInt::getOneBitSet(getBitWidth(), DstTySize); - APInt LowerDiv(Lower), UpperDiv(Upper); ConstantRange Union(DstTySize, /*isFullSet=*/false); @@ -587,35 +584,42 @@ ConstantRange ConstantRange::truncate(uint32_t DstTySize) const { // We use the non-wrapped set code to analyze the [Lower, MaxValue) part, and // then we do the union with [MaxValue, Upper) if (isWrappedSet()) { - // If Upper is greater than Max Value, it covers the whole truncated range. - if (Upper.uge(MaxValue)) + // If Upper is greater than or equal to MaxValue(DstTy), it covers the whole + // truncated range. + if (Upper.getActiveBits() > DstTySize || + Upper.countTrailingOnes() == DstTySize) return ConstantRange(DstTySize, /*isFullSet=*/true); Union = ConstantRange(APInt::getMaxValue(DstTySize),Upper.trunc(DstTySize)); UpperDiv.setAllBits(); // Union covers the MaxValue case, so return if the remaining range is just - // MaxValue. + // MaxValue(DstTy). if (LowerDiv == UpperDiv) return Union; } // Chop off the most significant bits that are past the destination bitwidth. - if (LowerDiv.uge(MaxValue)) { - APInt Div(getBitWidth(), 0); - APInt::udivrem(LowerDiv, MaxBitValue, Div, LowerDiv); - UpperDiv -= MaxBitValue * Div; + if (LowerDiv.getActiveBits() > DstTySize) { + // Mask to just the signficant bits and subtract from LowerDiv/UpperDiv. + APInt Adjust = LowerDiv & APInt::getBitsSetFrom(getBitWidth(), DstTySize); + LowerDiv -= Adjust; + UpperDiv -= Adjust; } - if (UpperDiv.ule(MaxValue)) + unsigned UpperDivWidth = UpperDiv.getActiveBits(); + if (UpperDivWidth <= DstTySize) return ConstantRange(LowerDiv.trunc(DstTySize), UpperDiv.trunc(DstTySize)).unionWith(Union); // The truncated value wraps around. Check if we can do better than fullset. - UpperDiv -= MaxBitValue; - if (UpperDiv.ult(LowerDiv)) - return ConstantRange(LowerDiv.trunc(DstTySize), - UpperDiv.trunc(DstTySize)).unionWith(Union); + if (UpperDivWidth == DstTySize + 1) { + // Clear the MSB so that UpperDiv wraps around. + UpperDiv.clearBit(DstTySize); + if (UpperDiv.ult(LowerDiv)) + return ConstantRange(LowerDiv.trunc(DstTySize), + UpperDiv.trunc(DstTySize)).unionWith(Union); + } return ConstantRange(DstTySize, /*isFullSet=*/true); } diff --git a/lib/IR/Constants.cpp b/lib/IR/Constants.cpp index 8b0ff66334a7..27150a89d9b2 100644 --- a/lib/IR/Constants.cpp +++ b/lib/IR/Constants.cpp @@ -127,7 +127,7 @@ bool Constant::isOneValue() const { // Check for FP which are bitcasted from 1 integers if (const ConstantFP *CFP = dyn_cast(this)) - return CFP->getValueAPF().bitcastToAPInt() == 1; + return CFP->getValueAPF().bitcastToAPInt().isOneValue(); // Check for constant vectors which are splats of 1 values. if (const ConstantVector *CV = dyn_cast(this)) @@ -1157,21 +1157,14 @@ bool ConstantInt::isValueValidForType(Type *Ty, uint64_t Val) { unsigned NumBits = Ty->getIntegerBitWidth(); // assert okay if (Ty->isIntegerTy(1)) return Val == 0 || Val == 1; - if (NumBits >= 64) - return true; // always true, has to fit in largest type - uint64_t Max = (1ll << NumBits) - 1; - return Val <= Max; + return isUIntN(NumBits, Val); } bool ConstantInt::isValueValidForType(Type *Ty, int64_t Val) { unsigned NumBits = Ty->getIntegerBitWidth(); if (Ty->isIntegerTy(1)) return Val == 0 || Val == 1 || Val == -1; - if (NumBits >= 64) - return true; // always true, has to fit in largest type - int64_t Min = -(1ll << (NumBits-1)); - int64_t Max = (1ll << (NumBits-1)) - 1; - return (Val >= Min && Val <= Max); + return isIntN(NumBits, Val); } bool ConstantFP::isValueValidForType(Type *Ty, const APFloat& Val) { diff --git a/lib/IR/Core.cpp b/lib/IR/Core.cpp index 50292b6e20bf..4ff0261a7f08 100644 --- a/lib/IR/Core.cpp +++ b/lib/IR/Core.cpp @@ -568,6 +568,14 @@ LLVMTypeRef LLVMGetTypeByName(LLVMModuleRef M, const char *Name) { /*--.. Operations on array, pointer, and vector types (sequence types) .....--*/ +void LLVMGetSubtypes(LLVMTypeRef Tp, LLVMTypeRef *Arr) { + int i = 0; + for (auto *T : unwrap(Tp)->subtypes()) { + Arr[i] = wrap(T); + i++; + } +} + LLVMTypeRef LLVMArrayType(LLVMTypeRef ElementType, unsigned ElementCount) { return wrap(ArrayType::get(unwrap(ElementType), ElementCount)); } @@ -587,6 +595,10 @@ LLVMTypeRef LLVMGetElementType(LLVMTypeRef WrappedTy) { return wrap(cast(Ty)->getElementType()); } +unsigned LLVMGetNumContainedTypes(LLVMTypeRef Tp) { + return unwrap(Tp)->getNumContainedTypes(); +} + unsigned LLVMGetArrayLength(LLVMTypeRef ArrayTy) { return unwrap(ArrayTy)->getNumElements(); } diff --git a/lib/IR/DIBuilder.cpp b/lib/IR/DIBuilder.cpp index 7754ac03b43d..7e598b43ac16 100644 --- a/lib/IR/DIBuilder.cpp +++ b/lib/IR/DIBuilder.cpp @@ -12,14 +12,14 @@ //===----------------------------------------------------------------------===// #include "llvm/IR/DIBuilder.h" +#include "LLVMContextImpl.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/Dwarf.h" -#include "LLVMContextImpl.h" using namespace llvm; using namespace llvm::dwarf; diff --git a/lib/IR/DataLayout.cpp b/lib/IR/DataLayout.cpp index d5e29649a237..5de281a95237 100644 --- a/lib/IR/DataLayout.cpp +++ b/lib/IR/DataLayout.cpp @@ -16,11 +16,11 @@ // //===----------------------------------------------------------------------===// +#include "llvm/IR/DataLayout.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalVariable.h" diff --git a/lib/IR/DebugInfo.cpp b/lib/IR/DebugInfo.cpp index ca3828420a72..56cec57a4d07 100644 --- a/lib/IR/DebugInfo.cpp +++ b/lib/IR/DebugInfo.cpp @@ -12,6 +12,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/IR/DebugInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/None.h" @@ -20,7 +21,6 @@ #include "llvm/ADT/StringRef.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" diff --git a/lib/IR/DebugLoc.cpp b/lib/IR/DebugLoc.cpp index 0485fece7c42..6297395b4c00 100644 --- a/lib/IR/DebugLoc.cpp +++ b/lib/IR/DebugLoc.cpp @@ -8,9 +8,9 @@ //===----------------------------------------------------------------------===// #include "llvm/IR/DebugLoc.h" -#include "llvm/IR/IntrinsicInst.h" #include "LLVMContextImpl.h" #include "llvm/IR/DebugInfo.h" +#include "llvm/IR/IntrinsicInst.h" using namespace llvm; //===----------------------------------------------------------------------===// diff --git a/lib/IR/DiagnosticInfo.cpp b/lib/IR/DiagnosticInfo.cpp index e73f53f3202d..5129d6b9b008 100644 --- a/lib/IR/DiagnosticInfo.cpp +++ b/lib/IR/DiagnosticInfo.cpp @@ -12,14 +12,14 @@ // Diagnostics reporting is still done as part of the LLVMContext. //===----------------------------------------------------------------------===// -#include "llvm/ADT/iterator_range.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" @@ -32,8 +32,8 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/Regex.h" +#include "llvm/Support/raw_ostream.h" #include #include #include diff --git a/lib/IR/DiagnosticPrinter.cpp b/lib/IR/DiagnosticPrinter.cpp index 659ff49d623f..ee2df9e24f93 100644 --- a/lib/IR/DiagnosticPrinter.cpp +++ b/lib/IR/DiagnosticPrinter.cpp @@ -11,12 +11,12 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/Twine.h" #include "llvm/IR/DiagnosticPrinter.h" +#include "llvm/ADT/Twine.h" #include "llvm/IR/Module.h" #include "llvm/IR/Value.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/lib/IR/Dominators.cpp b/lib/IR/Dominators.cpp index 44948cc5831d..37e735251fdf 100644 --- a/lib/IR/Dominators.cpp +++ b/lib/IR/Dominators.cpp @@ -150,12 +150,6 @@ bool DominatorTree::dominates(const Instruction *Def, bool DominatorTree::dominates(const BasicBlockEdge &BBE, const BasicBlock *UseBB) const { - // Assert that we have a single edge. We could handle them by simply - // returning false, but since isSingleEdge is linear on the number of - // edges, the callers can normally handle them more efficiently. - assert(BBE.isSingleEdge() && - "This function is not efficient in handling multiple edges"); - // If the BB the edge ends in doesn't dominate the use BB, then the // edge also doesn't. const BasicBlock *Start = BBE.getStart(); @@ -188,11 +182,17 @@ bool DominatorTree::dominates(const BasicBlockEdge &BBE, // trivially dominates itself, so we only have to find if it dominates the // other predecessors. Since the only way out of X is via NormalDest, X can // only properly dominate a node if NormalDest dominates that node too. + int IsDuplicateEdge = 0; for (const_pred_iterator PI = pred_begin(End), E = pred_end(End); PI != E; ++PI) { const BasicBlock *BB = *PI; - if (BB == Start) + if (BB == Start) { + // If there are multiple edges between Start and End, by definition they + // can't dominate anything. + if (IsDuplicateEdge++) + return false; continue; + } if (!dominates(End, BB)) return false; @@ -201,12 +201,6 @@ bool DominatorTree::dominates(const BasicBlockEdge &BBE, } bool DominatorTree::dominates(const BasicBlockEdge &BBE, const Use &U) const { - // Assert that we have a single edge. We could handle them by simply - // returning false, but since isSingleEdge is linear on the number of - // edges, the callers can normally handle them more efficiently. - assert(BBE.isSingleEdge() && - "This function is not efficient in handling multiple edges"); - Instruction *UserInst = cast(U.getUser()); // A PHI in the end of the edge is dominated by it. PHINode *PN = dyn_cast(UserInst); diff --git a/lib/IR/Function.cpp b/lib/IR/Function.cpp index fc68c0e3cad9..85a019856c01 100644 --- a/lib/IR/Function.cpp +++ b/lib/IR/Function.cpp @@ -11,14 +11,15 @@ // //===----------------------------------------------------------------------===// +#include "llvm/IR/Function.h" #include "LLVMContextImpl.h" #include "SymbolTableListTraitsImpl.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/None.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/ValueTypes.h" @@ -29,7 +30,6 @@ #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instruction.h" diff --git a/lib/IR/Globals.cpp b/lib/IR/Globals.cpp index 17d27b016cf2..afd4a36270a8 100644 --- a/lib/IR/Globals.cpp +++ b/lib/IR/Globals.cpp @@ -12,10 +12,11 @@ // //===----------------------------------------------------------------------===// +#include "LLVMContextImpl.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Triple.h" -#include "llvm/IR/Constants.h" #include "llvm/IR/ConstantRange.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalValue.h" @@ -24,7 +25,6 @@ #include "llvm/IR/Operator.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" -#include "LLVMContextImpl.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -293,6 +293,8 @@ GlobalVariable::GlobalVariable(Type *Ty, bool constant, LinkageTypes Link, InitVal != nullptr, Link, Name, AddressSpace), isConstantGlobal(constant), isExternallyInitializedConstant(isExternallyInitialized) { + assert(!Ty->isFunctionTy() && PointerType::isValidElementType(Ty) && + "invalid type for global variable"); setThreadLocalMode(TLMode); if (InitVal) { assert(InitVal->getType() == Ty && @@ -311,6 +313,8 @@ GlobalVariable::GlobalVariable(Module &M, Type *Ty, bool constant, InitVal != nullptr, Link, Name, AddressSpace), isConstantGlobal(constant), isExternallyInitializedConstant(isExternallyInitialized) { + assert(!Ty->isFunctionTy() && PointerType::isValidElementType(Ty) && + "invalid type for global variable"); setThreadLocalMode(TLMode); if (InitVal) { assert(InitVal->getType() == Ty && diff --git a/lib/IR/IRBuilder.cpp b/lib/IR/IRBuilder.cpp index 7572d0c6b3bc..81b02946e1d5 100644 --- a/lib/IR/IRBuilder.cpp +++ b/lib/IR/IRBuilder.cpp @@ -12,9 +12,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Statepoint.h" @@ -134,6 +134,38 @@ CreateMemCpy(Value *Dst, Value *Src, Value *Size, unsigned Align, return CI; } +CallInst *IRBuilderBase::CreateElementAtomicMemCpy( + Value *Dst, Value *Src, Value *NumElements, uint32_t ElementSize, + MDNode *TBAATag, MDNode *TBAAStructTag, MDNode *ScopeTag, + MDNode *NoAliasTag) { + Dst = getCastedInt8PtrValue(Dst); + Src = getCastedInt8PtrValue(Src); + + Value *Ops[] = {Dst, Src, NumElements, getInt32(ElementSize)}; + Type *Tys[] = {Dst->getType(), Src->getType()}; + Module *M = BB->getParent()->getParent(); + Value *TheFn = + Intrinsic::getDeclaration(M, Intrinsic::memcpy_element_atomic, Tys); + + CallInst *CI = createCallHelper(TheFn, Ops, this); + + // Set the TBAA info if present. + if (TBAATag) + CI->setMetadata(LLVMContext::MD_tbaa, TBAATag); + + // Set the TBAA Struct info if present. + if (TBAAStructTag) + CI->setMetadata(LLVMContext::MD_tbaa_struct, TBAAStructTag); + + if (ScopeTag) + CI->setMetadata(LLVMContext::MD_alias_scope, ScopeTag); + + if (NoAliasTag) + CI->setMetadata(LLVMContext::MD_noalias, NoAliasTag); + + return CI; +} + CallInst *IRBuilderBase:: CreateMemMove(Value *Dst, Value *Src, Value *Size, unsigned Align, bool isVolatile, MDNode *TBAATag, MDNode *ScopeTag, diff --git a/lib/IR/InlineAsm.cpp b/lib/IR/InlineAsm.cpp index 6c0c5a267f81..ad22efdf0eff 100644 --- a/lib/IR/InlineAsm.cpp +++ b/lib/IR/InlineAsm.cpp @@ -11,11 +11,11 @@ // //===----------------------------------------------------------------------===// +#include "llvm/IR/InlineAsm.h" #include "ConstantsContext.h" #include "LLVMContextImpl.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/InlineAsm.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Value.h" #include "llvm/Support/Casting.h" diff --git a/lib/IR/Instruction.cpp b/lib/IR/Instruction.cpp index 828e78b13005..3dd653d2d047 100644 --- a/lib/IR/Instruction.cpp +++ b/lib/IR/Instruction.cpp @@ -11,13 +11,13 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/DenseSet.h" #include "llvm/IR/Instruction.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/Module.h" #include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/IR/Type.h" using namespace llvm; @@ -216,10 +216,10 @@ void Instruction::copyFastMathFlags(const Instruction *I) { copyFastMathFlags(I->getFastMathFlags()); } -void Instruction::copyIRFlags(const Value *V) { +void Instruction::copyIRFlags(const Value *V, bool IncludeWrapFlags) { // Copy the wrapping flags. - if (auto *OB = dyn_cast(V)) { - if (isa(this)) { + if (IncludeWrapFlags && isa(this)) { + if (auto *OB = dyn_cast(V)) { setHasNoSignedWrap(OB->hasNoSignedWrap()); setHasNoUnsignedWrap(OB->hasNoUnsignedWrap()); } diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp index 46c27331ff95..023a0b178a14 100644 --- a/lib/IR/Instructions.cpp +++ b/lib/IR/Instructions.cpp @@ -12,6 +12,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/IR/Instructions.h" #include "LLVMContextImpl.h" #include "llvm/ADT/None.h" #include "llvm/ADT/SmallVector.h" @@ -26,7 +27,6 @@ #include "llvm/IR/Function.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" -#include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" @@ -63,7 +63,7 @@ unsigned TerminatorInst::getNumSuccessors() const { switch (getOpcode()) { #define HANDLE_TERM_INST(N, OPC, CLASS) \ case Instruction::OPC: \ - return static_cast(this)->getNumSuccessorsV(); + return static_cast(this)->getNumSuccessors(); #include "llvm/IR/Instruction.def" default: break; @@ -75,7 +75,7 @@ BasicBlock *TerminatorInst::getSuccessor(unsigned idx) const { switch (getOpcode()) { #define HANDLE_TERM_INST(N, OPC, CLASS) \ case Instruction::OPC: \ - return static_cast(this)->getSuccessorV(idx); + return static_cast(this)->getSuccessor(idx); #include "llvm/IR/Instruction.def" default: break; @@ -87,7 +87,7 @@ void TerminatorInst::setSuccessor(unsigned idx, BasicBlock *B) { switch (getOpcode()) { #define HANDLE_TERM_INST(N, OPC, CLASS) \ case Instruction::OPC: \ - return static_cast(this)->setSuccessorV(idx, B); + return static_cast(this)->setSuccessor(idx, B); #include "llvm/IR/Instruction.def" default: break; @@ -747,18 +747,6 @@ InvokeInst *InvokeInst::Create(InvokeInst *II, ArrayRef OpB, return NewII; } -BasicBlock *InvokeInst::getSuccessorV(unsigned idx) const { - return getSuccessor(idx); -} - -unsigned InvokeInst::getNumSuccessorsV() const { - return getNumSuccessors(); -} - -void InvokeInst::setSuccessorV(unsigned idx, BasicBlock *B) { - return setSuccessor(idx, B); -} - Value *InvokeInst::getReturnedArgOperand() const { unsigned Index; @@ -902,20 +890,6 @@ ReturnInst::ReturnInst(LLVMContext &Context, BasicBlock *InsertAtEnd) OperandTraits::op_end(this), 0, InsertAtEnd) { } -unsigned ReturnInst::getNumSuccessorsV() const { - return getNumSuccessors(); -} - -/// Out-of-line ReturnInst method, put here so the C++ compiler can choose to -/// emit the vtable for the class in this translation unit. -void ReturnInst::setSuccessorV(unsigned idx, BasicBlock *NewSucc) { - llvm_unreachable("ReturnInst has no successors!"); -} - -BasicBlock *ReturnInst::getSuccessorV(unsigned idx) const { - llvm_unreachable("ReturnInst has no successors!"); -} - //===----------------------------------------------------------------------===// // ResumeInst Implementation //===----------------------------------------------------------------------===// @@ -938,18 +912,6 @@ ResumeInst::ResumeInst(Value *Exn, BasicBlock *InsertAtEnd) Op<0>() = Exn; } -unsigned ResumeInst::getNumSuccessorsV() const { - return getNumSuccessors(); -} - -void ResumeInst::setSuccessorV(unsigned idx, BasicBlock *NewSucc) { - llvm_unreachable("ResumeInst has no successors!"); -} - -BasicBlock *ResumeInst::getSuccessorV(unsigned idx) const { - llvm_unreachable("ResumeInst has no successors!"); -} - //===----------------------------------------------------------------------===// // CleanupReturnInst Implementation //===----------------------------------------------------------------------===// @@ -992,20 +954,6 @@ CleanupReturnInst::CleanupReturnInst(Value *CleanupPad, BasicBlock *UnwindBB, init(CleanupPad, UnwindBB); } -BasicBlock *CleanupReturnInst::getSuccessorV(unsigned Idx) const { - assert(Idx == 0); - return getUnwindDest(); -} - -unsigned CleanupReturnInst::getNumSuccessorsV() const { - return getNumSuccessors(); -} - -void CleanupReturnInst::setSuccessorV(unsigned Idx, BasicBlock *B) { - assert(Idx == 0); - setUnwindDest(B); -} - //===----------------------------------------------------------------------===// // CatchReturnInst Implementation //===----------------------------------------------------------------------===// @@ -1037,20 +985,6 @@ CatchReturnInst::CatchReturnInst(Value *CatchPad, BasicBlock *BB, init(CatchPad, BB); } -BasicBlock *CatchReturnInst::getSuccessorV(unsigned Idx) const { - assert(Idx < getNumSuccessors() && "Successor # out of range for catchret!"); - return getSuccessor(); -} - -unsigned CatchReturnInst::getNumSuccessorsV() const { - return getNumSuccessors(); -} - -void CatchReturnInst::setSuccessorV(unsigned Idx, BasicBlock *B) { - assert(Idx < getNumSuccessors() && "Successor # out of range for catchret!"); - setSuccessor(B); -} - //===----------------------------------------------------------------------===// // CatchSwitchInst Implementation //===----------------------------------------------------------------------===// @@ -1134,18 +1068,6 @@ void CatchSwitchInst::removeHandler(handler_iterator HI) { setNumHungOffUseOperands(getNumOperands() - 1); } -BasicBlock *CatchSwitchInst::getSuccessorV(unsigned idx) const { - return getSuccessor(idx); -} - -unsigned CatchSwitchInst::getNumSuccessorsV() const { - return getNumSuccessors(); -} - -void CatchSwitchInst::setSuccessorV(unsigned idx, BasicBlock *B) { - setSuccessor(idx, B); -} - //===----------------------------------------------------------------------===// // FuncletPadInst Implementation //===----------------------------------------------------------------------===// @@ -1198,18 +1120,6 @@ UnreachableInst::UnreachableInst(LLVMContext &Context, BasicBlock *InsertAtEnd) nullptr, 0, InsertAtEnd) { } -unsigned UnreachableInst::getNumSuccessorsV() const { - return getNumSuccessors(); -} - -void UnreachableInst::setSuccessorV(unsigned idx, BasicBlock *NewSucc) { - llvm_unreachable("UnreachableInst has no successors!"); -} - -BasicBlock *UnreachableInst::getSuccessorV(unsigned idx) const { - llvm_unreachable("UnreachableInst has no successors!"); -} - //===----------------------------------------------------------------------===// // BranchInst Implementation //===----------------------------------------------------------------------===// @@ -1285,18 +1195,6 @@ void BranchInst::swapSuccessors() { swapProfMetadata(); } -BasicBlock *BranchInst::getSuccessorV(unsigned idx) const { - return getSuccessor(idx); -} - -unsigned BranchInst::getNumSuccessorsV() const { - return getNumSuccessors(); -} - -void BranchInst::setSuccessorV(unsigned idx, BasicBlock *B) { - setSuccessor(idx, B); -} - //===----------------------------------------------------------------------===// // AllocaInst Implementation //===----------------------------------------------------------------------===// @@ -3785,19 +3683,6 @@ void SwitchInst::growOperands() { growHungoffUses(ReservedSpace); } - -BasicBlock *SwitchInst::getSuccessorV(unsigned idx) const { - return getSuccessor(idx); -} - -unsigned SwitchInst::getNumSuccessorsV() const { - return getNumSuccessors(); -} - -void SwitchInst::setSuccessorV(unsigned idx, BasicBlock *B) { - setSuccessor(idx, B); -} - //===----------------------------------------------------------------------===// // IndirectBrInst Implementation //===----------------------------------------------------------------------===// @@ -3877,18 +3762,6 @@ void IndirectBrInst::removeDestination(unsigned idx) { setNumHungOffUseOperands(NumOps-1); } -BasicBlock *IndirectBrInst::getSuccessorV(unsigned idx) const { - return getSuccessor(idx); -} - -unsigned IndirectBrInst::getNumSuccessorsV() const { - return getNumSuccessors(); -} - -void IndirectBrInst::setSuccessorV(unsigned idx, BasicBlock *B) { - setSuccessor(idx, B); -} - //===----------------------------------------------------------------------===// // cloneImpl() implementations //===----------------------------------------------------------------------===// diff --git a/lib/IR/IntrinsicInst.cpp b/lib/IR/IntrinsicInst.cpp index 94e115a6a78d..8b12c55937f5 100644 --- a/lib/IR/IntrinsicInst.cpp +++ b/lib/IR/IntrinsicInst.cpp @@ -21,8 +21,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/StringSwitch.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/IR/Constants.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Metadata.h" diff --git a/lib/IR/LLVMBuild.txt b/lib/IR/LLVMBuild.txt index cd90ef5b16b6..71368abfd874 100644 --- a/lib/IR/LLVMBuild.txt +++ b/lib/IR/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Library name = Core parent = Libraries -required_libraries = Support +required_libraries = BinaryFormat Support diff --git a/lib/IR/LLVMContext.cpp b/lib/IR/LLVMContext.cpp index 6c6383c22255..ad0d4470c111 100644 --- a/lib/IR/LLVMContext.cpp +++ b/lib/IR/LLVMContext.cpp @@ -13,11 +13,11 @@ //===----------------------------------------------------------------------===// #include "llvm/IR/LLVMContext.h" +#include "LLVMContextImpl.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" -#include "LLVMContextImpl.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/Metadata.h" diff --git a/lib/IR/LLVMContextImpl.h b/lib/IR/LLVMContextImpl.h index 9db30da89ed0..4ba974409a4f 100644 --- a/lib/IR/LLVMContextImpl.h +++ b/lib/IR/LLVMContextImpl.h @@ -27,13 +27,13 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringSet.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/ValueHandle.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/YAMLTraits.h" #include diff --git a/lib/IR/LegacyPassManager.cpp b/lib/IR/LegacyPassManager.cpp index b2b12289f871..29e2f42d3e05 100644 --- a/lib/IR/LegacyPassManager.cpp +++ b/lib/IR/LegacyPassManager.cpp @@ -593,7 +593,7 @@ AnalysisUsage *PMTopLevelManager::findAnalysisUsage(Pass *P) { assert(Node && "cached analysis usage must be non null"); AnUsageMap[P] = &Node->AU; - AnUsage = &Node->AU;; + AnUsage = &Node->AU; } return AnUsage; } diff --git a/lib/IR/Metadata.cpp b/lib/IR/Metadata.cpp index 2411dc5ce7dc..0b1bc9a8c270 100644 --- a/lib/IR/Metadata.cpp +++ b/lib/IR/Metadata.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/IR/Metadata.h" #include "LLVMContextImpl.h" #include "MetadataImpl.h" #include "SymbolTableListTraitsImpl.h" @@ -19,11 +20,11 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/None.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/Argument.h" @@ -38,7 +39,6 @@ #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/TrackingMDRef.h" #include "llvm/IR/Type.h" diff --git a/lib/IR/Module.cpp b/lib/IR/Module.cpp index 95673e515a55..f8853ed169c5 100644 --- a/lib/IR/Module.cpp +++ b/lib/IR/Module.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/IR/Module.h" #include "SymbolTableListTraitsImpl.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallString.h" @@ -22,17 +23,16 @@ #include "llvm/IR/Comdat.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" +#include "llvm/IR/GVMaterializer.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalIFunc.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/GVMaterializer.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" -#include "llvm/IR/Module.h" #include "llvm/IR/SymbolTableListTraits.h" #include "llvm/IR/Type.h" #include "llvm/IR/TypeFinder.h" diff --git a/lib/IR/OptBisect.cpp b/lib/IR/OptBisect.cpp index a03a6fb62237..f1c70058fac2 100644 --- a/lib/IR/OptBisect.cpp +++ b/lib/IR/OptBisect.cpp @@ -13,12 +13,12 @@ /// //===----------------------------------------------------------------------===// +#include "llvm/IR/OptBisect.h" #include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/RegionInfo.h" #include "llvm/IR/Module.h" -#include "llvm/IR/OptBisect.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/IR/Type.cpp b/lib/IR/Type.cpp index c9f957c244f8..44fe5e48c720 100644 --- a/lib/IR/Type.cpp +++ b/lib/IR/Type.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/IR/Type.h" #include "LLVMContextImpl.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/None.h" @@ -22,7 +23,6 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" -#include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/Support/Casting.h" #include "llvm/Support/MathExtras.h" diff --git a/lib/IR/TypeFinder.cpp b/lib/IR/TypeFinder.cpp index a178b9ec0f09..b39678a013fb 100644 --- a/lib/IR/TypeFinder.cpp +++ b/lib/IR/TypeFinder.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/IR/TypeFinder.h" #include "llvm/ADT/SmallVector.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" @@ -20,7 +21,6 @@ #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" -#include "llvm/IR/TypeFinder.h" #include "llvm/IR/Use.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" diff --git a/lib/IR/ValueSymbolTable.cpp b/lib/IR/ValueSymbolTable.cpp index 0c3946c8661e..ccdabe0817b4 100644 --- a/lib/IR/ValueSymbolTable.cpp +++ b/lib/IR/ValueSymbolTable.cpp @@ -11,11 +11,11 @@ // //===----------------------------------------------------------------------===// +#include "llvm/IR/ValueSymbolTable.h" #include "llvm/ADT/SmallString.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" -#include "llvm/IR/ValueSymbolTable.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp index a8523236ac9f..5c1b3412840d 100644 --- a/lib/IR/Verifier.cpp +++ b/lib/IR/Verifier.cpp @@ -49,7 +49,6 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/ilist.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" @@ -59,6 +58,8 @@ #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/ADT/ilist.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" @@ -81,10 +82,10 @@ #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/InlineAsm.h" +#include "llvm/IR/InstVisitor.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/InstVisitor.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" @@ -102,7 +103,6 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/LLVMBuild.txt b/lib/LLVMBuild.txt index 89ddd0fc1af3..9e586465025e 100644 --- a/lib/LLVMBuild.txt +++ b/lib/LLVMBuild.txt @@ -31,6 +31,7 @@ subdirectories = LTO MC Object + BinaryFormat ObjectYAML Option Passes diff --git a/lib/LTO/LTO.cpp b/lib/LTO/LTO.cpp index 92145aaf667a..9d2a44045d6a 100644 --- a/lib/LTO/LTO.cpp +++ b/lib/LTO/LTO.cpp @@ -315,54 +315,19 @@ InputFile::~InputFile() = default; Expected> InputFile::create(MemoryBufferRef Object) { std::unique_ptr File(new InputFile); - ErrorOr BCOrErr = - IRObjectFile::findBitcodeInMemBuffer(Object); - if (!BCOrErr) - return errorCodeToError(BCOrErr.getError()); + Expected FOrErr = readIRSymtab(Object); + if (!FOrErr) + return FOrErr.takeError(); - Expected> BMsOrErr = - getBitcodeModuleList(*BCOrErr); - if (!BMsOrErr) - return BMsOrErr.takeError(); + File->TargetTriple = FOrErr->TheReader.getTargetTriple(); + File->SourceFileName = FOrErr->TheReader.getSourceFileName(); + File->COFFLinkerOpts = FOrErr->TheReader.getCOFFLinkerOpts(); + File->ComdatTable = FOrErr->TheReader.getComdatTable(); - if (BMsOrErr->empty()) - return make_error("Bitcode file does not contain any modules", - inconvertibleErrorCode()); - - File->Mods = *BMsOrErr; - - LLVMContext Ctx; - std::vector Mods; - std::vector> OwnedMods; - for (auto BM : *BMsOrErr) { - Expected> MOrErr = - BM.getLazyModule(Ctx, /*ShouldLazyLoadMetadata*/ true, - /*IsImporting*/ false); - if (!MOrErr) - return MOrErr.takeError(); - - if ((*MOrErr)->getDataLayoutStr().empty()) - return make_error("input module has no datalayout", - inconvertibleErrorCode()); - - Mods.push_back(MOrErr->get()); - OwnedMods.push_back(std::move(*MOrErr)); - } - - SmallVector Symtab; - if (Error E = irsymtab::build(Mods, Symtab, File->Strtab)) - return std::move(E); - - irsymtab::Reader R({Symtab.data(), Symtab.size()}, - {File->Strtab.data(), File->Strtab.size()}); - File->TargetTriple = R.getTargetTriple(); - File->SourceFileName = R.getSourceFileName(); - File->COFFLinkerOpts = R.getCOFFLinkerOpts(); - File->ComdatTable = R.getComdatTable(); - - for (unsigned I = 0; I != Mods.size(); ++I) { + for (unsigned I = 0; I != FOrErr->Mods.size(); ++I) { size_t Begin = File->Symbols.size(); - for (const irsymtab::Reader::SymbolRef &Sym : R.module_symbols(I)) + for (const irsymtab::Reader::SymbolRef &Sym : + FOrErr->TheReader.module_symbols(I)) // Skip symbols that are irrelevant to LTO. Note that this condition needs // to match the one in Skip() in LTO::addRegularLTO(). if (Sym.isGlobal() && !Sym.isFormatSpecific()) @@ -370,6 +335,8 @@ Expected> InputFile::create(MemoryBufferRef Object) { File->ModuleSymIndices.push_back({Begin, File->Symbols.size()}); } + File->Mods = FOrErr->Mods; + File->Strtab = std::move(FOrErr->Strtab); return std::move(File); } @@ -405,10 +372,11 @@ void LTO::addSymbolToGlobalRes(const InputFile::Symbol &Sym, if (Res.Prevailing) GlobalRes.IRName = Sym.getIRName(); - // Set the partition to external if we know it is used elsewhere, e.g. - // it is visible to a regular object, is referenced from llvm.compiler_used, - // or was already recorded as being referenced from a different partition. - if (Res.VisibleToRegularObj || Sym.isUsed() || + // Set the partition to external if we know it is re-defined by the linker + // with -defsym or -wrap options, used elsewhere, e.g. it is visible to a + // regular object, is referenced from llvm.compiler_used, or was already + // recorded as being referenced from a different partition. + if (Res.LinkerRedefined || Res.VisibleToRegularObj || Sym.isUsed() || (GlobalRes.Partition != GlobalResolution::Unknown && GlobalRes.Partition != Partition)) { GlobalRes.Partition = GlobalResolution::External; @@ -439,6 +407,8 @@ static void writeToResolutionFile(raw_ostream &OS, InputFile *Input, OS << 'l'; if (Res.VisibleToRegularObj) OS << 'x'; + if (Res.LinkerRedefined) + OS << 'r'; OS << '\n'; } OS.flush(); @@ -543,6 +513,12 @@ Error LTO::addRegularLTO(BitcodeModule BM, if (Sym.isUndefined()) continue; Keep.push_back(GV); + // For symbols re-defined with linker -wrap and -defsym options, + // set the linkage to weak to inhibit IPO. The linkage will be + // restored by the linker. + if (Res.LinkerRedefined) + GV->setLinkage(GlobalValue::WeakAnyLinkage); + GlobalValue::LinkageTypes OriginalLinkage = GV->getLinkage(); if (GlobalValue::isLinkOnceLinkage(OriginalLinkage)) GV->setLinkage(GlobalValue::getWeakLinkage( diff --git a/lib/LTO/ThinLTOCodeGenerator.cpp b/lib/LTO/ThinLTOCodeGenerator.cpp index 6b221a347c17..e4094d44867b 100644 --- a/lib/LTO/ThinLTOCodeGenerator.cpp +++ b/lib/LTO/ThinLTOCodeGenerator.cpp @@ -24,8 +24,8 @@ #include "llvm/Bitcode/BitcodeWriter.h" #include "llvm/Bitcode/BitcodeWriterPass.h" #include "llvm/ExecutionEngine/ObjectMemoryBuffer.h" -#include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Mangler.h" diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp index e86db933af3c..4d139132df46 100644 --- a/lib/MC/ELFObjectWriter.cpp +++ b/lib/MC/ELFObjectWriter.cpp @@ -13,11 +13,12 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" @@ -36,7 +37,6 @@ #include "llvm/Support/Allocator.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compression.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" @@ -1020,18 +1020,24 @@ void ELFObjectWriter::writeSectionData(const MCAssembler &Asm, MCSection &Sec, MCSectionELF &Section = static_cast(Sec); StringRef SectionName = Section.getSectionName(); + auto &MC = Asm.getContext(); + const auto &MAI = MC.getAsmInfo(); + // Compressing debug_frame requires handling alignment fragments which is // more work (possibly generalizing MCAssembler.cpp:writeFragment to allow // for writing to arbitrary buffers) for little benefit. bool CompressionEnabled = - Asm.getContext().getAsmInfo()->compressDebugSections() != - DebugCompressionType::DCT_None; + MAI->compressDebugSections() != DebugCompressionType::None; if (!CompressionEnabled || !SectionName.startswith(".debug_") || SectionName == ".debug_frame") { Asm.writeSectionData(&Section, Layout); return; } + assert((MAI->compressDebugSections() == DebugCompressionType::Z || + MAI->compressDebugSections() == DebugCompressionType::GNU) && + "expected zlib or zlib-gnu style compression"); + SmallVector UncompressedData; raw_svector_ostream VecOS(UncompressedData); raw_pwrite_stream &OldStream = getStream(); @@ -1048,8 +1054,7 @@ void ELFObjectWriter::writeSectionData(const MCAssembler &Asm, MCSection &Sec, return; } - bool ZlibStyle = Asm.getContext().getAsmInfo()->compressDebugSections() == - DebugCompressionType::DCT_Zlib; + bool ZlibStyle = MAI->compressDebugSections() == DebugCompressionType::Z; if (!maybeWriteCompression(UncompressedData.size(), CompressedContents, ZlibStyle, Sec.getAlignment())) { getStream() << UncompressedData; @@ -1061,8 +1066,7 @@ void ELFObjectWriter::writeSectionData(const MCAssembler &Asm, MCSection &Sec, Section.setFlags(Section.getFlags() | ELF::SHF_COMPRESSED); else // Add "z" prefix to section name. This is zlib-gnu style. - Asm.getContext().renameELFSection(&Section, - (".z" + SectionName.drop_front(1)).str()); + MC.renameELFSection(&Section, (".z" + SectionName.drop_front(1)).str()); getStream() << CompressedContents; } diff --git a/lib/MC/MCAsmBackend.cpp b/lib/MC/MCAsmBackend.cpp index fc0aa788f6d3..3642f37aa855 100644 --- a/lib/MC/MCAsmBackend.cpp +++ b/lib/MC/MCAsmBackend.cpp @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/MCAsmBackend.h" #include "llvm/ADT/None.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCFixupKindInfo.h" #include #include diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp index b9be685cedc4..f05904048e0b 100644 --- a/lib/MC/MCAsmInfo.cpp +++ b/lib/MC/MCAsmInfo.cpp @@ -13,10 +13,10 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCAsmInfo.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/Support/Dwarf.h" using namespace llvm; diff --git a/lib/MC/MCAsmInfoDarwin.cpp b/lib/MC/MCAsmInfoDarwin.cpp index 4b2001764e97..c74840982fb7 100644 --- a/lib/MC/MCAsmInfoDarwin.cpp +++ b/lib/MC/MCAsmInfoDarwin.cpp @@ -13,9 +13,9 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCAsmInfoDarwin.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCSectionMachO.h" -#include "llvm/Support/MachO.h" using namespace llvm; diff --git a/lib/MC/MCAsmInfoELF.cpp b/lib/MC/MCAsmInfoELF.cpp index e44c08b50d76..b0dc43c6c868 100644 --- a/lib/MC/MCAsmInfoELF.cpp +++ b/lib/MC/MCAsmInfoELF.cpp @@ -13,9 +13,9 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCAsmInfoELF.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" -#include "llvm/Support/ELF.h" using namespace llvm; diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp index c2bb7b277181..53cdaac3aa54 100644 --- a/lib/MC/MCAssembler.cpp +++ b/lib/MC/MCAssembler.cpp @@ -7,6 +7,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/MCAssembler.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" @@ -16,7 +17,6 @@ #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAsmLayout.h" -#include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCCodeView.h" #include "llvm/MC/MCContext.h" @@ -37,9 +37,9 @@ #include "llvm/Support/LEB128.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include #include #include +#include #include #include diff --git a/lib/MC/MCCodeView.cpp b/lib/MC/MCCodeView.cpp index 6c9a4f9f982d..92b1e12da552 100644 --- a/lib/MC/MCCodeView.cpp +++ b/lib/MC/MCCodeView.cpp @@ -13,6 +13,7 @@ #include "llvm/MC/MCCodeView.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/BinaryFormat/COFF.h" #include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/Line.h" #include "llvm/DebugInfo/CodeView/SymbolRecord.h" @@ -20,7 +21,6 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCObjectStreamer.h" #include "llvm/MC/MCValue.h" -#include "llvm/Support/COFF.h" #include "llvm/Support/EndianStream.h" using namespace llvm; diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp index 4628d0ab88f3..48ee84edb096 100644 --- a/lib/MC/MCContext.cpp +++ b/lib/MC/MCContext.cpp @@ -7,14 +7,16 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/MCContext.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCCodeView.h" -#include "llvm/MC/MCContext.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFragment.h" @@ -32,14 +34,12 @@ #include "llvm/MC/MCSymbolWasm.h" #include "llvm/MC/SectionKind.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/COFF.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/Signals.h" #include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_ostream.h" #include #include #include diff --git a/lib/MC/MCDisassembler/Disassembler.cpp b/lib/MC/MCDisassembler/Disassembler.cpp index aa5072743bdf..ef1d8335e1bd 100644 --- a/lib/MC/MCDisassembler/Disassembler.cpp +++ b/lib/MC/MCDisassembler/Disassembler.cpp @@ -27,8 +27,8 @@ #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" #include #include #include diff --git a/lib/MC/MCDisassembler/MCRelocationInfo.cpp b/lib/MC/MCDisassembler/MCRelocationInfo.cpp index 5805fd7007d2..8f932a3f0d48 100644 --- a/lib/MC/MCDisassembler/MCRelocationInfo.cpp +++ b/lib/MC/MCDisassembler/MCRelocationInfo.cpp @@ -8,8 +8,8 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCDisassembler/MCRelocationInfo.h" -#include "llvm/Support/TargetRegistry.h" #include "llvm-c/Disassembler.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp index 1a320b0165fa..a2beee32f2cb 100644 --- a/lib/MC/MCDwarf.cpp +++ b/lib/MC/MCDwarf.cpp @@ -7,19 +7,20 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/MCDwarf.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/None.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/Config/config.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCObjectStreamer.h" @@ -28,7 +29,6 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/Endian.h" #include "llvm/Support/EndianStream.h" #include "llvm/Support/ErrorHandling.h" diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp index c8e0223c0573..50c1f6e79f8a 100644 --- a/lib/MC/MCELFStreamer.cpp +++ b/lib/MC/MCELFStreamer.cpp @@ -11,14 +11,15 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/MCELFStreamer.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCELFStreamer.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCFragment.h" @@ -27,10 +28,9 @@ #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCSymbolELF.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp index 8149aa27327c..38a8af49c194 100644 --- a/lib/MC/MCExpr.cpp +++ b/lib/MC/MCExpr.cpp @@ -7,13 +7,13 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/MCExpr.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCValue.h" @@ -655,8 +655,12 @@ bool MCExpr::evaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm, // the OS X assembler will completely drop the 4. We should probably // include it in the relocation or produce an error if that is not // possible. + // Allow constant expressions. if (!A && !B) return true; + // Allows aliases with zero offset. + if (Res.getConstant() == 0 && (!A || !B)) + return true; } } diff --git a/lib/MC/MCFragment.cpp b/lib/MC/MCFragment.cpp index 90b44177cf5e..f3d0eb55eecd 100644 --- a/lib/MC/MCFragment.cpp +++ b/lib/MC/MCFragment.cpp @@ -7,15 +7,15 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/MCFragment.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" -#include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCAsmLayout.h" +#include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" -#include "llvm/MC/MCFragment.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCValue.h" diff --git a/lib/MC/MCInstPrinter.cpp b/lib/MC/MCInstPrinter.cpp index 912179095974..9296fcedb72b 100644 --- a/lib/MC/MCInstPrinter.cpp +++ b/lib/MC/MCInstPrinter.cpp @@ -7,10 +7,10 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/MCInstPrinter.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCInstPrinter.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" diff --git a/lib/MC/MCInstrAnalysis.cpp b/lib/MC/MCInstrAnalysis.cpp index 566944c53548..280b5cf68c98 100644 --- a/lib/MC/MCInstrAnalysis.cpp +++ b/lib/MC/MCInstrAnalysis.cpp @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrAnalysis.h" +#include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCInstrInfo.h" #include diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp index 1e9ef4163256..674c7b9bf619 100644 --- a/lib/MC/MCMachOStreamer.cpp +++ b/lib/MC/MCMachOStreamer.cpp @@ -32,8 +32,8 @@ #include "llvm/MC/MCValue.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" #include #include diff --git a/lib/MC/MCNullStreamer.cpp b/lib/MC/MCNullStreamer.cpp index d156f5d05a31..4db9a2c8d8de 100644 --- a/lib/MC/MCNullStreamer.cpp +++ b/lib/MC/MCNullStreamer.cpp @@ -7,10 +7,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" using namespace llvm; diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp index b685790910d0..21c5516785ef 100644 --- a/lib/MC/MCObjectFileInfo.cpp +++ b/lib/MC/MCObjectFileInfo.cpp @@ -10,6 +10,8 @@ #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSection.h" @@ -17,8 +19,6 @@ #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCSectionWasm.h" -#include "llvm/Support/COFF.h" -#include "llvm/Support/ELF.h" using namespace llvm; @@ -241,6 +241,9 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(const Triple &T) { DwarfStrSection = Ctx->getMachOSection("__DWARF", "__debug_str", MachO::S_ATTR_DEBUG, SectionKind::getMetadata(), "info_string"); + DwarfStrOffSection = + Ctx->getMachOSection("__DWARF", "__debug_str_offs", MachO::S_ATTR_DEBUG, + SectionKind::getMetadata(), "section_str_off"); DwarfLocSection = Ctx->getMachOSection("__DWARF", "__debug_loc", MachO::S_ATTR_DEBUG, SectionKind::getMetadata(), "section_debug_loc"); @@ -557,6 +560,11 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(const Triple &T) { DwarfAccelTypesSection = Ctx->getELFSection(".apple_types", ELF::SHT_PROGBITS, 0); + // String Offset and Address Sections + DwarfStrOffSection = + Ctx->getELFSection(".debug_str_offsets", DebugSecType, 0); + DwarfAddrSection = Ctx->getELFSection(".debug_addr", DebugSecType, 0); + // Fission Sections DwarfInfoDWOSection = Ctx->getELFSection(".debug_info.dwo", DebugSecType, 0); @@ -573,7 +581,6 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(const Triple &T) { Ctx->getELFSection(".debug_loc.dwo", DebugSecType, 0); DwarfStrOffDWOSection = Ctx->getELFSection(".debug_str_offsets.dwo", DebugSecType, 0); - DwarfAddrSection = Ctx->getELFSection(".debug_addr", DebugSecType, 0); // DWP Sections DwarfCUIndexSection = @@ -695,6 +702,11 @@ void MCObjectFileInfo::initCOFFMCObjectFileInfo(const Triple &T) { COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ, SectionKind::getMetadata(), "info_string"); + DwarfStrOffSection = Ctx->getCOFFSection( + ".debug_str_offsets", + COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata(), "section_str_off"); DwarfLocSection = Ctx->getCOFFSection( ".debug_loc", COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | @@ -749,7 +761,7 @@ void MCObjectFileInfo::initCOFFMCObjectFileInfo(const Triple &T) { ".debug_str_offsets.dwo", COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ, - SectionKind::getMetadata()); + SectionKind::getMetadata(), "section_str_off_dwo"); DwarfAddrSection = Ctx->getCOFFSection( ".debug_addr", COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | diff --git a/lib/MC/MCObjectWriter.cpp b/lib/MC/MCObjectWriter.cpp index 478b4e84e74a..98ac48a23f91 100644 --- a/lib/MC/MCObjectWriter.cpp +++ b/lib/MC/MCObjectWriter.cpp @@ -7,10 +7,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/MC/MCAssembler.h" -#include "llvm/MC/MCFragment.h" -#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCFragment.h" #include "llvm/MC/MCSymbol.h" using namespace llvm; diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp index 38dadfe62135..2b963607b837 100644 --- a/lib/MC/MCParser/AsmLexer.cpp +++ b/lib/MC/MCParser/AsmLexer.cpp @@ -11,12 +11,12 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/MCParser/AsmLexer.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCParser/AsmLexer.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/Support/SMLoc.h" #include "llvm/Support/SaveAndRestore.h" diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 3b213ef4ce09..dad47e49e2c2 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -15,12 +15,13 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/None.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCCodeView.h" #include "llvm/MC/MCContext.h" @@ -47,7 +48,6 @@ #include "llvm/MC/MCValue.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/MemoryBuffer.h" @@ -703,7 +703,7 @@ const AsmToken &AsmParser::Lex() { // if it's a end of statement with a comment in it if (getTok().is(AsmToken::EndOfStatement)) { // if this is a line comment output it. - if (getTok().getString().front() != '\n' && + if (!getTok().getString().empty() && getTok().getString().front() != '\n' && getTok().getString().front() != '\r' && MAI.preserveAsmComments()) Out.addExplicitComment(Twine(getTok().getString())); } @@ -1523,7 +1523,7 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info, Lex(); if (Lexer.is(AsmToken::EndOfStatement)) { // if this is a line comment we can drop it safely - if (getTok().getString().front() == '\r' || + if (getTok().getString().empty() || getTok().getString().front() == '\r' || getTok().getString().front() == '\n') Out.AddBlankLine(); Lex(); diff --git a/lib/MC/MCParser/COFFAsmParser.cpp b/lib/MC/MCParser/COFFAsmParser.cpp index bec62ccb2f7f..b83d68d4fe20 100644 --- a/lib/MC/MCParser/COFFAsmParser.cpp +++ b/lib/MC/MCParser/COFFAsmParser.cpp @@ -7,10 +7,11 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/COFF.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCObjectFileInfo.h" @@ -21,7 +22,6 @@ #include "llvm/MC/MCSectionCOFF.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/SectionKind.h" -#include "llvm/Support/COFF.h" #include "llvm/Support/SMLoc.h" #include #include diff --git a/lib/MC/MCParser/DarwinAsmParser.cpp b/lib/MC/MCParser/DarwinAsmParser.cpp index 73a7ad0500c3..f4152a9067a0 100644 --- a/lib/MC/MCParser/DarwinAsmParser.cpp +++ b/lib/MC/MCParser/DarwinAsmParser.cpp @@ -7,12 +7,13 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCObjectFileInfo.h" @@ -25,10 +26,9 @@ #include "llvm/MC/SectionKind.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/MachO.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/SMLoc.h" #include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_ostream.h" #include #include #include diff --git a/lib/MC/MCParser/ELFAsmParser.cpp b/lib/MC/MCParser/ELFAsmParser.cpp index 401011a027f4..f1dfb91aafbb 100644 --- a/lib/MC/MCParser/ELFAsmParser.cpp +++ b/lib/MC/MCParser/ELFAsmParser.cpp @@ -7,8 +7,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDirectives.h" @@ -23,7 +24,6 @@ #include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/SectionKind.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/SMLoc.h" #include diff --git a/lib/MC/MCParser/MCAsmLexer.cpp b/lib/MC/MCParser/MCAsmLexer.cpp index 1d12ab858284..8f845ee1d76f 100644 --- a/lib/MC/MCParser/MCAsmLexer.cpp +++ b/lib/MC/MCParser/MCAsmLexer.cpp @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/StringRef.h" #include "llvm/MC/MCParser/MCAsmLexer.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/SMLoc.h" using namespace llvm; diff --git a/lib/MC/MCParser/MCAsmParser.cpp b/lib/MC/MCParser/MCAsmParser.cpp index 27b37f3e2dfb..ea36b3b9b3b2 100644 --- a/lib/MC/MCParser/MCAsmParser.cpp +++ b/lib/MC/MCParser/MCAsmParser.cpp @@ -7,10 +7,10 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/MC/MCParser/MCAsmLexer.h" -#include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" #include "llvm/MC/MCParser/MCTargetAsmParser.h" #include "llvm/Support/Debug.h" diff --git a/lib/MC/MCParser/MCTargetAsmParser.cpp b/lib/MC/MCParser/MCTargetAsmParser.cpp index 5f821443bb96..64ac82a6c66f 100644 --- a/lib/MC/MCParser/MCTargetAsmParser.cpp +++ b/lib/MC/MCParser/MCTargetAsmParser.cpp @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/MC/MCContext.h" #include "llvm/MC/MCParser/MCTargetAsmParser.h" +#include "llvm/MC/MCContext.h" using namespace llvm; diff --git a/lib/MC/MCRegisterInfo.cpp b/lib/MC/MCRegisterInfo.cpp index a75100a4876b..0f76c1838b51 100644 --- a/lib/MC/MCRegisterInfo.cpp +++ b/lib/MC/MCRegisterInfo.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/DenseMap.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/Support/ErrorHandling.h" #include #include diff --git a/lib/MC/MCSection.cpp b/lib/MC/MCSection.cpp index 7986c0122043..b961cb3968e8 100644 --- a/lib/MC/MCSection.cpp +++ b/lib/MC/MCSection.cpp @@ -7,10 +7,10 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/MCSection.h" #include "llvm/ADT/SmallVector.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCFragment.h" -#include "llvm/MC/MCSection.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" diff --git a/lib/MC/MCSectionCOFF.cpp b/lib/MC/MCSectionCOFF.cpp index f0709cbc2515..72a7fc36a460 100644 --- a/lib/MC/MCSectionCOFF.cpp +++ b/lib/MC/MCSectionCOFF.cpp @@ -8,8 +8,8 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCSectionCOFF.h" +#include "llvm/BinaryFormat/COFF.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Support/COFF.h" #include "llvm/Support/raw_ostream.h" #include diff --git a/lib/MC/MCSectionELF.cpp b/lib/MC/MCSectionELF.cpp index 78fe01cca24a..a75068ebf05a 100644 --- a/lib/MC/MCSectionELF.cpp +++ b/lib/MC/MCSectionELF.cpp @@ -7,11 +7,11 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/MCSectionELF.h" #include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCSectionELF.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp index c9a6f12b6a58..2bfb9a63eedb 100644 --- a/lib/MC/MCStreamer.cpp +++ b/lib/MC/MCStreamer.cpp @@ -7,9 +7,11 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/MCStreamer.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/COFF.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCCodeView.h" @@ -21,19 +23,17 @@ #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSectionCOFF.h" -#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCWin64EH.h" #include "llvm/MC/MCWinEH.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/COFF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include #include #include +#include #include using namespace llvm; diff --git a/lib/MC/MCSubtargetInfo.cpp b/lib/MC/MCSubtargetInfo.cpp index 777b4e3d6b67..385cdcc62320 100644 --- a/lib/MC/MCSubtargetInfo.cpp +++ b/lib/MC/MCSubtargetInfo.cpp @@ -7,11 +7,11 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/MC/MCSchedule.h" -#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/Support/raw_ostream.h" #include diff --git a/lib/MC/MCSymbol.cpp b/lib/MC/MCSymbol.cpp index cb262542b89f..9abaaef2fe84 100644 --- a/lib/MC/MCSymbol.cpp +++ b/lib/MC/MCSymbol.cpp @@ -7,12 +7,12 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/MCSymbol.h" #include "llvm/ADT/StringRef.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFragment.h" -#include "llvm/MC/MCSymbol.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" diff --git a/lib/MC/MCSymbolELF.cpp b/lib/MC/MCSymbolELF.cpp index ffa8260d4342..67449eb6dcf9 100644 --- a/lib/MC/MCSymbolELF.cpp +++ b/lib/MC/MCSymbolELF.cpp @@ -7,10 +7,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCSymbolELF.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCFixupKindInfo.h" -#include "llvm/Support/ELF.h" namespace llvm { diff --git a/lib/MC/MCTargetOptions.cpp b/lib/MC/MCTargetOptions.cpp index 5d666b67fddb..b85e53db5d61 100644 --- a/lib/MC/MCTargetOptions.cpp +++ b/lib/MC/MCTargetOptions.cpp @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/StringRef.h" #include "llvm/MC/MCTargetOptions.h" +#include "llvm/ADT/StringRef.h" using namespace llvm; diff --git a/lib/MC/MCWasmObjectTargetWriter.cpp b/lib/MC/MCWasmObjectTargetWriter.cpp index a09a17d7a124..301f30d4f6ec 100644 --- a/lib/MC/MCWasmObjectTargetWriter.cpp +++ b/lib/MC/MCWasmObjectTargetWriter.cpp @@ -17,11 +17,5 @@ using namespace llvm; MCWasmObjectTargetWriter::MCWasmObjectTargetWriter(bool Is64Bit_) : Is64Bit(Is64Bit_) {} -bool MCWasmObjectTargetWriter::needsRelocateWithSymbol(const MCSymbol &Sym, - unsigned Type) const { - return false; -} - -void MCWasmObjectTargetWriter::sortRelocs( - const MCAssembler &Asm, std::vector &Relocs) { -} +// Pin the vtable to this object file +MCWasmObjectTargetWriter::~MCWasmObjectTargetWriter() = default; diff --git a/lib/MC/MCWinEH.cpp b/lib/MC/MCWinEH.cpp index 21a913999f64..a5d0f5a2cb75 100644 --- a/lib/MC/MCWinEH.cpp +++ b/lib/MC/MCWinEH.cpp @@ -7,14 +7,14 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/MCWinEH.h" #include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/COFF.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCSectionCOFF.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/MC/MCWinEH.h" -#include "llvm/Support/COFF.h" namespace llvm { namespace WinEH { diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp index d9ccf0dd661f..c4e7cdbe095e 100644 --- a/lib/MC/MachObjectWriter.cpp +++ b/lib/MC/MachObjectWriter.cpp @@ -8,8 +8,9 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/Twine.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" @@ -27,7 +28,6 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MachO.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include diff --git a/lib/MC/StringTableBuilder.cpp b/lib/MC/StringTableBuilder.cpp index a0fb33846fcf..6025a20a9c19 100644 --- a/lib/MC/StringTableBuilder.cpp +++ b/lib/MC/StringTableBuilder.cpp @@ -7,11 +7,11 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/StringTableBuilder.h" #include "llvm/ADT/CachedHashString.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" -#include "llvm/MC/StringTableBuilder.h" -#include "llvm/Support/COFF.h" +#include "llvm/BinaryFormat/COFF.h" #include "llvm/Support/Endian.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/MC/SubtargetFeature.cpp b/lib/MC/SubtargetFeature.cpp index 51aaa4b0aa25..b68e88ca5725 100644 --- a/lib/MC/SubtargetFeature.cpp +++ b/lib/MC/SubtargetFeature.cpp @@ -11,12 +11,12 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/SubtargetFeature.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" -#include "llvm/MC/SubtargetFeature.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Format.h" diff --git a/lib/MC/WasmObjectWriter.cpp b/lib/MC/WasmObjectWriter.cpp index 9b2031f05043..4b3dc6e0c211 100644 --- a/lib/MC/WasmObjectWriter.cpp +++ b/lib/MC/WasmObjectWriter.cpp @@ -13,6 +13,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/BinaryFormat/Wasm.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAsmLayout.h" @@ -31,7 +32,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/StringSaver.h" -#include "llvm/Support/Wasm.h" #include using namespace llvm; @@ -127,6 +127,38 @@ struct WasmGlobal { uint32_t ImportIndex; }; +// Information about a single relocation. +struct WasmRelocationEntry { + uint64_t Offset; // Where is the relocation. + const MCSymbolWasm *Symbol; // The symbol to relocate with. + int64_t Addend; // A value to add to the symbol. + unsigned Type; // The type of the relocation. + MCSectionWasm *FixupSection;// The section the relocation is targeting. + + WasmRelocationEntry(uint64_t Offset, const MCSymbolWasm *Symbol, + int64_t Addend, unsigned Type, + MCSectionWasm *FixupSection) + : Offset(Offset), Symbol(Symbol), Addend(Addend), Type(Type), + FixupSection(FixupSection) {} + + bool hasAddend() const { + switch (Type) { + case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_LEB: + case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_SLEB: + case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_I32: + return true; + default: + return false; + } + } + + void print(raw_ostream &Out) const { + Out << "Off=" << Offset << ", Sym=" << Symbol << ", Addend=" << Addend + << ", Type=" << Type << ", FixupSection=" << FixupSection; + } + void dump() const { print(errs()); } +}; + class WasmObjectWriter : public MCObjectWriter { /// Helper struct for containing some precomputed information on symbols. struct WasmSymbolData { @@ -146,11 +178,14 @@ class WasmObjectWriter : public MCObjectWriter { // Relocations for fixing up references in the data section. std::vector DataRelocations; - // Fixups for call_indirect type indices. - std::vector TypeIndexFixups; - // Index values to use for fixing up call_indirect type indices. - std::vector TypeIndexFixupTypes; + // Maps function symbols to the index of the type of the function + DenseMap TypeIndices; + + DenseMap SymbolIndices; + + DenseMap + FunctionTypeIndices; // TargetObjectWriter wrappers. bool is64Bit() const { return TargetObjectWriter->is64Bit(); } @@ -170,6 +205,15 @@ public: private: ~WasmObjectWriter() override; + void reset() override { + CodeRelocations.clear(); + DataRelocations.clear(); + TypeIndices.clear(); + SymbolIndices.clear(); + FunctionTypeIndices.clear(); + MCObjectWriter::reset(); + } + void writeHeader(const MCAssembler &Asm); void recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout, @@ -195,21 +239,23 @@ private: void writeExportSection(const SmallVector &Exports); void writeElemSection(const SmallVector &TableElems); void writeCodeSection(const MCAssembler &Asm, const MCAsmLayout &Layout, - DenseMap &SymbolIndices, const SmallVector &Functions); uint64_t - writeDataSection(const SmallVector &DataBytes, - DenseMap &SymbolIndices); + writeDataSection(const SmallVector &DataBytes); void writeNameSection(const SmallVector &Functions, const SmallVector &Imports, uint32_t NumFuncImports); - void writeCodeRelocSection( - DenseMap &SymbolIndices); - void writeDataRelocSection( - DenseMap &SymbolIndices, - uint64_t DataSectionHeaderSize); + void writeCodeRelocSection(); + void writeDataRelocSection(uint64_t DataSectionHeaderSize); void writeLinkingMetaDataSection(bool HasStackPointer, uint32_t StackPointerGlobal); + + void applyRelocations(ArrayRef Relocations, + uint64_t ContentsOffset); + + void writeRelocations(ArrayRef Relocations, + uint64_t HeaderSize); + uint32_t getRelocationIndexValue(const WasmRelocationEntry &RelEntry); }; } // end anonymous namespace @@ -356,19 +402,7 @@ void WasmObjectWriter::recordRelocation(MCAssembler &Asm, SymA->setUsedInReloc(); } - if (RefA) { - if (RefA->getKind() == MCSymbolRefExpr::VK_WebAssembly_TYPEINDEX) { - assert(C == 0); - WasmRelocationEntry Rec(FixupOffset, SymA, C, - wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB, - &FixupSection); - TypeIndexFixups.push_back(Rec); - return; - } - } - unsigned Type = getRelocType(Ctx, Target, Fixup, IsPCRel); - WasmRelocationEntry Rec(FixupOffset, SymA, C, Type, &FixupSection); if (FixupSection.hasInstructions()) @@ -427,124 +461,85 @@ static uint32_t ProvisionalValue(const WasmRelocationEntry &RelEntry) { return Value; } +uint32_t WasmObjectWriter::getRelocationIndexValue( + const WasmRelocationEntry &RelEntry) { + switch (RelEntry.Type) { + case wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB: + case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB: + case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32: + case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_LEB: + case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_SLEB: + case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_I32: + assert(SymbolIndices.count(RelEntry.Symbol)); + return SymbolIndices[RelEntry.Symbol]; + case wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB: + assert(TypeIndices.count(RelEntry.Symbol)); + return TypeIndices[RelEntry.Symbol]; + default: + llvm_unreachable("invalid relocation type"); + } +} + // Apply the portions of the relocation records that we can handle ourselves // directly. -static void ApplyRelocations( - ArrayRef Relocations, - raw_pwrite_stream &Stream, - DenseMap &SymbolIndices, - uint64_t ContentsOffset) -{ +void WasmObjectWriter::applyRelocations( + ArrayRef Relocations, uint64_t ContentsOffset) { + raw_pwrite_stream &Stream = getStream(); for (const WasmRelocationEntry &RelEntry : Relocations) { uint64_t Offset = ContentsOffset + RelEntry.FixupSection->getSectionOffset() + RelEntry.Offset; - switch (RelEntry.Type) { - case wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB: { - assert(SymbolIndices.count(RelEntry.Symbol)); - uint32_t Index = SymbolIndices[RelEntry.Symbol]; - assert(RelEntry.Addend == 0); - WritePatchableLEB(Stream, Index, Offset); + switch (RelEntry.Type) { + case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB: + case wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB: + case wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB: { + uint32_t Index = getRelocationIndexValue(RelEntry); + WritePatchableSLEB(Stream, Index, Offset); break; } - case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB: { - assert(SymbolIndices.count(RelEntry.Symbol)); - uint32_t Index = SymbolIndices[RelEntry.Symbol]; - assert(RelEntry.Addend == 0); - - WritePatchableSLEB(Stream, Index, Offset); + case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32: { + uint32_t Index = getRelocationIndexValue(RelEntry); + WriteI32(Stream, Index, Offset); break; } case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_SLEB: { uint32_t Value = ProvisionalValue(RelEntry); - WritePatchableSLEB(Stream, Value, Offset); break; } case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_LEB: { uint32_t Value = ProvisionalValue(RelEntry); - WritePatchableLEB(Stream, Value, Offset); break; } - case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32: { - assert(SymbolIndices.count(RelEntry.Symbol)); - uint32_t Index = SymbolIndices[RelEntry.Symbol]; - assert(RelEntry.Addend == 0); - - WriteI32(Stream, Index, Offset); - break; - } case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_I32: { uint32_t Value = ProvisionalValue(RelEntry); - WriteI32(Stream, Value, Offset); break; } - default: - break; - } - } -} - -// Write out the portions of the relocation records that the linker will -// need to handle. -static void -WriteRelocations(ArrayRef Relocations, - raw_pwrite_stream &Stream, - DenseMap &SymbolIndices, - uint64_t HeaderSize) { - for (const WasmRelocationEntry RelEntry : Relocations) { - encodeULEB128(RelEntry.Type, Stream); - - uint64_t Offset = RelEntry.Offset + - RelEntry.FixupSection->getSectionOffset() + HeaderSize; - assert(SymbolIndices.count(RelEntry.Symbol)); - uint32_t Index = SymbolIndices[RelEntry.Symbol]; - int64_t Addend = RelEntry.Addend; - - switch (RelEntry.Type) { - case wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB: - case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB: - case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32: - encodeULEB128(Offset, Stream); - encodeULEB128(Index, Stream); - assert(Addend == 0 && "addends not supported for functions"); - break; - case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_LEB: - case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_SLEB: - case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_I32: - encodeULEB128(Offset, Stream); - encodeULEB128(Index, Stream); - encodeSLEB128(Addend, Stream); - break; default: llvm_unreachable("unsupported relocation type"); } } } -// Write out the the type relocation records that the linker will +// Write out the portions of the relocation records that the linker will // need to handle. -static void WriteTypeRelocations( - ArrayRef TypeIndexFixups, - ArrayRef TypeIndexFixupTypes, - raw_pwrite_stream &Stream) -{ - for (size_t i = 0, e = TypeIndexFixups.size(); i < e; ++i) { - const WasmRelocationEntry &Fixup = TypeIndexFixups[i]; - uint32_t Type = TypeIndexFixupTypes[i]; +void WasmObjectWriter::writeRelocations( + ArrayRef Relocations, uint64_t HeaderSize) { + raw_pwrite_stream &Stream = getStream(); + for (const WasmRelocationEntry& RelEntry : Relocations) { - assert(Fixup.Type == wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB); - assert(Fixup.Addend == 0); + uint64_t Offset = RelEntry.Offset + + RelEntry.FixupSection->getSectionOffset() + HeaderSize; + uint32_t Index = getRelocationIndexValue(RelEntry); - uint64_t Offset = Fixup.Offset + - Fixup.FixupSection->getSectionOffset(); - - encodeULEB128(Fixup.Type, Stream); + encodeULEB128(RelEntry.Type, Stream); encodeULEB128(Offset, Stream); - encodeULEB128(Type, Stream); + encodeULEB128(Index, Stream); + if (RelEntry.hasAddend()) + encodeSLEB128(RelEntry.Addend, Stream); } } @@ -733,7 +728,6 @@ void WasmObjectWriter::writeElemSection( void WasmObjectWriter::writeCodeSection( const MCAssembler &Asm, const MCAsmLayout &Layout, - DenseMap &SymbolIndices, const SmallVector &Functions) { if (Functions.empty()) return; @@ -768,34 +762,14 @@ void WasmObjectWriter::writeCodeSection( Asm.writeSectionData(&FuncSection, Layout); } - // Apply the type index fixups for call_indirect etc. instructions. - for (size_t i = 0, e = TypeIndexFixups.size(); i < e; ++i) { - uint32_t Type = TypeIndexFixupTypes[i]; - unsigned Padding = PaddingFor5ByteULEB128(Type); - - const WasmRelocationEntry &Fixup = TypeIndexFixups[i]; - assert(Fixup.Addend == 0); - assert(Fixup.Type == wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB); - uint64_t Offset = Fixup.Offset + - Fixup.FixupSection->getSectionOffset(); - - uint8_t Buffer[16]; - unsigned SizeLen = encodeULEB128(Type, Buffer, Padding); - assert(SizeLen == 5); - getStream().pwrite((char *)Buffer, SizeLen, - Section.ContentsOffset + Offset); - } - // Apply fixups. - ApplyRelocations(CodeRelocations, getStream(), SymbolIndices, - Section.ContentsOffset); + applyRelocations(CodeRelocations, Section.ContentsOffset); endSection(Section); } uint64_t WasmObjectWriter::writeDataSection( - const SmallVector &DataBytes, - DenseMap &SymbolIndices) { + const SmallVector &DataBytes) { if (DataBytes.empty()) return 0; @@ -812,8 +786,7 @@ uint64_t WasmObjectWriter::writeDataSection( writeBytes(DataBytes); // data // Apply fixups. - ApplyRelocations(DataRelocations, getStream(), SymbolIndices, - Section.ContentsOffset + HeaderSize); + applyRelocations(DataRelocations, Section.ContentsOffset + HeaderSize); endSection(Section); return HeaderSize; @@ -853,8 +826,7 @@ void WasmObjectWriter::writeNameSection( endSection(Section); } -void WasmObjectWriter::writeCodeRelocSection( - DenseMap &SymbolIndices) { +void WasmObjectWriter::writeCodeRelocSection() { // See: https://github.com/WebAssembly/tool-conventions/blob/master/Linking.md // for descriptions of the reloc sections. @@ -865,17 +837,14 @@ void WasmObjectWriter::writeCodeRelocSection( startSection(Section, wasm::WASM_SEC_CUSTOM, "reloc.CODE"); encodeULEB128(wasm::WASM_SEC_CODE, getStream()); - encodeULEB128(CodeRelocations.size() + TypeIndexFixups.size(), getStream()); + encodeULEB128(CodeRelocations.size(), getStream()); - WriteRelocations(CodeRelocations, getStream(), SymbolIndices, 0); - WriteTypeRelocations(TypeIndexFixups, TypeIndexFixupTypes, getStream()); + writeRelocations(CodeRelocations, 0); endSection(Section); } -void WasmObjectWriter::writeDataRelocSection( - DenseMap &SymbolIndices, - uint64_t DataSectionHeaderSize) { +void WasmObjectWriter::writeDataRelocSection(uint64_t DataSectionHeaderSize) { // See: https://github.com/WebAssembly/tool-conventions/blob/master/Linking.md // for descriptions of the reloc sections. @@ -888,8 +857,7 @@ void WasmObjectWriter::writeDataRelocSection( encodeULEB128(wasm::WASM_SEC_DATA, getStream()); encodeULEB128(DataRelocations.size(), getStream()); - WriteRelocations(DataRelocations, getStream(), SymbolIndices, - DataSectionHeaderSize); + writeRelocations(DataRelocations, DataSectionHeaderSize); endSection(Section); } @@ -915,15 +883,12 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, wasm::ValType PtrType = is64Bit() ? wasm::ValType::I64 : wasm::ValType::I32; // Collect information from the available symbols. - DenseMap - FunctionTypeIndices; SmallVector FunctionTypes; SmallVector Functions; SmallVector TableElems; SmallVector Globals; SmallVector Imports; SmallVector Exports; - DenseMap SymbolIndices; SmallPtrSet IsAddressTaken; unsigned NumFuncImports = 0; unsigned NumGlobalImports = 0; @@ -1194,9 +1159,9 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, } // Add types for indirect function calls. - for (const WasmRelocationEntry &Fixup : TypeIndexFixups) { - assert(Fixup.Addend == 0); - assert(Fixup.Type == wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB); + for (const WasmRelocationEntry &Fixup : CodeRelocations) { + if (Fixup.Type != wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB) + continue; WasmFunctionType F; F.Returns = Fixup.Symbol->getReturns(); @@ -1206,7 +1171,7 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, if (Pair.second) FunctionTypes.push_back(F); - TypeIndexFixupTypes.push_back(Pair.first->second); + TypeIndices[Fixup.Symbol] = Pair.first->second; } // Write out the Wasm header. @@ -1221,11 +1186,11 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, writeExportSection(Exports); // TODO: Start Section writeElemSection(TableElems); - writeCodeSection(Asm, Layout, SymbolIndices, Functions); - uint64_t DataSectionHeaderSize = writeDataSection(DataBytes, SymbolIndices); + writeCodeSection(Asm, Layout, Functions); + uint64_t DataSectionHeaderSize = writeDataSection(DataBytes); writeNameSection(Functions, Imports, NumFuncImports); - writeCodeRelocSection(SymbolIndices); - writeDataRelocSection(SymbolIndices, DataSectionHeaderSize); + writeCodeRelocSection(); + writeDataRelocSection(DataSectionHeaderSize); writeLinkingMetaDataSection(HasStackPointer, StackPointerGlobal); // TODO: Translate the .comment section to the output. diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp index e99a548ac001..53dee3e8b9f3 100644 --- a/lib/MC/WinCOFFObjectWriter.cpp +++ b/lib/MC/WinCOFFObjectWriter.cpp @@ -12,11 +12,12 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/COFF.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" @@ -32,13 +33,12 @@ #include "llvm/MC/MCWinCOFFObjectWriter.h" #include "llvm/MC/StringTableBuilder.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/COFF.h" #include "llvm/Support/Endian.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/JamCRC.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include +#include #include #include #include diff --git a/lib/MC/WinCOFFStreamer.cpp b/lib/MC/WinCOFFStreamer.cpp index c26d87f36f83..b4d0d7a87f1d 100644 --- a/lib/MC/WinCOFFStreamer.cpp +++ b/lib/MC/WinCOFFStreamer.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/COFF.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCCodeEmitter.h" @@ -28,11 +29,10 @@ #include "llvm/MC/MCSymbolCOFF.h" #include "llvm/MC/MCWinCOFFStreamer.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/COFF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/SMLoc.h" +#include "llvm/Support/raw_ostream.h" #include #include #include diff --git a/lib/Object/Archive.cpp b/lib/Object/Archive.cpp index c4924f85a907..977cccc11dcd 100644 --- a/lib/Object/Archive.cpp +++ b/lib/Object/Archive.cpp @@ -11,11 +11,11 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Object/Archive.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" -#include "llvm/Object/Archive.h" #include "llvm/Object/Binary.h" #include "llvm/Object/Error.h" #include "llvm/Support/Chrono.h" diff --git a/lib/Object/ArchiveWriter.cpp b/lib/Object/ArchiveWriter.cpp index 5b233aab2018..e1c35ed6a6a0 100644 --- a/lib/Object/ArchiveWriter.cpp +++ b/lib/Object/ArchiveWriter.cpp @@ -14,6 +14,7 @@ #include "llvm/Object/ArchiveWriter.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Magic.h" #include "llvm/IR/LLVMContext.h" #include "llvm/Object/Archive.h" #include "llvm/Object/ObjectFile.h" @@ -290,7 +291,7 @@ writeSymbolTable(raw_fd_ostream &Out, object::Archive::Kind Kind, MemoryBufferRef MemberBuffer = Members[MemberNum].Buf->getMemBufferRef(); Expected> ObjOrErr = object::SymbolicFile::createSymbolicFile( - MemberBuffer, sys::fs::file_magic::unknown, &Context); + MemberBuffer, llvm::file_magic::unknown, &Context); if (!ObjOrErr) { // FIXME: check only for "not an object file" errors. consumeError(ObjOrErr.takeError()); diff --git a/lib/Object/Binary.cpp b/lib/Object/Binary.cpp index 116af3c917be..c4565db459e6 100644 --- a/lib/Object/Binary.cpp +++ b/lib/Object/Binary.cpp @@ -11,9 +11,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/StringRef.h" -#include "llvm/Object/Archive.h" #include "llvm/Object/Binary.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Magic.h" +#include "llvm/Object/Archive.h" #include "llvm/Object/Error.h" #include "llvm/Object/MachOUniversal.h" #include "llvm/Object/ObjectFile.h" @@ -43,41 +44,41 @@ MemoryBufferRef Binary::getMemoryBufferRef() const { return Data; } Expected> object::createBinary(MemoryBufferRef Buffer, LLVMContext *Context) { - sys::fs::file_magic Type = sys::fs::identify_magic(Buffer.getBuffer()); + file_magic Type = identify_magic(Buffer.getBuffer()); switch (Type) { - case sys::fs::file_magic::archive: - return Archive::create(Buffer); - case sys::fs::file_magic::elf: - case sys::fs::file_magic::elf_relocatable: - case sys::fs::file_magic::elf_executable: - case sys::fs::file_magic::elf_shared_object: - case sys::fs::file_magic::elf_core: - case sys::fs::file_magic::macho_object: - case sys::fs::file_magic::macho_executable: - case sys::fs::file_magic::macho_fixed_virtual_memory_shared_lib: - case sys::fs::file_magic::macho_core: - case sys::fs::file_magic::macho_preload_executable: - case sys::fs::file_magic::macho_dynamically_linked_shared_lib: - case sys::fs::file_magic::macho_dynamic_linker: - case sys::fs::file_magic::macho_bundle: - case sys::fs::file_magic::macho_dynamically_linked_shared_lib_stub: - case sys::fs::file_magic::macho_dsym_companion: - case sys::fs::file_magic::macho_kext_bundle: - case sys::fs::file_magic::coff_object: - case sys::fs::file_magic::coff_import_library: - case sys::fs::file_magic::pecoff_executable: - case sys::fs::file_magic::bitcode: - case sys::fs::file_magic::wasm_object: - return ObjectFile::createSymbolicFile(Buffer, Type, Context); - case sys::fs::file_magic::macho_universal_binary: - return MachOUniversalBinary::create(Buffer); - case sys::fs::file_magic::windows_resource: - return WindowsResource::createWindowsResource(Buffer); - case sys::fs::file_magic::unknown: - case sys::fs::file_magic::coff_cl_gl_object: - // Unrecognized object file format. - return errorCodeToError(object_error::invalid_file_type); + case file_magic::archive: + return Archive::create(Buffer); + case file_magic::elf: + case file_magic::elf_relocatable: + case file_magic::elf_executable: + case file_magic::elf_shared_object: + case file_magic::elf_core: + case file_magic::macho_object: + case file_magic::macho_executable: + case file_magic::macho_fixed_virtual_memory_shared_lib: + case file_magic::macho_core: + case file_magic::macho_preload_executable: + case file_magic::macho_dynamically_linked_shared_lib: + case file_magic::macho_dynamic_linker: + case file_magic::macho_bundle: + case file_magic::macho_dynamically_linked_shared_lib_stub: + case file_magic::macho_dsym_companion: + case file_magic::macho_kext_bundle: + case file_magic::coff_object: + case file_magic::coff_import_library: + case file_magic::pecoff_executable: + case file_magic::bitcode: + case file_magic::wasm_object: + return ObjectFile::createSymbolicFile(Buffer, Type, Context); + case file_magic::macho_universal_binary: + return MachOUniversalBinary::create(Buffer); + case file_magic::windows_resource: + return WindowsResource::createWindowsResource(Buffer); + case file_magic::unknown: + case file_magic::coff_cl_gl_object: + // Unrecognized object file format. + return errorCodeToError(object_error::invalid_file_type); } llvm_unreachable("Unexpected Binary File Type"); } diff --git a/lib/Object/COFFImportFile.cpp b/lib/Object/COFFImportFile.cpp index 37962d84d855..740bf94d40e0 100644 --- a/lib/Object/COFFImportFile.cpp +++ b/lib/Object/COFFImportFile.cpp @@ -285,11 +285,13 @@ ObjectFactory::createImportDescriptor(std::vector &Buffer) { IMAGE_SYM_CLASS_EXTERNAL, 0}, }; - reinterpret_cast(SymbolTable[0].Name).Offset = + // TODO: Name.Offset.Offset here and in the all similar places below + // suggests a names refactoring. Maybe StringTableOffset.Value? + SymbolTable[0].Name.Offset.Offset = sizeof(uint32_t); - reinterpret_cast(SymbolTable[5].Name).Offset = + SymbolTable[5].Name.Offset.Offset = sizeof(uint32_t) + ImportDescriptorSymbolName.length() + 1; - reinterpret_cast(SymbolTable[6].Name).Offset = + SymbolTable[6].Name.Offset.Offset = sizeof(uint32_t) + ImportDescriptorSymbolName.length() + 1 + NullImportDescriptorSymbolName.length() + 1; append(Buffer, SymbolTable); @@ -354,8 +356,7 @@ ObjectFactory::createNullImportDescriptor(std::vector &Buffer) { IMAGE_SYM_CLASS_EXTERNAL, 0}, }; - reinterpret_cast(SymbolTable[0].Name).Offset = - sizeof(uint32_t); + SymbolTable[0].Name.Offset.Offset = sizeof(uint32_t); append(Buffer, SymbolTable); // String Table @@ -437,8 +438,7 @@ NewArchiveMember ObjectFactory::createNullThunk(std::vector &Buffer) { IMAGE_SYM_CLASS_EXTERNAL, 0}, }; - reinterpret_cast(SymbolTable[0].Name).Offset = - sizeof(uint32_t); + SymbolTable[0].Name.Offset.Offset = sizeof(uint32_t); append(Buffer, SymbolTable); // String Table diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp index 7372f24cb9a8..579c8dde366a 100644 --- a/lib/Object/COFFObjectFile.cpp +++ b/lib/Object/COFFObjectFile.cpp @@ -15,12 +15,12 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/iterator_range.h" +#include "llvm/BinaryFormat/COFF.h" #include "llvm/Object/Binary.h" #include "llvm/Object/COFF.h" #include "llvm/Object/Error.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/BinaryStreamReader.h" -#include "llvm/Support/COFF.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" diff --git a/lib/Object/Decompressor.cpp b/lib/Object/Decompressor.cpp index 89d199a3f3f6..53f084d7620e 100644 --- a/lib/Object/Decompressor.cpp +++ b/lib/Object/Decompressor.cpp @@ -8,11 +8,11 @@ //===----------------------------------------------------------------------===// #include "llvm/Object/Decompressor.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/Object/ELFObjectFile.h" #include "llvm/Support/Compression.h" #include "llvm/Support/DataExtractor.h" #include "llvm/Support/Endian.h" -#include "llvm/Support/ELF.h" using namespace llvm; using namespace llvm::support::endian; diff --git a/lib/Object/ELF.cpp b/lib/Object/ELF.cpp index 5798a3540f53..9bc28dc14a29 100644 --- a/lib/Object/ELF.cpp +++ b/lib/Object/ELF.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Object/ELF.h" -#include "llvm/Support/ELF.h" +#include "llvm/BinaryFormat/ELF.h" using namespace llvm; using namespace object; @@ -24,7 +24,7 @@ StringRef llvm::object::getELFRelocationTypeName(uint32_t Machine, switch (Machine) { case ELF::EM_X86_64: switch (Type) { -#include "llvm/Support/ELFRelocs/x86_64.def" +#include "llvm/BinaryFormat/ELFRelocs/x86_64.def" default: break; } @@ -32,77 +32,77 @@ StringRef llvm::object::getELFRelocationTypeName(uint32_t Machine, case ELF::EM_386: case ELF::EM_IAMCU: switch (Type) { -#include "llvm/Support/ELFRelocs/i386.def" +#include "llvm/BinaryFormat/ELFRelocs/i386.def" default: break; } break; case ELF::EM_MIPS: switch (Type) { -#include "llvm/Support/ELFRelocs/Mips.def" +#include "llvm/BinaryFormat/ELFRelocs/Mips.def" default: break; } break; case ELF::EM_AARCH64: switch (Type) { -#include "llvm/Support/ELFRelocs/AArch64.def" +#include "llvm/BinaryFormat/ELFRelocs/AArch64.def" default: break; } break; case ELF::EM_ARM: switch (Type) { -#include "llvm/Support/ELFRelocs/ARM.def" +#include "llvm/BinaryFormat/ELFRelocs/ARM.def" default: break; } break; case ELF::EM_AVR: switch (Type) { -#include "llvm/Support/ELFRelocs/AVR.def" +#include "llvm/BinaryFormat/ELFRelocs/AVR.def" default: break; } break; case ELF::EM_HEXAGON: switch (Type) { -#include "llvm/Support/ELFRelocs/Hexagon.def" +#include "llvm/BinaryFormat/ELFRelocs/Hexagon.def" default: break; } break; case ELF::EM_LANAI: switch (Type) { -#include "llvm/Support/ELFRelocs/Lanai.def" +#include "llvm/BinaryFormat/ELFRelocs/Lanai.def" default: break; } break; case ELF::EM_PPC: switch (Type) { -#include "llvm/Support/ELFRelocs/PowerPC.def" +#include "llvm/BinaryFormat/ELFRelocs/PowerPC.def" default: break; } break; case ELF::EM_PPC64: switch (Type) { -#include "llvm/Support/ELFRelocs/PowerPC64.def" +#include "llvm/BinaryFormat/ELFRelocs/PowerPC64.def" default: break; } break; case ELF::EM_RISCV: switch (Type) { -#include "llvm/Support/ELFRelocs/RISCV.def" +#include "llvm/BinaryFormat/ELFRelocs/RISCV.def" default: break; } break; case ELF::EM_S390: switch (Type) { -#include "llvm/Support/ELFRelocs/SystemZ.def" +#include "llvm/BinaryFormat/ELFRelocs/SystemZ.def" default: break; } @@ -111,27 +111,27 @@ StringRef llvm::object::getELFRelocationTypeName(uint32_t Machine, case ELF::EM_SPARC32PLUS: case ELF::EM_SPARCV9: switch (Type) { -#include "llvm/Support/ELFRelocs/Sparc.def" +#include "llvm/BinaryFormat/ELFRelocs/Sparc.def" default: break; } break; case ELF::EM_WEBASSEMBLY: switch (Type) { -#include "llvm/Support/ELFRelocs/WebAssembly.def" +#include "llvm/BinaryFormat/ELFRelocs/WebAssembly.def" default: break; } break; case ELF::EM_AMDGPU: switch (Type) { -#include "llvm/Support/ELFRelocs/AMDGPU.def" +#include "llvm/BinaryFormat/ELFRelocs/AMDGPU.def" default: break; } case ELF::EM_BPF: switch (Type) { -#include "llvm/Support/ELFRelocs/BPF.def" +#include "llvm/BinaryFormat/ELFRelocs/BPF.def" default: break; } diff --git a/lib/Object/ELFObjectFile.cpp b/lib/Object/ELFObjectFile.cpp index 86f033bb6cbf..fa136d782b5a 100644 --- a/lib/Object/ELFObjectFile.cpp +++ b/lib/Object/ELFObjectFile.cpp @@ -11,15 +11,15 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Object/ELFObjectFile.h" #include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/Object/ELF.h" -#include "llvm/Object/ELFObjectFile.h" #include "llvm/Object/ELFTypes.h" #include "llvm/Object/Error.h" -#include "llvm/Support/ARMBuildAttributes.h" #include "llvm/Support/ARMAttributeParser.h" -#include "llvm/Support/ELF.h" +#include "llvm/Support/ARMBuildAttributes.h" #include "llvm/Support/Endian.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" diff --git a/lib/Object/IRObjectFile.cpp b/lib/Object/IRObjectFile.cpp index adbf0de6d1bc..e7807b038335 100644 --- a/lib/Object/IRObjectFile.cpp +++ b/lib/Object/IRObjectFile.cpp @@ -14,6 +14,7 @@ #include "llvm/Object/IRObjectFile.h" #include "RecordStreamer.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/BinaryFormat/Magic.h" #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/IR/GVMaterializer.h" #include "llvm/IR/LLVMContext.h" @@ -95,13 +96,13 @@ ErrorOr IRObjectFile::findBitcodeInObject(const ObjectFile &Obj } ErrorOr IRObjectFile::findBitcodeInMemBuffer(MemoryBufferRef Object) { - sys::fs::file_magic Type = sys::fs::identify_magic(Object.getBuffer()); + file_magic Type = identify_magic(Object.getBuffer()); switch (Type) { - case sys::fs::file_magic::bitcode: + case file_magic::bitcode: return Object; - case sys::fs::file_magic::elf_relocatable: - case sys::fs::file_magic::macho_object: - case sys::fs::file_magic::coff_object: { + case file_magic::elf_relocatable: + case file_magic::macho_object: + case file_magic::coff_object: { Expected> ObjFile = ObjectFile::createObjectFile(Object, Type); if (!ObjFile) @@ -138,3 +139,25 @@ IRObjectFile::create(MemoryBufferRef Object, LLVMContext &Context) { return std::unique_ptr( new IRObjectFile(*BCOrErr, std::move(Mods))); } + +Expected object::readIRSymtab(MemoryBufferRef MBRef) { + IRSymtabFile F; + ErrorOr BCOrErr = + IRObjectFile::findBitcodeInMemBuffer(MBRef); + if (!BCOrErr) + return errorCodeToError(BCOrErr.getError()); + + Expected BFCOrErr = getBitcodeFileContents(*BCOrErr); + if (!BFCOrErr) + return BFCOrErr.takeError(); + + Expected FCOrErr = irsymtab::readBitcode(*BFCOrErr); + if (!FCOrErr) + return FCOrErr.takeError(); + + F.Mods = std::move(BFCOrErr->Mods); + F.Symtab = std::move(FCOrErr->Symtab); + F.Strtab = std::move(FCOrErr->Strtab); + F.TheReader = std::move(FCOrErr->TheReader); + return std::move(F); +} diff --git a/lib/Object/IRSymtab.cpp b/lib/Object/IRSymtab.cpp index 5f0837882d60..d21acdb1d556 100644 --- a/lib/Object/IRSymtab.cpp +++ b/lib/Object/IRSymtab.cpp @@ -7,6 +7,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Object/IRSymtab.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" @@ -22,15 +23,16 @@ #include "llvm/IR/Mangler.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" +#include "llvm/Bitcode/BitcodeReader.h" #include "llvm/MC/StringTableBuilder.h" -#include "llvm/Object/IRSymtab.h" +#include "llvm/Object/IRObjectFile.h" #include "llvm/Object/ModuleSymbolTable.h" #include "llvm/Object/SymbolicFile.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Error.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/StringSaver.h" +#include "llvm/Support/raw_ostream.h" #include #include #include @@ -88,6 +90,10 @@ struct Builder { }; Error Builder::addModule(Module *M) { + if (M->getDataLayoutStr().empty()) + return make_error("input module has no datalayout", + inconvertibleErrorCode()); + SmallPtrSet Used; collectUsedGlobalVariables(*M, Used, /*CompilerUsed*/ false); @@ -259,3 +265,40 @@ Error irsymtab::build(ArrayRef Mods, SmallVector &Symtab, SmallVector &Strtab) { return Builder(Symtab, Strtab).build(Mods); } + +// Upgrade a vector of bitcode modules created by an old version of LLVM by +// creating an irsymtab for them in the current format. +static Expected upgrade(ArrayRef BMs) { + FileContents FC; + + LLVMContext Ctx; + std::vector Mods; + std::vector> OwnedMods; + for (auto BM : BMs) { + Expected> MOrErr = + BM.getLazyModule(Ctx, /*ShouldLazyLoadMetadata*/ true, + /*IsImporting*/ false); + if (!MOrErr) + return MOrErr.takeError(); + + Mods.push_back(MOrErr->get()); + OwnedMods.push_back(std::move(*MOrErr)); + } + + if (Error E = build(Mods, FC.Symtab, FC.Strtab)) + return std::move(E); + + FC.TheReader = {{FC.Symtab.data(), FC.Symtab.size()}, + {FC.Strtab.data(), FC.Strtab.size()}}; + return std::move(FC); +} + +Expected irsymtab::readBitcode(const BitcodeFileContents &BFC) { + if (BFC.Mods.empty()) + return make_error("Bitcode file does not contain any modules", + inconvertibleErrorCode()); + + // Right now we have no on-disk representation of symbol tables, so we always + // upgrade. + return upgrade(BFC.Mods); +} diff --git a/lib/Object/LLVMBuild.txt b/lib/Object/LLVMBuild.txt index bae578c76f7e..687713bab6a2 100644 --- a/lib/Object/LLVMBuild.txt +++ b/lib/Object/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Library name = Object parent = Libraries -required_libraries = BitReader Core MC MCParser Support +required_libraries = BitReader Core MC BinaryFormat MCParser Support diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp index 084159a61f55..7804bbe06f83 100644 --- a/lib/Object/MachOObjectFile.cpp +++ b/lib/Object/MachOObjectFile.cpp @@ -14,13 +14,14 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/None.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/Object/Error.h" #include "llvm/Object/MachO.h" #include "llvm/Object/ObjectFile.h" @@ -32,10 +33,9 @@ #include "llvm/Support/Format.h" #include "llvm/Support/Host.h" #include "llvm/Support/LEB128.h" -#include "llvm/Support/MachO.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/SwapByteOrder.h" +#include "llvm/Support/raw_ostream.h" #include #include #include @@ -4314,3 +4314,9 @@ ObjectFile::createMachOObjectFile(MemoryBufferRef Buffer, return make_error("Unrecognized MachO magic number", object_error::invalid_file_type); } + +StringRef MachOObjectFile::mapDebugSectionName(StringRef Name) const { + return StringSwitch(Name) + .Case("debug_str_offs", "debug_str_offsets") + .Default(Name); +} diff --git a/lib/Object/ModuleSymbolTable.cpp b/lib/Object/ModuleSymbolTable.cpp index a5b42725d817..f2e7a218c13a 100644 --- a/lib/Object/ModuleSymbolTable.cpp +++ b/lib/Object/ModuleSymbolTable.cpp @@ -13,9 +13,10 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Object/ModuleSymbolTable.h" #include "RecordStreamer.h" -#include "llvm/ADT/SmallString.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" @@ -36,16 +37,15 @@ #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCTargetOptions.h" -#include "llvm/Object/ModuleSymbolTable.h" #include "llvm/Object/SymbolicFile.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/SMLoc.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" #include #include #include diff --git a/lib/Object/Object.cpp b/lib/Object/Object.cpp index 6df481b060e1..1d2859cfbe9d 100644 --- a/lib/Object/Object.cpp +++ b/lib/Object/Object.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/SmallVector.h" #include "llvm-c/Object.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Object/ObjectFile.h" using namespace llvm; diff --git a/lib/Object/ObjectFile.cpp b/lib/Object/ObjectFile.cpp index 1f60e7157bd9..8377dd0d73fa 100644 --- a/lib/Object/ObjectFile.cpp +++ b/lib/Object/ObjectFile.cpp @@ -11,12 +11,13 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Object/ObjectFile.h" #include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Magic.h" #include "llvm/Object/Binary.h" #include "llvm/Object/COFF.h" #include "llvm/Object/Error.h" #include "llvm/Object/MachO.h" -#include "llvm/Object/ObjectFile.h" #include "llvm/Object/Wasm.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" @@ -79,42 +80,42 @@ section_iterator ObjectFile::getRelocatedSection(DataRefImpl Sec) const { } Expected> -ObjectFile::createObjectFile(MemoryBufferRef Object, sys::fs::file_magic Type) { +ObjectFile::createObjectFile(MemoryBufferRef Object, file_magic Type) { StringRef Data = Object.getBuffer(); - if (Type == sys::fs::file_magic::unknown) - Type = sys::fs::identify_magic(Data); + if (Type == file_magic::unknown) + Type = identify_magic(Data); switch (Type) { - case sys::fs::file_magic::unknown: - case sys::fs::file_magic::bitcode: - case sys::fs::file_magic::coff_cl_gl_object: - case sys::fs::file_magic::archive: - case sys::fs::file_magic::macho_universal_binary: - case sys::fs::file_magic::windows_resource: + case file_magic::unknown: + case file_magic::bitcode: + case file_magic::coff_cl_gl_object: + case file_magic::archive: + case file_magic::macho_universal_binary: + case file_magic::windows_resource: return errorCodeToError(object_error::invalid_file_type); - case sys::fs::file_magic::elf: - case sys::fs::file_magic::elf_relocatable: - case sys::fs::file_magic::elf_executable: - case sys::fs::file_magic::elf_shared_object: - case sys::fs::file_magic::elf_core: + case file_magic::elf: + case file_magic::elf_relocatable: + case file_magic::elf_executable: + case file_magic::elf_shared_object: + case file_magic::elf_core: return errorOrToExpected(createELFObjectFile(Object)); - case sys::fs::file_magic::macho_object: - case sys::fs::file_magic::macho_executable: - case sys::fs::file_magic::macho_fixed_virtual_memory_shared_lib: - case sys::fs::file_magic::macho_core: - case sys::fs::file_magic::macho_preload_executable: - case sys::fs::file_magic::macho_dynamically_linked_shared_lib: - case sys::fs::file_magic::macho_dynamic_linker: - case sys::fs::file_magic::macho_bundle: - case sys::fs::file_magic::macho_dynamically_linked_shared_lib_stub: - case sys::fs::file_magic::macho_dsym_companion: - case sys::fs::file_magic::macho_kext_bundle: + case file_magic::macho_object: + case file_magic::macho_executable: + case file_magic::macho_fixed_virtual_memory_shared_lib: + case file_magic::macho_core: + case file_magic::macho_preload_executable: + case file_magic::macho_dynamically_linked_shared_lib: + case file_magic::macho_dynamic_linker: + case file_magic::macho_bundle: + case file_magic::macho_dynamically_linked_shared_lib_stub: + case file_magic::macho_dsym_companion: + case file_magic::macho_kext_bundle: return createMachOObjectFile(Object); - case sys::fs::file_magic::coff_object: - case sys::fs::file_magic::coff_import_library: - case sys::fs::file_magic::pecoff_executable: + case file_magic::coff_object: + case file_magic::coff_import_library: + case file_magic::pecoff_executable: return errorOrToExpected(createCOFFObjectFile(Object)); - case sys::fs::file_magic::wasm_object: + case file_magic::wasm_object: return createWasmObjectFile(Object); } llvm_unreachable("Unexpected Object File Type"); diff --git a/lib/Object/SymbolicFile.cpp b/lib/Object/SymbolicFile.cpp index 16cff5c228bd..1042d29d2350 100644 --- a/lib/Object/SymbolicFile.cpp +++ b/lib/Object/SymbolicFile.cpp @@ -11,12 +11,13 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Object/SymbolicFile.h" #include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Magic.h" #include "llvm/Object/COFFImportFile.h" #include "llvm/Object/Error.h" #include "llvm/Object/IRObjectFile.h" #include "llvm/Object/ObjectFile.h" -#include "llvm/Object/SymbolicFile.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" @@ -34,45 +35,46 @@ SymbolicFile::SymbolicFile(unsigned int Type, MemoryBufferRef Source) SymbolicFile::~SymbolicFile() = default; -Expected> SymbolicFile::createSymbolicFile( - MemoryBufferRef Object, sys::fs::file_magic Type, LLVMContext *Context) { +Expected> +SymbolicFile::createSymbolicFile(MemoryBufferRef Object, file_magic Type, + LLVMContext *Context) { StringRef Data = Object.getBuffer(); - if (Type == sys::fs::file_magic::unknown) - Type = sys::fs::identify_magic(Data); + if (Type == file_magic::unknown) + Type = identify_magic(Data); switch (Type) { - case sys::fs::file_magic::bitcode: + case file_magic::bitcode: if (Context) return IRObjectFile::create(Object, *Context); LLVM_FALLTHROUGH; - case sys::fs::file_magic::unknown: - case sys::fs::file_magic::archive: - case sys::fs::file_magic::coff_cl_gl_object: - case sys::fs::file_magic::macho_universal_binary: - case sys::fs::file_magic::windows_resource: + case file_magic::unknown: + case file_magic::archive: + case file_magic::coff_cl_gl_object: + case file_magic::macho_universal_binary: + case file_magic::windows_resource: return errorCodeToError(object_error::invalid_file_type); - case sys::fs::file_magic::elf: - case sys::fs::file_magic::elf_executable: - case sys::fs::file_magic::elf_shared_object: - case sys::fs::file_magic::elf_core: - case sys::fs::file_magic::macho_executable: - case sys::fs::file_magic::macho_fixed_virtual_memory_shared_lib: - case sys::fs::file_magic::macho_core: - case sys::fs::file_magic::macho_preload_executable: - case sys::fs::file_magic::macho_dynamically_linked_shared_lib: - case sys::fs::file_magic::macho_dynamic_linker: - case sys::fs::file_magic::macho_bundle: - case sys::fs::file_magic::macho_dynamically_linked_shared_lib_stub: - case sys::fs::file_magic::macho_dsym_companion: - case sys::fs::file_magic::macho_kext_bundle: - case sys::fs::file_magic::pecoff_executable: - case sys::fs::file_magic::wasm_object: + case file_magic::elf: + case file_magic::elf_executable: + case file_magic::elf_shared_object: + case file_magic::elf_core: + case file_magic::macho_executable: + case file_magic::macho_fixed_virtual_memory_shared_lib: + case file_magic::macho_core: + case file_magic::macho_preload_executable: + case file_magic::macho_dynamically_linked_shared_lib: + case file_magic::macho_dynamic_linker: + case file_magic::macho_bundle: + case file_magic::macho_dynamically_linked_shared_lib_stub: + case file_magic::macho_dsym_companion: + case file_magic::macho_kext_bundle: + case file_magic::pecoff_executable: + case file_magic::wasm_object: return ObjectFile::createObjectFile(Object, Type); - case sys::fs::file_magic::coff_import_library: + case file_magic::coff_import_library: return std::unique_ptr(new COFFImportFile(Object)); - case sys::fs::file_magic::elf_relocatable: - case sys::fs::file_magic::macho_object: - case sys::fs::file_magic::coff_object: { + case file_magic::elf_relocatable: + case file_magic::macho_object: + case file_magic::coff_object: { Expected> Obj = ObjectFile::createObjectFile(Object, Type); if (!Obj || !Context) diff --git a/lib/Object/WasmObjectFile.cpp b/lib/Object/WasmObjectFile.cpp index f565d7a33e55..2304098c1dc9 100644 --- a/lib/Object/WasmObjectFile.cpp +++ b/lib/Object/WasmObjectFile.cpp @@ -11,6 +11,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/Wasm.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/Object/Binary.h" #include "llvm/Object/Error.h" @@ -21,7 +22,6 @@ #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/LEB128.h" -#include "llvm/Support/Wasm.h" #include #include #include @@ -830,7 +830,7 @@ void WasmObjectFile::getRelocationTypeName( break; switch (Rel.Type) { -#include "llvm/Support/WasmRelocs/WebAssembly.def" +#include "llvm/BinaryFormat/WasmRelocs/WebAssembly.def" } #undef WASM_RELOC diff --git a/lib/Object/WindowsResource.cpp b/lib/Object/WindowsResource.cpp index e46d38e466a0..041659e7aa23 100644 --- a/lib/Object/WindowsResource.cpp +++ b/lib/Object/WindowsResource.cpp @@ -12,7 +12,12 @@ //===----------------------------------------------------------------------===// #include "llvm/Object/WindowsResource.h" -#include "llvm/Support/COFF.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/Object/COFF.h" +#include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/MathExtras.h" +#include +#include #include #include @@ -29,6 +34,9 @@ static const size_t ResourceMagicSize = 16; static const size_t NullEntrySize = 16; +uint32_t WindowsResourceParser::TreeNode::StringCount = 0; +uint32_t WindowsResourceParser::TreeNode::DataCount = 0; + WindowsResource::WindowsResource(MemoryBufferRef Source) : Binary(Binary::ID_WinRes, Source) { size_t LeadingSize = ResourceMagicSize + NullEntrySize; @@ -115,7 +123,7 @@ Error ResourceEntryRef::loadNext() { return Error::success(); } -WindowsResourceParser::WindowsResourceParser() {} +WindowsResourceParser::WindowsResourceParser() : Root(false) {} Error WindowsResourceParser::parse(WindowsResource *WR) { auto EntryOrErr = WR->getHeadEntry(); @@ -124,9 +132,16 @@ Error WindowsResourceParser::parse(WindowsResource *WR) { ResourceEntryRef Entry = EntryOrErr.get(); bool End = false; - while (!End) { + Data.push_back(Entry.getData()); + + if (Entry.checkTypeString()) + StringTable.push_back(Entry.getTypeString()); + + if (Entry.checkNameString()) + StringTable.push_back(Entry.getNameString()); + Root.addEntry(Entry); RETURN_IF_ERROR(Entry.moveNext(End)); @@ -146,8 +161,37 @@ void WindowsResourceParser::TreeNode::addEntry(const ResourceEntryRef &Entry) { NameNode.addLanguageNode(Entry); } -WindowsResourceParser::TreeNode::TreeNode(ArrayRef NameRef) - : Name(NameRef) {} +WindowsResourceParser::TreeNode::TreeNode(bool IsStringNode) { + if (IsStringNode) + StringIndex = StringCount++; +} + +WindowsResourceParser::TreeNode::TreeNode(uint16_t MajorVersion, + uint16_t MinorVersion, + uint32_t Characteristics) + : IsDataNode(true), MajorVersion(MajorVersion), MinorVersion(MinorVersion), + Characteristics(Characteristics) { + if (IsDataNode) + DataIndex = DataCount++; +} + +std::unique_ptr +WindowsResourceParser::TreeNode::createStringNode() { + return std::unique_ptr(new TreeNode(true)); +} + +std::unique_ptr +WindowsResourceParser::TreeNode::createIDNode() { + return std::unique_ptr(new TreeNode(false)); +} + +std::unique_ptr +WindowsResourceParser::TreeNode::createDataNode(uint16_t MajorVersion, + uint16_t MinorVersion, + uint32_t Characteristics) { + return std::unique_ptr( + new TreeNode(MajorVersion, MinorVersion, Characteristics)); +} WindowsResourceParser::TreeNode & WindowsResourceParser::TreeNode::addTypeNode(const ResourceEntryRef &Entry) { @@ -168,14 +212,18 @@ WindowsResourceParser::TreeNode::addNameNode(const ResourceEntryRef &Entry) { WindowsResourceParser::TreeNode & WindowsResourceParser::TreeNode::addLanguageNode( const ResourceEntryRef &Entry) { - return addChild(Entry.getLanguage()); + return addChild(Entry.getLanguage(), true, Entry.getMajorVersion(), + Entry.getMinorVersion(), Entry.getCharacteristics()); } -WindowsResourceParser::TreeNode & -WindowsResourceParser::TreeNode::addChild(uint32_t ID) { +WindowsResourceParser::TreeNode &WindowsResourceParser::TreeNode::addChild( + uint32_t ID, bool IsDataNode, uint16_t MajorVersion, uint16_t MinorVersion, + uint32_t Characteristics) { auto Child = IDChildren.find(ID); if (Child == IDChildren.end()) { - auto NewChild = llvm::make_unique(ID); + auto NewChild = + IsDataNode ? createDataNode(MajorVersion, MinorVersion, Characteristics) + : createIDNode(); WindowsResourceParser::TreeNode &Node = *NewChild; IDChildren.emplace(ID, std::move(NewChild)); return Node; @@ -199,7 +247,7 @@ WindowsResourceParser::TreeNode::addChild(ArrayRef NameRef) { auto Child = StringChildren.find(NameString); if (Child == StringChildren.end()) { - auto NewChild = llvm::make_unique(NameRef); + auto NewChild = createStringNode(); WindowsResourceParser::TreeNode &Node = *NewChild; StringChildren.emplace(NameString, std::move(NewChild)); return Node; @@ -218,5 +266,455 @@ void WindowsResourceParser::TreeNode::print(ScopedPrinter &Writer, } } +// This function returns the size of the entire resource tree, including +// directory tables, directory entries, and data entries. It does not include +// the directory strings or the relocations of the .rsrc section. +uint32_t WindowsResourceParser::TreeNode::getTreeSize() const { + uint32_t Size = (IDChildren.size() + StringChildren.size()) * + sizeof(llvm::object::coff_resource_dir_entry); + + // Reached a node pointing to a data entry. + if (IsDataNode) { + Size += sizeof(llvm::object::coff_resource_data_entry); + return Size; + } + + // If the node does not point to data, it must have a directory table pointing + // to other nodes. + Size += sizeof(llvm::object::coff_resource_dir_table); + + for (auto const &Child : StringChildren) { + Size += Child.second->getTreeSize(); + } + for (auto const &Child : IDChildren) { + Size += Child.second->getTreeSize(); + } + return Size; +} + +class WindowsResourceCOFFWriter { +public: + WindowsResourceCOFFWriter(StringRef OutputFile, Machine MachineType, + const WindowsResourceParser &Parser, Error &E); + + Error write(); + +private: + void performFileLayout(); + void performSectionOneLayout(); + void performSectionTwoLayout(); + void writeCOFFHeader(); + void writeFirstSectionHeader(); + void writeSecondSectionHeader(); + void writeFirstSection(); + void writeSecondSection(); + void writeSymbolTable(); + void writeStringTable(); + void writeDirectoryTree(); + void writeDirectoryStringTable(); + void writeFirstSectionRelocations(); + std::unique_ptr Buffer; + uint8_t *Current; + Machine MachineType; + const WindowsResourceParser::TreeNode &Resources; + const ArrayRef> Data; + uint64_t FileSize; + uint32_t SymbolTableOffset; + uint32_t SectionOneSize; + uint32_t SectionOneOffset; + uint32_t SectionOneRelocations; + uint32_t SectionTwoSize; + uint32_t SectionTwoOffset; + const ArrayRef> StringTable; + std::vector StringTableOffsets; + std::vector DataOffsets; + std::vector RelocationAddresses; +}; + +WindowsResourceCOFFWriter::WindowsResourceCOFFWriter( + StringRef OutputFile, Machine MachineType, + const WindowsResourceParser &Parser, Error &E) + : MachineType(MachineType), Resources(Parser.getTree()), + Data(Parser.getData()), StringTable(Parser.getStringTable()) { + performFileLayout(); + + ErrorOr> BufferOrErr = + FileOutputBuffer::create(OutputFile, FileSize); + if (!BufferOrErr) { + E = errorCodeToError(BufferOrErr.getError()); + return; + } + + Buffer = std::move(*BufferOrErr); +} + +void WindowsResourceCOFFWriter::performFileLayout() { + // Add size of COFF header. + FileSize = llvm::COFF::Header16Size; + + // one .rsrc section header for directory tree, another for resource data. + FileSize += 2 * llvm::COFF::SectionSize; + + performSectionOneLayout(); + performSectionTwoLayout(); + + // We have reached the address of the symbol table. + SymbolTableOffset = FileSize; + + FileSize += llvm::COFF::Symbol16Size; // size of the @feat.00 symbol. + FileSize += 4 * llvm::COFF::Symbol16Size; // symbol + aux for each section. + FileSize += Data.size() * llvm::COFF::Symbol16Size; // 1 symbol per resource. + FileSize += 4; // four null bytes for the string table. +} + +void WindowsResourceCOFFWriter::performSectionOneLayout() { + SectionOneOffset = FileSize; + + SectionOneSize = Resources.getTreeSize(); + uint32_t CurrentStringOffset = SectionOneSize; + uint32_t TotalStringTableSize = 0; + for (auto const &String : StringTable) { + StringTableOffsets.push_back(CurrentStringOffset); + uint32_t StringSize = String.size() * sizeof(UTF16) + sizeof(uint16_t); + CurrentStringOffset += StringSize; + TotalStringTableSize += StringSize; + } + SectionOneSize += alignTo(TotalStringTableSize, sizeof(uint32_t)); + + // account for the relocations of section one. + SectionOneRelocations = FileSize + SectionOneSize; + FileSize += SectionOneSize; + FileSize += Data.size() * + llvm::COFF::RelocationSize; // one relocation for each resource. +} + +void WindowsResourceCOFFWriter::performSectionTwoLayout() { + // add size of .rsrc$2 section, which contains all resource data on 8-byte + // alignment. + SectionTwoOffset = FileSize; + SectionTwoSize = 0; + for (auto const &Entry : Data) { + DataOffsets.push_back(SectionTwoSize); + SectionTwoSize += llvm::alignTo(Entry.size(), sizeof(uint64_t)); + } + FileSize += SectionTwoSize; +} + +static std::time_t getTime() { + std::time_t Now = time(nullptr); + if (Now < 0 || !isUInt<32>(Now)) + return UINT32_MAX; + return Now; +} + +Error WindowsResourceCOFFWriter::write() { + Current = Buffer->getBufferStart(); + + writeCOFFHeader(); + writeFirstSectionHeader(); + writeSecondSectionHeader(); + writeFirstSection(); + writeSecondSection(); + writeSymbolTable(); + writeStringTable(); + + if (auto EC = Buffer->commit()) { + return errorCodeToError(EC); + } + + return Error::success(); +} + +void WindowsResourceCOFFWriter::writeCOFFHeader() { + // Write the COFF header. + auto *Header = reinterpret_cast(Current); + switch (MachineType) { + case Machine::ARM: + Header->Machine = llvm::COFF::IMAGE_FILE_MACHINE_ARMNT; + break; + case Machine::X64: + Header->Machine = llvm::COFF::IMAGE_FILE_MACHINE_AMD64; + break; + case Machine::X86: + Header->Machine = llvm::COFF::IMAGE_FILE_MACHINE_I386; + break; + default: + Header->Machine = llvm::COFF::IMAGE_FILE_MACHINE_UNKNOWN; + } + Header->NumberOfSections = 2; + Header->TimeDateStamp = getTime(); + Header->PointerToSymbolTable = SymbolTableOffset; + // One symbol for every resource plus 2 for each section and @feat.00 + Header->NumberOfSymbols = Data.size() + 5; + Header->SizeOfOptionalHeader = 0; + Header->Characteristics = llvm::COFF::IMAGE_FILE_32BIT_MACHINE; +} + +void WindowsResourceCOFFWriter::writeFirstSectionHeader() { + // Write the first section header. + Current += sizeof(llvm::object::coff_file_header); + auto *SectionOneHeader = + reinterpret_cast(Current); + strncpy(SectionOneHeader->Name, ".rsrc$01", (size_t)llvm::COFF::NameSize); + SectionOneHeader->VirtualSize = 0; + SectionOneHeader->VirtualAddress = 0; + SectionOneHeader->SizeOfRawData = SectionOneSize; + SectionOneHeader->PointerToRawData = SectionOneOffset; + SectionOneHeader->PointerToRelocations = SectionOneRelocations; + SectionOneHeader->PointerToLinenumbers = 0; + SectionOneHeader->NumberOfRelocations = Data.size(); + SectionOneHeader->NumberOfLinenumbers = 0; + SectionOneHeader->Characteristics = llvm::COFF::IMAGE_SCN_ALIGN_1BYTES; + SectionOneHeader->Characteristics += + llvm::COFF::IMAGE_SCN_CNT_INITIALIZED_DATA; + SectionOneHeader->Characteristics += llvm::COFF::IMAGE_SCN_MEM_DISCARDABLE; + SectionOneHeader->Characteristics += llvm::COFF::IMAGE_SCN_MEM_READ; +} + +void WindowsResourceCOFFWriter::writeSecondSectionHeader() { + // Write the second section header. + Current += sizeof(llvm::object::coff_section); + auto *SectionTwoHeader = + reinterpret_cast(Current); + strncpy(SectionTwoHeader->Name, ".rsrc$02", (size_t)llvm::COFF::NameSize); + SectionTwoHeader->VirtualSize = 0; + SectionTwoHeader->VirtualAddress = 0; + SectionTwoHeader->SizeOfRawData = SectionTwoSize; + SectionTwoHeader->PointerToRawData = SectionTwoOffset; + SectionTwoHeader->PointerToRelocations = 0; + SectionTwoHeader->PointerToLinenumbers = 0; + SectionTwoHeader->NumberOfRelocations = 0; + SectionTwoHeader->NumberOfLinenumbers = 0; + SectionTwoHeader->Characteristics = + llvm::COFF::IMAGE_SCN_CNT_INITIALIZED_DATA; + SectionTwoHeader->Characteristics += llvm::COFF::IMAGE_SCN_MEM_READ; +} + +void WindowsResourceCOFFWriter::writeFirstSection() { + // Write section one. + Current += sizeof(llvm::object::coff_section); + + writeDirectoryTree(); + writeDirectoryStringTable(); + writeFirstSectionRelocations(); +} + +void WindowsResourceCOFFWriter::writeSecondSection() { + // Now write the .rsrc$02 section. + for (auto const &RawDataEntry : Data) { + std::copy(RawDataEntry.begin(), RawDataEntry.end(), Current); + Current += alignTo(RawDataEntry.size(), sizeof(uint64_t)); + } +} + +void WindowsResourceCOFFWriter::writeSymbolTable() { + // Now write the symbol table. + // First, the feat symbol. + auto *Symbol = reinterpret_cast(Current); + strncpy(Symbol->Name.ShortName, "@feat.00", (size_t)llvm::COFF::NameSize); + Symbol->Value = 0x11; + Symbol->SectionNumber = 0xffff; + Symbol->Type = llvm::COFF::IMAGE_SYM_DTYPE_NULL; + Symbol->StorageClass = llvm::COFF::IMAGE_SYM_CLASS_STATIC; + Symbol->NumberOfAuxSymbols = 0; + Current += sizeof(llvm::object::coff_symbol16); + + // Now write the .rsrc1 symbol + aux. + Symbol = reinterpret_cast(Current); + strncpy(Symbol->Name.ShortName, ".rsrc$01", (size_t)llvm::COFF::NameSize); + Symbol->Value = 0; + Symbol->SectionNumber = 1; + Symbol->Type = llvm::COFF::IMAGE_SYM_DTYPE_NULL; + Symbol->StorageClass = llvm::COFF::IMAGE_SYM_CLASS_STATIC; + Symbol->NumberOfAuxSymbols = 1; + Current += sizeof(llvm::object::coff_symbol16); + auto *Aux = + reinterpret_cast(Current); + Aux->Length = SectionOneSize; + Aux->NumberOfRelocations = Data.size(); + Aux->NumberOfLinenumbers = 0; + Aux->CheckSum = 0; + Aux->NumberLowPart = 0; + Aux->Selection = 0; + Current += sizeof(llvm::object::coff_aux_section_definition); + + // Now write the .rsrc2 symbol + aux. + Symbol = reinterpret_cast(Current); + strncpy(Symbol->Name.ShortName, ".rsrc$02", (size_t)llvm::COFF::NameSize); + Symbol->Value = 0; + Symbol->SectionNumber = 2; + Symbol->Type = llvm::COFF::IMAGE_SYM_DTYPE_NULL; + Symbol->StorageClass = llvm::COFF::IMAGE_SYM_CLASS_STATIC; + Symbol->NumberOfAuxSymbols = 1; + Current += sizeof(llvm::object::coff_symbol16); + Aux = reinterpret_cast(Current); + Aux->Length = SectionTwoSize; + Aux->NumberOfRelocations = 0; + Aux->NumberOfLinenumbers = 0; + Aux->CheckSum = 0; + Aux->NumberLowPart = 0; + Aux->Selection = 0; + Current += sizeof(llvm::object::coff_aux_section_definition); + + // Now write a symbol for each relocation. + for (unsigned i = 0; i < Data.size(); i++) { + char RelocationName[9]; + sprintf(RelocationName, "$R%06X", DataOffsets[i]); + Symbol = reinterpret_cast(Current); + strncpy(Symbol->Name.ShortName, RelocationName, + (size_t)llvm::COFF::NameSize); + Symbol->Value = DataOffsets[i]; + Symbol->SectionNumber = 1; + Symbol->Type = llvm::COFF::IMAGE_SYM_DTYPE_NULL; + Symbol->StorageClass = llvm::COFF::IMAGE_SYM_CLASS_STATIC; + Symbol->NumberOfAuxSymbols = 0; + Current += sizeof(llvm::object::coff_symbol16); + } +} + +void WindowsResourceCOFFWriter::writeStringTable() { + // Just 4 null bytes for the string table. + auto COFFStringTable = reinterpret_cast(Current); + *COFFStringTable = 0; +} + +void WindowsResourceCOFFWriter::writeDirectoryTree() { + // Traverse parsed resource tree breadth-first and write the corresponding + // COFF objects. + std::queue Queue; + Queue.push(&Resources); + uint32_t NextLevelOffset = sizeof(llvm::object::coff_resource_dir_table) + + (Resources.getStringChildren().size() + + Resources.getIDChildren().size()) * + sizeof(llvm::object::coff_resource_dir_entry); + std::vector DataEntriesTreeOrder; + uint32_t CurrentRelativeOffset = 0; + + while (!Queue.empty()) { + auto CurrentNode = Queue.front(); + Queue.pop(); + auto *Table = + reinterpret_cast(Current); + Table->Characteristics = CurrentNode->getCharacteristics(); + Table->TimeDateStamp = 0; + Table->MajorVersion = CurrentNode->getMajorVersion(); + Table->MinorVersion = CurrentNode->getMinorVersion(); + auto &IDChildren = CurrentNode->getIDChildren(); + auto &StringChildren = CurrentNode->getStringChildren(); + Table->NumberOfNameEntries = StringChildren.size(); + Table->NumberOfIDEntries = IDChildren.size(); + Current += sizeof(llvm::object::coff_resource_dir_table); + CurrentRelativeOffset += sizeof(llvm::object::coff_resource_dir_table); + + // Write the directory entries immediately following each directory table. + for (auto const &Child : StringChildren) { + auto *Entry = + reinterpret_cast(Current); + Entry->Identifier.NameOffset = + StringTableOffsets[Child.second->getStringIndex()]; + if (Child.second->checkIsDataNode()) { + Entry->Offset.DataEntryOffset = NextLevelOffset; + NextLevelOffset += sizeof(llvm::object::coff_resource_data_entry); + DataEntriesTreeOrder.push_back(Child.second.get()); + } else { + Entry->Offset.SubdirOffset = NextLevelOffset + (1 << 31); + NextLevelOffset += sizeof(llvm::object::coff_resource_dir_table) + + (Child.second->getStringChildren().size() + + Child.second->getIDChildren().size()) * + sizeof(llvm::object::coff_resource_dir_entry); + Queue.push(Child.second.get()); + } + Current += sizeof(llvm::object::coff_resource_dir_entry); + CurrentRelativeOffset += sizeof(llvm::object::coff_resource_dir_entry); + } + for (auto const &Child : IDChildren) { + auto *Entry = + reinterpret_cast(Current); + Entry->Identifier.ID = Child.first; + if (Child.second->checkIsDataNode()) { + Entry->Offset.DataEntryOffset = NextLevelOffset; + NextLevelOffset += sizeof(llvm::object::coff_resource_data_entry); + DataEntriesTreeOrder.push_back(Child.second.get()); + } else { + Entry->Offset.SubdirOffset = NextLevelOffset + (1 << 31); + NextLevelOffset += sizeof(llvm::object::coff_resource_dir_table) + + (Child.second->getStringChildren().size() + + Child.second->getIDChildren().size()) * + sizeof(llvm::object::coff_resource_dir_entry); + Queue.push(Child.second.get()); + } + Current += sizeof(llvm::object::coff_resource_dir_entry); + CurrentRelativeOffset += sizeof(llvm::object::coff_resource_dir_entry); + } + } + + RelocationAddresses.resize(Data.size()); + // Now write all the resource data entries. + for (auto DataNodes : DataEntriesTreeOrder) { + auto *Entry = + reinterpret_cast(Current); + RelocationAddresses[DataNodes->getDataIndex()] = CurrentRelativeOffset; + Entry->DataRVA = 0; // Set to zero because it is a relocation. + Entry->DataSize = Data[DataNodes->getDataIndex()].size(); + Entry->Codepage = 0; + Entry->Reserved = 0; + Current += sizeof(llvm::object::coff_resource_data_entry); + CurrentRelativeOffset += sizeof(llvm::object::coff_resource_data_entry); + } +} + +void WindowsResourceCOFFWriter::writeDirectoryStringTable() { + // Now write the directory string table for .rsrc$01 + uint32_t TotalStringTableSize = 0; + for (auto String : StringTable) { + auto *LengthField = reinterpret_cast(Current); + uint16_t Length = String.size(); + *LengthField = Length; + Current += sizeof(uint16_t); + auto *Start = reinterpret_cast(Current); + std::copy(String.begin(), String.end(), Start); + Current += Length * sizeof(UTF16); + TotalStringTableSize += Length * sizeof(UTF16) + sizeof(uint16_t); + } + Current += + alignTo(TotalStringTableSize, sizeof(uint32_t)) - TotalStringTableSize; +} + +void WindowsResourceCOFFWriter::writeFirstSectionRelocations() { + + // Now write the relocations for .rsrc$01 + // Five symbols already in table before we start, @feat.00 and 2 for each + // .rsrc section. + uint32_t NextSymbolIndex = 5; + for (unsigned i = 0; i < Data.size(); i++) { + auto *Reloc = reinterpret_cast(Current); + Reloc->VirtualAddress = RelocationAddresses[i]; + Reloc->SymbolTableIndex = NextSymbolIndex++; + switch (MachineType) { + case Machine::ARM: + Reloc->Type = llvm::COFF::IMAGE_REL_ARM_ADDR32NB; + break; + case Machine::X64: + Reloc->Type = llvm::COFF::IMAGE_REL_AMD64_ADDR32NB; + break; + case Machine::X86: + Reloc->Type = llvm::COFF::IMAGE_REL_I386_DIR32NB; + break; + default: + Reloc->Type = 0; + } + Current += sizeof(llvm::object::coff_relocation); + } +} + +Error writeWindowsResourceCOFF(StringRef OutputFile, Machine MachineType, + const WindowsResourceParser &Parser) { + Error E = Error::success(); + WindowsResourceCOFFWriter Writer(OutputFile, MachineType, Parser, E); + if (E) + return E; + return Writer.write(); +} + } // namespace object } // namespace llvm diff --git a/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp b/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp index 21d29835624e..08a4bb715fac 100644 --- a/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp +++ b/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp @@ -18,13 +18,20 @@ #include "llvm/ADT/StringSwitch.h" #include "llvm/DebugInfo/CodeView/CodeViewError.h" #include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugCrossExSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugCrossImpSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h" #include "llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h" #include "llvm/DebugInfo/CodeView/DebugLinesSubsection.h" #include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h" #include "llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h" +#include "llvm/DebugInfo/CodeView/DebugSymbolRVASubsection.h" +#include "llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h" #include "llvm/DebugInfo/CodeView/EnumTables.h" #include "llvm/DebugInfo/CodeView/SymbolRecord.h" - +#include "llvm/DebugInfo/CodeView/SymbolSerializer.h" +#include "llvm/ObjectYAML/CodeViewYAMLSymbols.h" +#include "llvm/Support/BinaryStreamWriter.h" using namespace llvm; using namespace llvm::codeview; using namespace llvm::CodeViewYAML; @@ -38,13 +45,21 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(SourceLineBlock) LLVM_YAML_IS_SEQUENCE_VECTOR(SourceLineInfo) LLVM_YAML_IS_SEQUENCE_VECTOR(InlineeSite) LLVM_YAML_IS_SEQUENCE_VECTOR(InlineeInfo) +LLVM_YAML_IS_SEQUENCE_VECTOR(CrossModuleExport) +LLVM_YAML_IS_SEQUENCE_VECTOR(YAMLCrossModuleImport) LLVM_YAML_IS_SEQUENCE_VECTOR(StringRef) +LLVM_YAML_IS_SEQUENCE_VECTOR(YAMLFrameData) +LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint32_t) LLVM_YAML_DECLARE_SCALAR_TRAITS(HexFormattedString, false) LLVM_YAML_DECLARE_ENUM_TRAITS(DebugSubsectionKind) LLVM_YAML_DECLARE_ENUM_TRAITS(FileChecksumKind) LLVM_YAML_DECLARE_BITSET_TRAITS(LineFlags) +LLVM_YAML_DECLARE_MAPPING_TRAITS(CrossModuleExport) +LLVM_YAML_DECLARE_MAPPING_TRAITS(YAMLFrameData) +LLVM_YAML_DECLARE_MAPPING_TRAITS(YAMLCrossModuleImport) +LLVM_YAML_DECLARE_MAPPING_TRAITS(CrossModuleImportItem) LLVM_YAML_DECLARE_MAPPING_TRAITS(SourceLineEntry) LLVM_YAML_DECLARE_MAPPING_TRAITS(SourceColumnEntry) LLVM_YAML_DECLARE_MAPPING_TRAITS(SourceFileChecksumEntry) @@ -61,7 +76,8 @@ struct YAMLSubsectionBase { virtual void map(IO &IO) = 0; virtual std::unique_ptr - toCodeViewSubsection(DebugStringTableSubsection *UseStrings, + toCodeViewSubsection(BumpPtrAllocator &Allocator, + DebugStringTableSubsection *UseStrings, DebugChecksumsSubsection *UseChecksums) const = 0; }; } @@ -75,7 +91,8 @@ struct YAMLChecksumsSubsection : public YAMLSubsectionBase { void map(IO &IO) override; std::unique_ptr - toCodeViewSubsection(DebugStringTableSubsection *Strings, + toCodeViewSubsection(BumpPtrAllocator &Allocator, + DebugStringTableSubsection *Strings, DebugChecksumsSubsection *Checksums) const override; static Expected> fromCodeViewSubsection(const DebugStringTableSubsectionRef &Strings, @@ -89,7 +106,8 @@ struct YAMLLinesSubsection : public YAMLSubsectionBase { void map(IO &IO) override; std::unique_ptr - toCodeViewSubsection(DebugStringTableSubsection *Strings, + toCodeViewSubsection(BumpPtrAllocator &Allocator, + DebugStringTableSubsection *Strings, DebugChecksumsSubsection *Checksums) const override; static Expected> fromCodeViewSubsection(const DebugStringTableSubsectionRef &Strings, @@ -105,7 +123,8 @@ struct YAMLInlineeLinesSubsection : public YAMLSubsectionBase { void map(IO &IO) override; std::unique_ptr - toCodeViewSubsection(DebugStringTableSubsection *Strings, + toCodeViewSubsection(BumpPtrAllocator &Allocator, + DebugStringTableSubsection *Strings, DebugChecksumsSubsection *Checksums) const override; static Expected> fromCodeViewSubsection(const DebugStringTableSubsectionRef &Strings, @@ -114,6 +133,97 @@ struct YAMLInlineeLinesSubsection : public YAMLSubsectionBase { InlineeInfo InlineeLines; }; + +struct YAMLCrossModuleExportsSubsection : public YAMLSubsectionBase { + YAMLCrossModuleExportsSubsection() + : YAMLSubsectionBase(DebugSubsectionKind::CrossScopeExports) {} + + void map(IO &IO) override; + std::unique_ptr + toCodeViewSubsection(BumpPtrAllocator &Allocator, + DebugStringTableSubsection *Strings, + DebugChecksumsSubsection *Checksums) const override; + static Expected> + fromCodeViewSubsection(const DebugCrossModuleExportsSubsectionRef &Exports); + + std::vector Exports; +}; + +struct YAMLCrossModuleImportsSubsection : public YAMLSubsectionBase { + YAMLCrossModuleImportsSubsection() + : YAMLSubsectionBase(DebugSubsectionKind::CrossScopeImports) {} + + void map(IO &IO) override; + std::unique_ptr + toCodeViewSubsection(BumpPtrAllocator &Allocator, + DebugStringTableSubsection *Strings, + DebugChecksumsSubsection *Checksums) const override; + static Expected> + fromCodeViewSubsection(const DebugStringTableSubsectionRef &Strings, + const DebugCrossModuleImportsSubsectionRef &Imports); + + std::vector Imports; +}; + +struct YAMLSymbolsSubsection : public YAMLSubsectionBase { + YAMLSymbolsSubsection() : YAMLSubsectionBase(DebugSubsectionKind::Symbols) {} + + void map(IO &IO) override; + std::unique_ptr + toCodeViewSubsection(BumpPtrAllocator &Allocator, + DebugStringTableSubsection *Strings, + DebugChecksumsSubsection *Checksums) const override; + static Expected> + fromCodeViewSubsection(const DebugSymbolsSubsectionRef &Symbols); + + std::vector Symbols; +}; + +struct YAMLStringTableSubsection : public YAMLSubsectionBase { + YAMLStringTableSubsection() + : YAMLSubsectionBase(DebugSubsectionKind::StringTable) {} + + void map(IO &IO) override; + std::unique_ptr + toCodeViewSubsection(BumpPtrAllocator &Allocator, + DebugStringTableSubsection *Strings, + DebugChecksumsSubsection *Checksums) const override; + static Expected> + fromCodeViewSubsection(const DebugStringTableSubsectionRef &Strings); + + std::vector Strings; +}; + +struct YAMLFrameDataSubsection : public YAMLSubsectionBase { + YAMLFrameDataSubsection() + : YAMLSubsectionBase(DebugSubsectionKind::FrameData) {} + + void map(IO &IO) override; + std::unique_ptr + toCodeViewSubsection(BumpPtrAllocator &Allocator, + DebugStringTableSubsection *Strings, + DebugChecksumsSubsection *Checksums) const override; + static Expected> + fromCodeViewSubsection(const DebugStringTableSubsectionRef &Strings, + const DebugFrameDataSubsectionRef &Frames); + + std::vector Frames; +}; + +struct YAMLCoffSymbolRVASubsection : public YAMLSubsectionBase { + YAMLCoffSymbolRVASubsection() + : YAMLSubsectionBase(DebugSubsectionKind::CoffSymbolRVA) {} + + void map(IO &IO) override; + std::unique_ptr + toCodeViewSubsection(BumpPtrAllocator &Allocator, + DebugStringTableSubsection *Strings, + DebugChecksumsSubsection *Checksums) const override; + static Expected> + fromCodeViewSubsection(const DebugSymbolRVASubsectionRef &RVAs); + + std::vector RVAs; +}; } void ScalarBitSetTraits::bitset(IO &io, LineFlags &Flags) { @@ -161,6 +271,17 @@ void MappingTraits::mapping(IO &IO, SourceLineBlock &Obj) { IO.mapRequired("Columns", Obj.Columns); } +void MappingTraits::mapping(IO &IO, CrossModuleExport &Obj) { + IO.mapRequired("LocalId", Obj.Local); + IO.mapRequired("GlobalId", Obj.Global); +} + +void MappingTraits::mapping(IO &IO, + YAMLCrossModuleImport &Obj) { + IO.mapRequired("Module", Obj.ModuleName); + IO.mapRequired("Imports", Obj.ImportIds); +} + void MappingTraits::mapping( IO &IO, SourceFileChecksumEntry &Obj) { IO.mapRequired("FileName", Obj.FileName); @@ -175,6 +296,17 @@ void MappingTraits::mapping(IO &IO, InlineeSite &Obj) { IO.mapOptional("ExtraFiles", Obj.ExtraFiles); } +void MappingTraits::mapping(IO &IO, YAMLFrameData &Obj) { + IO.mapRequired("CodeSize", Obj.CodeSize); + IO.mapRequired("FrameFunc", Obj.FrameFunc); + IO.mapRequired("LocalSize", Obj.LocalSize); + IO.mapOptional("MaxStackSize", Obj.MaxStackSize); + IO.mapOptional("ParamsSize", Obj.ParamsSize); + IO.mapOptional("PrologSize", Obj.PrologSize); + IO.mapOptional("RvaStart", Obj.RvaStart); + IO.mapOptional("SavedRegsSize", Obj.SavedRegsSize); +} + void YAMLChecksumsSubsection::map(IO &IO) { IO.mapTag("!FileChecksums", true); IO.mapRequired("Checksums", Checksums); @@ -196,6 +328,36 @@ void YAMLInlineeLinesSubsection::map(IO &IO) { IO.mapRequired("Sites", InlineeLines.Sites); } +void YAMLCrossModuleExportsSubsection::map(IO &IO) { + IO.mapTag("!CrossModuleExports", true); + IO.mapOptional("Exports", Exports); +} + +void YAMLCrossModuleImportsSubsection::map(IO &IO) { + IO.mapTag("!CrossModuleImports", true); + IO.mapOptional("Imports", Imports); +} + +void YAMLSymbolsSubsection::map(IO &IO) { + IO.mapTag("!Symbols", true); + IO.mapRequired("Records", Symbols); +} + +void YAMLStringTableSubsection::map(IO &IO) { + IO.mapTag("!StringTable", true); + IO.mapRequired("Strings", Strings); +} + +void YAMLFrameDataSubsection::map(IO &IO) { + IO.mapTag("!FrameData", true); + IO.mapRequired("Frames", Frames); +} + +void YAMLCoffSymbolRVASubsection::map(IO &IO) { + IO.mapTag("!COFFSymbolRVAs", true); + IO.mapRequired("RVAs", RVAs); +} + void MappingTraits::mapping( IO &IO, YAMLDebugSubsection &Subsection) { if (!IO.outputting()) { @@ -206,6 +368,20 @@ void MappingTraits::mapping( Subsection.Subsection = std::make_shared(); } else if (IO.mapTag("!InlineeLines")) { Subsection.Subsection = std::make_shared(); + } else if (IO.mapTag("!CrossModuleExports")) { + Subsection.Subsection = + std::make_shared(); + } else if (IO.mapTag("!CrossModuleImports")) { + Subsection.Subsection = + std::make_shared(); + } else if (IO.mapTag("!Symbols")) { + Subsection.Subsection = std::make_shared(); + } else if (IO.mapTag("!StringTable")) { + Subsection.Subsection = std::make_shared(); + } else if (IO.mapTag("!FrameData")) { + Subsection.Subsection = std::make_shared(); + } else if (IO.mapTag("!COFFSymbolRVAs")) { + Subsection.Subsection = std::make_shared(); } else { llvm_unreachable("Unexpected subsection tag!"); } @@ -213,18 +389,19 @@ void MappingTraits::mapping( Subsection.Subsection->map(IO); } -static Expected +static std::shared_ptr findChecksums(ArrayRef Subsections) { for (const auto &SS : Subsections) { if (SS.Subsection->Kind == DebugSubsectionKind::FileChecksums) { - return static_cast(*SS.Subsection); + return std::static_pointer_cast(SS.Subsection); } } - return make_error(cv_error_code::no_records); + + return nullptr; } std::unique_ptr YAMLChecksumsSubsection::toCodeViewSubsection( - DebugStringTableSubsection *UseStrings, + BumpPtrAllocator &Allocator, DebugStringTableSubsection *UseStrings, DebugChecksumsSubsection *UseChecksums) const { assert(UseStrings && !UseChecksums); auto Result = llvm::make_unique(*UseStrings); @@ -235,7 +412,7 @@ std::unique_ptr YAMLChecksumsSubsection::toCodeViewSubsection( } std::unique_ptr YAMLLinesSubsection::toCodeViewSubsection( - DebugStringTableSubsection *UseStrings, + BumpPtrAllocator &Allocator, DebugStringTableSubsection *UseStrings, DebugChecksumsSubsection *UseChecksums) const { assert(UseStrings && UseChecksums); auto Result = @@ -266,7 +443,7 @@ std::unique_ptr YAMLLinesSubsection::toCodeViewSubsection( std::unique_ptr YAMLInlineeLinesSubsection::toCodeViewSubsection( - DebugStringTableSubsection *UseStrings, + BumpPtrAllocator &Allocator, DebugStringTableSubsection *UseStrings, DebugChecksumsSubsection *UseChecksums) const { assert(UseChecksums); auto Result = llvm::make_unique( @@ -285,6 +462,79 @@ YAMLInlineeLinesSubsection::toCodeViewSubsection( return llvm::cast(std::move(Result)); } +std::unique_ptr +YAMLCrossModuleExportsSubsection::toCodeViewSubsection( + BumpPtrAllocator &Allocator, DebugStringTableSubsection *Strings, + DebugChecksumsSubsection *Checksums) const { + auto Result = llvm::make_unique(); + for (const auto &M : Exports) + Result->addMapping(M.Local, M.Global); + return llvm::cast(std::move(Result)); +} + +std::unique_ptr +YAMLCrossModuleImportsSubsection::toCodeViewSubsection( + BumpPtrAllocator &Allocator, DebugStringTableSubsection *Strings, + DebugChecksumsSubsection *Checksums) const { + auto Result = llvm::make_unique(*Strings); + for (const auto &M : Imports) { + for (const auto Id : M.ImportIds) + Result->addImport(M.ModuleName, Id); + } + return llvm::cast(std::move(Result)); +} + +std::unique_ptr YAMLSymbolsSubsection::toCodeViewSubsection( + BumpPtrAllocator &Allocator, DebugStringTableSubsection *Strings, + DebugChecksumsSubsection *Checksums) const { + auto Result = llvm::make_unique(); + for (const auto &Sym : Symbols) + Result->addSymbol( + Sym.toCodeViewSymbol(Allocator, CodeViewContainer::ObjectFile)); + return std::move(Result); +} + +std::unique_ptr +YAMLStringTableSubsection::toCodeViewSubsection( + BumpPtrAllocator &Allocator, DebugStringTableSubsection *Strings, + DebugChecksumsSubsection *Checksums) const { + auto Result = llvm::make_unique(); + for (const auto &Str : this->Strings) + Result->insert(Str); + return std::move(Result); +} + +std::unique_ptr YAMLFrameDataSubsection::toCodeViewSubsection( + BumpPtrAllocator &Allocator, DebugStringTableSubsection *Strings, + DebugChecksumsSubsection *Checksums) const { + assert(Strings); + auto Result = llvm::make_unique(); + for (const auto &YF : Frames) { + codeview::FrameData F; + F.CodeSize = YF.CodeSize; + F.Flags = YF.Flags; + F.LocalSize = YF.LocalSize; + F.MaxStackSize = YF.MaxStackSize; + F.ParamsSize = YF.ParamsSize; + F.PrologSize = YF.PrologSize; + F.RvaStart = YF.RvaStart; + F.SavedRegsSize = YF.SavedRegsSize; + F.FrameFunc = Strings->insert(YF.FrameFunc); + Result->addFrameData(F); + } + return std::move(Result); +} + +std::unique_ptr +YAMLCoffSymbolRVASubsection::toCodeViewSubsection( + BumpPtrAllocator &Allocator, DebugStringTableSubsection *Strings, + DebugChecksumsSubsection *Checksums) const { + auto Result = llvm::make_unique(); + for (const auto &RVA : RVAs) + Result->addRVA(RVA); + return std::move(Result); +} + static Expected convertOneChecksum(const DebugStringTableSubsectionRef &Strings, const FileChecksumEntry &CS) { @@ -391,20 +641,121 @@ YAMLInlineeLinesSubsection::fromCodeViewSubsection( return Result; } +Expected> +YAMLCrossModuleExportsSubsection::fromCodeViewSubsection( + const DebugCrossModuleExportsSubsectionRef &Exports) { + auto Result = std::make_shared(); + Result->Exports.assign(Exports.begin(), Exports.end()); + return Result; +} + +Expected> +YAMLCrossModuleImportsSubsection::fromCodeViewSubsection( + const DebugStringTableSubsectionRef &Strings, + const DebugCrossModuleImportsSubsectionRef &Imports) { + auto Result = std::make_shared(); + for (const auto &CMI : Imports) { + YAMLCrossModuleImport YCMI; + auto ExpectedStr = Strings.getString(CMI.Header->ModuleNameOffset); + if (!ExpectedStr) + return ExpectedStr.takeError(); + YCMI.ModuleName = *ExpectedStr; + YCMI.ImportIds.assign(CMI.Imports.begin(), CMI.Imports.end()); + Result->Imports.push_back(YCMI); + } + return Result; +} + +Expected> +YAMLSymbolsSubsection::fromCodeViewSubsection( + const DebugSymbolsSubsectionRef &Symbols) { + auto Result = std::make_shared(); + for (const auto &Sym : Symbols) { + auto S = CodeViewYAML::SymbolRecord::fromCodeViewSymbol(Sym); + if (!S) + return joinErrors(make_error( + cv_error_code::corrupt_record, + "Invalid CodeView Symbol Record in SymbolRecord " + "subsection of .debug$S while converting to YAML!"), + S.takeError()); + + Result->Symbols.push_back(*S); + } + return Result; +} + +Expected> +YAMLStringTableSubsection::fromCodeViewSubsection( + const DebugStringTableSubsectionRef &Strings) { + auto Result = std::make_shared(); + BinaryStreamReader Reader(Strings.getBuffer()); + StringRef S; + // First item is a single null string, skip it. + if (auto EC = Reader.readCString(S)) + return std::move(EC); + assert(S.empty()); + while (Reader.bytesRemaining() > 0) { + if (auto EC = Reader.readCString(S)) + return std::move(EC); + Result->Strings.push_back(S); + } + return Result; +} + +Expected> +YAMLFrameDataSubsection::fromCodeViewSubsection( + const DebugStringTableSubsectionRef &Strings, + const DebugFrameDataSubsectionRef &Frames) { + auto Result = std::make_shared(); + for (const auto &F : Frames) { + YAMLFrameData YF; + YF.CodeSize = F.CodeSize; + YF.Flags = F.Flags; + YF.LocalSize = F.LocalSize; + YF.MaxStackSize = F.MaxStackSize; + YF.ParamsSize = F.ParamsSize; + YF.PrologSize = F.PrologSize; + YF.RvaStart = F.RvaStart; + YF.SavedRegsSize = F.SavedRegsSize; + + auto ES = Strings.getString(F.FrameFunc); + if (!ES) + return joinErrors( + make_error( + cv_error_code::no_records, + "Could not find string for string id while mapping FrameData!"), + ES.takeError()); + YF.FrameFunc = *ES; + Result->Frames.push_back(YF); + } + return Result; +} + +Expected> +YAMLCoffSymbolRVASubsection::fromCodeViewSubsection( + const DebugSymbolRVASubsectionRef &Section) { + auto Result = std::make_shared(); + for (const auto &RVA : Section) { + Result->RVAs.push_back(RVA); + } + return Result; +} + Expected>> -llvm::CodeViewYAML::convertSubsectionList( - ArrayRef Subsections, +llvm::CodeViewYAML::toCodeViewSubsectionList( + BumpPtrAllocator &Allocator, ArrayRef Subsections, DebugStringTableSubsection &Strings) { std::vector> Result; if (Subsections.empty()) return std::move(Result); auto Checksums = findChecksums(Subsections); - if (!Checksums) - return Checksums.takeError(); - auto ChecksumsBase = Checksums->toCodeViewSubsection(&Strings, nullptr); - DebugChecksumsSubsection &CS = - llvm::cast(*ChecksumsBase); + std::unique_ptr ChecksumsBase; + if (Checksums) + ChecksumsBase = + Checksums->toCodeViewSubsection(Allocator, &Strings, nullptr); + DebugChecksumsSubsection *CS = + static_cast(ChecksumsBase.get()); for (const auto &SS : Subsections) { // We've already converted the checksums subsection, don't do it // twice. @@ -412,7 +763,42 @@ llvm::CodeViewYAML::convertSubsectionList( if (SS.Subsection->Kind == DebugSubsectionKind::FileChecksums) CVS = std::move(ChecksumsBase); else - CVS = SS.Subsection->toCodeViewSubsection(&Strings, &CS); + CVS = SS.Subsection->toCodeViewSubsection(Allocator, &Strings, CS); + assert(CVS != nullptr); + Result.push_back(std::move(CVS)); + } + return std::move(Result); +} + +Expected>> +llvm::CodeViewYAML::toCodeViewSubsectionList( + BumpPtrAllocator &Allocator, ArrayRef Subsections, + std::unique_ptr &TakeStrings, + DebugStringTableSubsection *StringsRef) { + std::vector> Result; + if (Subsections.empty()) + return std::move(Result); + + auto Checksums = findChecksums(Subsections); + + std::unique_ptr ChecksumsBase; + if (Checksums) + ChecksumsBase = + Checksums->toCodeViewSubsection(Allocator, StringsRef, nullptr); + DebugChecksumsSubsection *CS = + static_cast(ChecksumsBase.get()); + for (const auto &SS : Subsections) { + // We've already converted the checksums and string table subsection, don't + // do it twice. + std::unique_ptr CVS; + if (SS.Subsection->Kind == DebugSubsectionKind::FileChecksums) + CVS = std::move(ChecksumsBase); + else if (SS.Subsection->Kind == DebugSubsectionKind::StringTable) { + assert(TakeStrings && "No string table!"); + CVS = std::move(TakeStrings); + } else + CVS = SS.Subsection->toCodeViewSubsection(Allocator, StringsRef, CS); + assert(CVS != nullptr); Result.push_back(std::move(CVS)); } return std::move(Result); @@ -420,21 +806,29 @@ llvm::CodeViewYAML::convertSubsectionList( namespace { struct SubsectionConversionVisitor : public DebugSubsectionVisitor { - explicit SubsectionConversionVisitor( - const DebugStringTableSubsectionRef &Strings, - const DebugChecksumsSubsectionRef &Checksums) - : Strings(Strings), Checksums(Checksums) {} + SubsectionConversionVisitor() {} Error visitUnknown(DebugUnknownSubsectionRef &Unknown) override; - Error visitLines(DebugLinesSubsectionRef &Lines) override; - Error visitFileChecksums(DebugChecksumsSubsectionRef &Checksums) override; - Error visitInlineeLines(DebugInlineeLinesSubsectionRef &Inlinees) override; + Error visitLines(DebugLinesSubsectionRef &Lines, + const DebugSubsectionState &State) override; + Error visitFileChecksums(DebugChecksumsSubsectionRef &Checksums, + const DebugSubsectionState &State) override; + Error visitInlineeLines(DebugInlineeLinesSubsectionRef &Inlinees, + const DebugSubsectionState &State) override; + Error visitCrossModuleExports(DebugCrossModuleExportsSubsectionRef &Checksums, + const DebugSubsectionState &State) override; + Error visitCrossModuleImports(DebugCrossModuleImportsSubsectionRef &Inlinees, + const DebugSubsectionState &State) override; + Error visitStringTable(DebugStringTableSubsectionRef &ST, + const DebugSubsectionState &State) override; + Error visitSymbols(DebugSymbolsSubsectionRef &Symbols, + const DebugSubsectionState &State) override; + Error visitFrameData(DebugFrameDataSubsectionRef &Symbols, + const DebugSubsectionState &State) override; + Error visitCOFFSymbolRVAs(DebugSymbolRVASubsectionRef &Symbols, + const DebugSubsectionState &State) override; YAMLDebugSubsection Subsection; - -private: - const DebugStringTableSubsectionRef &Strings; - const DebugChecksumsSubsectionRef &Checksums; }; Error SubsectionConversionVisitor::visitUnknown( @@ -442,9 +836,10 @@ Error SubsectionConversionVisitor::visitUnknown( return make_error(cv_error_code::operation_unsupported); } -Error SubsectionConversionVisitor::visitLines(DebugLinesSubsectionRef &Lines) { - auto Result = - YAMLLinesSubsection::fromCodeViewSubsection(Strings, Checksums, Lines); +Error SubsectionConversionVisitor::visitLines( + DebugLinesSubsectionRef &Lines, const DebugSubsectionState &State) { + auto Result = YAMLLinesSubsection::fromCodeViewSubsection( + State.strings(), State.checksums(), Lines); if (!Result) return Result.takeError(); Subsection.Subsection = *Result; @@ -452,9 +847,9 @@ Error SubsectionConversionVisitor::visitLines(DebugLinesSubsectionRef &Lines) { } Error SubsectionConversionVisitor::visitFileChecksums( - DebugChecksumsSubsectionRef &Checksums) { - auto Result = - YAMLChecksumsSubsection::fromCodeViewSubsection(Strings, Checksums); + DebugChecksumsSubsectionRef &Checksums, const DebugSubsectionState &State) { + auto Result = YAMLChecksumsSubsection::fromCodeViewSubsection(State.strings(), + Checksums); if (!Result) return Result.takeError(); Subsection.Subsection = *Result; @@ -462,9 +857,69 @@ Error SubsectionConversionVisitor::visitFileChecksums( } Error SubsectionConversionVisitor::visitInlineeLines( - DebugInlineeLinesSubsectionRef &Inlinees) { + DebugInlineeLinesSubsectionRef &Inlinees, + const DebugSubsectionState &State) { auto Result = YAMLInlineeLinesSubsection::fromCodeViewSubsection( - Strings, Checksums, Inlinees); + State.strings(), State.checksums(), Inlinees); + if (!Result) + return Result.takeError(); + Subsection.Subsection = *Result; + return Error::success(); +} + +Error SubsectionConversionVisitor::visitCrossModuleExports( + DebugCrossModuleExportsSubsectionRef &Exports, + const DebugSubsectionState &State) { + auto Result = + YAMLCrossModuleExportsSubsection::fromCodeViewSubsection(Exports); + if (!Result) + return Result.takeError(); + Subsection.Subsection = *Result; + return Error::success(); +} + +Error SubsectionConversionVisitor::visitCrossModuleImports( + DebugCrossModuleImportsSubsectionRef &Imports, + const DebugSubsectionState &State) { + auto Result = YAMLCrossModuleImportsSubsection::fromCodeViewSubsection( + State.strings(), Imports); + if (!Result) + return Result.takeError(); + Subsection.Subsection = *Result; + return Error::success(); +} + +Error SubsectionConversionVisitor::visitStringTable( + DebugStringTableSubsectionRef &Strings, const DebugSubsectionState &State) { + auto Result = YAMLStringTableSubsection::fromCodeViewSubsection(Strings); + if (!Result) + return Result.takeError(); + Subsection.Subsection = *Result; + return Error::success(); +} + +Error SubsectionConversionVisitor::visitSymbols( + DebugSymbolsSubsectionRef &Symbols, const DebugSubsectionState &State) { + auto Result = YAMLSymbolsSubsection::fromCodeViewSubsection(Symbols); + if (!Result) + return Result.takeError(); + Subsection.Subsection = *Result; + return Error::success(); +} + +Error SubsectionConversionVisitor::visitFrameData( + DebugFrameDataSubsectionRef &Frames, const DebugSubsectionState &State) { + auto Result = + YAMLFrameDataSubsection::fromCodeViewSubsection(State.strings(), Frames); + if (!Result) + return Result.takeError(); + Subsection.Subsection = *Result; + return Error::success(); +} + +Error SubsectionConversionVisitor::visitCOFFSymbolRVAs( + DebugSymbolRVASubsectionRef &RVAs, const DebugSubsectionState &State) { + auto Result = YAMLCoffSymbolRVASubsection::fromCodeViewSubsection(RVAs); if (!Result) return Result.takeError(); Subsection.Subsection = *Result; @@ -476,9 +931,25 @@ Expected YAMLDebugSubsection::fromCodeViewSubection( const DebugStringTableSubsectionRef &Strings, const DebugChecksumsSubsectionRef &Checksums, const DebugSubsectionRecord &SS) { - SubsectionConversionVisitor V(Strings, Checksums); - if (auto EC = visitDebugSubsection(SS, V)) + DebugSubsectionState State(Strings, Checksums); + SubsectionConversionVisitor V; + if (auto EC = visitDebugSubsection(SS, V, State)) return std::move(EC); return V.Subsection; } + +std::unique_ptr +llvm::CodeViewYAML::findStringTable(ArrayRef Sections) { + for (const auto &SS : Sections) { + if (SS.Subsection->Kind != DebugSubsectionKind::StringTable) + continue; + + // String Table doesn't use the allocator. + BumpPtrAllocator Allocator; + auto Result = + SS.Subsection->toCodeViewSubsection(Allocator, nullptr, nullptr); + return llvm::cast(std::move(Result)); + } + return nullptr; +} diff --git a/lib/ObjectYAML/CodeViewYAMLSymbols.cpp b/lib/ObjectYAML/CodeViewYAMLSymbols.cpp index bd97af3a9323..fa3f1e0b60aa 100644 --- a/lib/ObjectYAML/CodeViewYAMLSymbols.cpp +++ b/lib/ObjectYAML/CodeViewYAMLSymbols.cpp @@ -47,6 +47,18 @@ LLVM_YAML_DECLARE_ENUM_TRAITS(RegisterId) LLVM_YAML_DECLARE_ENUM_TRAITS(TrampolineType) LLVM_YAML_DECLARE_ENUM_TRAITS(ThunkOrdinal) +LLVM_YAML_STRONG_TYPEDEF(llvm::StringRef, TypeName) + +LLVM_YAML_DECLARE_SCALAR_TRAITS(TypeName, true) + +StringRef ScalarTraits::input(StringRef S, void *V, TypeName &T) { + return ScalarTraits::input(S, V, T.value); +} +void ScalarTraits::output(const TypeName &T, void *V, + llvm::raw_ostream &R) { + ScalarTraits::output(T.value, V, R); +} + void ScalarEnumerationTraits::enumeration(IO &io, SymbolKind &Value) { auto SymbolNames = getSymbolTypeNames(); @@ -264,6 +276,7 @@ template <> void SymbolRecordImpl::map(IO &IO) { template <> void SymbolRecordImpl::map(IO &IO) { IO.mapRequired("Type", Symbol.Type); IO.mapRequired("Flags", Symbol.Flags); + IO.mapRequired("VarName", Symbol.Name); } diff --git a/lib/ObjectYAML/CodeViewYAMLTypes.cpp b/lib/ObjectYAML/CodeViewYAMLTypes.cpp index 4e82a299a672..1302b0713d0e 100644 --- a/lib/ObjectYAML/CodeViewYAMLTypes.cpp +++ b/lib/ObjectYAML/CodeViewYAMLTypes.cpp @@ -20,6 +20,7 @@ #include "llvm/DebugInfo/CodeView/EnumTables.h" #include "llvm/DebugInfo/CodeView/TypeDeserializer.h" #include "llvm/DebugInfo/CodeView/TypeTableBuilder.h" +#include "llvm/Support/BinaryStreamWriter.h" using namespace llvm; using namespace llvm::codeview; @@ -66,7 +67,7 @@ struct LeafRecordBase { virtual ~LeafRecordBase() {} virtual void map(yaml::IO &io) = 0; - virtual CVType toCodeViewRecord(BumpPtrAllocator &Allocator) const = 0; + virtual CVType toCodeViewRecord(TypeTableBuilder &TTB) const = 0; virtual Error fromCodeViewRecord(CVType Type) = 0; }; @@ -80,10 +81,9 @@ template struct LeafRecordImpl : public LeafRecordBase { return TypeDeserializer::deserializeAs(Type, Record); } - CVType toCodeViewRecord(BumpPtrAllocator &Allocator) const override { - TypeTableBuilder Table(Allocator); - Table.writeKnownType(Record); - return CVType(Kind, Table.records().front()); + CVType toCodeViewRecord(TypeTableBuilder &TTB) const override { + TTB.writeKnownType(Record); + return CVType(Kind, TTB.records().back()); } mutable T Record; @@ -93,7 +93,7 @@ template <> struct LeafRecordImpl : public LeafRecordBase { explicit LeafRecordImpl(TypeLeafKind K) : LeafRecordBase(K) {} void map(yaml::IO &io) override; - CVType toCodeViewRecord(BumpPtrAllocator &Allocator) const override; + CVType toCodeViewRecord(TypeTableBuilder &TTB) const override; Error fromCodeViewRecord(CVType Type) override; std::vector Members; @@ -440,16 +440,15 @@ Error LeafRecordImpl::fromCodeViewRecord(CVType Type) { return visitMemberRecordStream(Type.content(), V); } -CVType LeafRecordImpl::toCodeViewRecord( - BumpPtrAllocator &Allocator) const { - TypeTableBuilder TTB(Allocator); +CVType +LeafRecordImpl::toCodeViewRecord(TypeTableBuilder &TTB) const { FieldListRecordBuilder FLRB(TTB); FLRB.begin(); for (const auto &Member : Members) { Member.Member->writeTo(FLRB); } FLRB.end(true); - return CVType(Kind, TTB.records().front()); + return CVType(Kind, TTB.records().back()); } void MappingTraits::mapping(IO &io, OneMethodRecord &Record) { @@ -634,8 +633,13 @@ Expected LeafRecord::fromCodeViewRecord(CVType Type) { return make_error(cv_error_code::corrupt_record); } -CVType LeafRecord::toCodeViewRecord(BumpPtrAllocator &Allocator) const { - return Leaf->toCodeViewRecord(Allocator); +CVType LeafRecord::toCodeViewRecord(BumpPtrAllocator &Alloc) const { + TypeTableBuilder TTB(Alloc); + return Leaf->toCodeViewRecord(TTB); +} + +CVType LeafRecord::toCodeViewRecord(TypeTableBuilder &TTB) const { + return Leaf->toCodeViewRecord(TTB); } namespace llvm { diff --git a/lib/ObjectYAML/DWARFEmitter.cpp b/lib/ObjectYAML/DWARFEmitter.cpp index 1aa1519b708b..91c928771a65 100644 --- a/lib/ObjectYAML/DWARFEmitter.cpp +++ b/lib/ObjectYAML/DWARFEmitter.cpp @@ -16,8 +16,8 @@ #include "llvm/ObjectYAML/DWARFYAML.h" #include "llvm/Support/Error.h" #include "llvm/Support/LEB128.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/SwapByteOrder.h" +#include "llvm/Support/raw_ostream.h" #include "DWARFVisitor.h" diff --git a/lib/ObjectYAML/DWARFVisitor.h b/lib/ObjectYAML/DWARFVisitor.h index 263e36220a05..81ef412eb7e6 100644 --- a/lib/ObjectYAML/DWARFVisitor.h +++ b/lib/ObjectYAML/DWARFVisitor.h @@ -13,7 +13,7 @@ #define LLVM_OBJECTYAML_DWARFVISITOR_H #include "llvm/ADT/StringRef.h" -#include "llvm/Support/Dwarf.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/Support/MemoryBuffer.h" namespace llvm { diff --git a/lib/ObjectYAML/ELFYAML.cpp b/lib/ObjectYAML/ELFYAML.cpp index 3052901da45c..70e25ea504a0 100644 --- a/lib/ObjectYAML/ELFYAML.cpp +++ b/lib/ObjectYAML/ELFYAML.cpp @@ -424,12 +424,6 @@ void ScalarBitSetTraits::bitset(IO &IO, case ELF::EM_ARM: BCase(SHF_ARM_PURECODE); break; - case ELF::EM_AMDGPU: - BCase(SHF_AMDGPU_HSA_GLOBAL); - BCase(SHF_AMDGPU_HSA_READONLY); - BCase(SHF_AMDGPU_HSA_CODE); - BCase(SHF_AMDGPU_HSA_AGENT); - break; case ELF::EM_HEXAGON: BCase(SHF_HEX_GPREL); break; @@ -513,35 +507,35 @@ void ScalarEnumerationTraits::enumeration( #define ELF_RELOC(X, Y) IO.enumCase(Value, #X, ELF::X); switch (Object->Header.Machine) { case ELF::EM_X86_64: -#include "llvm/Support/ELFRelocs/x86_64.def" +#include "llvm/BinaryFormat/ELFRelocs/x86_64.def" break; case ELF::EM_MIPS: -#include "llvm/Support/ELFRelocs/Mips.def" +#include "llvm/BinaryFormat/ELFRelocs/Mips.def" break; case ELF::EM_HEXAGON: -#include "llvm/Support/ELFRelocs/Hexagon.def" +#include "llvm/BinaryFormat/ELFRelocs/Hexagon.def" break; case ELF::EM_386: case ELF::EM_IAMCU: -#include "llvm/Support/ELFRelocs/i386.def" +#include "llvm/BinaryFormat/ELFRelocs/i386.def" break; case ELF::EM_AARCH64: -#include "llvm/Support/ELFRelocs/AArch64.def" +#include "llvm/BinaryFormat/ELFRelocs/AArch64.def" break; case ELF::EM_ARM: -#include "llvm/Support/ELFRelocs/ARM.def" +#include "llvm/BinaryFormat/ELFRelocs/ARM.def" break; case ELF::EM_RISCV: -#include "llvm/Support/ELFRelocs/RISCV.def" +#include "llvm/BinaryFormat/ELFRelocs/RISCV.def" break; case ELF::EM_LANAI: -#include "llvm/Support/ELFRelocs/Lanai.def" +#include "llvm/BinaryFormat/ELFRelocs/Lanai.def" break; case ELF::EM_AMDGPU: -#include "llvm/Support/ELFRelocs/AMDGPU.def" +#include "llvm/BinaryFormat/ELFRelocs/AMDGPU.def" break; case ELF::EM_BPF: -#include "llvm/Support/ELFRelocs/BPF.def" +#include "llvm/BinaryFormat/ELFRelocs/BPF.def" break; default: llvm_unreachable("Unsupported architecture"); diff --git a/lib/ObjectYAML/MachOYAML.cpp b/lib/ObjectYAML/MachOYAML.cpp index 6b0e4e3762d0..461684827872 100644 --- a/lib/ObjectYAML/MachOYAML.cpp +++ b/lib/ObjectYAML/MachOYAML.cpp @@ -12,10 +12,10 @@ //===----------------------------------------------------------------------===// #include "llvm/ObjectYAML/MachOYAML.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Format.h" #include "llvm/Support/Host.h" -#include "llvm/Support/MachO.h" #include // For memcpy, memset and strnlen. @@ -252,7 +252,7 @@ void MappingTraits::mapping( break; switch (LoadCommand.Data.load_command_data.cmd) { -#include "llvm/Support/MachO.def" +#include "llvm/BinaryFormat/MachO.def" } IO.mapOptional("PayloadBytes", LoadCommand.PayloadBytes); IO.mapOptional("ZeroPadBytes", LoadCommand.ZeroPadBytes, (uint64_t)0ull); diff --git a/lib/ObjectYAML/ObjectYAML.cpp b/lib/ObjectYAML/ObjectYAML.cpp index 74581c1ecaac..4b7154ebb7c1 100644 --- a/lib/ObjectYAML/ObjectYAML.cpp +++ b/lib/ObjectYAML/ObjectYAML.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ObjectYAML/YAML.h" #include "llvm/ObjectYAML/ObjectYAML.h" +#include "llvm/ObjectYAML/YAML.h" using namespace llvm; using namespace yaml; diff --git a/lib/ObjectYAML/WasmYAML.cpp b/lib/ObjectYAML/WasmYAML.cpp index 910d32f16af9..353d027f4e11 100644 --- a/lib/ObjectYAML/WasmYAML.cpp +++ b/lib/ObjectYAML/WasmYAML.cpp @@ -366,7 +366,7 @@ void ScalarEnumerationTraits::enumeration( void ScalarEnumerationTraits::enumeration( IO &IO, WasmYAML::RelocType &Type) { #define WASM_RELOC(name, value) IO.enumCase(Type, #name, wasm::name); -#include "llvm/Support/WasmRelocs/WebAssembly.def" +#include "llvm/BinaryFormat/WasmRelocs/WebAssembly.def" #undef WASM_RELOC } diff --git a/lib/Option/Arg.cpp b/lib/Option/Arg.cpp index 3e8a1d802314..e416df6a38dc 100644 --- a/lib/Option/Arg.cpp +++ b/lib/Option/Arg.cpp @@ -12,8 +12,8 @@ #include "llvm/ADT/Twine.h" #include "llvm/Option/ArgList.h" #include "llvm/Option/Option.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; using namespace llvm::opt; diff --git a/lib/Passes/PassBuilder.cpp b/lib/Passes/PassBuilder.cpp index 17c60348633c..1f638e768307 100644 --- a/lib/Passes/PassBuilder.cpp +++ b/lib/Passes/PassBuilder.cpp @@ -164,6 +164,10 @@ static cl::opt EnableGVNHoist( "enable-npm-gvn-hoist", cl::init(false), cl::Hidden, cl::desc("Enable the GVN hoisting pass for the new PM (default = off)")); +static cl::opt EnableGVNSink( + "enable-npm-gvn-sink", cl::init(false), cl::Hidden, + cl::desc("Enable the GVN hoisting pass for the new PM (default = off)")); + static Regex DefaultAliasRegex( "^(default|thinlto-pre-link|thinlto|lto-pre-link|lto)<(O[0123sz])>$"); @@ -314,6 +318,12 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, if (EnableGVNHoist) FPM.addPass(GVNHoistPass()); + // Global value numbering based sinking. + if (EnableGVNSink) { + FPM.addPass(GVNSinkPass()); + FPM.addPass(SimplifyCFGPass()); + } + // Speculative execution if the target has divergent branches; otherwise nop. FPM.addPass(SpeculativeExecutionPass()); diff --git a/lib/ProfileData/Coverage/CoverageMapping.cpp b/lib/ProfileData/Coverage/CoverageMapping.cpp index 23999a5312c7..015b3c6c2021 100644 --- a/lib/ProfileData/Coverage/CoverageMapping.cpp +++ b/lib/ProfileData/Coverage/CoverageMapping.cpp @@ -12,6 +12,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ProfileData/Coverage/CoverageMapping.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/None.h" @@ -19,7 +20,6 @@ #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" -#include "llvm/ProfileData/Coverage/CoverageMapping.h" #include "llvm/ProfileData/Coverage/CoverageMappingReader.h" #include "llvm/ProfileData/InstrProfReader.h" #include "llvm/Support/Debug.h" diff --git a/lib/ProfileData/Coverage/CoverageMappingWriter.cpp b/lib/ProfileData/Coverage/CoverageMappingWriter.cpp index f131be2cba49..6fe93530da21 100644 --- a/lib/ProfileData/Coverage/CoverageMappingWriter.cpp +++ b/lib/ProfileData/Coverage/CoverageMappingWriter.cpp @@ -12,9 +12,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ProfileData/Coverage/CoverageMappingWriter.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ProfileData/Coverage/CoverageMappingWriter.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/raw_ostream.h" #include diff --git a/lib/ProfileData/InstrProf.cpp b/lib/ProfileData/InstrProf.cpp index a2b7c94f9dec..c9b82c303e33 100644 --- a/lib/ProfileData/InstrProf.cpp +++ b/lib/ProfileData/InstrProf.cpp @@ -12,6 +12,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ProfileData/InstrProf.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" @@ -29,7 +30,6 @@ #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" -#include "llvm/ProfileData/InstrProf.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" @@ -45,8 +45,8 @@ #include #include #include -#include #include +#include #include #include #include diff --git a/lib/ProfileData/InstrProfReader.cpp b/lib/ProfileData/InstrProfReader.cpp index 856f793363f7..d9f599f400da 100644 --- a/lib/ProfileData/InstrProfReader.cpp +++ b/lib/ProfileData/InstrProfReader.cpp @@ -12,12 +12,12 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ProfileData/InstrProfReader.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/ProfileSummary.h" #include "llvm/ProfileData/InstrProf.h" -#include "llvm/ProfileData/InstrProfReader.h" #include "llvm/ProfileData/ProfileCommon.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" diff --git a/lib/ProfileData/InstrProfWriter.cpp b/lib/ProfileData/InstrProfWriter.cpp index 6b7bd3b2fc0a..b3402a6ea956 100644 --- a/lib/ProfileData/InstrProfWriter.cpp +++ b/lib/ProfileData/InstrProfWriter.cpp @@ -12,11 +12,11 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ProfileData/InstrProfWriter.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/ProfileSummary.h" #include "llvm/ProfileData/InstrProf.h" -#include "llvm/ProfileData/InstrProfWriter.h" #include "llvm/ProfileData/ProfileCommon.h" #include "llvm/Support/Endian.h" #include "llvm/Support/EndianStream.h" diff --git a/lib/ProfileData/SampleProfWriter.cpp b/lib/ProfileData/SampleProfWriter.cpp index b05efa7417b9..b45026140c99 100644 --- a/lib/ProfileData/SampleProfWriter.cpp +++ b/lib/ProfileData/SampleProfWriter.cpp @@ -18,10 +18,10 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ProfileData/SampleProfWriter.h" #include "llvm/ADT/StringRef.h" #include "llvm/ProfileData/ProfileCommon.h" #include "llvm/ProfileData/SampleProf.h" -#include "llvm/ProfileData/SampleProfWriter.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/LEB128.h" diff --git a/lib/Support/AMDGPUCodeObjectMetadata.cpp b/lib/Support/AMDGPUCodeObjectMetadata.cpp new file mode 100644 index 000000000000..a00e371415a3 --- /dev/null +++ b/lib/Support/AMDGPUCodeObjectMetadata.cpp @@ -0,0 +1,218 @@ +//===--- AMDGPUCodeObjectMetadata.cpp ---------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief AMDGPU Code Object Metadata definitions and in-memory +/// representations. +/// +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/AMDGPUCodeObjectMetadata.h" +#include "llvm/Support/YAMLTraits.h" + +using namespace llvm::AMDGPU; +using namespace llvm::AMDGPU::CodeObject; + +LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint32_t) +LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string) +LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Arg::Metadata) +LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Metadata) + +namespace llvm { +namespace yaml { + +template <> +struct ScalarEnumerationTraits { + static void enumeration(IO &YIO, AccessQualifier &EN) { + YIO.enumCase(EN, "Default", AccessQualifier::Default); + YIO.enumCase(EN, "ReadOnly", AccessQualifier::ReadOnly); + YIO.enumCase(EN, "WriteOnly", AccessQualifier::WriteOnly); + YIO.enumCase(EN, "ReadWrite", AccessQualifier::ReadWrite); + } +}; + +template <> +struct ScalarEnumerationTraits { + static void enumeration(IO &YIO, AddressSpaceQualifier &EN) { + YIO.enumCase(EN, "Private", AddressSpaceQualifier::Private); + YIO.enumCase(EN, "Global", AddressSpaceQualifier::Global); + YIO.enumCase(EN, "Constant", AddressSpaceQualifier::Constant); + YIO.enumCase(EN, "Local", AddressSpaceQualifier::Local); + YIO.enumCase(EN, "Generic", AddressSpaceQualifier::Generic); + YIO.enumCase(EN, "Region", AddressSpaceQualifier::Region); + } +}; + +template <> +struct ScalarEnumerationTraits { + static void enumeration(IO &YIO, ValueKind &EN) { + YIO.enumCase(EN, "ByValue", ValueKind::ByValue); + YIO.enumCase(EN, "GlobalBuffer", ValueKind::GlobalBuffer); + YIO.enumCase(EN, "DynamicSharedPointer", ValueKind::DynamicSharedPointer); + YIO.enumCase(EN, "Sampler", ValueKind::Sampler); + YIO.enumCase(EN, "Image", ValueKind::Image); + YIO.enumCase(EN, "Pipe", ValueKind::Pipe); + YIO.enumCase(EN, "Queue", ValueKind::Queue); + YIO.enumCase(EN, "HiddenGlobalOffsetX", ValueKind::HiddenGlobalOffsetX); + YIO.enumCase(EN, "HiddenGlobalOffsetY", ValueKind::HiddenGlobalOffsetY); + YIO.enumCase(EN, "HiddenGlobalOffsetZ", ValueKind::HiddenGlobalOffsetZ); + YIO.enumCase(EN, "HiddenNone", ValueKind::HiddenNone); + YIO.enumCase(EN, "HiddenPrintfBuffer", ValueKind::HiddenPrintfBuffer); + YIO.enumCase(EN, "HiddenDefaultQueue", ValueKind::HiddenDefaultQueue); + YIO.enumCase(EN, "HiddenCompletionAction", + ValueKind::HiddenCompletionAction); + } +}; + +template <> +struct ScalarEnumerationTraits { + static void enumeration(IO &YIO, ValueType &EN) { + YIO.enumCase(EN, "Struct", ValueType::Struct); + YIO.enumCase(EN, "I8", ValueType::I8); + YIO.enumCase(EN, "U8", ValueType::U8); + YIO.enumCase(EN, "I16", ValueType::I16); + YIO.enumCase(EN, "U16", ValueType::U16); + YIO.enumCase(EN, "F16", ValueType::F16); + YIO.enumCase(EN, "I32", ValueType::I32); + YIO.enumCase(EN, "U32", ValueType::U32); + YIO.enumCase(EN, "F32", ValueType::F32); + YIO.enumCase(EN, "I64", ValueType::I64); + YIO.enumCase(EN, "U64", ValueType::U64); + YIO.enumCase(EN, "F64", ValueType::F64); + } +}; + +template <> +struct MappingTraits { + static void mapping(IO &YIO, Kernel::Attrs::Metadata &MD) { + YIO.mapOptional(Kernel::Attrs::Key::ReqdWorkGroupSize, + MD.mReqdWorkGroupSize, std::vector()); + YIO.mapOptional(Kernel::Attrs::Key::WorkGroupSizeHint, + MD.mWorkGroupSizeHint, std::vector()); + YIO.mapOptional(Kernel::Attrs::Key::VecTypeHint, + MD.mVecTypeHint, std::string()); + } +}; + +template <> +struct MappingTraits { + static void mapping(IO &YIO, Kernel::Arg::Metadata &MD) { + YIO.mapRequired(Kernel::Arg::Key::Size, MD.mSize); + YIO.mapRequired(Kernel::Arg::Key::Align, MD.mAlign); + YIO.mapRequired(Kernel::Arg::Key::ValueKind, MD.mValueKind); + YIO.mapRequired(Kernel::Arg::Key::ValueType, MD.mValueType); + YIO.mapOptional(Kernel::Arg::Key::PointeeAlign, MD.mPointeeAlign, + uint32_t(0)); + YIO.mapOptional(Kernel::Arg::Key::AccQual, MD.mAccQual, + AccessQualifier::Unknown); + YIO.mapOptional(Kernel::Arg::Key::AddrSpaceQual, MD.mAddrSpaceQual, + AddressSpaceQualifier::Unknown); + YIO.mapOptional(Kernel::Arg::Key::IsConst, MD.mIsConst, false); + YIO.mapOptional(Kernel::Arg::Key::IsPipe, MD.mIsPipe, false); + YIO.mapOptional(Kernel::Arg::Key::IsRestrict, MD.mIsRestrict, false); + YIO.mapOptional(Kernel::Arg::Key::IsVolatile, MD.mIsVolatile, false); + YIO.mapOptional(Kernel::Arg::Key::Name, MD.mName, std::string()); + YIO.mapOptional(Kernel::Arg::Key::TypeName, MD.mTypeName, std::string()); + } +}; + +template <> +struct MappingTraits { + static void mapping(IO &YIO, Kernel::CodeProps::Metadata &MD) { + YIO.mapOptional(Kernel::CodeProps::Key::KernargSegmentSize, + MD.mKernargSegmentSize, uint64_t(0)); + YIO.mapOptional(Kernel::CodeProps::Key::WorkgroupGroupSegmentSize, + MD.mWorkgroupGroupSegmentSize, uint32_t(0)); + YIO.mapOptional(Kernel::CodeProps::Key::WorkitemPrivateSegmentSize, + MD.mWorkitemPrivateSegmentSize, uint32_t(0)); + YIO.mapOptional(Kernel::CodeProps::Key::WavefrontNumSGPRs, + MD.mWavefrontNumSGPRs, uint16_t(0)); + YIO.mapOptional(Kernel::CodeProps::Key::WorkitemNumVGPRs, + MD.mWorkitemNumVGPRs, uint16_t(0)); + YIO.mapOptional(Kernel::CodeProps::Key::KernargSegmentAlign, + MD.mKernargSegmentAlign, uint8_t(0)); + YIO.mapOptional(Kernel::CodeProps::Key::GroupSegmentAlign, + MD.mGroupSegmentAlign, uint8_t(0)); + YIO.mapOptional(Kernel::CodeProps::Key::PrivateSegmentAlign, + MD.mPrivateSegmentAlign, uint8_t(0)); + YIO.mapOptional(Kernel::CodeProps::Key::WavefrontSize, + MD.mWavefrontSize, uint8_t(0)); + } +}; + +template <> +struct MappingTraits { + static void mapping(IO &YIO, Kernel::DebugProps::Metadata &MD) { + YIO.mapOptional(Kernel::DebugProps::Key::DebuggerABIVersion, + MD.mDebuggerABIVersion, std::vector()); + YIO.mapOptional(Kernel::DebugProps::Key::ReservedNumVGPRs, + MD.mReservedNumVGPRs, uint16_t(0)); + YIO.mapOptional(Kernel::DebugProps::Key::ReservedFirstVGPR, + MD.mReservedFirstVGPR, uint16_t(-1)); + YIO.mapOptional(Kernel::DebugProps::Key::PrivateSegmentBufferSGPR, + MD.mPrivateSegmentBufferSGPR, uint16_t(-1)); + YIO.mapOptional(Kernel::DebugProps::Key::WavefrontPrivateSegmentOffsetSGPR, + MD.mWavefrontPrivateSegmentOffsetSGPR, uint16_t(-1)); + } +}; + +template <> +struct MappingTraits { + static void mapping(IO &YIO, Kernel::Metadata &MD) { + YIO.mapRequired(Kernel::Key::Name, MD.mName); + YIO.mapOptional(Kernel::Key::Language, MD.mLanguage, std::string()); + YIO.mapOptional(Kernel::Key::LanguageVersion, MD.mLanguageVersion, + std::vector()); + if (!MD.mAttrs.empty() || !YIO.outputting()) + YIO.mapOptional(Kernel::Key::Attrs, MD.mAttrs); + if (!MD.mArgs.empty() || !YIO.outputting()) + YIO.mapOptional(Kernel::Key::Args, MD.mArgs); + if (!MD.mCodeProps.empty() || !YIO.outputting()) + YIO.mapOptional(Kernel::Key::CodeProps, MD.mCodeProps); + if (!MD.mDebugProps.empty() || !YIO.outputting()) + YIO.mapOptional(Kernel::Key::DebugProps, MD.mDebugProps); + } +}; + +template <> +struct MappingTraits { + static void mapping(IO &YIO, CodeObject::Metadata &MD) { + YIO.mapRequired(Key::Version, MD.mVersion); + YIO.mapOptional(Key::Printf, MD.mPrintf, std::vector()); + if (!MD.mKernels.empty() || !YIO.outputting()) + YIO.mapOptional(Key::Kernels, MD.mKernels); + } +}; + +} // end namespace yaml + +namespace AMDGPU { +namespace CodeObject { + +/* static */ +std::error_code Metadata::fromYamlString( + std::string YamlString, Metadata &CodeObjectMetadata) { + yaml::Input YamlInput(YamlString); + YamlInput >> CodeObjectMetadata; + return YamlInput.error(); +} + +/* static */ +std::error_code Metadata::toYamlString( + Metadata CodeObjectMetadata, std::string &YamlString) { + raw_string_ostream YamlStream(YamlString); + yaml::Output YamlOutput(YamlStream, nullptr, std::numeric_limits::max()); + YamlOutput << CodeObjectMetadata; + return std::error_code(); +} + +} // end namespace CodeObject +} // end namespace AMDGPU +} // end namespace llvm diff --git a/lib/Support/ARMAttributeParser.cpp b/lib/Support/ARMAttributeParser.cpp index 63e800a5b78b..a9a0c1d1a4d3 100644 --- a/lib/Support/ARMAttributeParser.cpp +++ b/lib/Support/ARMAttributeParser.cpp @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Support/ARMAttributeParser.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/Support/ARMAttributeParser.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/ScopedPrinter.h" diff --git a/lib/Support/ARMBuildAttrs.cpp b/lib/Support/ARMBuildAttrs.cpp index 134ef8b587b7..8f18e9eb24ed 100644 --- a/lib/Support/ARMBuildAttrs.cpp +++ b/lib/Support/ARMBuildAttrs.cpp @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Support/ARMBuildAttributes.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/ARMBuildAttributes.h" using namespace llvm; diff --git a/lib/Support/Atomic.cpp b/lib/Support/Atomic.cpp index 80550e2b46a7..55910c489faf 100644 --- a/lib/Support/Atomic.cpp +++ b/lib/Support/Atomic.cpp @@ -18,6 +18,8 @@ using namespace llvm; #if defined(_MSC_VER) #include + +// We must include windows.h after Intrin.h. #include #undef MemoryFence #endif diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt index a12ba4fbfda8..0a8e3897cce9 100644 --- a/lib/Support/CMakeLists.txt +++ b/lib/Support/CMakeLists.txt @@ -30,6 +30,7 @@ elseif( CMAKE_HOST_UNIX ) endif( MSVC OR MINGW ) add_llvm_library(LLVMSupport + AMDGPUCodeObjectMetadata.cpp APFloat.cpp APInt.cpp APSInt.cpp @@ -57,7 +58,6 @@ add_llvm_library(LLVMSupport DebugCounter.cpp DeltaAlgorithm.cpp DAGDeltaAlgorithm.cpp - Dwarf.cpp Error.cpp ErrorHandling.cpp FileUtilities.cpp diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp index 34345901eab1..de0ca940b405 100644 --- a/lib/Support/CommandLine.cpp +++ b/lib/Support/CommandLine.cpp @@ -2042,9 +2042,9 @@ void CommandLineParser::printOptionValues() { Opts[i].second->printOptionValue(MaxArgLen, PrintAllOptions); } -static void (*OverrideVersionPrinter)() = nullptr; +static VersionPrinterTy OverrideVersionPrinter = nullptr; -static std::vector *ExtraVersionPrinters = nullptr; +static std::vector *ExtraVersionPrinters = nullptr; namespace { class VersionPrinter { @@ -2084,7 +2084,7 @@ public: return; if (OverrideVersionPrinter != nullptr) { - (*OverrideVersionPrinter)(); + OverrideVersionPrinter(outs()); exit(0); } print(); @@ -2093,10 +2093,8 @@ public: // information. if (ExtraVersionPrinters != nullptr) { outs() << '\n'; - for (std::vector::iterator I = ExtraVersionPrinters->begin(), - E = ExtraVersionPrinters->end(); - I != E; ++I) - (*I)(); + for (auto I : *ExtraVersionPrinters) + I(outs()); } exit(0); @@ -2134,11 +2132,11 @@ void cl::PrintHelpMessage(bool Hidden, bool Categorized) { /// Utility function for printing version number. void cl::PrintVersionMessage() { VersionPrinterInstance.print(); } -void cl::SetVersionPrinter(void (*func)()) { OverrideVersionPrinter = func; } +void cl::SetVersionPrinter(VersionPrinterTy func) { OverrideVersionPrinter = func; } -void cl::AddExtraVersionPrinter(void (*func)()) { +void cl::AddExtraVersionPrinter(VersionPrinterTy func) { if (!ExtraVersionPrinters) - ExtraVersionPrinters = new std::vector; + ExtraVersionPrinters = new std::vector; ExtraVersionPrinters->push_back(func); } diff --git a/lib/Support/ConvertUTF.cpp b/lib/Support/ConvertUTF.cpp index aa9507c189ed..e56854a3ae42 100644 --- a/lib/Support/ConvertUTF.cpp +++ b/lib/Support/ConvertUTF.cpp @@ -46,14 +46,12 @@ ------------------------------------------------------------------------ */ - #include "llvm/Support/ConvertUTF.h" #ifdef CVTUTF_DEBUG #include #endif #include - /* * This code extensively uses fall-through switches. * Keep the compiler from warning about that. diff --git a/lib/Support/ConvertUTFWrapper.cpp b/lib/Support/ConvertUTFWrapper.cpp index 217cedb24df6..6cb4f6376250 100644 --- a/lib/Support/ConvertUTFWrapper.cpp +++ b/lib/Support/ConvertUTFWrapper.cpp @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Support/ConvertUTF.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/ConvertUTF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/SwapByteOrder.h" #include diff --git a/lib/Support/Errno.cpp b/lib/Support/Errno.cpp index 3ba2a1277d05..10be9b391b49 100644 --- a/lib/Support/Errno.cpp +++ b/lib/Support/Errno.cpp @@ -12,7 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/Errno.h" -#include "llvm/Config/config.h" // Get autoconf configuration settings +#include "llvm/Config/config.h" // Get autoconf configuration settings #include "llvm/Support/raw_ostream.h" #include diff --git a/lib/Support/Error.cpp b/lib/Support/Error.cpp index 4730c0b26ba0..bb02c03ff2b6 100644 --- a/lib/Support/Error.cpp +++ b/lib/Support/Error.cpp @@ -13,7 +13,6 @@ #include "llvm/Support/ManagedStatic.h" #include - using namespace llvm; namespace { diff --git a/lib/Support/FormattedStream.cpp b/lib/Support/FormattedStream.cpp index c01659604444..a9f4409f5dde 100644 --- a/lib/Support/FormattedStream.cpp +++ b/lib/Support/FormattedStream.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Support/Debug.h" #include "llvm/Support/FormattedStream.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include diff --git a/lib/Support/LockFileManager.cpp b/lib/Support/LockFileManager.cpp index 8be9879fbc24..3ee3af7731e6 100644 --- a/lib/Support/LockFileManager.cpp +++ b/lib/Support/LockFileManager.cpp @@ -15,15 +15,15 @@ #include "llvm/Support/ErrorOr.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/Signals.h" +#include "llvm/Support/raw_ostream.h" #include #include #include -#include -#include #include #include +#include +#include #if LLVM_ON_WIN32 #include #endif diff --git a/lib/Support/MD5.cpp b/lib/Support/MD5.cpp index bdbf1d677938..545a64cfc767 100644 --- a/lib/Support/MD5.cpp +++ b/lib/Support/MD5.cpp @@ -37,11 +37,11 @@ * compile-time configuration. */ +#include "llvm/Support/MD5.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Format.h" -#include "llvm/Support/MD5.h" #include "llvm/Support/raw_ostream.h" #include #include diff --git a/lib/Support/Mutex.cpp b/lib/Support/Mutex.cpp index c8d3844d0c96..bdd02105f6f0 100644 --- a/lib/Support/Mutex.cpp +++ b/lib/Support/Mutex.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Config/config.h" #include "llvm/Support/Mutex.h" +#include "llvm/Config/config.h" //===----------------------------------------------------------------------===// //=== WARNING: Implementation here must contain only TRULY operating system diff --git a/lib/Support/Path.cpp b/lib/Support/Path.cpp index 80bef558258d..e58f856ca244 100644 --- a/lib/Support/Path.cpp +++ b/lib/Support/Path.cpp @@ -13,12 +13,12 @@ #include "llvm/Support/Path.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/Support/COFF.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Errc.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FileSystem.h" -#include "llvm/Support/MachO.h" #include "llvm/Support/Process.h" #include #include @@ -1027,178 +1027,6 @@ void directory_entry::replace_filename(const Twine &filename, file_status st) { Status = st; } -template -static bool startswith(StringRef Magic, const char (&S)[N]) { - return Magic.startswith(StringRef(S, N - 1)); -} - -/// @brief Identify the magic in magic. -file_magic identify_magic(StringRef Magic) { - if (Magic.size() < 4) - return file_magic::unknown; - switch ((unsigned char)Magic[0]) { - case 0x00: { - // COFF bigobj, CL.exe's LTO object file, or short import library file - if (startswith(Magic, "\0\0\xFF\xFF")) { - size_t MinSize = offsetof(COFF::BigObjHeader, UUID) + sizeof(COFF::BigObjMagic); - if (Magic.size() < MinSize) - return file_magic::coff_import_library; - - const char *Start = Magic.data() + offsetof(COFF::BigObjHeader, UUID); - if (memcmp(Start, COFF::BigObjMagic, sizeof(COFF::BigObjMagic)) == 0) - return file_magic::coff_object; - if (memcmp(Start, COFF::ClGlObjMagic, sizeof(COFF::BigObjMagic)) == 0) - return file_magic::coff_cl_gl_object; - return file_magic::coff_import_library; - } - // Windows resource file - if (startswith(Magic, "\0\0\0\0\x20\0\0\0\xFF")) - return file_magic::windows_resource; - // 0x0000 = COFF unknown machine type - if (Magic[1] == 0) - return file_magic::coff_object; - if (startswith(Magic, "\0asm")) - return file_magic::wasm_object; - break; - } - case 0xDE: // 0x0B17C0DE = BC wraper - if (startswith(Magic, "\xDE\xC0\x17\x0B")) - return file_magic::bitcode; - break; - case 'B': - if (startswith(Magic, "BC\xC0\xDE")) - return file_magic::bitcode; - break; - case '!': - if (startswith(Magic, "!\n") || startswith(Magic, "!\n")) - return file_magic::archive; - break; - - case '\177': - if (startswith(Magic, "\177ELF") && Magic.size() >= 18) { - bool Data2MSB = Magic[5] == 2; - unsigned high = Data2MSB ? 16 : 17; - unsigned low = Data2MSB ? 17 : 16; - if (Magic[high] == 0) { - switch (Magic[low]) { - default: return file_magic::elf; - case 1: return file_magic::elf_relocatable; - case 2: return file_magic::elf_executable; - case 3: return file_magic::elf_shared_object; - case 4: return file_magic::elf_core; - } - } - // It's still some type of ELF file. - return file_magic::elf; - } - break; - - case 0xCA: - if (startswith(Magic, "\xCA\xFE\xBA\xBE") || - startswith(Magic, "\xCA\xFE\xBA\xBF")) { - // This is complicated by an overlap with Java class files. - // See the Mach-O section in /usr/share/file/magic for details. - if (Magic.size() >= 8 && Magic[7] < 43) - return file_magic::macho_universal_binary; - } - break; - - // The two magic numbers for mach-o are: - // 0xfeedface - 32-bit mach-o - // 0xfeedfacf - 64-bit mach-o - case 0xFE: - case 0xCE: - case 0xCF: { - uint16_t type = 0; - if (startswith(Magic, "\xFE\xED\xFA\xCE") || - startswith(Magic, "\xFE\xED\xFA\xCF")) { - /* Native endian */ - size_t MinSize; - if (Magic[3] == char(0xCE)) - MinSize = sizeof(MachO::mach_header); - else - MinSize = sizeof(MachO::mach_header_64); - if (Magic.size() >= MinSize) - type = Magic[12] << 24 | Magic[13] << 12 | Magic[14] << 8 | Magic[15]; - } else if (startswith(Magic, "\xCE\xFA\xED\xFE") || - startswith(Magic, "\xCF\xFA\xED\xFE")) { - /* Reverse endian */ - size_t MinSize; - if (Magic[0] == char(0xCE)) - MinSize = sizeof(MachO::mach_header); - else - MinSize = sizeof(MachO::mach_header_64); - if (Magic.size() >= MinSize) - type = Magic[15] << 24 | Magic[14] << 12 |Magic[13] << 8 | Magic[12]; - } - switch (type) { - default: break; - case 1: return file_magic::macho_object; - case 2: return file_magic::macho_executable; - case 3: return file_magic::macho_fixed_virtual_memory_shared_lib; - case 4: return file_magic::macho_core; - case 5: return file_magic::macho_preload_executable; - case 6: return file_magic::macho_dynamically_linked_shared_lib; - case 7: return file_magic::macho_dynamic_linker; - case 8: return file_magic::macho_bundle; - case 9: return file_magic::macho_dynamically_linked_shared_lib_stub; - case 10: return file_magic::macho_dsym_companion; - case 11: return file_magic::macho_kext_bundle; - } - break; - } - case 0xF0: // PowerPC Windows - case 0x83: // Alpha 32-bit - case 0x84: // Alpha 64-bit - case 0x66: // MPS R4000 Windows - case 0x50: // mc68K - case 0x4c: // 80386 Windows - case 0xc4: // ARMNT Windows - if (Magic[1] == 0x01) - return file_magic::coff_object; - LLVM_FALLTHROUGH; - - case 0x90: // PA-RISC Windows - case 0x68: // mc68K Windows - if (Magic[1] == 0x02) - return file_magic::coff_object; - break; - - case 'M': // Possible MS-DOS stub on Windows PE file - if (startswith(Magic, "MZ")) { - uint32_t off = read32le(Magic.data() + 0x3c); - // PE/COFF file, either EXE or DLL. - if (off < Magic.size() && - memcmp(Magic.data()+off, COFF::PEMagic, sizeof(COFF::PEMagic)) == 0) - return file_magic::pecoff_executable; - } - break; - - case 0x64: // x86-64 Windows. - if (Magic[1] == char(0x86)) - return file_magic::coff_object; - break; - - default: - break; - } - return file_magic::unknown; -} - -std::error_code identify_magic(const Twine &Path, file_magic &Result) { - int FD; - if (std::error_code EC = openFileForRead(Path, FD)) - return EC; - - char Buffer[32]; - int Length = read(FD, Buffer, sizeof(Buffer)); - if (close(FD) != 0 || Length < 0) - return std::error_code(errno, std::generic_category()); - - Result = identify_magic(StringRef(Buffer, Length)); - return std::error_code(); -} - std::error_code directory_entry::status(file_status &result) const { return fs::status(Path, result, FollowSymlinks); } diff --git a/lib/Support/PrettyStackTrace.cpp b/lib/Support/PrettyStackTrace.cpp index abf61b73a70d..a18e9cc50040 100644 --- a/lib/Support/PrettyStackTrace.cpp +++ b/lib/Support/PrettyStackTrace.cpp @@ -15,7 +15,7 @@ #include "llvm/Support/PrettyStackTrace.h" #include "llvm-c/ErrorHandling.h" #include "llvm/ADT/SmallString.h" -#include "llvm/Config/config.h" // Get autoconf configuration settings +#include "llvm/Config/config.h" // Get autoconf configuration settings #include "llvm/Support/Compiler.h" #include "llvm/Support/Signals.h" #include "llvm/Support/Watchdog.h" diff --git a/lib/Support/Process.cpp b/lib/Support/Process.cpp index 290c30f4968f..caec993ee165 100644 --- a/lib/Support/Process.cpp +++ b/lib/Support/Process.cpp @@ -11,11 +11,11 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Support/Process.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Config/config.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" -#include "llvm/Support/Process.h" #include "llvm/Support/Program.h" using namespace llvm; diff --git a/lib/Support/RWMutex.cpp b/lib/Support/RWMutex.cpp index 6c9781c4e2d6..83c6d1d52b4c 100644 --- a/lib/Support/RWMutex.cpp +++ b/lib/Support/RWMutex.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Config/config.h" #include "llvm/Support/RWMutex.h" +#include "llvm/Config/config.h" //===----------------------------------------------------------------------===// //=== WARNING: Implementation here must contain only TRULY operating system diff --git a/lib/Support/SHA1.cpp b/lib/Support/SHA1.cpp index 0eefd998cd75..20f41c5ff447 100644 --- a/lib/Support/SHA1.cpp +++ b/lib/Support/SHA1.cpp @@ -15,9 +15,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Support/Host.h" #include "llvm/Support/SHA1.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/Support/Host.h" using namespace llvm; #include diff --git a/lib/Support/Signals.cpp b/lib/Support/Signals.cpp index 57f36bf175b3..256a22dee87b 100644 --- a/lib/Support/Signals.cpp +++ b/lib/Support/Signals.cpp @@ -12,6 +12,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Support/Signals.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Config/config.h" @@ -23,18 +24,23 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Mutex.h" #include "llvm/Support/Program.h" -#include "llvm/Support/Signals.h" #include "llvm/Support/StringSaver.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Options.h" #include -namespace llvm { - //===----------------------------------------------------------------------===// //=== WARNING: Implementation here must contain only TRULY operating system //=== independent code. //===----------------------------------------------------------------------===// +using namespace llvm; + +static cl::opt + DisableSymbolication("disable-symbolication", + cl::desc("Disable symbolizing crash backtraces."), + cl::init(false), cl::Hidden); + static ManagedStatic>> CallBacksToRun; void sys::RunSignalHandlers() { @@ -44,9 +50,6 @@ void sys::RunSignalHandlers() { I.first(I.second); CallBacksToRun->clear(); } -} - -using namespace llvm; static bool findModulesAndOffsets(void **StackTrace, int Depth, const char **Modules, intptr_t *Offsets, @@ -70,6 +73,9 @@ static bool printSymbolizedStackTrace(StringRef Argv0, static bool printSymbolizedStackTrace(StringRef Argv0, void **StackTrace, int Depth, llvm::raw_ostream &OS) { + if (DisableSymbolication) + return false; + // Don't recursively invoke the llvm-symbolizer binary. if (Argv0.find("llvm-symbolizer") != std::string::npos) return false; diff --git a/lib/Support/SourceMgr.cpp b/lib/Support/SourceMgr.cpp index 5199fad7d9e9..b0609d4fe047 100644 --- a/lib/Support/SourceMgr.cpp +++ b/lib/Support/SourceMgr.cpp @@ -13,18 +13,18 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Support/SourceMgr.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/Locale.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/SMLoc.h" -#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_ostream.h" #include #include #include diff --git a/lib/Support/SpecialCaseList.cpp b/lib/Support/SpecialCaseList.cpp index df524b352351..05886eaa8aee 100644 --- a/lib/Support/SpecialCaseList.cpp +++ b/lib/Support/SpecialCaseList.cpp @@ -15,12 +15,12 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/SpecialCaseList.h" -#include "llvm/Support/TrigramIndex.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSet.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Regex.h" +#include "llvm/Support/TrigramIndex.h" #include #include #include diff --git a/lib/Support/Statistic.cpp b/lib/Support/Statistic.cpp index 0c50dfd27d61..72ca22806c43 100644 --- a/lib/Support/Statistic.cpp +++ b/lib/Support/Statistic.cpp @@ -30,8 +30,8 @@ #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/Mutex.h" #include "llvm/Support/Timer.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/YAMLTraits.h" +#include "llvm/Support/raw_ostream.h" #include #include using namespace llvm; diff --git a/lib/Support/StringExtras.cpp b/lib/Support/StringExtras.cpp index 3e2420f67760..b2f42dfcc04d 100644 --- a/lib/Support/StringExtras.cpp +++ b/lib/Support/StringExtras.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/SmallVector.h" using namespace llvm; /// StrInStrNoCase - Portable version of strcasestr. Locates the first diff --git a/lib/Support/TargetRegistry.cpp b/lib/Support/TargetRegistry.cpp index bed9ed64f802..b5c283253117 100644 --- a/lib/Support/TargetRegistry.cpp +++ b/lib/Support/TargetRegistry.cpp @@ -114,7 +114,7 @@ static int TargetArraySortFn(const std::pair *LHS, return LHS->first.compare(RHS->first); } -void TargetRegistry::printRegisteredTargetsForVersion() { +void TargetRegistry::printRegisteredTargetsForVersion(raw_ostream &OS) { std::vector > Targets; size_t Width = 0; for (const auto &T : TargetRegistry::targets()) { @@ -123,7 +123,6 @@ void TargetRegistry::printRegisteredTargetsForVersion() { } array_pod_sort(Targets.begin(), Targets.end(), TargetArraySortFn); - raw_ostream &OS = outs(); OS << " Registered Targets:\n"; for (unsigned i = 0, e = Targets.size(); i != e; ++i) { OS << " " << Targets[i].first; diff --git a/lib/Support/ThreadLocal.cpp b/lib/Support/ThreadLocal.cpp index 9da1603080a2..9a75c02b351f 100644 --- a/lib/Support/ThreadLocal.cpp +++ b/lib/Support/ThreadLocal.cpp @@ -11,9 +11,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Support/ThreadLocal.h" #include "llvm/Config/config.h" #include "llvm/Support/Compiler.h" -#include "llvm/Support/ThreadLocal.h" //===----------------------------------------------------------------------===// //=== WARNING: Implementation here must contain only TRULY operating system diff --git a/lib/Support/Timer.cpp b/lib/Support/Timer.cpp index dec6baf7bf47..3386f2660f31 100644 --- a/lib/Support/Timer.cpp +++ b/lib/Support/Timer.cpp @@ -20,8 +20,8 @@ #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/Mutex.h" #include "llvm/Support/Process.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/YAMLTraits.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; // This ugly hack is brought to you courtesy of constructor/destructor ordering diff --git a/lib/Support/TrigramIndex.cpp b/lib/Support/TrigramIndex.cpp index 85ab5287566b..721763c88525 100644 --- a/lib/Support/TrigramIndex.cpp +++ b/lib/Support/TrigramIndex.cpp @@ -18,9 +18,9 @@ #include "llvm/Support/TrigramIndex.h" #include "llvm/ADT/SmallVector.h" -#include #include #include +#include using namespace llvm; diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index f7b7ad89e959..320aede79fbb 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -12,8 +12,8 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/TargetParser.h" #include "llvm/Support/Host.h" +#include "llvm/Support/TargetParser.h" #include using namespace llvm; @@ -877,6 +877,10 @@ std::string Triple::normalize(StringRef Str) { } } + // SUSE uses "gnueabi" to mean "gnueabihf" + if (Vendor == Triple::SUSE && Environment == llvm::Triple::GNUEABI) + Components[3] = "gnueabihf"; + if (OS == Triple::Win32) { Components.resize(4); Components[2] = "windows"; @@ -1484,6 +1488,21 @@ bool Triple::isLittleEndian() const { } bool Triple::isCompatibleWith(const Triple &Other) const { + // ARM and Thumb triples are compatible, if subarch, vendor and OS match. + if ((getArch() == Triple::thumb && Other.getArch() == Triple::arm) || + (getArch() == Triple::arm && Other.getArch() == Triple::thumb) || + (getArch() == Triple::thumbeb && Other.getArch() == Triple::armeb) || + (getArch() == Triple::armeb && Other.getArch() == Triple::thumbeb)) { + if (getVendor() == Triple::Apple) + return getSubArch() == Other.getSubArch() && + getVendor() == Other.getVendor() && getOS() == Other.getOS(); + else + return getSubArch() == Other.getSubArch() && + getVendor() == Other.getVendor() && getOS() == Other.getOS() && + getEnvironment() == Other.getEnvironment() && + getObjectFormat() == Other.getObjectFormat(); + } + // If vendor is apple, ignore the version number. if (getVendor() == Triple::Apple) return getArch() == Other.getArch() && getSubArch() == Other.getSubArch() && diff --git a/lib/Support/Unix/DynamicLibrary.inc b/lib/Support/Unix/DynamicLibrary.inc index a0526fa2c1b8..aad77f19c35a 100644 --- a/lib/Support/Unix/DynamicLibrary.inc +++ b/lib/Support/Unix/DynamicLibrary.inc @@ -15,7 +15,8 @@ #include DynamicLibrary::HandleSet::~HandleSet() { - for (void *Handle : Handles) + // Close the libraries in reverse order. + for (void *Handle : llvm::reverse(Handles)) ::dlclose(Handle); if (Process) ::dlclose(Process); @@ -101,10 +102,10 @@ static void *DoSearch(const char* SymbolName) { #define EXPLICIT_SYMBOL(SYM) \ if (!strcmp(SymbolName, #SYM)) return &SYM -// On linux we have a weird situation. The stderr/out/in symbols are both +// Under glibc we have a weird situation. The stderr/out/in symbols are both // macros and global variables because of standards requirements. So, we // boldly use the EXPLICIT_SYMBOL macro without checking for a #define first. -#if defined(__linux__) and !defined(__ANDROID__) +#if defined(__GLIBC__) { EXPLICIT_SYMBOL(stderr); EXPLICIT_SYMBOL(stdout); diff --git a/lib/Support/Unix/Path.inc b/lib/Support/Unix/Path.inc index ce638d453c19..b6774692595b 100644 --- a/lib/Support/Unix/Path.inc +++ b/lib/Support/Unix/Path.inc @@ -75,8 +75,8 @@ #define STATVFS_F_FRSIZE(vfs) vfs.f_frsize #else #if defined(__OpenBSD__) || defined(__FreeBSD__) -#include #include +#include #elif defined(__linux__) #if defined(HAVE_LINUX_MAGIC_H) #include diff --git a/lib/Support/Unix/Signals.inc b/lib/Support/Unix/Signals.inc index 88ad21e9806e..aaf760c5b616 100644 --- a/lib/Support/Unix/Signals.inc +++ b/lib/Support/Unix/Signals.inc @@ -15,9 +15,9 @@ #include "Unix.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Demangle/Demangle.h" -#include "llvm/Support/Format.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/FileUtilities.h" +#include "llvm/Support/Format.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Mutex.h" #include "llvm/Support/Program.h" diff --git a/lib/Support/Unix/Threading.inc b/lib/Support/Unix/Threading.inc index 407b194e1b6a..267af388ecdb 100644 --- a/lib/Support/Unix/Threading.inc +++ b/lib/Support/Unix/Threading.inc @@ -26,19 +26,19 @@ #endif #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) +#include #include #include -#include #include #endif #if defined(__NetBSD__) -#include // For _lwp_self() +#include // For _lwp_self() #endif #if defined(__linux__) -#include // For syscall() -#include // For syscall codes +#include // For syscall codes +#include // For syscall() #endif namespace { diff --git a/lib/Support/Windows/DynamicLibrary.inc b/lib/Support/Windows/DynamicLibrary.inc index 0b54b5dfdbc5..caf1a0a658de 100644 --- a/lib/Support/Windows/DynamicLibrary.inc +++ b/lib/Support/Windows/DynamicLibrary.inc @@ -23,7 +23,7 @@ DynamicLibrary::HandleSet::~HandleSet() { - for (void *Handle : Handles) + for (void *Handle : llvm::reverse(Handles)) FreeLibrary(HMODULE(Handle)); // 'Process' should not be released on Windows. diff --git a/lib/Support/Windows/WindowsSupport.h b/lib/Support/Windows/WindowsSupport.h index c358b99ab96a..d4599dca044e 100644 --- a/lib/Support/Windows/WindowsSupport.h +++ b/lib/Support/Windows/WindowsSupport.h @@ -45,7 +45,9 @@ #include #include #include -#include // Must be included after windows.h + +// Must be included after windows.h +#include /// Determines if the program is running on Windows 8 or newer. This /// reimplements one of the helpers in the Windows 8.1 SDK, which are intended diff --git a/lib/Support/YAMLParser.cpp b/lib/Support/YAMLParser.cpp index f1496393e55e..01ae3214453d 100644 --- a/lib/Support/YAMLParser.cpp +++ b/lib/Support/YAMLParser.cpp @@ -12,12 +12,12 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/YAMLParser.h" +#include "llvm/ADT/AllocatorList.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" -#include "llvm/ADT/AllocatorList.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SourceMgr.h" diff --git a/lib/TableGen/StringMatcher.cpp b/lib/TableGen/StringMatcher.cpp index 0c83da65e19e..7e510f0c2fdc 100644 --- a/lib/TableGen/StringMatcher.cpp +++ b/lib/TableGen/StringMatcher.cpp @@ -11,9 +11,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/TableGen/StringMatcher.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/TableGen/StringMatcher.h" #include #include #include diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp index 981fd22c213c..5ce57926cc03 100644 --- a/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -12,13 +12,13 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/AArch64AddressingModes.h" #include "AArch64.h" #include "AArch64MCInstLower.h" #include "AArch64MachineFunctionInfo.h" #include "AArch64RegisterInfo.h" #include "AArch64Subtarget.h" #include "InstPrinter/AArch64InstPrinter.h" +#include "MCTargetDesc/AArch64AddressingModes.h" #include "MCTargetDesc/AArch64MCExpr.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringSwitch.h" @@ -35,11 +35,11 @@ #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstBuilder.h" #include "llvm/MC/MCLinkerOptimizationHint.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCSymbolELF.h" -#include "llvm/MC/MCSectionELF.h" -#include "llvm/MC/MCSectionMachO.h" #include "llvm/Support/Debug.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp b/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp index 30e2b2310456..544f67433fd5 100644 --- a/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp +++ b/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp @@ -20,8 +20,8 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "aarch64-dead-defs" diff --git a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp index 33fec74998d6..160107cd7e2b 100644 --- a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -14,9 +14,9 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/AArch64AddressingModes.h" #include "AArch64InstrInfo.h" #include "AArch64Subtarget.h" +#include "MCTargetDesc/AArch64AddressingModes.h" #include "Utils/AArch64BaseInfo.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFunctionPass.h" diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index 9ac7ecb9cdb4..e8fcf1a0e9b7 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -2827,7 +2827,7 @@ bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) { return false; EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true); - if (SrcVT == MVT::f128) + if (SrcVT == MVT::f128 || SrcVT == MVT::f16) return false; unsigned Opc; @@ -2854,6 +2854,10 @@ bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) { MVT DestVT; if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector()) return false; + // Let regular ISEL handle FP16 + if (DestVT == MVT::f16) + return false; + assert((DestVT == MVT::f32 || DestVT == MVT::f64) && "Unexpected value type."); diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index b18fb30eb2d4..8c2c0a564c30 100644 --- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -2566,7 +2566,7 @@ bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) { // pstatefield for the MSR (immediate) instruction, we also require that an // immediate value has been provided as an argument, we know that this is // the case as it has been ensured by semantic checking. - auto PMapper = AArch64PState::lookupPStateByName(RegString->getString());; + auto PMapper = AArch64PState::lookupPStateByName(RegString->getString()); if (PMapper) { assert (isa(N->getOperand(2)) && "Expected a constant integer expression."); diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index f798010906cc..059556a560c0 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -11,9 +11,9 @@ // //===----------------------------------------------------------------------===// +#include "AArch64ISelLowering.h" #include "AArch64CallingConvention.h" #include "AArch64MachineFunctionInfo.h" -#include "AArch64ISelLowering.h" #include "AArch64PerfectShuffle.h" #include "AArch64RegisterInfo.h" #include "AArch64Subtarget.h" @@ -22,9 +22,9 @@ #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" @@ -51,10 +51,10 @@ #include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" -#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Module.h" #include "llvm/IR/OperandTraits.h" #include "llvm/IR/Type.h" diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp index faf39be9b41e..eea012382150 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -17,8 +17,8 @@ #include "MCTargetDesc/AArch64AddressingModes.h" #include "Utils/AArch64BaseInfo.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" diff --git a/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/lib/Target/AArch64/AArch64LegalizerInfo.cpp index 6e6daf812295..01196817f311 100644 --- a/lib/Target/AArch64/AArch64LegalizerInfo.cpp +++ b/lib/Target/AArch64/AArch64LegalizerInfo.cpp @@ -13,12 +13,12 @@ //===----------------------------------------------------------------------===// #include "AArch64LegalizerInfo.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ValueTypes.h" -#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" -#include "llvm/IR/Type.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Type.h" #include "llvm/Target/TargetOpcodes.h" using namespace llvm; diff --git a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index 976498aa70d6..9243eb91cc1a 100644 --- a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -16,10 +16,10 @@ #include "AArch64Subtarget.h" #include "MCTargetDesc/AArch64AddressingModes.h" #include "llvm/ADT/BitVector.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" diff --git a/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp b/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp index 038162c6f54a..fe4ef4b40ece 100644 --- a/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp +++ b/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp @@ -17,8 +17,8 @@ #define DEBUG_TYPE "aarch64-pbqp" -#include "AArch64.h" #include "AArch64PBQPRegAlloc.h" +#include "AArch64.h" #include "AArch64RegisterInfo.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineBasicBlock.h" diff --git a/lib/Target/AArch64/AArch64RegisterBankInfo.cpp b/lib/Target/AArch64/AArch64RegisterBankInfo.cpp index 789270c2a34b..9b3899e0681c 100644 --- a/lib/Target/AArch64/AArch64RegisterBankInfo.cpp +++ b/lib/Target/AArch64/AArch64RegisterBankInfo.cpp @@ -15,13 +15,13 @@ #include "AArch64RegisterBankInfo.h" #include "AArch64InstrInfo.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/CodeGen/LowLevelType.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/GlobalISel/RegisterBank.h" #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" +#include "llvm/CodeGen/LowLevelType.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetRegisterInfo.h" diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.cpp b/lib/Target/AArch64/AArch64TargetObjectFile.cpp index 12a2e9a867f0..4bc2c060a068 100644 --- a/lib/Target/AArch64/AArch64TargetObjectFile.cpp +++ b/lib/Target/AArch64/AArch64TargetObjectFile.cpp @@ -9,12 +9,12 @@ #include "AArch64TargetObjectFile.h" #include "AArch64TargetMachine.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/Mangler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCValue.h" -#include "llvm/Support/Dwarf.h" using namespace llvm; using namespace dwarf; diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 43569af04347..a4328682b93c 100644 --- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -9,8 +9,8 @@ #include "AArch64TargetTransformInfo.h" #include "MCTargetDesc/AArch64AddressingModes.h" -#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/BasicTTIImpl.h" #include "llvm/Support/Debug.h" #include "llvm/Target/CostTable.h" diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 449d732a8d44..e841fb894519 100644 --- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -15,8 +15,8 @@ #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp index ebf05ae303dd..43a6fa9ce089 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp @@ -11,8 +11,9 @@ #include "AArch64RegisterInfo.h" #include "MCTargetDesc/AArch64FixupKinds.h" #include "llvm/ADT/Triple.h" -#include "llvm/MC/MCAssembler.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCELFObjectWriter.h" @@ -22,7 +23,6 @@ #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MachO.h" using namespace llvm; namespace { diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp index 10e7241da709..f7dda92fb551 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp @@ -15,11 +15,11 @@ #include "MCTargetDesc/AArch64FixupKinds.h" #include "MCTargetDesc/AArch64MCExpr.h" #include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCValue.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include #include diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp index 271263507ae1..031aa8b81e35 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCCodeEmitter.h" @@ -30,7 +31,6 @@ #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbolELF.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp index 3d296ba4806b..19b2576f6895 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp @@ -10,6 +10,7 @@ #include "MCTargetDesc/AArch64FixupKinds.h" #include "MCTargetDesc/AArch64MCTargetDesc.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" @@ -23,7 +24,6 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/MachO.h" #include "llvm/Support/MathExtras.h" #include #include diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td index b50e8d1d659e..6ab2b9ef0459 100644 --- a/lib/Target/AMDGPU/AMDGPU.td +++ b/lib/Target/AMDGPU/AMDGPU.td @@ -447,6 +447,16 @@ class SubtargetFeatureISAVersion ; +def FeatureISAVersion6_0_0 : SubtargetFeatureISAVersion <6,0,0, + [FeatureSouthernIslands, + FeatureFastFMAF32, + HalfRate64Ops, + FeatureLDSBankCount32]>; + +def FeatureISAVersion6_0_1 : SubtargetFeatureISAVersion <6,0,1, + [FeatureSouthernIslands, + FeatureLDSBankCount32]>; + def FeatureISAVersion7_0_0 : SubtargetFeatureISAVersion <7,0,0, [FeatureSeaIslands, FeatureLDSBankCount32]>; @@ -461,6 +471,10 @@ def FeatureISAVersion7_0_2 : SubtargetFeatureISAVersion <7,0,2, [FeatureSeaIslands, FeatureLDSBankCount16]>; +def FeatureISAVersion7_0_3 : SubtargetFeatureISAVersion <7,0,3, + [FeatureSeaIslands, + FeatureLDSBankCount16]>; + def FeatureISAVersion8_0_0 : SubtargetFeatureISAVersion <8,0,0, [FeatureVolcanicIslands, FeatureLDSBankCount32, @@ -489,8 +503,23 @@ def FeatureISAVersion8_1_0 : SubtargetFeatureISAVersion <8,1,0, FeatureLDSBankCount16, FeatureXNACK]>; -def FeatureISAVersion9_0_0 : SubtargetFeatureISAVersion <9,0,0,[]>; -def FeatureISAVersion9_0_1 : SubtargetFeatureISAVersion <9,0,1,[]>; +def FeatureISAVersion9_0_0 : SubtargetFeatureISAVersion <9,0,0, + [FeatureGFX9, + FeatureLDSBankCount32]>; + +def FeatureISAVersion9_0_1 : SubtargetFeatureISAVersion <9,0,1, + [FeatureGFX9, + FeatureLDSBankCount32, + FeatureXNACK]>; + +def FeatureISAVersion9_0_2 : SubtargetFeatureISAVersion <9,0,2, + [FeatureGFX9, + FeatureLDSBankCount32]>; + +def FeatureISAVersion9_0_3 : SubtargetFeatureISAVersion <9,0,3, + [FeatureGFX9, + FeatureLDSBankCount32, + FeatureXNACK]>; //===----------------------------------------------------------------------===// // Debugger related subtarget features. diff --git a/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp b/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp index 3c99f48e818a..faa424eb0a64 100644 --- a/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp +++ b/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp @@ -10,15 +10,15 @@ /// This is the AMGPU address space based alias analysis pass. //===----------------------------------------------------------------------===// -#include "AMDGPU.h" #include "AMDGPUAliasAnalysis.h" +#include "AMDGPU.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/Passes.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp b/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp index 91b3649f5c39..3c788fa1dcea 100644 --- a/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp +++ b/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp @@ -19,8 +19,8 @@ #include "llvm/Analysis/DivergenceAnalysis.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" -#include "llvm/IR/InstVisitor.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstVisitor.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 0959014812d8..83ad1a5c6ee3 100644 --- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -17,25 +17,25 @@ // #include "AMDGPUAsmPrinter.h" -#include "AMDGPUTargetMachine.h" -#include "MCTargetDesc/AMDGPUTargetStreamer.h" -#include "InstPrinter/AMDGPUInstPrinter.h" -#include "Utils/AMDGPUBaseInfo.h" #include "AMDGPU.h" #include "AMDGPUSubtarget.h" +#include "AMDGPUTargetMachine.h" +#include "InstPrinter/AMDGPUInstPrinter.h" +#include "MCTargetDesc/AMDGPUTargetStreamer.h" #include "R600Defines.h" #include "R600MachineFunctionInfo.h" #include "R600RegisterInfo.h" #include "SIDefines.h" -#include "SIMachineFunctionInfo.h" #include "SIInstrInfo.h" +#include "SIMachineFunctionInfo.h" #include "SIRegisterInfo.h" +#include "Utils/AMDGPUBaseInfo.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetLoweringObjectFile.h" diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h index e5adeeb465e1..0a58ce06704d 100644 --- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -15,8 +15,8 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUASMPRINTER_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPUASMPRINTER_H -#include "AMDKernelCodeT.h" #include "AMDGPU.h" +#include "AMDKernelCodeT.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/AsmPrinter.h" #include diff --git a/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/lib/Target/AMDGPU/AMDGPUCallLowering.cpp index e67ae092fdda..515cc07dd449 100644 --- a/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -18,8 +18,8 @@ #include "AMDGPUISelLowering.h" #include "AMDGPUSubtarget.h" #include "SIISelLowering.h" -#include "SIRegisterInfo.h" #include "SIMachineFunctionInfo.h" +#include "SIRegisterInfo.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/MachineInstrBuilder.h" diff --git a/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp index d923cb117c12..b312dbc8d14d 100644 --- a/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ b/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -25,13 +25,13 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstVisitor.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/InstVisitor.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" -#include "llvm/IR/IRBuilder.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Operator.h" #include "llvm/IR/Type.h" diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 19fce064783d..251c2f9bb25a 100644 --- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -13,15 +13,15 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" +#include "AMDGPUISelLowering.h" // For AMDGPUISD #include "AMDGPUInstrInfo.h" #include "AMDGPURegisterInfo.h" -#include "AMDGPUISelLowering.h" // For AMDGPUISD #include "AMDGPUSubtarget.h" #include "SIDefines.h" -#include "SIInstrInfo.h" -#include "SIRegisterInfo.h" #include "SIISelLowering.h" +#include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" +#include "SIRegisterInfo.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 723e8a7b54e2..5586b513b5fc 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -21,6 +21,7 @@ #include "AMDGPURegisterInfo.h" #include "AMDGPUSubtarget.h" #include "R600MachineFunctionInfo.h" +#include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFunction.h" @@ -30,7 +31,6 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/Support/KnownBits.h" -#include "SIInstrInfo.h" using namespace llvm; static bool allocateKernArg(unsigned ValNo, MVT ValVT, MVT LocVT, diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/lib/Target/AMDGPU/AMDGPUInstrInfo.h index 12caa5118342..41cc7d7093ec 100644 --- a/lib/Target/AMDGPU/AMDGPUInstrInfo.h +++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.h @@ -17,8 +17,8 @@ #define LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRINFO_H #include "AMDGPU.h" -#include "llvm/Target/TargetInstrInfo.h" #include "Utils/AMDGPUBaseInfo.h" +#include "llvm/Target/TargetInstrInfo.h" #define GET_INSTRINFO_HEADER #include "AMDGPUGenInstrInfo.inc" diff --git a/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index c87102e55dfb..ef845f44d365 100644 --- a/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -15,9 +15,9 @@ #define LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRUCTIONSELECTOR_H #include "AMDGPU.h" -#include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelector.h" namespace llvm { diff --git a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 267f4807a788..b889788c3426 100644 --- a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -14,10 +14,10 @@ #include "AMDGPULegalizerInfo.h" #include "llvm/CodeGen/ValueTypes.h" -#include "llvm/IR/Type.h" #include "llvm/IR/DerivedTypes.h" -#include "llvm/Target/TargetOpcodes.h" +#include "llvm/IR/Type.h" #include "llvm/Support/Debug.h" +#include "llvm/Target/TargetOpcodes.h" using namespace llvm; @@ -47,12 +47,18 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo() { setAction({G_GEP, P2}, Legal); setAction({G_GEP, 1, S64}, Legal); + setAction({G_ICMP, S1}, Legal); + setAction({G_ICMP, 1, S32}, Legal); + setAction({G_LOAD, P1}, Legal); setAction({G_LOAD, P2}, Legal); setAction({G_LOAD, S32}, Legal); setAction({G_LOAD, 1, P1}, Legal); setAction({G_LOAD, 1, P2}, Legal); + setAction({G_SELECT, S32}, Legal); + setAction({G_SELECT, 1, S1}, Legal); + setAction({G_STORE, S32}, Legal); setAction({G_STORE, 1, P1}, Legal); diff --git a/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp index f1ef6281c90f..63dd0d726d91 100644 --- a/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp +++ b/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp @@ -38,7 +38,6 @@ using namespace llvm; #include "AMDGPUGenMCPseudoLowering.inc" - AMDGPUMCInstLower::AMDGPUMCInstLower(MCContext &ctx, const AMDGPUSubtarget &st, const AsmPrinter &ap): Ctx(ctx), ST(st), AP(ap) { } diff --git a/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp b/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp index 6d2785ba1c60..2071b6f157cd 100644 --- a/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp +++ b/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp @@ -12,8 +12,8 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "SIInstrInfo.h" #include "AMDGPUSubtarget.h" +#include "SIInstrInfo.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" diff --git a/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/lib/Target/AMDGPU/AMDGPUMachineFunction.h index 8bfeb67ad4ec..99bb61b21db0 100644 --- a/lib/Target/AMDGPU/AMDGPUMachineFunction.h +++ b/lib/Target/AMDGPU/AMDGPUMachineFunction.h @@ -10,8 +10,8 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINEFUNCTION_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINEFUNCTION_H -#include "llvm/CodeGen/MachineFunction.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/MachineFunction.h" namespace llvm { diff --git a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp index 07f92918a43f..625c9b77e2de 100644 --- a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -33,11 +33,11 @@ #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" -#include "llvm/IR/IRBuilder.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" @@ -319,15 +319,17 @@ static bool canVectorizeInst(Instruction *Inst, User *User) { switch (Inst->getOpcode()) { case Instruction::Load: { LoadInst *LI = cast(Inst); - return !LI->isVolatile(); + // Currently only handle the case where the Pointer Operand is a GEP so check for that case. + return isa(LI->getPointerOperand()) && !LI->isVolatile(); } case Instruction::BitCast: case Instruction::AddrSpaceCast: return true; case Instruction::Store: { - // Must be the stored pointer operand, not a stored value. + // Must be the stored pointer operand, not a stored value, plus + // since it should be canonical form, the User should be a GEP. StoreInst *SI = cast(Inst); - return (SI->getPointerOperand() == User) && !SI->isVolatile(); + return (SI->getPointerOperand() == User) && isa(User) && !SI->isVolatile(); } default: return false; @@ -341,8 +343,11 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) { // FIXME: There is no reason why we can't support larger arrays, we // are just being conservative for now. + // FIXME: We also reject alloca's of the form [ 2 x [ 2 x i32 ]] or equivalent. Potentially these + // could also be promoted but we don't currently handle this case if (!AllocaTy || AllocaTy->getElementType()->isVectorTy() || + AllocaTy->getElementType()->isArrayTy() || AllocaTy->getNumElements() > 4 || AllocaTy->getNumElements() < 2) { DEBUG(dbgs() << " Cannot convert type to vector\n"); @@ -390,7 +395,7 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) { switch (Inst->getOpcode()) { case Instruction::Load: { Type *VecPtrTy = VectorTy->getPointerTo(AS.PRIVATE_ADDRESS); - Value *Ptr = Inst->getOperand(0); + Value *Ptr = cast(Inst)->getPointerOperand(); Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx); Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy); @@ -403,12 +408,13 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) { case Instruction::Store: { Type *VecPtrTy = VectorTy->getPointerTo(AS.PRIVATE_ADDRESS); - Value *Ptr = Inst->getOperand(1); + StoreInst *SI = cast(Inst); + Value *Ptr = SI->getPointerOperand(); Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx); Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy); Value *VecValue = Builder.CreateLoad(BitCast); Value *NewVecValue = Builder.CreateInsertElement(VecValue, - Inst->getOperand(0), + SI->getValueOperand(), Index); Builder.CreateStore(NewVecValue, BitCast); Inst->eraseFromParent(); diff --git a/lib/Target/AMDGPU/AMDGPURegAsmNames.inc.cpp b/lib/Target/AMDGPU/AMDGPURegAsmNames.inc.cpp new file mode 100644 index 000000000000..36d88f52910d --- /dev/null +++ b/lib/Target/AMDGPU/AMDGPURegAsmNames.inc.cpp @@ -0,0 +1,353 @@ +//===-- AMDGPURegAsmNames.inc - Register asm names ----------*- C++ -*-----===// + +#ifdef AMDGPU_REG_ASM_NAMES + +static const char *const VGPR32RegNames[] = { + "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", + "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", + "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", + "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35", + "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44", + "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53", + "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62", + "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71", + "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80", + "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89", + "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98", + "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107", + "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116", + "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125", + "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134", + "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143", + "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152", + "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161", + "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170", + "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179", + "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188", + "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197", + "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206", + "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215", + "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224", + "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233", + "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242", + "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251", + "v252", "v253", "v254", "v255" +}; + +static const char *const SGPR32RegNames[] = { + "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9", + "s10", "s11", "s12", "s13", "s14", "s15", "s16", "s17", "s18", "s19", + "s20", "s21", "s22", "s23", "s24", "s25", "s26", "s27", "s28", "s29", + "s30", "s31", "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", + "s40", "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49", + "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58", "s59", + "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67", "s68", "s69", + "s70", "s71", "s72", "s73", "s74", "s75", "s76", "s77", "s78", "s79", + "s80", "s81", "s82", "s83", "s84", "s85", "s86", "s87", "s88", "s89", + "s90", "s91", "s92", "s93", "s94", "s95", "s96", "s97", "s98", "s99", + "s100", "s101", "s102", "s103" +}; + +static const char *const VGPR64RegNames[] = { + "v[0:1]", "v[1:2]", "v[2:3]", "v[3:4]", "v[4:5]", + "v[5:6]", "v[6:7]", "v[7:8]", "v[8:9]", "v[9:10]", + "v[10:11]", "v[11:12]", "v[12:13]", "v[13:14]", "v[14:15]", + "v[15:16]", "v[16:17]", "v[17:18]", "v[18:19]", "v[19:20]", + "v[20:21]", "v[21:22]", "v[22:23]", "v[23:24]", "v[24:25]", + "v[25:26]", "v[26:27]", "v[27:28]", "v[28:29]", "v[29:30]", + "v[30:31]", "v[31:32]", "v[32:33]", "v[33:34]", "v[34:35]", + "v[35:36]", "v[36:37]", "v[37:38]", "v[38:39]", "v[39:40]", + "v[40:41]", "v[41:42]", "v[42:43]", "v[43:44]", "v[44:45]", + "v[45:46]", "v[46:47]", "v[47:48]", "v[48:49]", "v[49:50]", + "v[50:51]", "v[51:52]", "v[52:53]", "v[53:54]", "v[54:55]", + "v[55:56]", "v[56:57]", "v[57:58]", "v[58:59]", "v[59:60]", + "v[60:61]", "v[61:62]", "v[62:63]", "v[63:64]", "v[64:65]", + "v[65:66]", "v[66:67]", "v[67:68]", "v[68:69]", "v[69:70]", + "v[70:71]", "v[71:72]", "v[72:73]", "v[73:74]", "v[74:75]", + "v[75:76]", "v[76:77]", "v[77:78]", "v[78:79]", "v[79:80]", + "v[80:81]", "v[81:82]", "v[82:83]", "v[83:84]", "v[84:85]", + "v[85:86]", "v[86:87]", "v[87:88]", "v[88:89]", "v[89:90]", + "v[90:91]", "v[91:92]", "v[92:93]", "v[93:94]", "v[94:95]", + "v[95:96]", "v[96:97]", "v[97:98]", "v[98:99]", "v[99:100]", + "v[100:101]", "v[101:102]", "v[102:103]", "v[103:104]", "v[104:105]", + "v[105:106]", "v[106:107]", "v[107:108]", "v[108:109]", "v[109:110]", + "v[110:111]", "v[111:112]", "v[112:113]", "v[113:114]", "v[114:115]", + "v[115:116]", "v[116:117]", "v[117:118]", "v[118:119]", "v[119:120]", + "v[120:121]", "v[121:122]", "v[122:123]", "v[123:124]", "v[124:125]", + "v[125:126]", "v[126:127]", "v[127:128]", "v[128:129]", "v[129:130]", + "v[130:131]", "v[131:132]", "v[132:133]", "v[133:134]", "v[134:135]", + "v[135:136]", "v[136:137]", "v[137:138]", "v[138:139]", "v[139:140]", + "v[140:141]", "v[141:142]", "v[142:143]", "v[143:144]", "v[144:145]", + "v[145:146]", "v[146:147]", "v[147:148]", "v[148:149]", "v[149:150]", + "v[150:151]", "v[151:152]", "v[152:153]", "v[153:154]", "v[154:155]", + "v[155:156]", "v[156:157]", "v[157:158]", "v[158:159]", "v[159:160]", + "v[160:161]", "v[161:162]", "v[162:163]", "v[163:164]", "v[164:165]", + "v[165:166]", "v[166:167]", "v[167:168]", "v[168:169]", "v[169:170]", + "v[170:171]", "v[171:172]", "v[172:173]", "v[173:174]", "v[174:175]", + "v[175:176]", "v[176:177]", "v[177:178]", "v[178:179]", "v[179:180]", + "v[180:181]", "v[181:182]", "v[182:183]", "v[183:184]", "v[184:185]", + "v[185:186]", "v[186:187]", "v[187:188]", "v[188:189]", "v[189:190]", + "v[190:191]", "v[191:192]", "v[192:193]", "v[193:194]", "v[194:195]", + "v[195:196]", "v[196:197]", "v[197:198]", "v[198:199]", "v[199:200]", + "v[200:201]", "v[201:202]", "v[202:203]", "v[203:204]", "v[204:205]", + "v[205:206]", "v[206:207]", "v[207:208]", "v[208:209]", "v[209:210]", + "v[210:211]", "v[211:212]", "v[212:213]", "v[213:214]", "v[214:215]", + "v[215:216]", "v[216:217]", "v[217:218]", "v[218:219]", "v[219:220]", + "v[220:221]", "v[221:222]", "v[222:223]", "v[223:224]", "v[224:225]", + "v[225:226]", "v[226:227]", "v[227:228]", "v[228:229]", "v[229:230]", + "v[230:231]", "v[231:232]", "v[232:233]", "v[233:234]", "v[234:235]", + "v[235:236]", "v[236:237]", "v[237:238]", "v[238:239]", "v[239:240]", + "v[240:241]", "v[241:242]", "v[242:243]", "v[243:244]", "v[244:245]", + "v[245:246]", "v[246:247]", "v[247:248]", "v[248:249]", "v[249:250]", + "v[250:251]", "v[251:252]", "v[252:253]", "v[253:254]", "v[254:255]" +}; + +static const char *const VGPR96RegNames[] = { + "v[0:2]", "v[1:3]", "v[2:4]", "v[3:5]", "v[4:6]", + "v[5:7]", "v[6:8]", "v[7:9]", "v[8:10]", "v[9:11]", + "v[10:12]", "v[11:13]", "v[12:14]", "v[13:15]", "v[14:16]", + "v[15:17]", "v[16:18]", "v[17:19]", "v[18:20]", "v[19:21]", + "v[20:22]", "v[21:23]", "v[22:24]", "v[23:25]", "v[24:26]", + "v[25:27]", "v[26:28]", "v[27:29]", "v[28:30]", "v[29:31]", + "v[30:32]", "v[31:33]", "v[32:34]", "v[33:35]", "v[34:36]", + "v[35:37]", "v[36:38]", "v[37:39]", "v[38:40]", "v[39:41]", + "v[40:42]", "v[41:43]", "v[42:44]", "v[43:45]", "v[44:46]", + "v[45:47]", "v[46:48]", "v[47:49]", "v[48:50]", "v[49:51]", + "v[50:52]", "v[51:53]", "v[52:54]", "v[53:55]", "v[54:56]", + "v[55:57]", "v[56:58]", "v[57:59]", "v[58:60]", "v[59:61]", + "v[60:62]", "v[61:63]", "v[62:64]", "v[63:65]", "v[64:66]", + "v[65:67]", "v[66:68]", "v[67:69]", "v[68:70]", "v[69:71]", + "v[70:72]", "v[71:73]", "v[72:74]", "v[73:75]", "v[74:76]", + "v[75:77]", "v[76:78]", "v[77:79]", "v[78:80]", "v[79:81]", + "v[80:82]", "v[81:83]", "v[82:84]", "v[83:85]", "v[84:86]", + "v[85:87]", "v[86:88]", "v[87:89]", "v[88:90]", "v[89:91]", + "v[90:92]", "v[91:93]", "v[92:94]", "v[93:95]", "v[94:96]", + "v[95:97]", "v[96:98]", "v[97:99]", "v[98:100]", "v[99:101]", + "v[100:102]", "v[101:103]", "v[102:104]", "v[103:105]", "v[104:106]", + "v[105:107]", "v[106:108]", "v[107:109]", "v[108:110]", "v[109:111]", + "v[110:112]", "v[111:113]", "v[112:114]", "v[113:115]", "v[114:116]", + "v[115:117]", "v[116:118]", "v[117:119]", "v[118:120]", "v[119:121]", + "v[120:122]", "v[121:123]", "v[122:124]", "v[123:125]", "v[124:126]", + "v[125:127]", "v[126:128]", "v[127:129]", "v[128:130]", "v[129:131]", + "v[130:132]", "v[131:133]", "v[132:134]", "v[133:135]", "v[134:136]", + "v[135:137]", "v[136:138]", "v[137:139]", "v[138:140]", "v[139:141]", + "v[140:142]", "v[141:143]", "v[142:144]", "v[143:145]", "v[144:146]", + "v[145:147]", "v[146:148]", "v[147:149]", "v[148:150]", "v[149:151]", + "v[150:152]", "v[151:153]", "v[152:154]", "v[153:155]", "v[154:156]", + "v[155:157]", "v[156:158]", "v[157:159]", "v[158:160]", "v[159:161]", + "v[160:162]", "v[161:163]", "v[162:164]", "v[163:165]", "v[164:166]", + "v[165:167]", "v[166:168]", "v[167:169]", "v[168:170]", "v[169:171]", + "v[170:172]", "v[171:173]", "v[172:174]", "v[173:175]", "v[174:176]", + "v[175:177]", "v[176:178]", "v[177:179]", "v[178:180]", "v[179:181]", + "v[180:182]", "v[181:183]", "v[182:184]", "v[183:185]", "v[184:186]", + "v[185:187]", "v[186:188]", "v[187:189]", "v[188:190]", "v[189:191]", + "v[190:192]", "v[191:193]", "v[192:194]", "v[193:195]", "v[194:196]", + "v[195:197]", "v[196:198]", "v[197:199]", "v[198:200]", "v[199:201]", + "v[200:202]", "v[201:203]", "v[202:204]", "v[203:205]", "v[204:206]", + "v[205:207]", "v[206:208]", "v[207:209]", "v[208:210]", "v[209:211]", + "v[210:212]", "v[211:213]", "v[212:214]", "v[213:215]", "v[214:216]", + "v[215:217]", "v[216:218]", "v[217:219]", "v[218:220]", "v[219:221]", + "v[220:222]", "v[221:223]", "v[222:224]", "v[223:225]", "v[224:226]", + "v[225:227]", "v[226:228]", "v[227:229]", "v[228:230]", "v[229:231]", + "v[230:232]", "v[231:233]", "v[232:234]", "v[233:235]", "v[234:236]", + "v[235:237]", "v[236:238]", "v[237:239]", "v[238:240]", "v[239:241]", + "v[240:242]", "v[241:243]", "v[242:244]", "v[243:245]", "v[244:246]", + "v[245:247]", "v[246:248]", "v[247:249]", "v[248:250]", "v[249:251]", + "v[250:252]", "v[251:253]", "v[252:254]", "v[253:255]" +}; + +static const char *const VGPR128RegNames[] = { + "v[0:3]", "v[1:4]", "v[2:5]", "v[3:6]", "v[4:7]", + "v[5:8]", "v[6:9]", "v[7:10]", "v[8:11]", "v[9:12]", + "v[10:13]", "v[11:14]", "v[12:15]", "v[13:16]", "v[14:17]", + "v[15:18]", "v[16:19]", "v[17:20]", "v[18:21]", "v[19:22]", + "v[20:23]", "v[21:24]", "v[22:25]", "v[23:26]", "v[24:27]", + "v[25:28]", "v[26:29]", "v[27:30]", "v[28:31]", "v[29:32]", + "v[30:33]", "v[31:34]", "v[32:35]", "v[33:36]", "v[34:37]", + "v[35:38]", "v[36:39]", "v[37:40]", "v[38:41]", "v[39:42]", + "v[40:43]", "v[41:44]", "v[42:45]", "v[43:46]", "v[44:47]", + "v[45:48]", "v[46:49]", "v[47:50]", "v[48:51]", "v[49:52]", + "v[50:53]", "v[51:54]", "v[52:55]", "v[53:56]", "v[54:57]", + "v[55:58]", "v[56:59]", "v[57:60]", "v[58:61]", "v[59:62]", + "v[60:63]", "v[61:64]", "v[62:65]", "v[63:66]", "v[64:67]", + "v[65:68]", "v[66:69]", "v[67:70]", "v[68:71]", "v[69:72]", + "v[70:73]", "v[71:74]", "v[72:75]", "v[73:76]", "v[74:77]", + "v[75:78]", "v[76:79]", "v[77:80]", "v[78:81]", "v[79:82]", + "v[80:83]", "v[81:84]", "v[82:85]", "v[83:86]", "v[84:87]", + "v[85:88]", "v[86:89]", "v[87:90]", "v[88:91]", "v[89:92]", + "v[90:93]", "v[91:94]", "v[92:95]", "v[93:96]", "v[94:97]", + "v[95:98]", "v[96:99]", "v[97:100]", "v[98:101]", "v[99:102]", + "v[100:103]", "v[101:104]", "v[102:105]", "v[103:106]", "v[104:107]", + "v[105:108]", "v[106:109]", "v[107:110]", "v[108:111]", "v[109:112]", + "v[110:113]", "v[111:114]", "v[112:115]", "v[113:116]", "v[114:117]", + "v[115:118]", "v[116:119]", "v[117:120]", "v[118:121]", "v[119:122]", + "v[120:123]", "v[121:124]", "v[122:125]", "v[123:126]", "v[124:127]", + "v[125:128]", "v[126:129]", "v[127:130]", "v[128:131]", "v[129:132]", + "v[130:133]", "v[131:134]", "v[132:135]", "v[133:136]", "v[134:137]", + "v[135:138]", "v[136:139]", "v[137:140]", "v[138:141]", "v[139:142]", + "v[140:143]", "v[141:144]", "v[142:145]", "v[143:146]", "v[144:147]", + "v[145:148]", "v[146:149]", "v[147:150]", "v[148:151]", "v[149:152]", + "v[150:153]", "v[151:154]", "v[152:155]", "v[153:156]", "v[154:157]", + "v[155:158]", "v[156:159]", "v[157:160]", "v[158:161]", "v[159:162]", + "v[160:163]", "v[161:164]", "v[162:165]", "v[163:166]", "v[164:167]", + "v[165:168]", "v[166:169]", "v[167:170]", "v[168:171]", "v[169:172]", + "v[170:173]", "v[171:174]", "v[172:175]", "v[173:176]", "v[174:177]", + "v[175:178]", "v[176:179]", "v[177:180]", "v[178:181]", "v[179:182]", + "v[180:183]", "v[181:184]", "v[182:185]", "v[183:186]", "v[184:187]", + "v[185:188]", "v[186:189]", "v[187:190]", "v[188:191]", "v[189:192]", + "v[190:193]", "v[191:194]", "v[192:195]", "v[193:196]", "v[194:197]", + "v[195:198]", "v[196:199]", "v[197:200]", "v[198:201]", "v[199:202]", + "v[200:203]", "v[201:204]", "v[202:205]", "v[203:206]", "v[204:207]", + "v[205:208]", "v[206:209]", "v[207:210]", "v[208:211]", "v[209:212]", + "v[210:213]", "v[211:214]", "v[212:215]", "v[213:216]", "v[214:217]", + "v[215:218]", "v[216:219]", "v[217:220]", "v[218:221]", "v[219:222]", + "v[220:223]", "v[221:224]", "v[222:225]", "v[223:226]", "v[224:227]", + "v[225:228]", "v[226:229]", "v[227:230]", "v[228:231]", "v[229:232]", + "v[230:233]", "v[231:234]", "v[232:235]", "v[233:236]", "v[234:237]", + "v[235:238]", "v[236:239]", "v[237:240]", "v[238:241]", "v[239:242]", + "v[240:243]", "v[241:244]", "v[242:245]", "v[243:246]", "v[244:247]", + "v[245:248]", "v[246:249]", "v[247:250]", "v[248:251]", "v[249:252]", + "v[250:253]", "v[251:254]", "v[252:255]" +}; + +static const char *const VGPR256RegNames[] = { + "v[0:7]", "v[1:8]", "v[2:9]", "v[3:10]", "v[4:11]", + "v[5:12]", "v[6:13]", "v[7:14]", "v[8:15]", "v[9:16]", + "v[10:17]", "v[11:18]", "v[12:19]", "v[13:20]", "v[14:21]", + "v[15:22]", "v[16:23]", "v[17:24]", "v[18:25]", "v[19:26]", + "v[20:27]", "v[21:28]", "v[22:29]", "v[23:30]", "v[24:31]", + "v[25:32]", "v[26:33]", "v[27:34]", "v[28:35]", "v[29:36]", + "v[30:37]", "v[31:38]", "v[32:39]", "v[33:40]", "v[34:41]", + "v[35:42]", "v[36:43]", "v[37:44]", "v[38:45]", "v[39:46]", + "v[40:47]", "v[41:48]", "v[42:49]", "v[43:50]", "v[44:51]", + "v[45:52]", "v[46:53]", "v[47:54]", "v[48:55]", "v[49:56]", + "v[50:57]", "v[51:58]", "v[52:59]", "v[53:60]", "v[54:61]", + "v[55:62]", "v[56:63]", "v[57:64]", "v[58:65]", "v[59:66]", + "v[60:67]", "v[61:68]", "v[62:69]", "v[63:70]", "v[64:71]", + "v[65:72]", "v[66:73]", "v[67:74]", "v[68:75]", "v[69:76]", + "v[70:77]", "v[71:78]", "v[72:79]", "v[73:80]", "v[74:81]", + "v[75:82]", "v[76:83]", "v[77:84]", "v[78:85]", "v[79:86]", + "v[80:87]", "v[81:88]", "v[82:89]", "v[83:90]", "v[84:91]", + "v[85:92]", "v[86:93]", "v[87:94]", "v[88:95]", "v[89:96]", + "v[90:97]", "v[91:98]", "v[92:99]", "v[93:100]", "v[94:101]", + "v[95:102]", "v[96:103]", "v[97:104]", "v[98:105]", "v[99:106]", + "v[100:107]", "v[101:108]", "v[102:109]", "v[103:110]", "v[104:111]", + "v[105:112]", "v[106:113]", "v[107:114]", "v[108:115]", "v[109:116]", + "v[110:117]", "v[111:118]", "v[112:119]", "v[113:120]", "v[114:121]", + "v[115:122]", "v[116:123]", "v[117:124]", "v[118:125]", "v[119:126]", + "v[120:127]", "v[121:128]", "v[122:129]", "v[123:130]", "v[124:131]", + "v[125:132]", "v[126:133]", "v[127:134]", "v[128:135]", "v[129:136]", + "v[130:137]", "v[131:138]", "v[132:139]", "v[133:140]", "v[134:141]", + "v[135:142]", "v[136:143]", "v[137:144]", "v[138:145]", "v[139:146]", + "v[140:147]", "v[141:148]", "v[142:149]", "v[143:150]", "v[144:151]", + "v[145:152]", "v[146:153]", "v[147:154]", "v[148:155]", "v[149:156]", + "v[150:157]", "v[151:158]", "v[152:159]", "v[153:160]", "v[154:161]", + "v[155:162]", "v[156:163]", "v[157:164]", "v[158:165]", "v[159:166]", + "v[160:167]", "v[161:168]", "v[162:169]", "v[163:170]", "v[164:171]", + "v[165:172]", "v[166:173]", "v[167:174]", "v[168:175]", "v[169:176]", + "v[170:177]", "v[171:178]", "v[172:179]", "v[173:180]", "v[174:181]", + "v[175:182]", "v[176:183]", "v[177:184]", "v[178:185]", "v[179:186]", + "v[180:187]", "v[181:188]", "v[182:189]", "v[183:190]", "v[184:191]", + "v[185:192]", "v[186:193]", "v[187:194]", "v[188:195]", "v[189:196]", + "v[190:197]", "v[191:198]", "v[192:199]", "v[193:200]", "v[194:201]", + "v[195:202]", "v[196:203]", "v[197:204]", "v[198:205]", "v[199:206]", + "v[200:207]", "v[201:208]", "v[202:209]", "v[203:210]", "v[204:211]", + "v[205:212]", "v[206:213]", "v[207:214]", "v[208:215]", "v[209:216]", + "v[210:217]", "v[211:218]", "v[212:219]", "v[213:220]", "v[214:221]", + "v[215:222]", "v[216:223]", "v[217:224]", "v[218:225]", "v[219:226]", + "v[220:227]", "v[221:228]", "v[222:229]", "v[223:230]", "v[224:231]", + "v[225:232]", "v[226:233]", "v[227:234]", "v[228:235]", "v[229:236]", + "v[230:237]", "v[231:238]", "v[232:239]", "v[233:240]", "v[234:241]", + "v[235:242]", "v[236:243]", "v[237:244]", "v[238:245]", "v[239:246]", + "v[240:247]", "v[241:248]", "v[242:249]", "v[243:250]", "v[244:251]", + "v[245:252]", "v[246:253]", "v[247:254]", "v[248:255]" +}; + +static const char *const VGPR512RegNames[] = { + "v[0:15]", "v[1:16]", "v[2:17]", "v[3:18]", "v[4:19]", + "v[5:20]", "v[6:21]", "v[7:22]", "v[8:23]", "v[9:24]", + "v[10:25]", "v[11:26]", "v[12:27]", "v[13:28]", "v[14:29]", + "v[15:30]", "v[16:31]", "v[17:32]", "v[18:33]", "v[19:34]", + "v[20:35]", "v[21:36]", "v[22:37]", "v[23:38]", "v[24:39]", + "v[25:40]", "v[26:41]", "v[27:42]", "v[28:43]", "v[29:44]", + "v[30:45]", "v[31:46]", "v[32:47]", "v[33:48]", "v[34:49]", + "v[35:50]", "v[36:51]", "v[37:52]", "v[38:53]", "v[39:54]", + "v[40:55]", "v[41:56]", "v[42:57]", "v[43:58]", "v[44:59]", + "v[45:60]", "v[46:61]", "v[47:62]", "v[48:63]", "v[49:64]", + "v[50:65]", "v[51:66]", "v[52:67]", "v[53:68]", "v[54:69]", + "v[55:70]", "v[56:71]", "v[57:72]", "v[58:73]", "v[59:74]", + "v[60:75]", "v[61:76]", "v[62:77]", "v[63:78]", "v[64:79]", + "v[65:80]", "v[66:81]", "v[67:82]", "v[68:83]", "v[69:84]", + "v[70:85]", "v[71:86]", "v[72:87]", "v[73:88]", "v[74:89]", + "v[75:90]", "v[76:91]", "v[77:92]", "v[78:93]", "v[79:94]", + "v[80:95]", "v[81:96]", "v[82:97]", "v[83:98]", "v[84:99]", + "v[85:100]", "v[86:101]", "v[87:102]", "v[88:103]", "v[89:104]", + "v[90:105]", "v[91:106]", "v[92:107]", "v[93:108]", "v[94:109]", + "v[95:110]", "v[96:111]", "v[97:112]", "v[98:113]", "v[99:114]", + "v[100:115]", "v[101:116]", "v[102:117]", "v[103:118]", "v[104:119]", + "v[105:120]", "v[106:121]", "v[107:122]", "v[108:123]", "v[109:124]", + "v[110:125]", "v[111:126]", "v[112:127]", "v[113:128]", "v[114:129]", + "v[115:130]", "v[116:131]", "v[117:132]", "v[118:133]", "v[119:134]", + "v[120:135]", "v[121:136]", "v[122:137]", "v[123:138]", "v[124:139]", + "v[125:140]", "v[126:141]", "v[127:142]", "v[128:143]", "v[129:144]", + "v[130:145]", "v[131:146]", "v[132:147]", "v[133:148]", "v[134:149]", + "v[135:150]", "v[136:151]", "v[137:152]", "v[138:153]", "v[139:154]", + "v[140:155]", "v[141:156]", "v[142:157]", "v[143:158]", "v[144:159]", + "v[145:160]", "v[146:161]", "v[147:162]", "v[148:163]", "v[149:164]", + "v[150:165]", "v[151:166]", "v[152:167]", "v[153:168]", "v[154:169]", + "v[155:170]", "v[156:171]", "v[157:172]", "v[158:173]", "v[159:174]", + "v[160:175]", "v[161:176]", "v[162:177]", "v[163:178]", "v[164:179]", + "v[165:180]", "v[166:181]", "v[167:182]", "v[168:183]", "v[169:184]", + "v[170:185]", "v[171:186]", "v[172:187]", "v[173:188]", "v[174:189]", + "v[175:190]", "v[176:191]", "v[177:192]", "v[178:193]", "v[179:194]", + "v[180:195]", "v[181:196]", "v[182:197]", "v[183:198]", "v[184:199]", + "v[185:200]", "v[186:201]", "v[187:202]", "v[188:203]", "v[189:204]", + "v[190:205]", "v[191:206]", "v[192:207]", "v[193:208]", "v[194:209]", + "v[195:210]", "v[196:211]", "v[197:212]", "v[198:213]", "v[199:214]", + "v[200:215]", "v[201:216]", "v[202:217]", "v[203:218]", "v[204:219]", + "v[205:220]", "v[206:221]", "v[207:222]", "v[208:223]", "v[209:224]", + "v[210:225]", "v[211:226]", "v[212:227]", "v[213:228]", "v[214:229]", + "v[215:230]", "v[216:231]", "v[217:232]", "v[218:233]", "v[219:234]", + "v[220:235]", "v[221:236]", "v[222:237]", "v[223:238]", "v[224:239]", + "v[225:240]", "v[226:241]", "v[227:242]", "v[228:243]", "v[229:244]", + "v[230:245]", "v[231:246]", "v[232:247]", "v[233:248]", "v[234:249]", + "v[235:250]", "v[236:251]", "v[237:252]", "v[238:253]", "v[239:254]", + "v[240:255]" +}; + +static const char *const SGPR64RegNames[] = { + "s[0:1]", "s[2:3]", "s[4:5]", "s[6:7]", "s[8:9]", "s[10:11]", + "s[12:13]", "s[14:15]", "s[16:17]", "s[18:19]", "s[20:21]", "s[22:23]", + "s[24:25]", "s[26:27]", "s[28:29]", "s[30:31]", "s[32:33]", "s[34:35]", + "s[36:37]", "s[38:39]", "s[40:41]", "s[42:43]", "s[44:45]", "s[46:47]", + "s[48:49]", "s[50:51]", "s[52:53]", "s[54:55]", "s[56:57]", "s[58:59]", + "s[60:61]", "s[62:63]", "s[64:65]", "s[66:67]", "s[68:69]", "s[70:71]", + "s[72:73]", "s[74:75]", "s[76:77]", "s[78:79]", "s[80:81]", "s[82:83]", + "s[84:85]", "s[86:87]", "s[88:89]", "s[90:91]", "s[92:93]", "s[94:95]", + "s[96:97]", "s[98:99]", "s[100:101]", "s[102:103]" +}; + +static const char *const SGPR128RegNames[] = { + "s[0:3]", "s[4:7]", "s[8:11]", "s[12:15]", "s[16:19]", "s[20:23]", + "s[24:27]", "s[28:31]", "s[32:35]", "s[36:39]", "s[40:43]", "s[44:47]", + "s[48:51]", "s[52:55]", "s[56:59]", "s[60:63]", "s[64:67]", "s[68:71]", + "s[72:75]", "s[76:79]", "s[80:83]", "s[84:87]", "s[88:91]", "s[92:95]", + "s[96:99]", "s[100:103]" +}; + +static const char *const SGPR256RegNames[] = { + "s[0:7]", "s[4:11]", "s[8:15]", "s[12:19]", "s[16:23]", + "s[20:27]", "s[24:31]", "s[28:35]", "s[32:39]", "s[36:43]", + "s[40:47]", "s[44:51]", "s[48:55]", "s[52:59]", "s[56:63]", + "s[60:67]", "s[64:71]", "s[68:75]", "s[72:79]", "s[76:83]", + "s[80:87]", "s[84:91]", "s[88:95]", "s[92:99]", "s[96:103]" +}; + +static const char *const SGPR512RegNames[] = { + "s[0:15]", "s[4:19]", "s[8:23]", "s[12:27]", "s[16:31]", "s[20:35]", + "s[24:39]", "s[28:43]", "s[32:47]", "s[36:51]", "s[40:55]", "s[44:59]", + "s[48:63]", "s[52:67]", "s[56:71]", "s[60:75]", "s[64:79]", "s[68:83]", + "s[72:87]", "s[76:91]", "s[80:95]", "s[84:99]", "s[88:103]" +}; + +#endif diff --git a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h index 7c198a1b8a3f..201fdc1974c6 100644 --- a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h +++ b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h @@ -36,7 +36,6 @@ protected: #define GET_TARGET_REGBANK_CLASS #include "AMDGPUGenRegisterBank.inc" - }; class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo { const SIRegisterInfo *TRI; diff --git a/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp b/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp index b2867fcc49f9..ff58aa5741a1 100644 --- a/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp @@ -40,7 +40,6 @@ unsigned AMDGPURegisterInfo::getSubRegFromChannel(unsigned Channel) const { #define GET_REGINFO_TARGET_DESC #include "AMDGPUGenRegisterInfo.inc" - // Forced to be here by one .inc const MCPhysReg *SIRegisterInfo::getCalleeSavedRegs( const MachineFunction *MF) const { diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h index ed9cbb994fad..5f4f20316a6b 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -16,12 +16,12 @@ #define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H #include "AMDGPU.h" -#include "R600InstrInfo.h" -#include "R600ISelLowering.h" #include "R600FrameLowering.h" -#include "SIInstrInfo.h" -#include "SIISelLowering.h" +#include "R600ISelLowering.h" +#include "R600InstrInfo.h" #include "SIFrameLowering.h" +#include "SIISelLowering.h" +#include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/Triple.h" @@ -57,9 +57,12 @@ public: enum { ISAVersion0_0_0, + ISAVersion6_0_0, + ISAVersion6_0_1, ISAVersion7_0_0, ISAVersion7_0_1, ISAVersion7_0_2, + ISAVersion7_0_3, ISAVersion8_0_0, ISAVersion8_0_1, ISAVersion8_0_2, @@ -67,7 +70,9 @@ public: ISAVersion8_0_4, ISAVersion8_1_0, ISAVersion9_0_0, - ISAVersion9_0_1 + ISAVersion9_0_1, + ISAVersion9_0_2, + ISAVersion9_0_3 }; enum TrapHandlerAbi { @@ -787,7 +792,7 @@ public: /// \returns VGPR allocation granularity supported by the subtarget. unsigned getVGPRAllocGranule() const { - return AMDGPU::IsaInfo::getVGPRAllocGranule(getFeatureBits());; + return AMDGPU::IsaInfo::getVGPRAllocGranule(getFeatureBits()); } /// \returns VGPR encoding granularity supported by the subtarget. diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 404598ff4738..b644eba536fa 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -28,26 +28,26 @@ #include "GCNSchedStrategy.h" #include "R600MachineScheduler.h" #include "SIMachineScheduler.h" -#include "llvm/CodeGen/GlobalISel/InstructionSelect.h" #include "llvm/CodeGen/GlobalISel/IRTranslator.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelect.h" #include "llvm/CodeGen/GlobalISel/Legalizer.h" #include "llvm/CodeGen/GlobalISel/RegBankSelect.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetPassConfig.h" -#include "llvm/Support/TargetRegistry.h" -#include "llvm/Transforms/IPO.h" -#include "llvm/Transforms/IPO/AlwaysInliner.h" -#include "llvm/Transforms/IPO/PassManagerBuilder.h" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Scalar/GVN.h" -#include "llvm/Transforms/Vectorize.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/IPO/AlwaysInliner.h" +#include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/GVN.h" +#include "llvm/Transforms/Vectorize.h" #include using namespace llvm; @@ -734,7 +734,6 @@ void GCNPassConfig::addMachineSSAOptimization() { addPass(&SIFoldOperandsID); addPass(&DeadMachineInstructionElimID); addPass(&SILoadStoreOptimizerID); - addPass(createSIShrinkInstructionsPass()); if (EnableSDWAPeephole) { addPass(&SIPeepholeSDWAID); addPass(&MachineLICMID); @@ -742,6 +741,7 @@ void GCNPassConfig::addMachineSSAOptimization() { addPass(&SIFoldOperandsID); addPass(&DeadMachineInstructionElimID); } + addPass(createSIShrinkInstructionsPass()); } bool GCNPassConfig::addILPOpts() { diff --git a/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp b/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp index c96761c0b04e..6c1885e67fcb 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp @@ -7,13 +7,13 @@ // //===----------------------------------------------------------------------===// -#include "AMDGPUTargetMachine.h" #include "AMDGPUTargetObjectFile.h" #include "AMDGPU.h" +#include "AMDGPUTargetMachine.h" +#include "Utils/AMDGPUBaseInfo.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" -#include "llvm/Support/ELF.h" -#include "Utils/AMDGPUBaseInfo.h" using namespace llvm; diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index beafebc1284a..dee3d2856701 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -20,8 +20,8 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/BasicTTIImpl.h" -#include "llvm/IR/Module.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Target/CostTable.h" #include "llvm/Target/TargetLowering.h" diff --git a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index cc68c971b249..16e3b7b4ebee 100644 --- a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -11,18 +11,19 @@ #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "MCTargetDesc/AMDGPUTargetStreamer.h" #include "SIDefines.h" +#include "Utils/AMDGPUAsmUtils.h" #include "Utils/AMDGPUBaseInfo.h" #include "Utils/AMDKernelCodeTUtils.h" -#include "Utils/AMDGPUAsmUtils.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallString.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/MachineValueType.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" @@ -40,12 +41,11 @@ #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/SMLoc.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" #include #include #include @@ -814,14 +814,8 @@ private: bool ParseDirectiveCodeObjectMetadata(); bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); bool ParseDirectiveAMDKernelCodeT(); - bool ParseSectionDirectiveHSAText(); bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; bool ParseDirectiveAMDGPUHsaKernel(); - bool ParseDirectiveAMDGPUHsaModuleGlobal(); - bool ParseDirectiveAMDGPUHsaProgramGlobal(); - bool ParseSectionDirectiveHSADataGlobalAgent(); - bool ParseSectionDirectiveHSADataGlobalProgram(); - bool ParseSectionDirectiveHSARodataReadonlyAgent(); bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, RegisterKind RegKind, unsigned Reg1, unsigned RegNum); @@ -2365,12 +2359,6 @@ bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { return false; } -bool AMDGPUAsmParser::ParseSectionDirectiveHSAText() { - getParser().getStreamer().SwitchSection( - AMDGPU::getHSATextSection(getContext())); - return false; -} - bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { if (getLexer().isNot(AsmToken::Identifier)) return TokError("expected symbol name"); @@ -2384,46 +2372,6 @@ bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { return false; } -bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaModuleGlobal() { - if (getLexer().isNot(AsmToken::Identifier)) - return TokError("expected symbol name"); - - StringRef GlobalName = Parser.getTok().getIdentifier(); - - getTargetStreamer().EmitAMDGPUHsaModuleScopeGlobal(GlobalName); - Lex(); - return false; -} - -bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaProgramGlobal() { - if (getLexer().isNot(AsmToken::Identifier)) - return TokError("expected symbol name"); - - StringRef GlobalName = Parser.getTok().getIdentifier(); - - getTargetStreamer().EmitAMDGPUHsaProgramScopeGlobal(GlobalName); - Lex(); - return false; -} - -bool AMDGPUAsmParser::ParseSectionDirectiveHSADataGlobalAgent() { - getParser().getStreamer().SwitchSection( - AMDGPU::getHSADataGlobalAgentSection(getContext())); - return false; -} - -bool AMDGPUAsmParser::ParseSectionDirectiveHSADataGlobalProgram() { - getParser().getStreamer().SwitchSection( - AMDGPU::getHSADataGlobalProgramSection(getContext())); - return false; -} - -bool AMDGPUAsmParser::ParseSectionDirectiveHSARodataReadonlyAgent() { - getParser().getStreamer().SwitchSection( - AMDGPU::getHSARodataReadonlyAgentSection(getContext())); - return false; -} - bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { StringRef IDVal = DirectiveID.getString(); @@ -2439,27 +2387,9 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { if (IDVal == ".amd_kernel_code_t") return ParseDirectiveAMDKernelCodeT(); - if (IDVal == ".hsatext") - return ParseSectionDirectiveHSAText(); - if (IDVal == ".amdgpu_hsa_kernel") return ParseDirectiveAMDGPUHsaKernel(); - if (IDVal == ".amdgpu_hsa_module_global") - return ParseDirectiveAMDGPUHsaModuleGlobal(); - - if (IDVal == ".amdgpu_hsa_program_global") - return ParseDirectiveAMDGPUHsaProgramGlobal(); - - if (IDVal == ".hsadata_global_agent") - return ParseSectionDirectiveHSADataGlobalAgent(); - - if (IDVal == ".hsadata_global_program") - return ParseSectionDirectiveHSADataGlobalProgram(); - - if (IDVal == ".hsarodata_readonly_agent") - return ParseSectionDirectiveHSARodataReadonlyAgent(); - return true; } @@ -2919,6 +2849,7 @@ bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { if (getLexer().isNot(AsmToken::Integer)) return true; + SMLoc ValLoc = Parser.getTok().getLoc(); if (getParser().parseAbsoluteExpression(CntVal)) return true; @@ -2936,21 +2867,24 @@ bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); } - // To improve diagnostics, do not skip delimiters on errors - if (!Failed) { - if (getLexer().isNot(AsmToken::RParen)) { - return true; - } - Parser.Lex(); - if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) { - const AsmToken NextToken = getLexer().peekTok(); - if (NextToken.is(AsmToken::Identifier)) { - Parser.Lex(); - } + if (Failed) { + Error(ValLoc, "too large value for " + CntName); + return true; + } + + if (getLexer().isNot(AsmToken::RParen)) { + return true; + } + + Parser.Lex(); + if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) { + const AsmToken NextToken = getLexer().peekTok(); + if (NextToken.is(AsmToken::Identifier)) { + Parser.Lex(); } } - return Failed; + return false; } OperandMatchResultTy diff --git a/lib/Target/AMDGPU/CMakeLists.txt b/lib/Target/AMDGPU/CMakeLists.txt index cafce0164fa9..e30844f082cd 100644 --- a/lib/Target/AMDGPU/CMakeLists.txt +++ b/lib/Target/AMDGPU/CMakeLists.txt @@ -58,6 +58,7 @@ add_llvm_target(AMDGPUCodeGen AMDGPUISelLowering.cpp AMDGPUInstrInfo.cpp AMDGPUPromoteAlloca.cpp + AMDGPURegAsmNames.inc.cpp AMDGPURegisterInfo.cpp AMDGPUUnifyDivergentExitNodes.cpp GCNHazardRecognizer.cpp diff --git a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 9b3cde7c4df6..88c92b9582fd 100644 --- a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -20,21 +20,20 @@ #include "AMDGPUDisassembler.h" #include "AMDGPU.h" #include "AMDGPURegisterInfo.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIDefines.h" #include "Utils/AMDGPUBaseInfo.h" -#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Support/ELF.h" -#include "llvm/Support/Endian.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Endian.h" #include "llvm/Support/TargetRegistry.h" - using namespace llvm; #define DEBUG_TYPE "amdgpu-disassembler" diff --git a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h index 0ff405a71e9b..5fa3cf1a223f 100644 --- a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -20,8 +20,8 @@ #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCDisassembler/MCRelocationInfo.h" #include "llvm/MC/MCDisassembler/MCSymbolizer.h" -#include #include +#include #include namespace llvm { diff --git a/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index 80fc4ac9d2a3..cd9e7fb04f16 100644 --- a/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "AMDGPUSubtarget.h" #include "GCNHazardRecognizer.h" +#include "AMDGPUSubtarget.h" #include "SIDefines.h" #include "SIInstrInfo.h" #include "SIRegisterInfo.h" diff --git a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp index 523eea41897e..b84640230eee 100644 --- a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp +++ b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp @@ -9,8 +9,8 @@ //===----------------------------------------------------------------------===// #include "AMDGPUInstPrinter.h" -#include "SIDefines.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "SIDefines.h" #include "Utils/AMDGPUAsmUtils.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm/MC/MCExpr.h" diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp index f3266fe82955..0a9c2b94c1ee 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp @@ -8,8 +8,8 @@ /// \file //===----------------------------------------------------------------------===// -#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "MCTargetDesc/AMDGPUFixupKinds.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/ADT/StringRef.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp index 647017d5061d..4e828a791e09 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp @@ -13,20 +13,12 @@ // //===----------------------------------------------------------------------===// -#include "AMDGPU.h" #include "AMDGPUCodeObjectMetadataStreamer.h" +#include "AMDGPU.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Module.h" -#include "llvm/Support/YAMLTraits.h" - -using namespace llvm::AMDGPU; -using namespace llvm::AMDGPU::CodeObject; - -LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint32_t) -LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string) -LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Arg::Metadata) -LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Metadata) +#include "llvm/Support/raw_ostream.h" namespace llvm { @@ -37,192 +29,7 @@ static cl::opt VerifyCodeObjectMetadata( "amdgpu-verify-comd", cl::desc("Verify AMDGPU Code Object Metadata")); -namespace yaml { - -template <> -struct ScalarEnumerationTraits { - static void enumeration(IO &YIO, AccessQualifier &EN) { - YIO.enumCase(EN, "Default", AccessQualifier::Default); - YIO.enumCase(EN, "ReadOnly", AccessQualifier::ReadOnly); - YIO.enumCase(EN, "WriteOnly", AccessQualifier::WriteOnly); - YIO.enumCase(EN, "ReadWrite", AccessQualifier::ReadWrite); - } -}; - -template <> -struct ScalarEnumerationTraits { - static void enumeration(IO &YIO, AddressSpaceQualifier &EN) { - YIO.enumCase(EN, "Private", AddressSpaceQualifier::Private); - YIO.enumCase(EN, "Global", AddressSpaceQualifier::Global); - YIO.enumCase(EN, "Constant", AddressSpaceQualifier::Constant); - YIO.enumCase(EN, "Local", AddressSpaceQualifier::Local); - YIO.enumCase(EN, "Generic", AddressSpaceQualifier::Generic); - YIO.enumCase(EN, "Region", AddressSpaceQualifier::Region); - } -}; - -template <> -struct ScalarEnumerationTraits { - static void enumeration(IO &YIO, ValueKind &EN) { - YIO.enumCase(EN, "ByValue", ValueKind::ByValue); - YIO.enumCase(EN, "GlobalBuffer", ValueKind::GlobalBuffer); - YIO.enumCase(EN, "DynamicSharedPointer", ValueKind::DynamicSharedPointer); - YIO.enumCase(EN, "Sampler", ValueKind::Sampler); - YIO.enumCase(EN, "Image", ValueKind::Image); - YIO.enumCase(EN, "Pipe", ValueKind::Pipe); - YIO.enumCase(EN, "Queue", ValueKind::Queue); - YIO.enumCase(EN, "HiddenGlobalOffsetX", ValueKind::HiddenGlobalOffsetX); - YIO.enumCase(EN, "HiddenGlobalOffsetY", ValueKind::HiddenGlobalOffsetY); - YIO.enumCase(EN, "HiddenGlobalOffsetZ", ValueKind::HiddenGlobalOffsetZ); - YIO.enumCase(EN, "HiddenNone", ValueKind::HiddenNone); - YIO.enumCase(EN, "HiddenPrintfBuffer", ValueKind::HiddenPrintfBuffer); - YIO.enumCase(EN, "HiddenDefaultQueue", ValueKind::HiddenDefaultQueue); - YIO.enumCase(EN, "HiddenCompletionAction", - ValueKind::HiddenCompletionAction); - } -}; - -template <> -struct ScalarEnumerationTraits { - static void enumeration(IO &YIO, ValueType &EN) { - YIO.enumCase(EN, "Struct", ValueType::Struct); - YIO.enumCase(EN, "I8", ValueType::I8); - YIO.enumCase(EN, "U8", ValueType::U8); - YIO.enumCase(EN, "I16", ValueType::I16); - YIO.enumCase(EN, "U16", ValueType::U16); - YIO.enumCase(EN, "F16", ValueType::F16); - YIO.enumCase(EN, "I32", ValueType::I32); - YIO.enumCase(EN, "U32", ValueType::U32); - YIO.enumCase(EN, "F32", ValueType::F32); - YIO.enumCase(EN, "I64", ValueType::I64); - YIO.enumCase(EN, "U64", ValueType::U64); - YIO.enumCase(EN, "F64", ValueType::F64); - } -}; - -template <> -struct MappingTraits { - static void mapping(IO &YIO, Kernel::Attrs::Metadata &MD) { - YIO.mapOptional(Kernel::Attrs::Key::ReqdWorkGroupSize, - MD.mReqdWorkGroupSize, std::vector()); - YIO.mapOptional(Kernel::Attrs::Key::WorkGroupSizeHint, - MD.mWorkGroupSizeHint, std::vector()); - YIO.mapOptional(Kernel::Attrs::Key::VecTypeHint, - MD.mVecTypeHint, std::string()); - } -}; - -template <> -struct MappingTraits { - static void mapping(IO &YIO, Kernel::Arg::Metadata &MD) { - YIO.mapRequired(Kernel::Arg::Key::Size, MD.mSize); - YIO.mapRequired(Kernel::Arg::Key::Align, MD.mAlign); - YIO.mapRequired(Kernel::Arg::Key::ValueKind, MD.mValueKind); - YIO.mapRequired(Kernel::Arg::Key::ValueType, MD.mValueType); - YIO.mapOptional(Kernel::Arg::Key::PointeeAlign, MD.mPointeeAlign, - uint32_t(0)); - YIO.mapOptional(Kernel::Arg::Key::AccQual, MD.mAccQual, - AccessQualifier::Unknown); - YIO.mapOptional(Kernel::Arg::Key::AddrSpaceQual, MD.mAddrSpaceQual, - AddressSpaceQualifier::Unknown); - YIO.mapOptional(Kernel::Arg::Key::IsConst, MD.mIsConst, false); - YIO.mapOptional(Kernel::Arg::Key::IsPipe, MD.mIsPipe, false); - YIO.mapOptional(Kernel::Arg::Key::IsRestrict, MD.mIsRestrict, false); - YIO.mapOptional(Kernel::Arg::Key::IsVolatile, MD.mIsVolatile, false); - YIO.mapOptional(Kernel::Arg::Key::Name, MD.mName, std::string()); - YIO.mapOptional(Kernel::Arg::Key::TypeName, MD.mTypeName, std::string()); - } -}; - -template <> -struct MappingTraits { - static void mapping(IO &YIO, Kernel::CodeProps::Metadata &MD) { - YIO.mapOptional(Kernel::CodeProps::Key::KernargSegmentSize, - MD.mKernargSegmentSize, uint64_t(0)); - YIO.mapOptional(Kernel::CodeProps::Key::WorkgroupGroupSegmentSize, - MD.mWorkgroupGroupSegmentSize, uint32_t(0)); - YIO.mapOptional(Kernel::CodeProps::Key::WorkitemPrivateSegmentSize, - MD.mWorkitemPrivateSegmentSize, uint32_t(0)); - YIO.mapOptional(Kernel::CodeProps::Key::WavefrontNumSGPRs, - MD.mWavefrontNumSGPRs, uint16_t(0)); - YIO.mapOptional(Kernel::CodeProps::Key::WorkitemNumVGPRs, - MD.mWorkitemNumVGPRs, uint16_t(0)); - YIO.mapOptional(Kernel::CodeProps::Key::KernargSegmentAlign, - MD.mKernargSegmentAlign, uint8_t(0)); - YIO.mapOptional(Kernel::CodeProps::Key::GroupSegmentAlign, - MD.mGroupSegmentAlign, uint8_t(0)); - YIO.mapOptional(Kernel::CodeProps::Key::PrivateSegmentAlign, - MD.mPrivateSegmentAlign, uint8_t(0)); - YIO.mapOptional(Kernel::CodeProps::Key::WavefrontSize, - MD.mWavefrontSize, uint8_t(0)); - } -}; - -template <> -struct MappingTraits { - static void mapping(IO &YIO, Kernel::DebugProps::Metadata &MD) { - YIO.mapOptional(Kernel::DebugProps::Key::DebuggerABIVersion, - MD.mDebuggerABIVersion, std::vector()); - YIO.mapOptional(Kernel::DebugProps::Key::ReservedNumVGPRs, - MD.mReservedNumVGPRs, uint16_t(0)); - YIO.mapOptional(Kernel::DebugProps::Key::ReservedFirstVGPR, - MD.mReservedFirstVGPR, uint16_t(-1)); - YIO.mapOptional(Kernel::DebugProps::Key::PrivateSegmentBufferSGPR, - MD.mPrivateSegmentBufferSGPR, uint16_t(-1)); - YIO.mapOptional(Kernel::DebugProps::Key::WavefrontPrivateSegmentOffsetSGPR, - MD.mWavefrontPrivateSegmentOffsetSGPR, uint16_t(-1)); - } -}; - -template <> -struct MappingTraits { - static void mapping(IO &YIO, Kernel::Metadata &MD) { - YIO.mapRequired(Kernel::Key::Name, MD.mName); - YIO.mapOptional(Kernel::Key::Language, MD.mLanguage, std::string()); - YIO.mapOptional(Kernel::Key::LanguageVersion, MD.mLanguageVersion, - std::vector()); - if (!MD.mAttrs.empty() || !YIO.outputting()) - YIO.mapOptional(Kernel::Key::Attrs, MD.mAttrs); - if (!MD.mArgs.empty() || !YIO.outputting()) - YIO.mapOptional(Kernel::Key::Args, MD.mArgs); - if (!MD.mCodeProps.empty() || !YIO.outputting()) - YIO.mapOptional(Kernel::Key::CodeProps, MD.mCodeProps); - if (!MD.mDebugProps.empty() || !YIO.outputting()) - YIO.mapOptional(Kernel::Key::DebugProps, MD.mDebugProps); - } -}; - -template <> -struct MappingTraits { - static void mapping(IO &YIO, CodeObject::Metadata &MD) { - YIO.mapRequired(Key::Version, MD.mVersion); - YIO.mapOptional(Key::Printf, MD.mPrintf, std::vector()); - if (!MD.mKernels.empty() || !YIO.outputting()) - YIO.mapOptional(Key::Kernels, MD.mKernels); - } -}; - -} // end namespace yaml - namespace AMDGPU { - -/* static */ -std::error_code CodeObject::Metadata::fromYamlString( - std::string YamlString, CodeObject::Metadata &CodeObjectMetadata) { - yaml::Input YamlInput(YamlString); - YamlInput >> CodeObjectMetadata; - return YamlInput.error(); -} - -/* static */ -std::error_code CodeObject::Metadata::toYamlString( - CodeObject::Metadata CodeObjectMetadata, std::string &YamlString) { - raw_string_ostream YamlStream(YamlString); - yaml::Output YamlOutput(YamlStream, nullptr, std::numeric_limits::max()); - YamlOutput << CodeObjectMetadata; - return std::error_code(); -} - namespace CodeObject { void MetadataStreamer::dump(StringRef YamlString) const { diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h index 8d4c51763f63..c6681431d74d 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h @@ -17,9 +17,9 @@ #define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATASTREAMER_H #include "AMDGPU.h" -#include "AMDGPUCodeObjectMetadata.h" #include "AMDKernelCodeT.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/AMDGPUCodeObjectMetadata.h" #include "llvm/Support/ErrorOr.h" namespace llvm { diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp index 073d19422e86..6abe7f3d37d5 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp @@ -8,12 +8,12 @@ //===----------------------------------------------------------------------===// #include "AMDGPUMCTargetDesc.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCValue.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" using namespace llvm; diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index 8dc863f723e2..2a0032fc9adc 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -11,12 +11,13 @@ // //===----------------------------------------------------------------------===// -#include "AMDGPU.h" #include "AMDGPUTargetStreamer.h" +#include "AMDGPU.h" #include "SIDefines.h" #include "Utils/AMDGPUBaseInfo.h" #include "Utils/AMDKernelCodeTUtils.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/Metadata.h" @@ -25,7 +26,6 @@ #include "llvm/MC/MCELFStreamer.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCSectionELF.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/FormattedStream.h" namespace llvm { @@ -100,16 +100,6 @@ void AMDGPUTargetAsmStreamer::EmitAMDGPUSymbolType(StringRef SymbolName, } } -void AMDGPUTargetAsmStreamer::EmitAMDGPUHsaModuleScopeGlobal( - StringRef GlobalName) { - OS << "\t.amdgpu_hsa_module_global " << GlobalName << '\n'; -} - -void AMDGPUTargetAsmStreamer::EmitAMDGPUHsaProgramScopeGlobal( - StringRef GlobalName) { - OS << "\t.amdgpu_hsa_program_global " << GlobalName << '\n'; -} - bool AMDGPUTargetAsmStreamer::EmitCodeObjectMetadata(StringRef YamlString) { auto VerifiedYamlString = CodeObjectMetadataStreamer.toYamlString(YamlString); if (!VerifiedYamlString) @@ -214,24 +204,6 @@ void AMDGPUTargetELFStreamer::EmitAMDGPUSymbolType(StringRef SymbolName, Symbol->setType(ELF::STT_AMDGPU_HSA_KERNEL); } -void AMDGPUTargetELFStreamer::EmitAMDGPUHsaModuleScopeGlobal( - StringRef GlobalName) { - - MCSymbolELF *Symbol = cast( - getStreamer().getContext().getOrCreateSymbol(GlobalName)); - Symbol->setType(ELF::STT_OBJECT); - Symbol->setBinding(ELF::STB_LOCAL); -} - -void AMDGPUTargetELFStreamer::EmitAMDGPUHsaProgramScopeGlobal( - StringRef GlobalName) { - - MCSymbolELF *Symbol = cast( - getStreamer().getContext().getOrCreateSymbol(GlobalName)); - Symbol->setType(ELF::STT_OBJECT); - Symbol->setBinding(ELF::STB_GLOBAL); -} - bool AMDGPUTargetELFStreamer::EmitCodeObjectMetadata(StringRef YamlString) { auto VerifiedYamlString = CodeObjectMetadataStreamer.toYamlString(YamlString); if (!VerifiedYamlString) diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h index 5c588bbded9c..968128e94d0b 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h @@ -44,10 +44,6 @@ public: virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) = 0; - virtual void EmitAMDGPUHsaModuleScopeGlobal(StringRef GlobalName) = 0; - - virtual void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) = 0; - virtual void EmitStartOfCodeObjectMetadata(const Module &Mod); virtual void EmitKernelCodeObjectMetadata( @@ -74,10 +70,6 @@ public: void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override; - void EmitAMDGPUHsaModuleScopeGlobal(StringRef GlobalName) override; - - void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override; - /// \returns True on success, false on failure. bool EmitCodeObjectMetadata(StringRef YamlString) override; }; @@ -105,10 +97,6 @@ public: void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override; - void EmitAMDGPUHsaModuleScopeGlobal(StringRef GlobalName) override; - - void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override; - /// \returns True on success, false on failure. bool EmitCodeObjectMetadata(StringRef YamlString) override; }; diff --git a/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp index 6015ec190fd4..eab90e1d344c 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp @@ -14,10 +14,10 @@ // //===----------------------------------------------------------------------===// -#include "R600Defines.h" #include "MCTargetDesc/AMDGPUFixupKinds.h" #include "MCTargetDesc/AMDGPUMCCodeEmitter.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "R600Defines.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCFixup.h" diff --git a/lib/Target/AMDGPU/Processors.td b/lib/Target/AMDGPU/Processors.td index 0e4eda982139..f6f2582aa11b 100644 --- a/lib/Target/AMDGPU/Processors.td +++ b/lib/Target/AMDGPU/Processors.td @@ -80,57 +80,71 @@ def : Proc<"cayman", R600_VLIW4_Itin, // Southern Islands //===----------------------------------------------------------------------===// -def : ProcessorModel<"SI", SIFullSpeedModel, - [FeatureSouthernIslands, FeatureFastFMAF32, HalfRate64Ops] +def : ProcessorModel<"gfx600", SIFullSpeedModel, + [FeatureISAVersion6_0_0]>; + +def : ProcessorModel<"SI", SIFullSpeedModel, + [FeatureISAVersion6_0_0] >; -def : ProcessorModel<"tahiti", SIFullSpeedModel, - [FeatureSouthernIslands, FeatureFastFMAF32, HalfRate64Ops] +def : ProcessorModel<"tahiti", SIFullSpeedModel, + [FeatureISAVersion6_0_0] >; -def : ProcessorModel<"pitcairn", SIQuarterSpeedModel, [FeatureSouthernIslands]>; +def : ProcessorModel<"gfx601", SIQuarterSpeedModel, + [FeatureISAVersion6_0_1] +>; -def : ProcessorModel<"verde", SIQuarterSpeedModel, [FeatureSouthernIslands]>; +def : ProcessorModel<"pitcairn", SIQuarterSpeedModel, + [FeatureISAVersion6_0_1]>; -def : ProcessorModel<"oland", SIQuarterSpeedModel, [FeatureSouthernIslands]>; +def : ProcessorModel<"verde", SIQuarterSpeedModel, + [FeatureISAVersion6_0_1]>; -def : ProcessorModel<"hainan", SIQuarterSpeedModel, [FeatureSouthernIslands]>; +def : ProcessorModel<"oland", SIQuarterSpeedModel, + [FeatureISAVersion6_0_1]>; + +def : ProcessorModel<"hainan", SIQuarterSpeedModel, [FeatureISAVersion6_0_1]>; //===----------------------------------------------------------------------===// // Sea Islands //===----------------------------------------------------------------------===// +def : ProcessorModel<"gfx700", SIQuarterSpeedModel, + [FeatureISAVersion7_0_0] +>; + def : ProcessorModel<"bonaire", SIQuarterSpeedModel, [FeatureISAVersion7_0_0] >; -def : ProcessorModel<"kabini", SIQuarterSpeedModel, - [FeatureISAVersion7_0_2] ->; - def : ProcessorModel<"kaveri", SIQuarterSpeedModel, [FeatureISAVersion7_0_0] >; -def : ProcessorModel<"hawaii", SIFullSpeedModel, - [FeatureISAVersion7_0_1] ->; - -def : ProcessorModel<"mullins", SIQuarterSpeedModel, - [FeatureISAVersion7_0_2]>; - -def : ProcessorModel<"gfx700", SIQuarterSpeedModel, - [FeatureISAVersion7_0_0] ->; - def : ProcessorModel<"gfx701", SIFullSpeedModel, [FeatureISAVersion7_0_1] >; +def : ProcessorModel<"hawaii", SIFullSpeedModel, + [FeatureISAVersion7_0_1] +>; + def : ProcessorModel<"gfx702", SIQuarterSpeedModel, [FeatureISAVersion7_0_2] >; +def : ProcessorModel<"gfx703", SIQuarterSpeedModel, + [FeatureISAVersion7_0_3] +>; + +def : ProcessorModel<"kabini", SIQuarterSpeedModel, + [FeatureISAVersion7_0_3] +>; + +def : ProcessorModel<"mullins", SIQuarterSpeedModel, + [FeatureISAVersion7_0_3]>; + //===----------------------------------------------------------------------===// // Volcanic Islands //===----------------------------------------------------------------------===// @@ -187,10 +201,23 @@ def : ProcessorModel<"gfx810", SIQuarterSpeedModel, [FeatureISAVersion8_1_0] >; -def : ProcessorModel<"gfx900", SIQuarterSpeedModel, - [FeatureGFX9, FeatureISAVersion9_0_0, FeatureLDSBankCount32] +//===----------------------------------------------------------------------===// +// GFX9 +//===----------------------------------------------------------------------===// + +def : ProcessorModel<"gfx900", SIQuarterSpeedModel, + [FeatureISAVersion9_0_0] >; -def : ProcessorModel<"gfx901", SIQuarterSpeedModel, - [FeatureGFX9, FeatureXNACK, FeatureISAVersion9_0_1, FeatureLDSBankCount32] +def : ProcessorModel<"gfx901", SIQuarterSpeedModel, + [FeatureISAVersion9_0_1] >; + +def : ProcessorModel<"gfx902", SIQuarterSpeedModel, + [FeatureISAVersion9_0_2] +>; + +def : ProcessorModel<"gfx903", SIQuarterSpeedModel, + [FeatureISAVersion9_0_3] +>; + diff --git a/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp b/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp index 09b328765604..6993e8a62a9c 100644 --- a/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp +++ b/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp @@ -12,15 +12,14 @@ /// computing their address on the fly ; it also sets STACK_SIZE info. //===----------------------------------------------------------------------===// -#include "llvm/Support/Debug.h" #include "AMDGPU.h" #include "AMDGPUSubtarget.h" #include "R600Defines.h" #include "R600InstrInfo.h" #include "R600MachineFunctionInfo.h" #include "R600RegisterInfo.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" @@ -30,6 +29,7 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DebugLoc.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include diff --git a/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp b/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp index 03fc1aff5ec1..0d8ccd088ec4 100644 --- a/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp +++ b/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp @@ -15,10 +15,10 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" +#include "AMDGPUSubtarget.h" #include "R600Defines.h" #include "R600InstrInfo.h" #include "R600RegisterInfo.h" -#include "AMDGPUSubtarget.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineBasicBlock.h" diff --git a/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp b/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp index 5c30a0734f0d..66def2d29caf 100644 --- a/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp +++ b/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp @@ -15,11 +15,11 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" +#include "AMDGPUSubtarget.h" #include "R600Defines.h" #include "R600InstrInfo.h" #include "R600MachineFunctionInfo.h" #include "R600RegisterInfo.h" -#include "AMDGPUSubtarget.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" diff --git a/lib/Target/AMDGPU/R600FrameLowering.cpp b/lib/Target/AMDGPU/R600FrameLowering.cpp index 1f01ad732e00..37787b3c5f72 100644 --- a/lib/Target/AMDGPU/R600FrameLowering.cpp +++ b/lib/Target/AMDGPU/R600FrameLowering.cpp @@ -10,8 +10,8 @@ #include "R600FrameLowering.h" #include "AMDGPUSubtarget.h" #include "R600RegisterInfo.h" -#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/Support/MathExtras.h" using namespace llvm; diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp index 60b913cfd39a..c55878f8bff0 100644 --- a/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -1120,7 +1120,7 @@ SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store, Mask = DAG.getConstant(0xff, DL, MVT::i32); } else if (Store->getMemoryVT() == MVT::i16) { assert(Store->getAlignment() >= 2); - Mask = DAG.getConstant(0xffff, DL, MVT::i32);; + Mask = DAG.getConstant(0xffff, DL, MVT::i32); } else { llvm_unreachable("Unsupported private trunc store"); } diff --git a/lib/Target/AMDGPU/R600InstrInfo.cpp b/lib/Target/AMDGPU/R600InstrInfo.cpp index 2422d57269eb..c5da5e404200 100644 --- a/lib/Target/AMDGPU/R600InstrInfo.cpp +++ b/lib/Target/AMDGPU/R600InstrInfo.cpp @@ -12,12 +12,12 @@ // //===----------------------------------------------------------------------===// +#include "R600InstrInfo.h" #include "AMDGPU.h" #include "AMDGPUInstrInfo.h" #include "AMDGPUSubtarget.h" #include "R600Defines.h" #include "R600FrameLowering.h" -#include "R600InstrInfo.h" #include "R600RegisterInfo.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/BitVector.h" @@ -35,8 +35,8 @@ #include "llvm/Target/TargetSubtargetInfo.h" #include #include -#include #include +#include #include #include #include diff --git a/lib/Target/AMDGPU/R600MachineScheduler.cpp b/lib/Target/AMDGPU/R600MachineScheduler.cpp index db18e5bd1afa..47fda1c8fa82 100644 --- a/lib/Target/AMDGPU/R600MachineScheduler.cpp +++ b/lib/Target/AMDGPU/R600MachineScheduler.cpp @@ -13,11 +13,11 @@ //===----------------------------------------------------------------------===// #include "R600MachineScheduler.h" -#include "R600InstrInfo.h" #include "AMDGPUSubtarget.h" +#include "R600InstrInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Pass.h" #include "llvm/IR/LegacyPassManager.h" +#include "llvm/Pass.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/lib/Target/AMDGPU/R600Packetizer.cpp b/lib/Target/AMDGPU/R600Packetizer.cpp index 3e957126b497..1cb40938cee7 100644 --- a/lib/Target/AMDGPU/R600Packetizer.cpp +++ b/lib/Target/AMDGPU/R600Packetizer.cpp @@ -14,7 +14,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Support/Debug.h" #include "AMDGPU.h" #include "AMDGPUSubtarget.h" #include "R600InstrInfo.h" @@ -24,6 +23,7 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp b/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp index 62ebef8e91af..b5c439b21b89 100644 --- a/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp +++ b/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp @@ -19,8 +19,8 @@ // //===----------------------------------------------------------------------===// -#include "SIInstrInfo.h" #include "AMDGPUSubtarget.h" +#include "SIInstrInfo.h" #include "llvm/ADT/DenseSet.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" diff --git a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp index 3cca815d8773..5f5f25103c02 100644 --- a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -65,10 +65,10 @@ /// ultimately led to the creation of an illegal COPY. //===----------------------------------------------------------------------===// -#include "llvm/ADT/DenseSet.h" #include "AMDGPU.h" #include "AMDGPUSubtarget.h" #include "SIInstrInfo.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" diff --git a/lib/Target/AMDGPU/SIFoldOperands.cpp b/lib/Target/AMDGPU/SIFoldOperands.cpp index dfac068d1f69..e10f1ed3762e 100644 --- a/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -730,7 +730,8 @@ const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const { // Make sure sources are identical. const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); - if (!Src0->isReg() || Src0->getSubReg() != Src1->getSubReg() || + if (!Src0->isReg() || !Src1->isReg() || + Src0->getSubReg() != Src1->getSubReg() || Src0->getSubReg() != AMDGPU::NoSubRegister) return nullptr; diff --git a/lib/Target/AMDGPU/SIFrameLowering.cpp b/lib/Target/AMDGPU/SIFrameLowering.cpp index 97bb0f0c0656..b1bd14e421f0 100644 --- a/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -8,10 +8,10 @@ //==-----------------------------------------------------------------------===// #include "SIFrameLowering.h" +#include "AMDGPUSubtarget.h" #include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" #include "SIRegisterInfo.h" -#include "AMDGPUSubtarget.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index b48b23911105..599ee942d738 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -17,12 +17,12 @@ #define _USE_MATH_DEFINES #endif +#include "SIISelLowering.h" #include "AMDGPU.h" #include "AMDGPUIntrinsicInfo.h" -#include "AMDGPUTargetMachine.h" #include "AMDGPUSubtarget.h" +#include "AMDGPUTargetMachine.h" #include "SIDefines.h" -#include "SIISelLowering.h" #include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" #include "SIRegisterInfo.h" @@ -2604,7 +2604,7 @@ SDValue SITargetLowering::lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { SDValue FpToFp16 = DAG.getNode(ISD::FP_TO_FP16, DL, MVT::i32, Src); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FpToFp16); - return DAG.getNode(ISD::BITCAST, DL, MVT::f16, Trunc);; + return DAG.getNode(ISD::BITCAST, DL, MVT::f16, Trunc); } SDValue SITargetLowering::lowerTRAP(SDValue Op, SelectionDAG &DAG) const { diff --git a/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index c10badba88f3..0f009a48754a 100644 --- a/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -229,7 +229,7 @@ public: MachineInstr &MI); BlockWaitcntBrackets() - : WaitAtBeginning(false), ValidLoop(false), MixedExpTypes(false), + : WaitAtBeginning(false), RevisitLoop(false), ValidLoop(false), MixedExpTypes(false), LoopRegion(NULL), PostOrder(0), Waitcnt(NULL), VgprUB(0), SgprUB(0) { for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS; T = (enum InstCounterType)(T + 1)) { diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp index 36d29b8ecf06..58c05cf16f15 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -20,10 +20,10 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" -#include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/Support/Debug.h" diff --git a/lib/Target/AMDGPU/SILowerControlFlow.cpp b/lib/Target/AMDGPU/SILowerControlFlow.cpp index 35d3a93d8710..5f1c7f1fc42f 100644 --- a/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -60,8 +60,8 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" diff --git a/lib/Target/AMDGPU/SILowerI1Copies.cpp b/lib/Target/AMDGPU/SILowerI1Copies.cpp index 3680e02da576..ba616ada0c9c 100644 --- a/lib/Target/AMDGPU/SILowerI1Copies.cpp +++ b/lib/Target/AMDGPU/SILowerI1Copies.cpp @@ -21,8 +21,8 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Function.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Support/Debug.h" #include "llvm/Target/TargetMachine.h" diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/lib/Target/AMDGPU/SIMachineFunctionInfo.h index 348bb4fa0260..9fdb8caac6f2 100644 --- a/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -15,8 +15,8 @@ #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H #include "AMDGPUMachineFunction.h" -#include "SIRegisterInfo.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "SIRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/ErrorHandling.h" diff --git a/lib/Target/AMDGPU/SIMachineScheduler.cpp b/lib/Target/AMDGPU/SIMachineScheduler.cpp index 9d4e677400e6..bb17dbbdfbd6 100644 --- a/lib/Target/AMDGPU/SIMachineScheduler.cpp +++ b/lib/Target/AMDGPU/SIMachineScheduler.cpp @@ -12,9 +12,9 @@ // //===----------------------------------------------------------------------===// +#include "SIMachineScheduler.h" #include "AMDGPU.h" #include "SIInstrInfo.h" -#include "SIMachineScheduler.h" #include "SIRegisterInfo.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" diff --git a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp index fae249b04492..f4ddf1891683 100644 --- a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp +++ b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp @@ -20,13 +20,12 @@ /// //===----------------------------------------------------------------------===// - #include "AMDGPU.h" #include "AMDGPUSubtarget.h" #include "SIDefines.h" #include "SIInstrInfo.h" -#include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include @@ -129,7 +128,8 @@ public: bool getNeg() const { return Neg; } bool getSext() const { return Sext; } - uint64_t getSrcMods() const; + uint64_t getSrcMods(const SIInstrInfo *TII, + const MachineOperand *SrcOp) const; }; class SDWADstOperand : public SDWAOperand { @@ -240,13 +240,24 @@ static bool isSubregOf(const MachineOperand &SubReg, return SuperMask.all(); } -uint64_t SDWASrcOperand::getSrcMods() const { +uint64_t SDWASrcOperand::getSrcMods(const SIInstrInfo *TII, + const MachineOperand *SrcOp) const { uint64_t Mods = 0; + const auto *MI = SrcOp->getParent(); + if (TII->getNamedOperand(*MI, AMDGPU::OpName::src0) == SrcOp) { + if (auto *Mod = TII->getNamedOperand(*MI, AMDGPU::OpName::src0_modifiers)) { + Mods = Mod->getImm(); + } + } else if (TII->getNamedOperand(*MI, AMDGPU::OpName::src1) == SrcOp) { + if (auto *Mod = TII->getNamedOperand(*MI, AMDGPU::OpName::src1_modifiers)) { + Mods = Mod->getImm(); + } + } if (Abs || Neg) { assert(!Sext && "Float and integer src modifiers can't be set simulteniously"); Mods |= Abs ? SISrcMods::ABS : 0; - Mods |= Neg ? SISrcMods::NEG : 0; + Mods ^= Neg ? SISrcMods::NEG : 0; } else if (Sext) { Mods |= SISrcMods::SEXT; } @@ -312,7 +323,7 @@ bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) { } copyRegOperand(*Src, *getTargetOperand()); SrcSel->setImm(getSrcSel()); - SrcMods->setImm(getSrcMods()); + SrcMods->setImm(getSrcMods(TII, Src)); getTargetOperand()->setIsKill(false); return true; } @@ -409,7 +420,10 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) { switch (Opcode) { case AMDGPU::V_LSHRREV_B32_e32: case AMDGPU::V_ASHRREV_I32_e32: - case AMDGPU::V_LSHLREV_B32_e32: { + case AMDGPU::V_LSHLREV_B32_e32: + case AMDGPU::V_LSHRREV_B32_e64: + case AMDGPU::V_ASHRREV_I32_e64: + case AMDGPU::V_LSHLREV_B32_e64: { // from: v_lshrrev_b32_e32 v1, 16/24, v0 // to SDWA src:v0 src_sel:WORD_1/BYTE_3 @@ -432,7 +446,8 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) { TRI->isPhysicalRegister(Dst->getReg())) break; - if (Opcode == AMDGPU::V_LSHLREV_B32_e32) { + if (Opcode == AMDGPU::V_LSHLREV_B32_e32 || + Opcode == AMDGPU::V_LSHLREV_B32_e64) { auto SDWADst = make_unique( Dst, Src1, *Imm == 16 ? WORD_1 : BYTE_3, UNUSED_PAD); DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWADst << '\n'); @@ -441,7 +456,8 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) { } else { auto SDWASrc = make_unique( Src1, Dst, *Imm == 16 ? WORD_1 : BYTE_3, false, false, - Opcode == AMDGPU::V_LSHRREV_B32_e32 ? false : true); + Opcode != AMDGPU::V_LSHRREV_B32_e32 && + Opcode != AMDGPU::V_LSHRREV_B32_e64); DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n'); SDWAOperands[&MI] = std::move(SDWASrc); ++NumSDWAPatternsFound; @@ -451,7 +467,10 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) { case AMDGPU::V_LSHRREV_B16_e32: case AMDGPU::V_ASHRREV_I16_e32: - case AMDGPU::V_LSHLREV_B16_e32: { + case AMDGPU::V_LSHLREV_B16_e32: + case AMDGPU::V_LSHRREV_B16_e64: + case AMDGPU::V_ASHRREV_I16_e64: + case AMDGPU::V_LSHLREV_B16_e64: { // from: v_lshrrev_b16_e32 v1, 8, v0 // to SDWA src:v0 src_sel:BYTE_1 @@ -472,7 +491,8 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) { TRI->isPhysicalRegister(Dst->getReg())) break; - if (Opcode == AMDGPU::V_LSHLREV_B16_e32) { + if (Opcode == AMDGPU::V_LSHLREV_B16_e32 || + Opcode == AMDGPU::V_LSHLREV_B16_e64) { auto SDWADst = make_unique(Dst, Src1, BYTE_1, UNUSED_PAD); DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWADst << '\n'); @@ -481,7 +501,8 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) { } else { auto SDWASrc = make_unique( Src1, Dst, BYTE_1, false, false, - Opcode == AMDGPU::V_LSHRREV_B16_e32 ? false : true); + Opcode != AMDGPU::V_LSHRREV_B16_e32 && + Opcode != AMDGPU::V_LSHRREV_B16_e64); DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n'); SDWAOperands[&MI] = std::move(SDWASrc); ++NumSDWAPatternsFound; @@ -549,20 +570,25 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) { ++NumSDWAPatternsFound; break; } - case AMDGPU::V_AND_B32_e32: { + case AMDGPU::V_AND_B32_e32: + case AMDGPU::V_AND_B32_e64: { // e.g.: // from: v_and_b32_e32 v1, 0x0000ffff/0x000000ff, v0 // to SDWA src:v0 src_sel:WORD_0/BYTE_0 MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); - auto Imm = foldToImm(*Src0); - if (!Imm) - break; - - if (*Imm != 0x0000ffff && *Imm != 0x000000ff) - break; - MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); + auto ValSrc = Src1; + auto Imm = foldToImm(*Src0); + + if (!Imm) { + Imm = foldToImm(*Src1); + ValSrc = Src0; + } + + if (!Imm || (*Imm != 0x0000ffff && *Imm != 0x000000ff)) + break; + MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); if (TRI->isPhysicalRegister(Src1->getReg()) || @@ -570,7 +596,7 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) { break; auto SDWASrc = make_unique( - Src1, Dst, *Imm == 0x0000ffff ? WORD_0 : BYTE_0); + ValSrc, Dst, *Imm == 0x0000ffff ? WORD_0 : BYTE_0); DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n'); SDWAOperands[&MI] = std::move(SDWASrc); ++NumSDWAPatternsFound; @@ -583,28 +609,38 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) { bool SIPeepholeSDWA::isConvertibleToSDWA(const MachineInstr &MI) const { // Check if this instruction has opcode that supports SDWA - return AMDGPU::getSDWAOp(MI.getOpcode()) != -1; + unsigned Opc = MI.getOpcode(); + if (AMDGPU::getSDWAOp(Opc) != -1) + return true; + int Opc32 = AMDGPU::getVOPe32(Opc); + if (Opc32 != -1 && AMDGPU::getSDWAOp(Opc32) != -1) + return !TII->hasModifiersSet(MI, AMDGPU::OpName::omod) && + !TII->getNamedOperand(MI, AMDGPU::OpName::sdst); + return false; } bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, const SDWAOperandsVector &SDWAOperands) { // Convert to sdwa int SDWAOpcode = AMDGPU::getSDWAOp(MI.getOpcode()); + if (SDWAOpcode == -1) + SDWAOpcode = AMDGPU::getSDWAOp(AMDGPU::getVOPe32(MI.getOpcode())); assert(SDWAOpcode != -1); + // Copy dst, if it is present in original then should also be present in SDWA + MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); + if (!Dst && !TII->isVOPC(MI)) + return false; + const MCInstrDesc &SDWADesc = TII->get(SDWAOpcode); // Create SDWA version of instruction MI and initialize its operands MachineInstrBuilder SDWAInst = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), SDWADesc); - // Copy dst, if it is present in original then should also be present in SDWA - MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); if (Dst) { assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::vdst) != -1); SDWAInst.add(*Dst); - } else { - assert(TII->isVOPC(MI)); } // Copy src0, initialize src0_modifiers. All sdwa instructions has src0 and @@ -614,7 +650,10 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, Src0 && AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0) != -1 && AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0_modifiers) != -1); - SDWAInst.addImm(0); + if (auto *Mod = TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)) + SDWAInst.addImm(Mod->getImm()); + else + SDWAInst.addImm(0); SDWAInst.add(*Src0); // Copy src1 if present, initialize src1_modifiers. @@ -623,10 +662,11 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, assert( AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1) != -1 && AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1_modifiers) != -1); - SDWAInst.addImm(0); + if (auto *Mod = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers)) + SDWAInst.addImm(Mod->getImm()); + else + SDWAInst.addImm(0); SDWAInst.add(*Src1); - } else { - assert(TII->isVOP1(MI)); } if (SDWAOpcode == AMDGPU::V_MAC_F16_sdwa || @@ -746,8 +786,9 @@ bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) { PotentialMatches.clear(); SDWAOperands.clear(); + bool Ret = !ConvertedInstructions.empty(); while (!ConvertedInstructions.empty()) legalizeScalarOperands(*ConvertedInstructions.pop_back_val()); - return false; + return Ret; } diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp index 6fb01a09fe13..b611f28fcabd 100644 --- a/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -13,9 +13,9 @@ //===----------------------------------------------------------------------===// #include "SIRegisterInfo.h" +#include "AMDGPUSubtarget.h" #include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" -#include "AMDGPUSubtarget.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/RegisterScavenging.h" @@ -1104,6 +1104,66 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, } } +StringRef SIRegisterInfo::getRegAsmName(unsigned Reg) const { + #define AMDGPU_REG_ASM_NAMES + #include "AMDGPURegAsmNames.inc.cpp" + + #define REG_RANGE(BeginReg, EndReg, RegTable) \ + if (Reg >= BeginReg && Reg <= EndReg) { \ + unsigned Index = Reg - BeginReg; \ + assert(Index < array_lengthof(RegTable)); \ + return RegTable[Index]; \ + } + + REG_RANGE(AMDGPU::VGPR0, AMDGPU::VGPR255, VGPR32RegNames); + REG_RANGE(AMDGPU::SGPR0, AMDGPU::SGPR103, SGPR32RegNames); + REG_RANGE(AMDGPU::VGPR0_VGPR1, AMDGPU::VGPR254_VGPR255, VGPR64RegNames); + REG_RANGE(AMDGPU::SGPR0_SGPR1, AMDGPU::SGPR102_SGPR103, SGPR64RegNames); + REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2, AMDGPU::VGPR253_VGPR254_VGPR255, + VGPR96RegNames); + + REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3, + AMDGPU::VGPR252_VGPR253_VGPR254_VGPR255, + VGPR128RegNames); + REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, + AMDGPU::SGPR100_SGPR101_SGPR102_SGPR103, + SGPR128RegNames); + + REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7, + AMDGPU::VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255, + VGPR256RegNames); + + REG_RANGE( + AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7_VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15, + AMDGPU::VGPR240_VGPR241_VGPR242_VGPR243_VGPR244_VGPR245_VGPR246_VGPR247_VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255, + VGPR512RegNames); + + REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7, + AMDGPU::SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103, + SGPR256RegNames); + + REG_RANGE( + AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7_SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15, + AMDGPU::SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95_SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103, + SGPR512RegNames + ); + +#undef REG_RANGE + + // FIXME: Rename flat_scr so we don't need to special case this. + switch (Reg) { + case AMDGPU::FLAT_SCR: + return "flat_scratch"; + case AMDGPU::FLAT_SCR_LO: + return "flat_scratch_lo"; + case AMDGPU::FLAT_SCR_HI: + return "flat_scratch_hi"; + default: + // For the special named registers the default is fine. + return TargetRegisterInfo::getRegAsmName(Reg); + } +} + // FIXME: This is very slow. It might be worth creating a map from physreg to // register class. const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const { diff --git a/lib/Target/AMDGPU/SIRegisterInfo.h b/lib/Target/AMDGPU/SIRegisterInfo.h index a648c178101a..8fed6d5f9710 100644 --- a/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/lib/Target/AMDGPU/SIRegisterInfo.h @@ -16,8 +16,8 @@ #define LLVM_LIB_TARGET_AMDGPU_SIREGISTERINFO_H #include "AMDGPURegisterInfo.h" -#include "SIDefines.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "SIDefines.h" #include "llvm/CodeGen/MachineRegisterInfo.h" namespace llvm { @@ -118,6 +118,8 @@ public: bool eliminateSGPRToVGPRSpillFrameIndex(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS) const; + StringRef getRegAsmName(unsigned Reg) const override; + unsigned getHWRegIndex(unsigned Reg) const { return getEncodingValue(Reg) & 0xff; } diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 630f469eabf0..f581e69980c7 100644 --- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -7,11 +7,12 @@ // //===----------------------------------------------------------------------===// -#include "AMDGPU.h" #include "AMDGPUBaseInfo.h" +#include "AMDGPU.h" #include "SIDefines.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Constants.h" @@ -27,7 +28,6 @@ #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include @@ -38,7 +38,6 @@ #include "MCTargetDesc/AMDGPUMCTargetDesc.h" - #define GET_INSTRINFO_NAMED_OPS #include "AMDGPUGenInstrInfo.inc" #undef GET_INSTRINFO_NAMED_OPS @@ -104,6 +103,11 @@ namespace AMDGPU { namespace IsaInfo { IsaVersion getIsaVersion(const FeatureBitset &Features) { + // SI. + if (Features.test(FeatureISAVersion6_0_0)) + return {6, 0, 0}; + if (Features.test(FeatureISAVersion6_0_1)) + return {6, 0, 1}; // CI. if (Features.test(FeatureISAVersion7_0_0)) return {7, 0, 0}; @@ -111,6 +115,8 @@ IsaVersion getIsaVersion(const FeatureBitset &Features) { return {7, 0, 1}; if (Features.test(FeatureISAVersion7_0_2)) return {7, 0, 2}; + if (Features.test(FeatureISAVersion7_0_3)) + return {7, 0, 3}; // VI. if (Features.test(FeatureISAVersion8_0_0)) @@ -131,6 +137,10 @@ IsaVersion getIsaVersion(const FeatureBitset &Features) { return {9, 0, 0}; if (Features.test(FeatureISAVersion9_0_1)) return {9, 0, 1}; + if (Features.test(FeatureISAVersion9_0_2)) + return {9, 0, 2}; + if (Features.test(FeatureISAVersion9_0_3)) + return {9, 0, 3}; if (!Features.test(FeatureGCN) || Features.test(FeatureSouthernIslands)) return {0, 0, 0}; @@ -327,33 +337,6 @@ void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, Header.private_segment_alignment = 4; } -MCSection *getHSATextSection(MCContext &Ctx) { - return Ctx.getELFSection(".hsatext", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC | ELF::SHF_WRITE | - ELF::SHF_EXECINSTR | - ELF::SHF_AMDGPU_HSA_AGENT | - ELF::SHF_AMDGPU_HSA_CODE); -} - -MCSection *getHSADataGlobalAgentSection(MCContext &Ctx) { - return Ctx.getELFSection(".hsadata_global_agent", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC | ELF::SHF_WRITE | - ELF::SHF_AMDGPU_HSA_GLOBAL | - ELF::SHF_AMDGPU_HSA_AGENT); -} - -MCSection *getHSADataGlobalProgramSection(MCContext &Ctx) { - return Ctx.getELFSection(".hsadata_global_program", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC | ELF::SHF_WRITE | - ELF::SHF_AMDGPU_HSA_GLOBAL); -} - -MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx) { - return Ctx.getELFSection(".hsarodata_readonly_agent", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC | ELF::SHF_AMDGPU_HSA_READONLY | - ELF::SHF_AMDGPU_HSA_AGENT); -} - bool isGroupSegment(const GlobalValue *GV, AMDGPUAS AS) { return GV->getType()->getAddressSpace() == AS.LOCAL_ADDRESS; } diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 19888ad7556a..eff0230d21f5 100644 --- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -149,13 +149,6 @@ int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx); void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const FeatureBitset &Features); -MCSection *getHSATextSection(MCContext &Ctx); - -MCSection *getHSADataGlobalAgentSection(MCContext &Ctx); - -MCSection *getHSADataGlobalProgramSection(MCContext &Ctx); - -MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx); bool isGroupSegment(const GlobalValue *GV, AMDGPUAS AS); bool isGlobalSegment(const GlobalValue *GV, AMDGPUAS AS); diff --git a/lib/Target/AMDGPU/VOP3Instructions.td b/lib/Target/AMDGPU/VOP3Instructions.td index 77fc9551cff9..a8ca593f14ed 100644 --- a/lib/Target/AMDGPU/VOP3Instructions.td +++ b/lib/Target/AMDGPU/VOP3Instructions.td @@ -172,8 +172,8 @@ def V_CUBEMA_F32 : VOP3Inst <"v_cubema_f32", VOP3_Profile, def V_BFE_U32 : VOP3Inst <"v_bfe_u32", VOP3_Profile, AMDGPUbfe_u32>; def V_BFE_I32 : VOP3Inst <"v_bfe_i32", VOP3_Profile, AMDGPUbfe_i32>; def V_BFI_B32 : VOP3Inst <"v_bfi_b32", VOP3_Profile, AMDGPUbfi>; -def V_ALIGNBIT_B32 : VOP3Inst <"v_alignbit_b32", VOP3_Profile>; -def V_ALIGNBYTE_B32 : VOP3Inst <"v_alignbyte_b32", VOP3_Profile>; +def V_ALIGNBIT_B32 : VOP3Inst <"v_alignbit_b32", VOP3_Profile, int_amdgcn_alignbit>; +def V_ALIGNBYTE_B32 : VOP3Inst <"v_alignbyte_b32", VOP3_Profile, int_amdgcn_alignbyte>; def V_MIN3_F32 : VOP3Inst <"v_min3_f32", VOP3_Profile, AMDGPUfmin3>; def V_MIN3_I32 : VOP3Inst <"v_min3_i32", VOP3_Profile, AMDGPUsmin3>; def V_MIN3_U32 : VOP3Inst <"v_min3_u32", VOP3_Profile, AMDGPUumin3>; @@ -209,7 +209,10 @@ def V_DIV_SCALE_F64 : VOP3_Pseudo <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64, } def V_MSAD_U8 : VOP3Inst <"v_msad_u8", VOP3_Profile, int_amdgcn_msad_u8>; + +let Constraints = "@earlyclobber $vdst" in { def V_MQSAD_PK_U16_U8 : VOP3Inst <"v_mqsad_pk_u16_u8", VOP3_Profile, int_amdgcn_mqsad_pk_u16_u8>; +} // End Constraints = "@earlyclobber $vdst" def V_TRIG_PREOP_F64 : VOP3Inst <"v_trig_preop_f64", VOP3_Profile, AMDGPUtrig_preop> { let SchedRW = [WriteDouble]; @@ -232,8 +235,10 @@ def V_ASHRREV_I64 : VOP3Inst <"v_ashrrev_i64", VOP3_Profile>; let SubtargetPredicate = isCIVI in { +let Constraints = "@earlyclobber $vdst" in { def V_QSAD_PK_U16_U8 : VOP3Inst <"v_qsad_pk_u16_u8", VOP3_Profile, int_amdgcn_qsad_pk_u16_u8>; def V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOP3_Profile, int_amdgcn_mqsad_u32_u8>; +} // End Constraints = "@earlyclobber $vdst" let isCommutable = 1 in { def V_MAD_U64_U32 : VOP3Inst <"v_mad_u64_u32", VOP3b_I64_I1_I32_I32_I64>; diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 14e197f477f1..f9da036c7e46 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -23,6 +23,8 @@ #include "MCTargetDesc/ARMMCExpr.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallString.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" @@ -43,9 +45,7 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/ARMBuildAttributes.h" -#include "llvm/Support/COFF.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetParser.h" #include "llvm/Support/TargetRegistry.h" diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index f8b65573f9cd..8715657ad5e2 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -21,9 +21,9 @@ #include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMBaseInfo.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Triple.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineBasicBlock.h" diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index b18ed509ed23..b4fb292c0116 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -11,17 +11,17 @@ // //===----------------------------------------------------------------------===// +#include "ARMBaseRegisterInfo.h" #include "ARM.h" #include "ARMBaseInstrInfo.h" -#include "ARMBaseRegisterInfo.h" #include "ARMFrameLowering.h" #include "ARMMachineFunctionInfo.h" #include "ARMSubtarget.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMBaseInfo.h" #include "llvm/ADT/BitVector.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" diff --git a/lib/Target/ARM/ARMCallLowering.cpp b/lib/Target/ARM/ARMCallLowering.cpp index a33d025d114e..a7ac9a1dca6e 100644 --- a/lib/Target/ARM/ARMCallLowering.cpp +++ b/lib/Target/ARM/ARMCallLowering.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineRegisterInfo.h" using namespace llvm; @@ -122,8 +123,7 @@ struct OutgoingValueHandler : public CallLowering::ValueHandler { unsigned NewRegs[] = {MRI.createGenericVirtualRegister(LLT::scalar(32)), MRI.createGenericVirtualRegister(LLT::scalar(32))}; - MIRBuilder.buildExtract(NewRegs[0], Arg.Reg, 0); - MIRBuilder.buildExtract(NewRegs[1], Arg.Reg, 32); + MIRBuilder.buildUnmerge(NewRegs, Arg.Reg); bool IsLittle = MIRBuilder.getMF().getSubtarget().isLittle(); if (!IsLittle) @@ -339,7 +339,7 @@ struct IncomingValueHandler : public CallLowering::ValueHandler { if (!IsLittle) std::swap(NewRegs[0], NewRegs[1]); - MIRBuilder.buildSequence(Arg.Reg, NewRegs, {0, 32}); + MIRBuilder.buildMerge(Arg.Reg, NewRegs); return 1; } @@ -461,7 +461,8 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, MachineFunction &MF = MIRBuilder.getMF(); const auto &TLI = *getTLI(); const auto &DL = MF.getDataLayout(); - const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + const auto &STI = MF.getSubtarget(); + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); if (MF.getSubtarget().genLongCalls()) @@ -473,6 +474,13 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, // registers, but don't insert it yet. auto MIB = MIRBuilder.buildInstrNoInsert(ARM::BLX).add(Callee).addRegMask( TRI->getCallPreservedMask(MF, CallConv)); + if (Callee.isReg()) { + auto CalleeReg = Callee.getReg(); + if (CalleeReg && !TRI->isPhysicalRegister(CalleeReg)) + MIB->getOperand(0).setReg(constrainOperandRegClass( + MF, *TRI, MRI, *STI.getInstrInfo(), *STI.getRegBankInfo(), + *MIB.getInstr(), MIB->getDesc(), CalleeReg, 0)); + } SmallVector ArgInfos; for (auto Arg : OrigArgs) { diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index 6434df317aa8..667337dc9267 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -21,10 +21,10 @@ #include "MCTargetDesc/ARMBaseInfo.h" #include "Thumb2InstrInfo.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 4f6a73b5980d..384f80356cc8 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -26,8 +26,8 @@ #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index c2b2502843c0..16b54e8848c2 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -20,9 +20,9 @@ #include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMBaseInfo.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 949d821e36b2..5b2d093e8f0d 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -12,11 +12,11 @@ // //===----------------------------------------------------------------------===// +#include "ARMISelLowering.h" #include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" #include "ARMCallingConv.h" #include "ARMConstantPoolValue.h" -#include "ARMISelLowering.h" #include "ARMMachineFunctionInfo.h" #include "ARMPerfectShuffle.h" #include "ARMRegisterInfo.h" @@ -29,13 +29,13 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/VectorUtils.h" @@ -61,7 +61,6 @@ #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/Function.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" @@ -103,8 +102,8 @@ #include #include #include -#include #include +#include #include #include diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 0f225156d4ca..817b567db767 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -1958,7 +1958,8 @@ def VFMSD : ADbI<0b11101, 0b10, 1, 0, [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>; + Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>, + Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>; def VFMSS : ASbIn<0b11101, 0b10, 1, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -1966,7 +1967,8 @@ def VFMSS : ASbIn<0b11101, 0b10, 1, 0, [(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> { + Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>, + Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines. } @@ -1976,7 +1978,8 @@ def VFMSH : AHbI<0b11101, 0b10, 1, 0, IIC_fpFMAC16, "vfms", ".f16\t$Sd, $Sn, $Sm", []>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasFullFP16,UseFusedMAC]>; + Requires<[HasFullFP16,UseFusedMAC]>, + Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]>; def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VFMSD DPR:$dstin, DPR:$a, DPR:$b)>, diff --git a/lib/Target/ARM/ARMInstructionSelector.cpp b/lib/Target/ARM/ARMInstructionSelector.cpp index b1f059835ff5..2ae3bad4076b 100644 --- a/lib/Target/ARM/ARMInstructionSelector.cpp +++ b/lib/Target/ARM/ARMInstructionSelector.cpp @@ -127,34 +127,30 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, return true; } -static bool selectSequence(MachineInstrBuilder &MIB, - const ARMBaseInstrInfo &TII, - MachineRegisterInfo &MRI, - const TargetRegisterInfo &TRI, - const RegisterBankInfo &RBI) { - assert(TII.getSubtarget().hasVFP2() && "Can't select sequence without VFP"); +static bool selectMergeValues(MachineInstrBuilder &MIB, + const ARMBaseInstrInfo &TII, + MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI, + const RegisterBankInfo &RBI) { + assert(TII.getSubtarget().hasVFP2() && "Can't select merge without VFP"); - // We only support G_SEQUENCE as a way to stick together two scalar GPRs + // We only support G_MERGE_VALUES as a way to stick together two scalar GPRs // into one DPR. unsigned VReg0 = MIB->getOperand(0).getReg(); (void)VReg0; assert(MRI.getType(VReg0).getSizeInBits() == 64 && RBI.getRegBank(VReg0, MRI, TRI)->getID() == ARM::FPRRegBankID && - "Unsupported operand for G_SEQUENCE"); + "Unsupported operand for G_MERGE_VALUES"); unsigned VReg1 = MIB->getOperand(1).getReg(); (void)VReg1; assert(MRI.getType(VReg1).getSizeInBits() == 32 && RBI.getRegBank(VReg1, MRI, TRI)->getID() == ARM::GPRRegBankID && - "Unsupported operand for G_SEQUENCE"); - unsigned VReg2 = MIB->getOperand(3).getReg(); + "Unsupported operand for G_MERGE_VALUES"); + unsigned VReg2 = MIB->getOperand(2).getReg(); (void)VReg2; assert(MRI.getType(VReg2).getSizeInBits() == 32 && RBI.getRegBank(VReg2, MRI, TRI)->getID() == ARM::GPRRegBankID && - "Unsupported operand for G_SEQUENCE"); - - // Remove the operands corresponding to the offsets. - MIB->RemoveOperand(4); - MIB->RemoveOperand(2); + "Unsupported operand for G_MERGE_VALUES"); MIB->setDesc(TII.get(ARM::VMOVDRR)); MIB.add(predOps(ARMCC::AL)); @@ -162,30 +158,32 @@ static bool selectSequence(MachineInstrBuilder &MIB, return true; } -static bool selectExtract(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, - MachineRegisterInfo &MRI, - const TargetRegisterInfo &TRI, - const RegisterBankInfo &RBI) { - assert(TII.getSubtarget().hasVFP2() && "Can't select extract without VFP"); +static bool selectUnmergeValues(MachineInstrBuilder &MIB, + const ARMBaseInstrInfo &TII, + MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI, + const RegisterBankInfo &RBI) { + assert(TII.getSubtarget().hasVFP2() && "Can't select unmerge without VFP"); - // We only support G_EXTRACT as a way to break up one DPR into two GPRs. + // We only support G_UNMERGE_VALUES as a way to break up one DPR into two + // GPRs. unsigned VReg0 = MIB->getOperand(0).getReg(); (void)VReg0; assert(MRI.getType(VReg0).getSizeInBits() == 32 && RBI.getRegBank(VReg0, MRI, TRI)->getID() == ARM::GPRRegBankID && - "Unsupported operand for G_EXTRACT"); + "Unsupported operand for G_UNMERGE_VALUES"); unsigned VReg1 = MIB->getOperand(1).getReg(); (void)VReg1; - assert(MRI.getType(VReg1).getSizeInBits() == 64 && - RBI.getRegBank(VReg1, MRI, TRI)->getID() == ARM::FPRRegBankID && - "Unsupported operand for G_EXTRACT"); - assert(MIB->getOperand(2).getImm() % 32 == 0 && - "Unsupported operand for G_EXTRACT"); + assert(MRI.getType(VReg1).getSizeInBits() == 32 && + RBI.getRegBank(VReg1, MRI, TRI)->getID() == ARM::GPRRegBankID && + "Unsupported operand for G_UNMERGE_VALUES"); + unsigned VReg2 = MIB->getOperand(2).getReg(); + (void)VReg2; + assert(MRI.getType(VReg2).getSizeInBits() == 64 && + RBI.getRegBank(VReg2, MRI, TRI)->getID() == ARM::FPRRegBankID && + "Unsupported operand for G_UNMERGE_VALUES"); - // Remove the operands corresponding to the offsets. - MIB->getOperand(2).setImm(MIB->getOperand(2).getImm() / 32); - - MIB->setDesc(TII.get(ARM::VGETLNi32)); + MIB->setDesc(TII.get(ARM::VMOVRRD)); MIB.add(predOps(ARMCC::AL)); return true; @@ -407,13 +405,13 @@ bool ARMInstructionSelector::select(MachineInstr &I) const { MIB.addImm(0).add(predOps(ARMCC::AL)); break; } - case G_SEQUENCE: { - if (!selectSequence(MIB, TII, MRI, TRI, RBI)) + case G_MERGE_VALUES: { + if (!selectMergeValues(MIB, TII, MRI, TRI, RBI)) return false; break; } - case G_EXTRACT: { - if (!selectExtract(MIB, TII, MRI, TRI, RBI)) + case G_UNMERGE_VALUES: { + if (!selectUnmergeValues(MIB, TII, MRI, TRI, RBI)) return false; break; } diff --git a/lib/Target/ARM/ARMLegalizerInfo.cpp b/lib/Target/ARM/ARMLegalizerInfo.cpp index 5bf6c7aed6b8..2d490b7c303e 100644 --- a/lib/Target/ARM/ARMLegalizerInfo.cpp +++ b/lib/Target/ARM/ARMLegalizerInfo.cpp @@ -45,7 +45,7 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { setAction({Op, 1, p0}, Legal); } - for (unsigned Op : {G_ADD, G_SUB, G_MUL}) { + for (unsigned Op : {G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR}) { for (auto Ty : {s1, s8, s16}) setAction({Op, Ty}, WidenScalar); setAction({Op, s32}, Legal); diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 72fcf7cd6a4f..7a452d4a2095 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -26,6 +26,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" @@ -33,7 +34,6 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/SelectionDAGNodes.h" -#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" diff --git a/lib/Target/ARM/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp index 9e9c1ba6c114..13acea3c28a9 100644 --- a/lib/Target/ARM/ARMMCInstLower.cpp +++ b/lib/Target/ARM/ARMMCInstLower.cpp @@ -25,9 +25,9 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/IR/Constants.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" -#include "llvm/MC/MCContext.h" #include "llvm/MC/MCInstBuilder.h" #include "llvm/MC/MCStreamer.h" #include "llvm/Support/ErrorHandling.h" diff --git a/lib/Target/ARM/ARMRegisterBankInfo.cpp b/lib/Target/ARM/ARMRegisterBankInfo.cpp index a20997c95cd9..f59b075e6dd9 100644 --- a/lib/Target/ARM/ARMRegisterBankInfo.cpp +++ b/lib/Target/ARM/ARMRegisterBankInfo.cpp @@ -221,6 +221,9 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case G_ADD: case G_SUB: case G_MUL: + case G_AND: + case G_OR: + case G_XOR: case G_SDIV: case G_UDIV: case G_SEXT: @@ -252,30 +255,32 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OperandsMapping = getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr}); break; - case G_SEQUENCE: { - // We only support G_SEQUENCE for creating a double precision floating point - // value out of two GPRs. + case G_MERGE_VALUES: { + // We only support G_MERGE_VALUES for creating a double precision floating + // point value out of two GPRs. LLT Ty1 = MRI.getType(MI.getOperand(1).getReg()); - LLT Ty2 = MRI.getType(MI.getOperand(3).getReg()); + LLT Ty2 = MRI.getType(MI.getOperand(2).getReg()); if (Ty.getSizeInBits() != 64 || Ty1.getSizeInBits() != 32 || Ty2.getSizeInBits() != 32) return getInvalidInstructionMapping(); OperandsMapping = getOperandsMapping({&ARM::ValueMappings[ARM::DPR3OpsIdx], - &ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr, - &ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr}); + &ARM::ValueMappings[ARM::GPR3OpsIdx], + &ARM::ValueMappings[ARM::GPR3OpsIdx]}); break; } - case G_EXTRACT: { - // We only support G_EXTRACT for splitting a double precision floating point - // value into two GPRs. + case G_UNMERGE_VALUES: { + // We only support G_UNMERGE_VALUES for splitting a double precision + // floating point value into two GPRs. LLT Ty1 = MRI.getType(MI.getOperand(1).getReg()); - if (Ty.getSizeInBits() != 32 || Ty1.getSizeInBits() != 64 || - MI.getOperand(2).getImm() % 32 != 0) + LLT Ty2 = MRI.getType(MI.getOperand(2).getReg()); + if (Ty.getSizeInBits() != 32 || Ty1.getSizeInBits() != 32 || + Ty2.getSizeInBits() != 64) return getInvalidInstructionMapping(); - OperandsMapping = getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], - &ARM::ValueMappings[ARM::DPR3OpsIdx], - nullptr, nullptr}); + OperandsMapping = + getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], + &ARM::ValueMappings[ARM::GPR3OpsIdx], + &ARM::ValueMappings[ARM::DPR3OpsIdx]}); break; } default: diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index b8a708a20a95..d9d0c27c6304 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -28,10 +28,10 @@ #include "llvm/IR/GlobalValue.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCTargetOptions.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Target/TargetOptions.h" #include "llvm/Support/CodeGen.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/TargetParser.h" +#include "llvm/Target/TargetOptions.h" #include #include diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index f5e4043882ff..c0506cfda612 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -37,6 +37,7 @@ #include "llvm/CodeGen/GlobalISel/RegBankSelect.h" #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineScheduler.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Attributes.h" @@ -389,6 +390,20 @@ public: return getTM(); } + ScheduleDAGInstrs * + createMachineScheduler(MachineSchedContext *C) const override { + ScheduleDAGMILive *DAG = createGenericSchedLive(C); + // add DAG Mutations here. + return DAG; + } + + ScheduleDAGInstrs * + createPostMachineScheduler(MachineSchedContext *C) const override { + ScheduleDAGMI *DAG = createGenericSchedPostRA(C); + // add DAG Mutations here. + return DAG; + } + void addIRPasses() override; bool addPreISel() override; bool addInstSelector() override; diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp index edbf2b99126c..a5b27abeb27f 100644 --- a/lib/Target/ARM/ARMTargetObjectFile.cpp +++ b/lib/Target/ARM/ARMTargetObjectFile.cpp @@ -7,17 +7,17 @@ // //===----------------------------------------------------------------------===// +#include "ARMTargetObjectFile.h" #include "ARMSubtarget.h" #include "ARMTargetMachine.h" -#include "ARMTargetObjectFile.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCTargetOptions.h" #include "llvm/MC/SectionKind.h" -#include "llvm/Support/Dwarf.h" -#include "llvm/Support/ELF.h" #include "llvm/Target/TargetMachine.h" #include diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index ada816c16389..19fba3033bb2 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -17,6 +17,8 @@ #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" @@ -39,10 +41,8 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/Support/ARMBuildAttributes.h" #include "llvm/Support/ARMEHABI.h" -#include "llvm/Support/COFF.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetParser.h" @@ -1026,6 +1026,15 @@ public: ARM_AM::getSOImmVal(-Value) != -1); } bool isT2SOImm() const { + // If we have an immediate that's not a constant, treat it as an expression + // needing a fixup. + if (isImm() && !isa(getImm())) { + // We want to avoid matching :upper16: and :lower16: as we want these + // expressions to match in isImm0_65535Expr() + const ARMMCExpr *ARM16Expr = dyn_cast(getImm()); + return (!ARM16Expr || (ARM16Expr->getKind() != ARMMCExpr::VK_ARM_HI16 && + ARM16Expr->getKind() != ARMMCExpr::VK_ARM_LO16)); + } if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast(getImm()); if (!CE) return false; @@ -8404,7 +8413,8 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, // wide encoding wasn't explicit. if (Inst.getOperand(0).getReg() != Inst.getOperand(1).getReg() || !isARMLowRegister(Inst.getOperand(0).getReg()) || - (unsigned)Inst.getOperand(2).getImm() > 255 || + (Inst.getOperand(2).isImm() && + (unsigned)Inst.getOperand(2).getImm() > 255) || ((!inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR) || (inITBlock() && Inst.getOperand(5).getReg() != 0)) || (static_cast(*Operands[3]).isToken() && @@ -8556,7 +8566,8 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, // If we can use the 16-bit encoding and the user didn't explicitly // request the 32-bit variant, transform it here. if (isARMLowRegister(Inst.getOperand(0).getReg()) && - (unsigned)Inst.getOperand(1).getImm() <= 255 && + (Inst.getOperand(1).isImm() && + (unsigned)Inst.getOperand(1).getImm() <= 255) && ((!inITBlock() && Inst.getOperand(2).getImm() == ARMCC::AL && Inst.getOperand(4).getReg() == ARM::CPSR) || (inITBlock() && Inst.getOperand(4).getReg() == 0)) && diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index e812d32cc76f..585726208a8d 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -20,8 +20,8 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" #include #include #include diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index b0d1d3fb9ef0..716492ea2566 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -7,15 +7,17 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/ARMMCTargetDesc.h" -#include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMAsmBackend.h" +#include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMAsmBackendDarwin.h" #include "MCTargetDesc/ARMAsmBackendELF.h" #include "MCTargetDesc/ARMAsmBackendWinCOFF.h" #include "MCTargetDesc/ARMBaseInfo.h" #include "MCTargetDesc/ARMFixupKinds.h" +#include "MCTargetDesc/ARMMCTargetDesc.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" @@ -31,10 +33,8 @@ #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" -#include "llvm/Support/MachO.h" #include "llvm/Support/TargetParser.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -98,6 +98,7 @@ const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { {"fixup_t2_movt_hi16", 0, 20, 0}, {"fixup_t2_movw_lo16", 0, 20, 0}, {"fixup_arm_mod_imm", 0, 12, 0}, + {"fixup_t2_so_imm", 0, 26, 0}, }; const static MCFixupKindInfo InfosBE[ARM::NumTargetFixupKinds] = { // This table *must* be in the order that the fixup_* kinds are defined in @@ -148,6 +149,7 @@ const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { {"fixup_t2_movt_hi16", 12, 20, 0}, {"fixup_t2_movw_lo16", 12, 20, 0}, {"fixup_arm_mod_imm", 20, 12, 0}, + {"fixup_t2_so_imm", 26, 6, 0}, }; if (Kind < FirstTargetFixupKind) @@ -693,6 +695,23 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value, return 0; } return Value; + case ARM::fixup_t2_so_imm: { + Value = ARM_AM::getT2SOImmVal(Value); + if ((int64_t)Value < 0) { + Ctx.reportError(Fixup.getLoc(), "out of range immediate fixup value"); + return 0; + } + // Value will contain a 12-bit value broken up into a 4-bit shift in bits + // 11:8 and the 8-bit immediate in 0:7. The instruction has the immediate + // in 0:7. The 4-bit shift is split up into i:imm3 where i is placed at bit + // 10 of the upper half-word and imm3 is placed at 14:12 of the lower + // half-word. + uint64_t EncValue = 0; + EncValue |= (Value & 0x800) << 15; + EncValue |= (Value & 0x700) << 4; + EncValue |= (Value & 0xff); + return swapHalfWords(EncValue, IsLittleEndian); + } } } @@ -704,16 +723,17 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm, bool &IsResolved) { const MCSymbolRefExpr *A = Target.getSymA(); const MCSymbol *Sym = A ? &A->getSymbol() : nullptr; + const unsigned FixupKind = Fixup.getKind() ; // MachO (the only user of "Value") tries to make .o files that look vaguely // pre-linked, so for MOVW/MOVT and .word relocations they put the Thumb bit // into the addend if possible. Other relocation types don't want this bit // though (branches couldn't encode it if it *was* present, and no other // relocations exist) and it can interfere with checking valid expressions. - if ((unsigned)Fixup.getKind() == FK_Data_4 || - (unsigned)Fixup.getKind() == ARM::fixup_arm_movw_lo16 || - (unsigned)Fixup.getKind() == ARM::fixup_arm_movt_hi16 || - (unsigned)Fixup.getKind() == ARM::fixup_t2_movw_lo16 || - (unsigned)Fixup.getKind() == ARM::fixup_t2_movt_hi16) { + if (FixupKind == FK_Data_4 || + FixupKind == ARM::fixup_arm_movw_lo16 || + FixupKind == ARM::fixup_arm_movt_hi16 || + FixupKind == ARM::fixup_t2_movw_lo16 || + FixupKind == ARM::fixup_t2_movt_hi16) { if (Sym) { if (Asm.isThumbFunc(Sym)) Value |= 1; @@ -729,23 +749,27 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm, // linker can handle it. GNU AS produces an error in this case. if (Sym->isExternal() || Value >= 0x400004) IsResolved = false; - // When an ARM function is called from a Thumb function, produce a - // relocation so the linker will use the correct branch instruction for ELF - // binaries. - if (Sym->isELF()) { - unsigned Type = dyn_cast(Sym)->getType(); - if ((Type == ELF::STT_FUNC || Type == ELF::STT_GNU_IFUNC) && - !Asm.isThumbFunc(Sym)) + } + // Create relocations for unconditional branches to function symbols with + // different execution mode in ELF binaries. + if (Sym && Sym->isELF()) { + unsigned Type = dyn_cast(Sym)->getType(); + if ((Type == ELF::STT_FUNC || Type == ELF::STT_GNU_IFUNC)) { + if (Asm.isThumbFunc(Sym) && (FixupKind == ARM::fixup_arm_uncondbranch)) + IsResolved = false; + if (!Asm.isThumbFunc(Sym) && (FixupKind == ARM::fixup_arm_thumb_br || + FixupKind == ARM::fixup_arm_thumb_bl || + FixupKind == ARM::fixup_t2_uncondbranch)) IsResolved = false; } } // We must always generate a relocation for BL/BLX instructions if we have // a symbol to reference, as the linker relies on knowing the destination // symbol's thumb-ness to get interworking right. - if (A && ((unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_blx || - (unsigned)Fixup.getKind() == ARM::fixup_arm_blx || - (unsigned)Fixup.getKind() == ARM::fixup_arm_uncondbl || - (unsigned)Fixup.getKind() == ARM::fixup_arm_condbl)) + if (A && (FixupKind == ARM::fixup_arm_thumb_blx || + FixupKind == ARM::fixup_arm_blx || + FixupKind == ARM::fixup_arm_uncondbl || + FixupKind == ARM::fixup_arm_condbl)) IsResolved = false; } @@ -792,6 +816,7 @@ static unsigned getFixupKindNumBytes(unsigned Kind) { case ARM::fixup_arm_movw_lo16: case ARM::fixup_t2_movt_hi16: case ARM::fixup_t2_movw_lo16: + case ARM::fixup_t2_so_imm: return 4; case FK_SecRel_2: @@ -844,6 +869,7 @@ static unsigned getFixupKindContainerSizeBytes(unsigned Kind) { case ARM::fixup_t2_movt_hi16: case ARM::fixup_t2_movw_lo16: case ARM::fixup_arm_mod_imm: + case ARM::fixup_t2_so_imm: // Instruction size is 4 bytes. return 4; } diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h index 09dc0173ade6..bd729fabedf5 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h @@ -11,7 +11,7 @@ #define LLVM_LIB_TARGET_ARM_ARMASMBACKENDDARWIN_H #include "ARMAsmBackend.h" -#include "llvm/Support/MachO.h" +#include "llvm/BinaryFormat/MachO.h" namespace llvm { class ARMAsmBackendDarwin : public ARMAsmBackend { diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp index e1fa24571820..59f31be69d58 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp @@ -9,12 +9,12 @@ #include "MCTargetDesc/ARMFixupKinds.h" #include "MCTargetDesc/ARMMCTargetDesc.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCValue.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp index 4d6c52f3cd49..93f4006cee87 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -21,6 +21,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAssembler.h" @@ -43,12 +44,11 @@ #include "llvm/Support/ARMBuildAttributes.h" #include "llvm/Support/ARMEHABI.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/LEB128.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetParser.h" +#include "llvm/Support/raw_ostream.h" #include #include #include diff --git a/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h b/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h index 3fe2302bdd37..9f6c5d7bf920 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h +++ b/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h @@ -110,6 +110,9 @@ enum Fixups { // fixup_arm_mod_imm - Fixup for mod_imm fixup_arm_mod_imm, + // fixup_t2_so_imm - Fixup for Thumb2 8-bit rotated operand + fixup_t2_so_imm, + // Marker LastTargetFixupKind, NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index d9df2c6da7ec..f1f35f409900 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -339,7 +339,17 @@ public: unsigned getT2SOImmOpValue(const MCInst &MI, unsigned Op, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { - unsigned SoImm = MI.getOperand(Op).getImm(); + const MCOperand &MO = MI.getOperand(Op); + + // Support for fixups (MCFixup) + if (MO.isExpr()) { + const MCExpr *Expr = MO.getExpr(); + // Fixups resolve to plain values that need to be encoded. + MCFixupKind Kind = MCFixupKind(ARM::fixup_t2_so_imm); + Fixups.push_back(MCFixup::create(0, Expr, Kind, MI.getLoc())); + return 0; + } + unsigned SoImm = MO.getImm(); unsigned Encoded = ARM_AM::getT2SOImmVal(SoImm); assert(Encoded != ~0U && "Not a Thumb2 so_imm value?"); return Encoded; diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index 477755157040..b8a8b1f7619a 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -11,9 +11,9 @@ // //===----------------------------------------------------------------------===// +#include "ARMMCTargetDesc.h" #include "ARMBaseInfo.h" #include "ARMMCAsmInfo.h" -#include "ARMMCTargetDesc.h" #include "InstPrinter/ARMInstPrinter.h" #include "llvm/ADT/Triple.h" #include "llvm/MC/MCELFStreamer.h" diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp index 34c770440e1b..5516a1bdb03d 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp @@ -9,10 +9,10 @@ #include "ARMMCExpr.h" #include "MCTargetDesc/ARMMCTargetDesc.h" +#include "llvm-c/Disassembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler/MCRelocationInfo.h" #include "llvm/MC/MCExpr.h" -#include "llvm-c/Disassembler.h" using namespace llvm; diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp index b77181f29b2d..4a8139dea668 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp @@ -7,10 +7,11 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/ARMMCTargetDesc.h" #include "MCTargetDesc/ARMBaseInfo.h" #include "MCTargetDesc/ARMFixupKinds.h" +#include "MCTargetDesc/ARMMCTargetDesc.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" @@ -21,7 +22,6 @@ #include "llvm/MC/MCSection.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MachO.h" using namespace llvm; namespace { diff --git a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp index 7ae2f864d79d..00505a103e00 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp @@ -9,13 +9,13 @@ #include "MCTargetDesc/ARMFixupKinds.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/COFF.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCValue.h" #include "llvm/MC/MCWinCOFFObjectWriter.h" -#include "llvm/Support/COFF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp index f10427e2ed57..0b6574c37de1 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -11,26 +11,26 @@ // //===----------------------------------------------------------------------===// +#include "Thumb1FrameLowering.h" #include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" #include "ARMMachineFunctionInfo.h" #include "ARMSubtarget.h" #include "MCTargetDesc/ARMBaseInfo.h" -#include "Thumb1FrameLowering.h" #include "Thumb1InstrInfo.h" #include "ThumbRegisterInfo.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/DebugLoc.h" #include "llvm/MC/MCDwarf.h" diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp index 0ebf55924647..3a3920a2db32 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "ARMSubtarget.h" #include "Thumb1InstrInfo.h" +#include "ARMSubtarget.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index 2e2dfe035e26..9125be96a07b 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -1,4 +1,4 @@ -//===-- Thumb2InstrInfo.cpp - Thumb-2 Instruction Information -------------===// +//===- Thumb2InstrInfo.cpp - Thumb-2 Instruction Information --------------===// // // The LLVM Compiler Infrastructure // @@ -11,16 +11,26 @@ // //===----------------------------------------------------------------------===// -#include "Thumb2InstrInfo.h" -#include "ARMConstantPoolValue.h" #include "ARMMachineFunctionInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" +#include "Thumb2InstrInfo.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrDesc.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include using namespace llvm; @@ -30,7 +40,7 @@ OldT2IfCvt("old-thumb2-ifcvt", cl::Hidden, cl::init(false)); Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI) - : ARMBaseInstrInfo(STI), RI() {} + : ARMBaseInstrInfo(STI) {} /// Return the noop instruction to use for a noop. void Thumb2InstrInfo::getNoop(MCInst &NopInst) const { @@ -539,9 +549,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, // Add cc_out operand if the original instruction did not have one. if (!HasCCOut) MI.addOperand(MachineOperand::CreateReg(0, false)); - } else { - // AddrMode4 and AddrMode6 cannot handle any offset. if (AddrMode == ARMII::AddrMode4 || AddrMode == ARMII::AddrMode6) return false; diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index c90475c28db7..d911dd97b1ac 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -14,10 +14,10 @@ #include "Thumb2InstrInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" diff --git a/lib/Target/AVR/AVR.h b/lib/Target/AVR/AVR.h index 8e5cc5360ad4..5eadf7bdcef6 100644 --- a/lib/Target/AVR/AVR.h +++ b/lib/Target/AVR/AVR.h @@ -15,8 +15,8 @@ #ifndef LLVM_AVR_H #define LLVM_AVR_H -#include "llvm/Target/TargetMachine.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/Target/TargetMachine.h" namespace llvm { diff --git a/lib/Target/AVR/AVRAsmPrinter.cpp b/lib/Target/AVR/AVRAsmPrinter.cpp index d6491ce5c3bf..f0c7b11895b4 100644 --- a/lib/Target/AVR/AVRAsmPrinter.cpp +++ b/lib/Target/AVR/AVRAsmPrinter.cpp @@ -18,8 +18,8 @@ #include "InstPrinter/AVRInstPrinter.h" #include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/IR/Mangler.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCStreamer.h" diff --git a/lib/Target/AVR/AVRRegisterInfo.cpp b/lib/Target/AVR/AVRRegisterInfo.cpp index 11a47bad78ba..55f3f5cf428a 100644 --- a/lib/Target/AVR/AVRRegisterInfo.cpp +++ b/lib/Target/AVR/AVRRegisterInfo.cpp @@ -51,7 +51,6 @@ AVRRegisterInfo::getCallPreservedMask(const MachineFunction &MF, BitVector AVRRegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); - const AVRTargetMachine &TM = static_cast(MF.getTarget()); // Reserve the intermediate result registers r1 and r2 // The result of instructions like 'mul' is always stored here. @@ -269,4 +268,3 @@ void AVRRegisterInfo::splitReg(unsigned Reg, } } // end of namespace llvm - diff --git a/lib/Target/AVR/AVRSubtarget.cpp b/lib/Target/AVR/AVRSubtarget.cpp index c228d051d771..556d69ec5234 100644 --- a/lib/Target/AVR/AVRSubtarget.cpp +++ b/lib/Target/AVR/AVRSubtarget.cpp @@ -13,7 +13,7 @@ #include "AVRSubtarget.h" -#include "llvm/Support/ELF.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/Support/TargetRegistry.h" #include "AVR.h" diff --git a/lib/Target/AVR/AVRSubtarget.h b/lib/Target/AVR/AVRSubtarget.h index a37849c3f3f7..b0e634f86168 100644 --- a/lib/Target/AVR/AVRSubtarget.h +++ b/lib/Target/AVR/AVRSubtarget.h @@ -14,10 +14,9 @@ #ifndef LLVM_AVR_SUBTARGET_H #define LLVM_AVR_SUBTARGET_H -#include "llvm/Target/TargetSubtargetInfo.h" -#include "llvm/IR/DataLayout.h" #include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include "AVRFrameLowering.h" #include "AVRISelLowering.h" diff --git a/lib/Target/AVR/AVRTargetMachine.cpp b/lib/Target/AVR/AVRTargetMachine.cpp index 2ab0b1080c6a..91d2a8737b87 100644 --- a/lib/Target/AVR/AVRTargetMachine.cpp +++ b/lib/Target/AVR/AVRTargetMachine.cpp @@ -15,12 +15,12 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetPassConfig.h" -#include "llvm/IR/Module.h" #include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Module.h" #include "llvm/Support/TargetRegistry.h" -#include "AVRTargetObjectFile.h" #include "AVR.h" +#include "AVRTargetObjectFile.h" #include "MCTargetDesc/AVRMCTargetDesc.h" namespace llvm { diff --git a/lib/Target/AVR/AVRTargetObjectFile.cpp b/lib/Target/AVR/AVRTargetObjectFile.cpp index af14d9292f27..0cebb0f043f9 100644 --- a/lib/Target/AVR/AVRTargetObjectFile.cpp +++ b/lib/Target/AVR/AVRTargetObjectFile.cpp @@ -9,12 +9,12 @@ #include "AVRTargetObjectFile.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Mangler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" -#include "llvm/Support/ELF.h" #include "AVR.h" diff --git a/lib/Target/AVR/AsmParser/AVRAsmParser.cpp b/lib/Target/AVR/AsmParser/AVRAsmParser.cpp index 5b0398c0ca34..cf52e552978f 100644 --- a/lib/Target/AVR/AsmParser/AVRAsmParser.cpp +++ b/lib/Target/AVR/AsmParser/AVRAsmParser.cpp @@ -18,12 +18,12 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstBuilder.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" #include "llvm/MC/MCParser/MCTargetAsmParser.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" diff --git a/lib/Target/AVR/Disassembler/AVRDisassembler.cpp b/lib/Target/AVR/Disassembler/AVRDisassembler.cpp index d2a21fb64635..e69accfa9393 100644 --- a/lib/Target/AVR/Disassembler/AVRDisassembler.cpp +++ b/lib/Target/AVR/Disassembler/AVRDisassembler.cpp @@ -16,11 +16,11 @@ #include "AVRSubtarget.h" #include "MCTargetDesc/AVRMCTargetDesc.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; diff --git a/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp b/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp index 713754821005..1e61eccf775f 100644 --- a/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp +++ b/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp @@ -1,8 +1,8 @@ #include "AVRELFStreamer.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/SubtargetFeature.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/FormattedStream.h" #include "AVRMCTargetDesc.h" diff --git a/lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp b/lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp index 400296b8409b..085afd23a83c 100644 --- a/lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp +++ b/lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp @@ -9,11 +9,11 @@ #include "AVRMCExpr.h" +#include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCValue.h" -#include "llvm/MC/MCAsmLayout.h" namespace llvm { diff --git a/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp b/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp index a4fa5c0a9310..826430e94b9c 100644 --- a/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp +++ b/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp @@ -11,9 +11,9 @@ // //===----------------------------------------------------------------------===// +#include "AVRMCTargetDesc.h" #include "AVRELFStreamer.h" #include "AVRMCAsmInfo.h" -#include "AVRMCTargetDesc.h" #include "AVRTargetStreamer.h" #include "InstPrinter/AVRInstPrinter.h" diff --git a/lib/Target/BPF/BPFAsmPrinter.cpp b/lib/Target/BPF/BPFAsmPrinter.cpp index c5201465e074..fcd903b7a4a8 100644 --- a/lib/Target/BPF/BPFAsmPrinter.cpp +++ b/lib/Target/BPF/BPFAsmPrinter.cpp @@ -18,10 +18,10 @@ #include "BPFTargetMachine.h" #include "InstPrinter/BPFInstPrinter.h" #include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCStreamer.h" diff --git a/lib/Target/BPF/BPFInstrInfo.cpp b/lib/Target/BPF/BPFInstrInfo.cpp index e38facead922..5351cfa95020 100644 --- a/lib/Target/BPF/BPFInstrInfo.cpp +++ b/lib/Target/BPF/BPFInstrInfo.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "BPF.h" #include "BPFInstrInfo.h" +#include "BPF.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineInstrBuilder.h" diff --git a/lib/Target/BPF/BPFRegisterInfo.cpp b/lib/Target/BPF/BPFRegisterInfo.cpp index 7925bee9c587..273843e92701 100644 --- a/lib/Target/BPF/BPFRegisterInfo.cpp +++ b/lib/Target/BPF/BPFRegisterInfo.cpp @@ -11,17 +11,17 @@ // //===----------------------------------------------------------------------===// -#include "BPF.h" #include "BPFRegisterInfo.h" +#include "BPF.h" #include "BPFSubtarget.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/IR/DiagnosticInfo.h" #define GET_REGINFO_TARGET_DESC #include "BPFGenRegisterInfo.inc" diff --git a/lib/Target/BPF/BPFTargetMachine.cpp b/lib/Target/BPF/BPFTargetMachine.cpp index cf8e73540904..d84b0a80fc0c 100644 --- a/lib/Target/BPF/BPFTargetMachine.cpp +++ b/lib/Target/BPF/BPFTargetMachine.cpp @@ -11,12 +11,12 @@ // //===----------------------------------------------------------------------===// -#include "BPF.h" #include "BPFTargetMachine.h" -#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" -#include "llvm/IR/LegacyPassManager.h" +#include "BPF.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/LegacyPassManager.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetOptions.h" diff --git a/lib/Target/BPF/Disassembler/BPFDisassembler.cpp b/lib/Target/BPF/Disassembler/BPFDisassembler.cpp index b98621ca4749..a1d732c339e5 100644 --- a/lib/Target/BPF/Disassembler/BPFDisassembler.cpp +++ b/lib/Target/BPF/Disassembler/BPFDisassembler.cpp @@ -15,10 +15,10 @@ #include "BPFSubtarget.h" #include "MCTargetDesc/BPFMCTargetDesc.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCFixedLenDisassembler.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInst.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/TargetRegistry.h" diff --git a/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp b/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp index ffd29f3ea991..64e986fe0f04 100644 --- a/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp +++ b/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "BPF.h" #include "BPFInstPrinter.h" +#include "BPF.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" diff --git a/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp b/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp index ebe9abd8ffac..d5e1d7706edc 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp +++ b/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp @@ -8,9 +8,9 @@ //===----------------------------------------------------------------------===// #include "MCTargetDesc/BPFMCTargetDesc.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCFixup.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp index b58409730de0..797904e1c976 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp +++ b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp @@ -11,9 +11,9 @@ // //===----------------------------------------------------------------------===// +#include "MCTargetDesc/BPFMCTargetDesc.h" #include "BPF.h" #include "InstPrinter/BPFInstPrinter.h" -#include "MCTargetDesc/BPFMCTargetDesc.h" #include "MCTargetDesc/BPFMCAsmInfo.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h index 3df673eaeb4b..d1c97c9987e1 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h +++ b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h @@ -14,8 +14,8 @@ #ifndef LLVM_LIB_TARGET_BPF_MCTARGETDESC_BPFMCTARGETDESC_H #define LLVM_LIB_TARGET_BPF_MCTARGETDESC_BPFMCTARGETDESC_H -#include "llvm/Support/DataTypes.h" #include "llvm/Config/config.h" +#include "llvm/Support/DataTypes.h" namespace llvm { class MCAsmBackend; diff --git a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp index b0b2644fffbe..c19e636d79ca 100644 --- a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp +++ b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp @@ -17,11 +17,12 @@ #include "MCTargetDesc/HexagonMCInstrInfo.h" #include "MCTargetDesc/HexagonMCTargetDesc.h" #include "MCTargetDesc/HexagonShuffler.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDirectives.h" @@ -42,13 +43,12 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/SMLoc.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" #include #include #include diff --git a/lib/Target/Hexagon/BitTracker.cpp b/lib/Target/Hexagon/BitTracker.cpp index 07767d1037a9..5b02aa3ca3ae 100644 --- a/lib/Target/Hexagon/BitTracker.cpp +++ b/lib/Target/Hexagon/BitTracker.cpp @@ -65,9 +65,9 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" -#include #include #include +#include using namespace llvm; diff --git a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp index 87c212b6163f..586220dfec26 100644 --- a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp +++ b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp @@ -12,12 +12,12 @@ #include "Hexagon.h" #include "MCTargetDesc/HexagonBaseInfo.h" #include "MCTargetDesc/HexagonMCChecker.h" -#include "MCTargetDesc/HexagonMCTargetDesc.h" #include "MCTargetDesc/HexagonMCInstrInfo.h" +#include "MCTargetDesc/HexagonMCTargetDesc.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" @@ -25,8 +25,8 @@ #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" #include #include #include diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp index bb5128e7500f..e689483a0999 100644 --- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp +++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp @@ -13,8 +13,8 @@ // //===----------------------------------------------------------------------===// -#include "Hexagon.h" #include "HexagonAsmPrinter.h" +#include "Hexagon.h" #include "HexagonMachineFunctionInfo.h" #include "HexagonSubtarget.h" #include "HexagonTargetMachine.h" @@ -23,6 +23,7 @@ #include "MCTargetDesc/HexagonMCShuffler.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" @@ -43,7 +44,6 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/Format.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/TargetRegistry.h" diff --git a/lib/Target/Hexagon/HexagonBitSimplify.cpp b/lib/Target/Hexagon/HexagonBitSimplify.cpp index 8502bf24c02f..14c682c6df4b 100644 --- a/lib/Target/Hexagon/HexagonBitSimplify.cpp +++ b/lib/Target/Hexagon/HexagonBitSimplify.cpp @@ -13,8 +13,8 @@ #include "HexagonTargetMachine.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominators.h" diff --git a/lib/Target/Hexagon/HexagonBitTracker.cpp b/lib/Target/Hexagon/HexagonBitTracker.cpp index af0f8b265bda..730026121d3b 100644 --- a/lib/Target/Hexagon/HexagonBitTracker.cpp +++ b/lib/Target/Hexagon/HexagonBitTracker.cpp @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#include "Hexagon.h" #include "HexagonBitTracker.h" +#include "Hexagon.h" #include "HexagonInstrInfo.h" #include "HexagonRegisterInfo.h" #include "HexagonTargetMachine.h" diff --git a/lib/Target/Hexagon/HexagonBlockRanges.h b/lib/Target/Hexagon/HexagonBlockRanges.h index 717480314d16..769ec7044a0e 100644 --- a/lib/Target/Hexagon/HexagonBlockRanges.h +++ b/lib/Target/Hexagon/HexagonBlockRanges.h @@ -14,8 +14,8 @@ #include #include #include -#include #include +#include namespace llvm { diff --git a/lib/Target/Hexagon/HexagonCommonGEP.cpp b/lib/Target/Hexagon/HexagonCommonGEP.cpp index a07ba77e6f3e..b5b46f2b7d19 100644 --- a/lib/Target/Hexagon/HexagonCommonGEP.cpp +++ b/lib/Target/Hexagon/HexagonCommonGEP.cpp @@ -175,7 +175,8 @@ namespace { None = 0, Root = 0x01, Internal = 0x02, - Used = 0x04 + Used = 0x04, + InBounds = 0x08 }; uint32_t Flags; @@ -231,6 +232,11 @@ namespace { OS << ','; OS << "used"; } + if (GN.Flags & GepNode::InBounds) { + if (Comma) + OS << ','; + OS << "inbounds"; + } OS << "} "; if (GN.Flags & GepNode::Root) OS << "BaseVal:" << GN.BaseVal->getName() << '(' << GN.BaseVal << ')'; @@ -334,10 +340,11 @@ void HexagonCommonGEP::processGepInst(GetElementPtrInst *GepI, DEBUG(dbgs() << "Visiting GEP: " << *GepI << '\n'); GepNode *N = new (*Mem) GepNode; Value *PtrOp = GepI->getPointerOperand(); + uint32_t InBounds = GepI->isInBounds() ? GepNode::InBounds : 0; ValueToNodeMap::iterator F = NM.find(PtrOp); if (F == NM.end()) { N->BaseVal = PtrOp; - N->Flags |= GepNode::Root; + N->Flags |= GepNode::Root | InBounds; } else { // If PtrOp was a GEP instruction, it must have already been processed. // The ValueToNodeMap entry for it is the last gep node in the generated @@ -373,7 +380,7 @@ void HexagonCommonGEP::processGepInst(GetElementPtrInst *GepI, Value *Op = *OI; GepNode *Nx = new (*Mem) GepNode; Nx->Parent = PN; // Link Nx to the previous node. - Nx->Flags |= GepNode::Internal; + Nx->Flags |= GepNode::Internal | InBounds; Nx->PTy = PtrTy; Nx->Idx = Op; Nodes.push_back(Nx); @@ -1081,7 +1088,7 @@ Value *HexagonCommonGEP::fabricateGEP(NodeVect &NA, BasicBlock::iterator At, GepNode *RN = NA[0]; assert((RN->Flags & GepNode::Root) && "Creating GEP for non-root"); - Value *NewInst = nullptr; + GetElementPtrInst *NewInst = nullptr; Value *Input = RN->BaseVal; Value **IdxList = new Value*[Num+1]; unsigned nax = 0; @@ -1112,6 +1119,7 @@ Value *HexagonCommonGEP::fabricateGEP(NodeVect &NA, BasicBlock::iterator At, Type *InpTy = Input->getType(); Type *ElTy = cast(InpTy->getScalarType())->getElementType(); NewInst = GetElementPtrInst::Create(ElTy, Input, A, "cgep", &*At); + NewInst->setIsInBounds(RN->Flags & GepNode::InBounds); DEBUG(dbgs() << "new GEP: " << *NewInst << '\n'); Input = NewInst; } while (nax <= Num); diff --git a/lib/Target/Hexagon/HexagonConstPropagation.cpp b/lib/Target/Hexagon/HexagonConstPropagation.cpp index 783b916e04b0..aa68f6cfdfc1 100644 --- a/lib/Target/Hexagon/HexagonConstPropagation.cpp +++ b/lib/Target/Hexagon/HexagonConstPropagation.cpp @@ -2276,7 +2276,7 @@ Undetermined: goto Undetermined; uint32_t Props = PredC.properties(); - bool CTrue = false, CFalse = false;; + bool CTrue = false, CFalse = false; if (Props & ConstantProperties::Zero) CFalse = true; else if (Props & ConstantProperties::NonZero) diff --git a/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/lib/Target/Hexagon/HexagonCopyToCombine.cpp index 8118c8eb149d..6b4f53428256 100644 --- a/lib/Target/Hexagon/HexagonCopyToCombine.cpp +++ b/lib/Target/Hexagon/HexagonCopyToCombine.cpp @@ -13,7 +13,6 @@ //===----------------------------------------------------------------------===// #include "HexagonInstrInfo.h" #include "HexagonSubtarget.h" -#include "llvm/PassSupport.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -22,6 +21,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/PassSupport.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" diff --git a/lib/Target/Hexagon/HexagonEarlyIfConv.cpp b/lib/Target/Hexagon/HexagonEarlyIfConv.cpp index 67af947e089d..03c4a83594b3 100644 --- a/lib/Target/Hexagon/HexagonEarlyIfConv.cpp +++ b/lib/Target/Hexagon/HexagonEarlyIfConv.cpp @@ -65,9 +65,9 @@ #include "HexagonInstrInfo.h" #include "HexagonSubtarget.h" #include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineDominators.h" diff --git a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp index 015d3b840e6f..23d4e2610d9a 100644 --- a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp +++ b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp @@ -12,10 +12,9 @@ // form. //===----------------------------------------------------------------------===// - -#include "llvm/ADT/DenseMap.h" #include "Hexagon.h" #include "HexagonTargetMachine.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp index 25018b9ed510..18e49c69b8e3 100644 --- a/lib/Target/Hexagon/HexagonFrameLowering.cpp +++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -10,8 +10,8 @@ #define DEBUG_TYPE "hexagon-pei" -#include "HexagonBlockRanges.h" #include "HexagonFrameLowering.h" +#include "HexagonBlockRanges.h" #include "HexagonInstrInfo.h" #include "HexagonMachineFunctionInfo.h" #include "HexagonRegisterInfo.h" diff --git a/lib/Target/Hexagon/HexagonGenExtract.cpp b/lib/Target/Hexagon/HexagonGenExtract.cpp index c99ad5130aef..7c6de6d513e8 100644 --- a/lib/Target/Hexagon/HexagonGenExtract.cpp +++ b/lib/Target/Hexagon/HexagonGenExtract.cpp @@ -14,10 +14,10 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" -#include "llvm/IR/IRBuilder.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" diff --git a/lib/Target/Hexagon/HexagonGenInsert.cpp b/lib/Target/Hexagon/HexagonGenInsert.cpp index 54d99d399f88..bf31e1699284 100644 --- a/lib/Target/Hexagon/HexagonGenInsert.cpp +++ b/lib/Target/Hexagon/HexagonGenInsert.cpp @@ -17,9 +17,9 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominators.h" @@ -34,8 +34,8 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/Timer.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" #include #include diff --git a/lib/Target/Hexagon/HexagonGenMux.cpp b/lib/Target/Hexagon/HexagonGenMux.cpp index 85222944c77c..3c37d9ebb0eb 100644 --- a/lib/Target/Hexagon/HexagonGenMux.cpp +++ b/lib/Target/Hexagon/HexagonGenMux.cpp @@ -40,8 +40,8 @@ #include "llvm/Pass.h" #include "llvm/Support/MathExtras.h" #include -#include #include +#include #include using namespace llvm; @@ -235,8 +235,11 @@ bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) { unsigned DR = MI->getOperand(0).getReg(); if (isRegPair(DR)) continue; + MachineOperand &PredOp = MI->getOperand(1); + if (PredOp.isUndef()) + continue; - unsigned PR = MI->getOperand(1).getReg(); + unsigned PR = PredOp.getReg(); unsigned Idx = I2X.lookup(MI); CondsetMap::iterator F = CM.find(DR); bool IfTrue = HII->isPredicatedTrue(Opc); diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index 4c6c6eeafbe0..afed894cfb9a 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "Hexagon.h" #include "HexagonISelLowering.h" +#include "Hexagon.h" #include "HexagonMachineFunctionInfo.h" #include "HexagonRegisterInfo.h" #include "HexagonSubtarget.h" @@ -26,8 +26,8 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/BasicBlock.h" diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp index b76da727237c..f43101fa456d 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -11,9 +11,9 @@ // //===----------------------------------------------------------------------===// +#include "HexagonInstrInfo.h" #include "Hexagon.h" #include "HexagonHazardRecognizer.h" -#include "HexagonInstrInfo.h" #include "HexagonRegisterInfo.h" #include "HexagonSubtarget.h" #include "llvm/ADT/SmallPtrSet.h" @@ -57,9 +57,9 @@ using namespace llvm; #define GET_INSTRINFO_CTOR_DTOR #define GET_INSTRMAP_INFO -#include "HexagonGenInstrInfo.inc" -#include "HexagonGenDFAPacketizer.inc" #include "HexagonDepTimingClasses.h" +#include "HexagonGenDFAPacketizer.inc" +#include "HexagonGenInstrInfo.inc" cl::opt ScheduleInlineAsm("hexagon-sched-inline-asm", cl::Hidden, cl::init(false), cl::desc("Do not consider inline-asm a scheduling/" diff --git a/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp b/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp index 9aa185fc85a6..b748b58bc0ae 100644 --- a/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp +++ b/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp @@ -23,11 +23,11 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/PatternMatch.h" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Support/Debug.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" #include #include diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/lib/Target/Hexagon/HexagonMachineScheduler.cpp index 324108284a9a..4602de979024 100644 --- a/lib/Target/Hexagon/HexagonMachineScheduler.cpp +++ b/lib/Target/Hexagon/HexagonMachineScheduler.cpp @@ -563,40 +563,33 @@ void ConvergingVLIWScheduler::readyQueueVerboseDump( } #endif -/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor -/// of SU, return it, otherwise return null. -static SUnit *getSingleUnscheduledPred(SUnit *SU) { - SUnit *OnlyAvailablePred = nullptr; - for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - SUnit &Pred = *I->getSUnit(); - if (!Pred.isScheduled) { - // We found an available, but not scheduled, predecessor. If it's the - // only one we have found, keep track of it... otherwise give up. - if (OnlyAvailablePred && OnlyAvailablePred != &Pred) - return nullptr; - OnlyAvailablePred = &Pred; - } +/// isSingleUnscheduledPred - If SU2 is the only unscheduled predecessor +/// of SU, return true (we may have duplicates) +static inline bool isSingleUnscheduledPred(SUnit *SU, SUnit *SU2) { + if (SU->NumPredsLeft == 0) + return false; + + for (auto &Pred : SU->Preds) { + // We found an available, but not scheduled, predecessor. + if (!Pred.getSUnit()->isScheduled && (Pred.getSUnit() != SU2)) + return false; } - return OnlyAvailablePred; + + return true; } -/// getSingleUnscheduledSucc - If there is exactly one unscheduled successor -/// of SU, return it, otherwise return null. -static SUnit *getSingleUnscheduledSucc(SUnit *SU) { - SUnit *OnlyAvailableSucc = nullptr; - for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) { - SUnit &Succ = *I->getSUnit(); - if (!Succ.isScheduled) { - // We found an available, but not scheduled, successor. If it's the - // only one we have found, keep track of it... otherwise give up. - if (OnlyAvailableSucc && OnlyAvailableSucc != &Succ) - return nullptr; - OnlyAvailableSucc = &Succ; - } +/// isSingleUnscheduledSucc - If SU2 is the only unscheduled successor +/// of SU, return true (we may have duplicates) +static inline bool isSingleUnscheduledSucc(SUnit *SU, SUnit *SU2) { + if (SU->NumSuccsLeft == 0) + return false; + + for (auto &Succ : SU->Succs) { + // We found an available, but not scheduled, successor. + if (!Succ.getSUnit()->isScheduled && (Succ.getSUnit() != SU2)) + return false; } - return OnlyAvailableSucc; + return true; } // Constants used to denote relative importance of @@ -673,12 +666,12 @@ int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU, // Count the number of nodes that // this node is the sole unscheduled node for. for (const SDep &SI : SU->Succs) - if (getSingleUnscheduledPred(SI.getSUnit()) == SU) + if (isSingleUnscheduledPred(SI.getSUnit(), SU)) ++NumNodesBlocking; } else { // How many unscheduled predecessors block this node? for (const SDep &PI : SU->Preds) - if (getSingleUnscheduledSucc(PI.getSUnit()) == SU) + if (isSingleUnscheduledSucc(PI.getSUnit(), SU)) ++NumNodesBlocking; } ResCount += (NumNodesBlocking * ScaleTwo); diff --git a/lib/Target/Hexagon/HexagonPatterns.td b/lib/Target/Hexagon/HexagonPatterns.td index 70ed123bc898..f269b74fc447 100644 --- a/lib/Target/Hexagon/HexagonPatterns.td +++ b/lib/Target/Hexagon/HexagonPatterns.td @@ -1,3 +1,12 @@ +//==- HexagonPatterns.td - Target Description for Hexagon -*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + // Pattern fragment that combines the value type and the register class // into a single parameter. @@ -345,7 +354,7 @@ def: Pat<(add (mul IntRegs:$src2, u32_0ImmPred:$src3), IntRegs:$src1), (M2_macsip IntRegs:$src1, IntRegs:$src2, imm:$src3)>; def: Pat<(add (mul I32:$src2, I32:$src3), I32:$src1), (M2_maci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; -def: Pat<(add (add IntRegs:$src2, u32_0ImmPred:$src3), IntRegs:$src1), +def: Pat<(add (add IntRegs:$src2, s32_0ImmPred:$src3), IntRegs:$src1), (M2_accii IntRegs:$src1, IntRegs:$src2, imm:$src3)>; def: Pat<(add (add I32:$src2, I32:$src3), I32:$src1), (M2_acci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; @@ -674,6 +683,8 @@ def I32toI1: OutPatFrag<(ops node:$Rs), defm: Storexm_pat; def: Storexm_simple_pat; +def: Pat<(sra (add (sra I64:$src, u6_0ImmPred:$u6), 1), (i32 1)), + (S2_asr_i_p_rnd DoubleRegs:$src, imm:$u6)>, Requires<[HasV5T]>; def: Pat<(sra I64:$src, u6_0ImmPred:$u6), (S2_asr_i_p DoubleRegs:$src, imm:$u6)>; def: Pat<(srl I64:$src, u6_0ImmPred:$u6), @@ -786,27 +797,19 @@ def: Pat<(i64 (sext_inreg I64:$src1, i16)), def: Pat<(i64 (sext_inreg I64:$src1, i8)), (A2_sxtw (A2_sxtb (LoReg DoubleRegs:$src1)))>; -// We want to prevent emitting pnot's as much as possible. -// Map brcond with an unsupported setcc to a J2_jumpf. -def : Pat <(brcond (i1 (setne I32:$src1, I32:$src2)), - bb:$offset), - (J2_jumpf (C2_cmpeq I32:$src1, I32:$src2), - bb:$offset)>; - -def : Pat <(brcond (i1 (setne I32:$src1, s10_0ImmPred:$src2)), - bb:$offset), - (J2_jumpf (C2_cmpeqi I32:$src1, s10_0ImmPred:$src2), bb:$offset)>; - -def: Pat<(brcond (i1 (setne I1:$src1, (i1 -1))), bb:$offset), - (J2_jumpf PredRegs:$src1, bb:$offset)>; - -def: Pat<(brcond (i1 (setne I1:$src1, (i1 0))), bb:$offset), - (J2_jumpt PredRegs:$src1, bb:$offset)>; +def: Pat<(brcond (i1 (setne I32:$Rs, I32:$Rt)), bb:$offset), + (J2_jumpf (C2_cmpeq I32:$Rs, I32:$Rt), bb:$offset)>; +def: Pat<(brcond (i1 (setne I32:$Rs, s10_0ImmPred:$s10)), bb:$offset), + (J2_jumpf (C2_cmpeqi I32:$Rs, imm:$s10), bb:$offset)>; +def: Pat<(brcond (i1 (setne I1:$Pu, (i1 -1))), bb:$offset), + (J2_jumpf PredRegs:$Pu, bb:$offset)>; +def: Pat<(brcond (i1 (setne I1:$Pu, (i1 0))), bb:$offset), + (J2_jumpt PredRegs:$Pu, bb:$offset)>; // cmp.lt(Rs, Imm) -> !cmp.ge(Rs, Imm) -> !cmp.gt(Rs, Imm-1) -def: Pat<(brcond (i1 (setlt I32:$src1, s8_0ImmPred:$src2)), bb:$offset), - (J2_jumpf (C2_cmpgti IntRegs:$src1, (SDEC1 s8_0ImmPred:$src2)), - bb:$offset)>; +def: Pat<(brcond (i1 (setlt I32:$Rs, s8_0ImmPred:$s8)), bb:$offset), + (J2_jumpf (C2_cmpgti IntRegs:$Rs, (SDEC1 imm:$s8)), bb:$offset)>; + // Map from a 64-bit select to an emulated 64-bit mux. // Hexagon does not support 64-bit MUXes; so emulate with combines. @@ -860,15 +863,13 @@ def: Pat<(i1 (setne I1:$src1, I1:$src2)), def: Pat<(i1 (setne I64:$src1, I64:$src2)), (C2_not (C2_cmpeqp DoubleRegs:$src1, DoubleRegs:$src2))>; -// Map cmpge(Rs, Rt) -> !cmpgt(Rs, Rt). -// rs >= rt -> !(rt > rs). -def : Pat <(i1 (setge I32:$src1, I32:$src2)), - (i1 (C2_not (i1 (C2_cmpgt I32:$src2, I32:$src1))))>; +// rs >= rt -> rt <= rs +def: Pat<(i1 (setge I32:$Rs, I32:$Rt)), + (C4_cmplte I32:$Rt, I32:$Rs)>; -// cmpge(Rs, Imm) -> cmpgt(Rs, Imm-1) let AddedComplexity = 30 in -def: Pat<(i1 (setge I32:$src1, s32_0ImmPred:$src2)), - (C2_cmpgti IntRegs:$src1, (SDEC1 s32_0ImmPred:$src2))>; +def: Pat<(i1 (setge I32:$Rs, s32_0ImmPred:$s10)), + (C2_cmpgti IntRegs:$Rs, (SDEC1 imm:$s10))>; // Map cmpge(Rss, Rtt) -> !cmpgt(Rtt, Rss). // rss >= rtt -> !(rtt > rss). @@ -1634,9 +1635,14 @@ def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), I32:$Rt)), def: Pat<(add (mul I32:$Rs, u6_0ImmPred:$U6), u32_0ImmPred:$u6), (M4_mpyri_addi imm:$u6, IntRegs:$Rs, imm:$U6)>; +def: Pat<(add (mul I32:$Rs, u6_0ImmPred:$U6), + (HexagonCONST32 tglobaladdr:$global)), + (M4_mpyri_addi tglobaladdr:$global, IntRegs:$Rs, imm:$U6)>; def: Pat<(add (mul I32:$Rs, I32:$Rt), u32_0ImmPred:$u6), (M4_mpyrr_addi imm:$u6, IntRegs:$Rs, IntRegs:$Rt)>; - +def: Pat<(add (mul I32:$Rs, I32:$Rt), + (HexagonCONST32 tglobaladdr:$global)), + (M4_mpyrr_addi tglobaladdr:$global, IntRegs:$Rs, IntRegs:$Rt)>; def: Pat<(add I32:$src1, (mul I32:$src3, u6_2ImmPred:$src2)), (M4_mpyri_addr_u2 IntRegs:$src1, imm:$src2, IntRegs:$src3)>; def: Pat<(add I32:$src1, (mul I32:$src3, u32_0ImmPred:$src2)), @@ -2129,6 +2135,11 @@ let AddedComplexity = 30 in { def: Storea_pat; def: Storea_pat; def: Storea_pat; + def: Storea_pat; + + def: Stoream_pat; + def: Stoream_pat; + def: Stoream_pat; } let AddedComplexity = 30 in { @@ -2137,6 +2148,19 @@ let AddedComplexity = 30 in { def: Loada_pat; def: Loada_pat; def: Loada_pat; + def: Loada_pat; + + def: Loadam_pat; + def: Loadam_pat; + def: Loadam_pat; + + def: Loadam_pat; + def: Loadam_pat; + def: Loadam_pat; + + def: Loadam_pat; + def: Loadam_pat; + def: Loadam_pat; } // Indexed store word - global address. @@ -2707,6 +2731,15 @@ def: Pat<(fneg F64:$Rs), (S2_togglebit_i (HiReg $Rs), 31), isub_hi, (i32 (LoReg $Rs)), isub_lo)>; +def: Pat<(mul I64:$Rss, I64:$Rtt), + (A2_combinew + (M2_maci (M2_maci (HiReg (M2_dpmpyuu_s0 (LoReg $Rss), (LoReg $Rtt))), + (LoReg $Rss), + (HiReg $Rtt)), + (LoReg $Rtt), + (HiReg $Rss)), + (LoReg (M2_dpmpyuu_s0 (LoReg $Rss), (LoReg $Rtt))))>; + def alignedload : PatFrag<(ops node:$addr), (load $addr), [{ return isAlignedMemNode(dyn_cast(N)); }]>; diff --git a/lib/Target/Hexagon/HexagonSplitDouble.cpp b/lib/Target/Hexagon/HexagonSplitDouble.cpp index 471e32221b29..db268b78cd73 100644 --- a/lib/Target/Hexagon/HexagonSplitDouble.cpp +++ b/lib/Target/Hexagon/HexagonSplitDouble.cpp @@ -13,8 +13,8 @@ #include "HexagonRegisterInfo.h" #include "HexagonSubtarget.h" #include "llvm/ADT/BitVector.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp index 14ecf297d351..c757b6ecdd00 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -23,8 +23,8 @@ #include "llvm/IR/Module.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/TargetRegistry.h" -#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/Transforms/Scalar.h" using namespace llvm; @@ -276,27 +276,27 @@ bool HexagonPassConfig::addInstSelector() { if (!NoOpt) { // Create logical operations on predicate registers. if (EnableGenPred) - addPass(createHexagonGenPredicate(), false); + addPass(createHexagonGenPredicate()); // Rotate loops to expose bit-simplification opportunities. if (EnableLoopResched) - addPass(createHexagonLoopRescheduling(), false); + addPass(createHexagonLoopRescheduling()); // Split double registers. if (!DisableHSDR) addPass(createHexagonSplitDoubleRegs()); // Bit simplification. if (EnableBitSimplify) - addPass(createHexagonBitSimplify(), false); + addPass(createHexagonBitSimplify()); addPass(createHexagonPeephole()); printAndVerify("After hexagon peephole pass"); // Constant propagation. if (!DisableHCP) { - addPass(createHexagonConstPropagationPass(), false); - addPass(&UnreachableMachineBlockElimID, false); + addPass(createHexagonConstPropagationPass()); + addPass(&UnreachableMachineBlockElimID); } if (EnableGenInsert) - addPass(createHexagonGenInsert(), false); + addPass(createHexagonGenInsert()); if (EnableEarlyIf) - addPass(createHexagonEarlyIfConversion(), false); + addPass(createHexagonEarlyIfConversion()); } return false; @@ -307,9 +307,9 @@ void HexagonPassConfig::addPreRegAlloc() { if (EnableExpandCondsets) insertPass(&RegisterCoalescerID, &HexagonExpandCondsetsID); if (!DisableStoreWidening) - addPass(createHexagonStoreWidening(), false); + addPass(createHexagonStoreWidening()); if (!DisableHardwareLoops) - addPass(createHexagonHardwareLoops(), false); + addPass(createHexagonHardwareLoops()); } if (TM->getOptLevel() >= CodeGenOpt::Default) addPass(&MachinePipelinerID); @@ -320,16 +320,16 @@ void HexagonPassConfig::addPostRegAlloc() { if (EnableRDFOpt) addPass(createHexagonRDFOpt()); if (!DisableHexagonCFGOpt) - addPass(createHexagonCFGOptimizer(), false); + addPass(createHexagonCFGOptimizer()); if (!DisableAModeOpt) - addPass(createHexagonOptAddrMode(), false); + addPass(createHexagonOptAddrMode()); } } void HexagonPassConfig::addPreSched2() { - addPass(createHexagonCopyToCombine(), false); + addPass(createHexagonCopyToCombine()); if (getOptLevel() != CodeGenOpt::None) - addPass(&IfConverterID, false); + addPass(&IfConverterID); addPass(createHexagonSplitConst32AndConst64()); } @@ -337,17 +337,17 @@ void HexagonPassConfig::addPreEmitPass() { bool NoOpt = (getOptLevel() == CodeGenOpt::None); if (!NoOpt) - addPass(createHexagonNewValueJump(), false); + addPass(createHexagonNewValueJump()); - addPass(createHexagonBranchRelaxation(), false); + addPass(createHexagonBranchRelaxation()); // Create Packets. if (!NoOpt) { if (!DisableHardwareLoops) - addPass(createHexagonFixupHwLoops(), false); + addPass(createHexagonFixupHwLoops()); // Generate MUX from pairs of conditional transfers. if (EnableGenMux) - addPass(createHexagonGenMux(), false); + addPass(createHexagonGenMux()); addPass(createHexagonPacketizer(), false); } diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp index c9c4f95dbaaa..4dacb1501392 100644 --- a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp +++ b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalObject.h" @@ -28,7 +29,6 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp index fa08afe4019d..7667bfb7a0eb 100644 --- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp +++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp @@ -16,10 +16,10 @@ // prune the dependence. // //===----------------------------------------------------------------------===// +#include "HexagonVLIWPacketizer.h" #include "HexagonRegisterInfo.h" #include "HexagonSubtarget.h" #include "HexagonTargetMachine.h" -#include "HexagonVLIWPacketizer.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp index 904403543e18..545c8b6b2acd 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp @@ -12,9 +12,9 @@ #include "MCTargetDesc/HexagonBaseInfo.h" #include "MCTargetDesc/HexagonMCChecker.h" #include "MCTargetDesc/HexagonMCCodeEmitter.h" -#include "MCTargetDesc/HexagonMCTargetDesc.h" #include "MCTargetDesc/HexagonMCInstrInfo.h" #include "MCTargetDesc/HexagonMCShuffler.h" +#include "MCTargetDesc/HexagonMCTargetDesc.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp index dd790fd41257..1929152129fa 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "HexagonAsmPrinter.h" #include "HexagonInstPrinter.h" +#include "HexagonAsmPrinter.h" #include "MCTargetDesc/HexagonBaseInfo.h" #include "MCTargetDesc/HexagonMCInstrInfo.h" #include "llvm/MC/MCAsmInfo.h" diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp index 70410ff03a64..50f00d1aaeac 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp @@ -7,10 +7,10 @@ // //===----------------------------------------------------------------------===// +#include "MCTargetDesc/HexagonMCCodeEmitter.h" #include "Hexagon.h" #include "MCTargetDesc/HexagonBaseInfo.h" #include "MCTargetDesc/HexagonFixupKinds.h" -#include "MCTargetDesc/HexagonMCCodeEmitter.h" #include "MCTargetDesc/HexagonMCInstrInfo.h" #include "MCTargetDesc/HexagonMCTargetDesc.h" #include "llvm/ADT/Statistic.h" diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp index 9e1ff9ca35d7..47007e08a2ff 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp @@ -17,6 +17,7 @@ #include "MCTargetDesc/HexagonMCInstrInfo.h" #include "MCTargetDesc/HexagonMCShuffler.h" #include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" @@ -29,7 +30,6 @@ #include "llvm/MC/MCSymbolELF.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp index aece36790486..b2c7f1569380 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp @@ -14,9 +14,9 @@ #define DEBUG_TYPE "hexagon-shuffle" +#include "MCTargetDesc/HexagonMCShuffler.h" #include "Hexagon.h" #include "MCTargetDesc/HexagonMCInstrInfo.h" -#include "MCTargetDesc/HexagonMCShuffler.h" #include "MCTargetDesc/HexagonMCTargetDesc.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp index bb98c2bbef6d..1a361548f938 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp @@ -11,14 +11,15 @@ // //===----------------------------------------------------------------------===// +#include "MCTargetDesc/HexagonMCTargetDesc.h" #include "Hexagon.h" #include "HexagonTargetStreamer.h" #include "MCTargetDesc/HexagonInstPrinter.h" #include "MCTargetDesc/HexagonMCAsmInfo.h" #include "MCTargetDesc/HexagonMCELFStreamer.h" #include "MCTargetDesc/HexagonMCInstrInfo.h" -#include "MCTargetDesc/HexagonMCTargetDesc.h" #include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCELFStreamer.h" @@ -27,10 +28,9 @@ #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" #include #include #include diff --git a/lib/Target/Hexagon/RDFDeadCode.cpp b/lib/Target/Hexagon/RDFDeadCode.cpp index 9aa8ad68e07e..60a12dcf2f03 100644 --- a/lib/Target/Hexagon/RDFDeadCode.cpp +++ b/lib/Target/Hexagon/RDFDeadCode.cpp @@ -9,9 +9,9 @@ // // RDF-based generic dead code elimination. +#include "RDFDeadCode.h" #include "RDFGraph.h" #include "RDFLiveness.h" -#include "RDFDeadCode.h" #include "llvm/ADT/SetVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" diff --git a/lib/Target/Hexagon/RDFGraph.cpp b/lib/Target/Hexagon/RDFGraph.cpp index 7a2895aa4e8c..8d1272370899 100644 --- a/lib/Target/Hexagon/RDFGraph.cpp +++ b/lib/Target/Hexagon/RDFGraph.cpp @@ -10,8 +10,8 @@ // Target-independent, SSA-based data flow graph for register data flow (RDF). // #include "RDFGraph.h" -#include "llvm/ADT/SetVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominanceFrontier.h" #include "llvm/CodeGen/MachineDominators.h" diff --git a/lib/Target/Hexagon/RDFLiveness.cpp b/lib/Target/Hexagon/RDFLiveness.cpp index 9d8a3881797b..83e8968086d8 100644 --- a/lib/Target/Hexagon/RDFLiveness.cpp +++ b/lib/Target/Hexagon/RDFLiveness.cpp @@ -23,8 +23,8 @@ // and Embedded Architectures and Compilers", 8 (4), // <10.1145/2086696.2086706>. // -#include "RDFGraph.h" #include "RDFLiveness.h" +#include "RDFGraph.h" #include "llvm/ADT/SetVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominanceFrontier.h" diff --git a/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp b/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp index 1d6c07974beb..72e471f5766e 100644 --- a/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp +++ b/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp @@ -28,8 +28,8 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/SMLoc.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" #include #include #include diff --git a/lib/Target/Lanai/LanaiTargetObjectFile.cpp b/lib/Target/Lanai/LanaiTargetObjectFile.cpp index 7475dbd68ae4..38e75108ba16 100644 --- a/lib/Target/Lanai/LanaiTargetObjectFile.cpp +++ b/lib/Target/Lanai/LanaiTargetObjectFile.cpp @@ -10,13 +10,13 @@ #include "LanaiSubtarget.h" #include "LanaiTargetMachine.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/ELF.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp b/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp index e02bba529bd5..64cd3342ac18 100644 --- a/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp +++ b/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp @@ -9,8 +9,8 @@ #include "MCTargetDesc/LanaiBaseInfo.h" #include "MCTargetDesc/LanaiFixupKinds.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCELFObjectWriter.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" using namespace llvm; diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp b/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp index 10254677a5ad..c3727416ecb9 100644 --- a/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp +++ b/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp @@ -19,8 +19,8 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/MC/MCCodeEmitter.h" -#include "llvm/MC/MCFixup.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCFixup.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp b/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp index a47ff9ff3d61..bcbde2b8b794 100644 --- a/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp +++ b/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp @@ -11,9 +11,9 @@ // //===----------------------------------------------------------------------===// -#include "LanaiMCAsmInfo.h" #include "LanaiMCTargetDesc.h" #include "InstPrinter/LanaiInstPrinter.h" +#include "LanaiMCAsmInfo.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/MC/MCInst.h" diff --git a/lib/Target/MSP430/MSP430AsmPrinter.cpp b/lib/Target/MSP430/MSP430AsmPrinter.cpp index abf062fe86ae..f39c21fc8aa2 100644 --- a/lib/Target/MSP430/MSP430AsmPrinter.cpp +++ b/lib/Target/MSP430/MSP430AsmPrinter.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "MSP430.h" #include "InstPrinter/MSP430InstPrinter.h" +#include "MSP430.h" #include "MSP430InstrInfo.h" #include "MSP430MCInstLower.h" #include "MSP430TargetMachine.h" diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index d855d3e7f778..694c201cbe8d 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -9,17 +9,18 @@ #include "MCTargetDesc/MipsABIFlagsSection.h" #include "MCTargetDesc/MipsABIInfo.h" +#include "MCTargetDesc/MipsBaseInfo.h" #include "MCTargetDesc/MipsMCExpr.h" #include "MCTargetDesc/MipsMCTargetDesc.h" #include "MipsTargetStreamer.h" -#include "MCTargetDesc/MipsBaseInfo.h" #include "llvm/ADT/APFloat.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" @@ -40,13 +41,12 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/SMLoc.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" #include #include #include diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp index ecdf6b0de6e7..b0b994323036 100644 --- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp +++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp @@ -17,14 +17,14 @@ #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" -#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" #include #include diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp index 1e2eb7dbec3e..6d3d4db03603 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp @@ -12,8 +12,8 @@ //===----------------------------------------------------------------------===// // -#include "MCTargetDesc/MipsFixupKinds.h" #include "MCTargetDesc/MipsAsmBackend.h" +#include "MCTargetDesc/MipsFixupKinds.h" #include "MCTargetDesc/MipsMCExpr.h" #include "MCTargetDesc/MipsMCTargetDesc.h" #include "llvm/MC/MCAsmBackend.h" diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp index 1a1c613cfce0..d116ac3471bc 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp @@ -10,13 +10,13 @@ #include "MCTargetDesc/MipsFixupKinds.h" #include "MCTargetDesc/MipsMCTargetDesc.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCSymbolELF.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp index ae3278322311..f658aadff22f 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp @@ -10,12 +10,12 @@ #include "MipsELFStreamer.h" #include "MipsOptionRecord.h" #include "MipsTargetStreamer.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCSymbolELF.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/ELF.h" using namespace llvm; diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp index a35eb2a8e03a..0330824fd614 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp @@ -11,10 +11,10 @@ // //===----------------------------------------------------------------------===// +#include "MipsMCCodeEmitter.h" #include "MCTargetDesc/MipsFixupKinds.h" #include "MCTargetDesc/MipsMCExpr.h" #include "MCTargetDesc/MipsMCTargetDesc.h" -#include "MipsMCCodeEmitter.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/SmallVector.h" diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp index be04480044d4..aad6bf378ea0 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp @@ -8,14 +8,14 @@ //===----------------------------------------------------------------------===// #include "MipsMCExpr.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp b/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp index 74d5e4cc9841..2d84528e7469 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp @@ -7,15 +7,15 @@ // //===----------------------------------------------------------------------===// +#include "MipsOptionRecord.h" #include "MipsABIInfo.h" #include "MipsELFStreamer.h" -#include "MipsOptionRecord.h" #include "MipsTargetStreamer.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSectionELF.h" -#include "llvm/Support/ELF.h" #include using namespace llvm; diff --git a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp index 2d4083b27ed1..0cd4aebe4d16 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp @@ -11,19 +11,19 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/MipsABIInfo.h" #include "MipsTargetStreamer.h" #include "InstPrinter/MipsInstPrinter.h" +#include "MCTargetDesc/MipsABIInfo.h" #include "MipsELFStreamer.h" #include "MipsMCExpr.h" #include "MipsMCTargetDesc.h" #include "MipsTargetObjectFile.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbolELF.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td index 9615bc38bfce..f24761d7d101 100644 --- a/lib/Target/Mips/Mips.td +++ b/lib/Target/Mips/Mips.td @@ -185,6 +185,9 @@ def FeatureUseTCCInDIV : SubtargetFeature< "UseTCCInDIV", "false", "Force the assembler to use trapping">; +def FeatureMadd4 : SubtargetFeature<"nomadd4", "DisableMadd4", "true", + "Disable 4-operand madd.fmt and related instructions">; + //===----------------------------------------------------------------------===// // Mips processors supported. //===----------------------------------------------------------------------===// diff --git a/lib/Target/Mips/Mips16FrameLowering.cpp b/lib/Target/Mips/Mips16FrameLowering.cpp index a222080f6b81..09e41e1423ae 100644 --- a/lib/Target/Mips/Mips16FrameLowering.cpp +++ b/lib/Target/Mips/Mips16FrameLowering.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/MipsBaseInfo.h" #include "Mips16FrameLowering.h" +#include "MCTargetDesc/MipsBaseInfo.h" #include "Mips16InstrInfo.h" #include "MipsInstrInfo.h" #include "MipsRegisterInfo.h" @@ -25,10 +25,10 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/IR/DebugLoc.h" -#include "llvm/MC/MachineLocation.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MachineLocation.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetFrameLowering.h" #include diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index 9cdbf510737f..f7ff7c3dc7bb 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -12,17 +12,18 @@ // //===----------------------------------------------------------------------===// +#include "MipsAsmPrinter.h" #include "InstPrinter/MipsInstPrinter.h" #include "MCTargetDesc/MipsBaseInfo.h" #include "MCTargetDesc/MipsMCNaCl.h" #include "Mips.h" -#include "MipsAsmPrinter.h" #include "MipsInstrInfo.h" #include "MipsMCInstLower.h" #include "MipsTargetMachine.h" #include "MipsTargetStreamer.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -43,7 +44,6 @@ #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSymbolELF.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLoweringObjectFile.h" diff --git a/lib/Target/Mips/MipsCCState.cpp b/lib/Target/Mips/MipsCCState.cpp index cb9f676c237a..6a03ee9927d7 100644 --- a/lib/Target/Mips/MipsCCState.cpp +++ b/lib/Target/Mips/MipsCCState.cpp @@ -51,6 +51,22 @@ static bool originalTypeIsF128(const Type *Ty, const char *Func) { return (Func && Ty->isIntegerTy(128) && isF128SoftLibCall(Func)); } +/// Return true if the original type was vXfXX. +static bool originalEVTTypeIsVectorFloat(EVT Ty) { + if (Ty.isVector() && Ty.getVectorElementType().isFloatingPoint()) + return true; + + return false; +} + +/// Return true if the original type was vXfXX / vXfXX. +static bool originalTypeIsVectorFloat(const Type * Ty) { + if (Ty->isVectorTy() && Ty->isFPOrFPVectorTy()) + return true; + + return false; +} + MipsCCState::SpecialCallingConvType MipsCCState::getSpecialCallingConvForCallee(const SDNode *Callee, const MipsSubtarget &Subtarget) { @@ -78,8 +94,8 @@ void MipsCCState::PreAnalyzeCallResultForF128( } } -/// Identify lowered values that originated from f128 arguments and record -/// this for use by RetCC_MipsN. +/// Identify lowered values that originated from f128 or float arguments and +/// record this for use by RetCC_MipsN. void MipsCCState::PreAnalyzeReturnForF128( const SmallVectorImpl &Outs) { const MachineFunction &MF = getMachineFunction(); @@ -91,23 +107,44 @@ void MipsCCState::PreAnalyzeReturnForF128( } } -/// Identify lowered values that originated from f128 arguments and record +/// Identify lower values that originated from vXfXX and record /// this. +void MipsCCState::PreAnalyzeCallResultForVectorFloat( + const SmallVectorImpl &Ins, const Type *RetTy) { + for (unsigned i = 0; i < Ins.size(); ++i) { + OriginalRetWasFloatVector.push_back(originalTypeIsVectorFloat(RetTy)); + } +} + +/// Identify lowered values that originated from vXfXX arguments and record +/// this. +void MipsCCState::PreAnalyzeReturnForVectorFloat( + const SmallVectorImpl &Outs) { + for (unsigned i = 0; i < Outs.size(); ++i) { + ISD::OutputArg Out = Outs[i]; + OriginalRetWasFloatVector.push_back( + originalEVTTypeIsVectorFloat(Out.ArgVT)); + } +} + +/// Identify lowered values that originated from f128, float and sret to vXfXX +/// arguments and record this. void MipsCCState::PreAnalyzeCallOperands( const SmallVectorImpl &Outs, std::vector &FuncArgs, const char *Func) { for (unsigned i = 0; i < Outs.size(); ++i) { - OriginalArgWasF128.push_back( - originalTypeIsF128(FuncArgs[Outs[i].OrigArgIndex].Ty, Func)); - OriginalArgWasFloat.push_back( - FuncArgs[Outs[i].OrigArgIndex].Ty->isFloatingPointTy()); + TargetLowering::ArgListEntry FuncArg = FuncArgs[Outs[i].OrigArgIndex]; + + OriginalArgWasF128.push_back(originalTypeIsF128(FuncArg.Ty, Func)); + OriginalArgWasFloat.push_back(FuncArg.Ty->isFloatingPointTy()); + OriginalArgWasFloatVector.push_back(FuncArg.Ty->isVectorTy()); CallOperandIsFixed.push_back(Outs[i].IsFixed); } } -/// Identify lowered values that originated from f128 arguments and record -/// this. +/// Identify lowered values that originated from f128, float and vXfXX arguments +/// and record this. void MipsCCState::PreAnalyzeFormalArgumentsForF128( const SmallVectorImpl &Ins) { const MachineFunction &MF = getMachineFunction(); @@ -120,6 +157,7 @@ void MipsCCState::PreAnalyzeFormalArgumentsForF128( if (Ins[i].Flags.isSRet()) { OriginalArgWasF128.push_back(false); OriginalArgWasFloat.push_back(false); + OriginalArgWasFloatVector.push_back(false); continue; } @@ -129,5 +167,10 @@ void MipsCCState::PreAnalyzeFormalArgumentsForF128( OriginalArgWasF128.push_back( originalTypeIsF128(FuncArg->getType(), nullptr)); OriginalArgWasFloat.push_back(FuncArg->getType()->isFloatingPointTy()); + + // The MIPS vector ABI exhibits a corner case of sorts or quirk; if the + // first argument is actually an SRet pointer to a vector, then the next + // argument slot is $a2. + OriginalArgWasFloatVector.push_back(FuncArg->getType()->isVectorTy()); } } diff --git a/lib/Target/Mips/MipsCCState.h b/lib/Target/Mips/MipsCCState.h index 77ecc65b2eee..27901699480b 100644 --- a/lib/Target/Mips/MipsCCState.h +++ b/lib/Target/Mips/MipsCCState.h @@ -45,16 +45,33 @@ private: const char *Func); /// Identify lowered values that originated from f128 arguments and record - /// this. + /// this for use by RetCC_MipsN. void PreAnalyzeFormalArgumentsForF128(const SmallVectorImpl &Ins); + void + PreAnalyzeCallResultForVectorFloat(const SmallVectorImpl &Ins, + const Type *RetTy); + + void PreAnalyzeFormalArgumentsForVectorFloat( + const SmallVectorImpl &Ins); + + void + PreAnalyzeReturnForVectorFloat(const SmallVectorImpl &Outs); + /// Records whether the value has been lowered from an f128. SmallVector OriginalArgWasF128; /// Records whether the value has been lowered from float. SmallVector OriginalArgWasFloat; + /// Records whether the value has been lowered from a floating point vector. + SmallVector OriginalArgWasFloatVector; + + /// Records whether the return value has been lowered from a floating point + /// vector. + SmallVector OriginalRetWasFloatVector; + /// Records whether the value was a fixed argument. /// See ISD::OutputArg::IsFixed, SmallVector CallOperandIsFixed; @@ -78,6 +95,7 @@ public: CCState::AnalyzeCallOperands(Outs, Fn); OriginalArgWasF128.clear(); OriginalArgWasFloat.clear(); + OriginalArgWasFloatVector.clear(); CallOperandIsFixed.clear(); } @@ -96,31 +114,38 @@ public: CCState::AnalyzeFormalArguments(Ins, Fn); OriginalArgWasFloat.clear(); OriginalArgWasF128.clear(); + OriginalArgWasFloatVector.clear(); } void AnalyzeCallResult(const SmallVectorImpl &Ins, CCAssignFn Fn, const Type *RetTy, const char *Func) { PreAnalyzeCallResultForF128(Ins, RetTy, Func); + PreAnalyzeCallResultForVectorFloat(Ins, RetTy); CCState::AnalyzeCallResult(Ins, Fn); OriginalArgWasFloat.clear(); OriginalArgWasF128.clear(); + OriginalArgWasFloatVector.clear(); } void AnalyzeReturn(const SmallVectorImpl &Outs, CCAssignFn Fn) { PreAnalyzeReturnForF128(Outs); + PreAnalyzeReturnForVectorFloat(Outs); CCState::AnalyzeReturn(Outs, Fn); OriginalArgWasFloat.clear(); OriginalArgWasF128.clear(); + OriginalArgWasFloatVector.clear(); } bool CheckReturn(const SmallVectorImpl &ArgsFlags, CCAssignFn Fn) { PreAnalyzeReturnForF128(ArgsFlags); + PreAnalyzeReturnForVectorFloat(ArgsFlags); bool Return = CCState::CheckReturn(ArgsFlags, Fn); OriginalArgWasFloat.clear(); OriginalArgWasF128.clear(); + OriginalArgWasFloatVector.clear(); return Return; } @@ -128,6 +153,12 @@ public: bool WasOriginalArgFloat(unsigned ValNo) { return OriginalArgWasFloat[ValNo]; } + bool WasOriginalArgVectorFloat(unsigned ValNo) const { + return OriginalArgWasFloatVector[ValNo]; + } + bool WasOriginalRetVectorFloat(unsigned ValNo) const { + return OriginalRetWasFloatVector[ValNo]; + } bool IsCallOperandFixed(unsigned ValNo) { return CallOperandIsFixed[ValNo]; } SpecialCallingConvType getSpecialCallingConv() { return SpecialCallingConv; } }; diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td index a57cb7badc17..b5df78f89a6b 100644 --- a/lib/Target/Mips/MipsCallingConv.td +++ b/lib/Target/Mips/MipsCallingConv.td @@ -37,6 +37,10 @@ class CCIfOrigArgWasF128 class CCIfArgIsVarArg : CCIf<"!static_cast(&State)->IsCallOperandFixed(ValNo)", A>; +/// Match if the return was a floating point vector. +class CCIfOrigArgWasNotVectorFloat + : CCIf<"!static_cast(&State)" + "->WasOriginalRetVectorFloat(ValNo)", A>; /// Match if the special calling conv is the specified value. class CCIfSpecialCallingConv @@ -93,8 +97,10 @@ def RetCC_MipsO32 : CallingConv<[ // Promote i1/i8/i16 return values to i32. CCIfType<[i1, i8, i16], CCPromoteToType>, - // i32 are returned in registers V0, V1, A0, A1 - CCIfType<[i32], CCAssignToReg<[V0, V1, A0, A1]>>, + // i32 are returned in registers V0, V1, A0, A1, unless the original return + // type was a vector of floats. + CCIfOrigArgWasNotVectorFloat>>, // f32 are returned in registers F0, F2 CCIfType<[f32], CCAssignToReg<[F0, F2]>>, diff --git a/lib/Target/Mips/MipsConstantIslandPass.cpp b/lib/Target/Mips/MipsConstantIslandPass.cpp index 026f66a1c0e1..ff43a3950610 100644 --- a/lib/Target/Mips/MipsConstantIslandPass.cpp +++ b/lib/Target/Mips/MipsConstantIslandPass.cpp @@ -24,10 +24,10 @@ #include "Mips16InstrInfo.h" #include "MipsMachineFunction.h" #include "MipsSubtarget.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" diff --git a/lib/Target/Mips/MipsDSPInstrInfo.td b/lib/Target/Mips/MipsDSPInstrInfo.td index ac9a81b1bb2f..c238a65378e2 100644 --- a/lib/Target/Mips/MipsDSPInstrInfo.td +++ b/lib/Target/Mips/MipsDSPInstrInfo.td @@ -19,6 +19,7 @@ def immZExt4 : ImmLeaf(Imm);}]>; def immZExt8 : ImmLeaf(Imm);}]>; def immZExt10 : ImmLeaf(Imm);}]>; def immSExt6 : ImmLeaf(Imm);}]>; +def immSExt10 : ImmLeaf(Imm);}]>; // Mips-specific dsp nodes def SDT_MipsExtr : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>, @@ -851,8 +852,8 @@ class PACKRL_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"packrl.ph", int_mips_packrl_ph, class REPL_QB_DESC : REPL_DESC_BASE<"repl.qb", int_mips_repl_qb, uimm8, immZExt8, NoItinerary, DSPROpnd>; -class REPL_PH_DESC : REPL_DESC_BASE<"repl.ph", int_mips_repl_ph, uimm10, - immZExt10, NoItinerary, DSPROpnd>; +class REPL_PH_DESC : REPL_DESC_BASE<"repl.ph", int_mips_repl_ph, simm10, + immSExt10, NoItinerary, DSPROpnd>; class REPLV_QB_DESC : ABSQ_S_PH_R2_DESC_BASE<"replv.qb", int_mips_repl_qb, NoItinerary, DSPROpnd, GPR32Opnd>; diff --git a/lib/Target/Mips/MipsFastISel.cpp b/lib/Target/Mips/MipsFastISel.cpp index b83f44a74d5b..f79cb0e67200 100644 --- a/lib/Target/Mips/MipsFastISel.cpp +++ b/lib/Target/Mips/MipsFastISel.cpp @@ -17,8 +17,8 @@ #include "MCTargetDesc/MipsABIInfo.h" #include "MCTargetDesc/MipsBaseInfo.h" #include "MipsCCState.h" -#include "MipsInstrInfo.h" #include "MipsISelLowering.h" +#include "MipsInstrInfo.h" #include "MipsMachineFunction.h" #include "MipsSubtarget.h" #include "MipsTargetMachine.h" diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 760630c41176..f2193013b7aa 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -22,12 +22,12 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/CallingConv.h" @@ -71,6 +71,48 @@ static bool isShiftedMask(uint64_t I, uint64_t &Pos, uint64_t &Size) { return true; } +// The MIPS MSA ABI passes vector arguments in the integer register set. +// The number of integer registers used is dependant on the ABI used. +MVT MipsTargetLowering::getRegisterTypeForCallingConv(MVT VT) const { + if (VT.isVector() && Subtarget.hasMSA()) + return Subtarget.isABI_O32() ? MVT::i32 : MVT::i64; + return MipsTargetLowering::getRegisterType(VT); +} + +MVT MipsTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, + EVT VT) const { + if (VT.isVector()) { + if (Subtarget.isABI_O32()) { + return MVT::i32; + } else { + return (VT.getSizeInBits() == 32) ? MVT::i32 : MVT::i64; + } + } + return MipsTargetLowering::getRegisterType(Context, VT); +} + +unsigned MipsTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, + EVT VT) const { + if (VT.isVector()) + return std::max((VT.getSizeInBits() / (Subtarget.isABI_O32() ? 32 : 64)), + 1U); + return MipsTargetLowering::getNumRegisters(Context, VT); +} + +unsigned MipsTargetLowering::getVectorTypeBreakdownForCallingConv( + LLVMContext &Context, EVT VT, EVT &IntermediateVT, + unsigned &NumIntermediates, MVT &RegisterVT) const { + + // Break down vector types to either 2 i64s or 4 i32s. + RegisterVT = getRegisterTypeForCallingConv(Context, VT) ; + IntermediateVT = RegisterVT; + NumIntermediates = VT.getSizeInBits() < RegisterVT.getSizeInBits() + ? VT.getVectorNumElements() + : VT.getSizeInBits() / RegisterVT.getSizeInBits(); + + return NumIntermediates; +} + SDValue MipsTargetLowering::getGlobalReg(SelectionDAG &DAG, EVT Ty) const { MipsFunctionInfo *FI = DAG.getMachineFunction().getInfo(); return DAG.getRegister(FI->getGlobalBaseReg(), Ty); @@ -470,8 +512,9 @@ MipsTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, !Subtarget.hasMips32r6() && !Subtarget.inMips16Mode() && !Subtarget.inMicroMipsMode(); - // Disable if we don't generate PIC or the ABI isn't O32. - if (!TM.isPositionIndependent() || !TM.getABI().IsO32()) + // Disable if either of the following is true: + // We do not generate PIC, the ABI is not O32, LargeGOT is being used. + if (!TM.isPositionIndependent() || !TM.getABI().IsO32() || LargeGOT) UseFastISel = false; return UseFastISel ? Mips::createFastISel(funcInfo, libInfo) : nullptr; @@ -2551,6 +2594,11 @@ SDValue MipsTargetLowering::lowerFP_TO_SINT(SDValue Op, // yet to hold an argument. Otherwise, use A2, A3 and stack. If A1 is // not used, it must be shadowed. If only A3 is available, shadow it and // go to stack. +// vXiX - Received as scalarized i32s, passed in A0 - A3 and the stack. +// vXf32 - Passed in either a pair of registers {A0, A1}, {A2, A3} or {A0 - A3} +// with the remainder spilled to the stack. +// vXf64 - Passed in either {A0, A1, A2, A3} or {A2, A3} and in both cases +// spilling the remainder to the stack. // // For vararg functions, all arguments are passed in A0, A1, A2, A3 and stack. //===----------------------------------------------------------------------===// @@ -2562,8 +2610,13 @@ static bool CC_MipsO32(unsigned ValNo, MVT ValVT, MVT LocVT, State.getMachineFunction().getSubtarget()); static const MCPhysReg IntRegs[] = { Mips::A0, Mips::A1, Mips::A2, Mips::A3 }; + + const MipsCCState * MipsState = static_cast(&State); + static const MCPhysReg F32Regs[] = { Mips::F12, Mips::F14 }; + static const MCPhysReg FloatVectorIntRegs[] = { Mips::A0, Mips::A2 }; + // Do not process byval args here. if (ArgFlags.isByVal()) return true; @@ -2601,8 +2654,26 @@ static bool CC_MipsO32(unsigned ValNo, MVT ValVT, MVT LocVT, State.getFirstUnallocated(F32Regs) != ValNo; unsigned OrigAlign = ArgFlags.getOrigAlign(); bool isI64 = (ValVT == MVT::i32 && OrigAlign == 8); + bool isVectorFloat = MipsState->WasOriginalArgVectorFloat(ValNo); - if (ValVT == MVT::i32 || (ValVT == MVT::f32 && AllocateFloatsInIntReg)) { + // The MIPS vector ABI for floats passes them in a pair of registers + if (ValVT == MVT::i32 && isVectorFloat) { + // This is the start of an vector that was scalarized into an unknown number + // of components. It doesn't matter how many there are. Allocate one of the + // notional 8 byte aligned registers which map onto the argument stack, and + // shadow the register lost to alignment requirements. + if (ArgFlags.isSplit()) { + Reg = State.AllocateReg(FloatVectorIntRegs); + if (Reg == Mips::A2) + State.AllocateReg(Mips::A1); + else if (Reg == 0) + State.AllocateReg(Mips::A3); + } else { + // If we're an intermediate component of the split, we can just attempt to + // allocate a register directly. + Reg = State.AllocateReg(IntRegs); + } + } else if (ValVT == MVT::i32 || (ValVT == MVT::f32 && AllocateFloatsInIntReg)) { Reg = State.AllocateReg(IntRegs); // If this is the first part of an i64 arg, // the allocated register must be either A0 or A2. diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 2dcafd51061a..0e47ed38f420 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -248,6 +248,33 @@ namespace llvm { bool isCheapToSpeculateCttz() const override; bool isCheapToSpeculateCtlz() const override; + /// Return the register type for a given MVT, ensuring vectors are treated + /// as a series of gpr sized integers. + virtual MVT getRegisterTypeForCallingConv(MVT VT) const override; + + /// Return the register type for a given MVT, ensuring vectors are treated + /// as a series of gpr sized integers. + virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, + EVT VT) const override; + + /// Return the number of registers for a given MVT, ensuring vectors are + /// treated as a series of gpr sized integers. + virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, + EVT VT) const override; + + /// Break down vectors to the correct number of gpr sized integers. + virtual unsigned getVectorTypeBreakdownForCallingConv( + LLVMContext &Context, EVT VT, EVT &IntermediateVT, + unsigned &NumIntermediates, MVT &RegisterVT) const override; + + /// Return the correct alignment for the current calling convention. + virtual unsigned + getABIAlignmentForCallingConv(Type *ArgTy, DataLayout DL) const override { + if (ArgTy->isVectorTy()) + return std::min(DL.getABITypeAlignment(ArgTy), 8U); + return DL.getABITypeAlignment(ArgTy); + } + ISD::NodeType getExtendForAtomicOps() const override { return ISD::SIGN_EXTEND; } diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td index d81a769d7fd9..94f3a74be98b 100644 --- a/lib/Target/Mips/MipsInstrFPU.td +++ b/lib/Target/Mips/MipsInstrFPU.td @@ -557,11 +557,11 @@ def FSUB_S : MMRel, ADDS_FT<"sub.s", FGR32Opnd, II_SUB_S, 0, fsub>, defm FSUB : ADDS_M<"sub.d", II_SUB_D, 0, fsub>, ADDS_FM<0x01, 17>; def MADD_S : MMRel, MADDS_FT<"madd.s", FGR32Opnd, II_MADD_S, fadd>, - MADDS_FM<4, 0>, INSN_MIPS4_32R2_NOT_32R6_64R6; + MADDS_FM<4, 0>, INSN_MIPS4_32R2_NOT_32R6_64R6, MADD4; def MSUB_S : MMRel, MADDS_FT<"msub.s", FGR32Opnd, II_MSUB_S, fsub>, - MADDS_FM<5, 0>, INSN_MIPS4_32R2_NOT_32R6_64R6; + MADDS_FM<5, 0>, INSN_MIPS4_32R2_NOT_32R6_64R6, MADD4; -let AdditionalPredicates = [NoNaNsFPMath] in { +let AdditionalPredicates = [NoNaNsFPMath, HasMadd4] in { def NMADD_S : MMRel, NMADDS_FT<"nmadd.s", FGR32Opnd, II_NMADD_S, fadd>, MADDS_FM<6, 0>, INSN_MIPS4_32R2_NOT_32R6_64R6; def NMSUB_S : MMRel, NMADDS_FT<"nmsub.s", FGR32Opnd, II_NMSUB_S, fsub>, @@ -569,11 +569,11 @@ let AdditionalPredicates = [NoNaNsFPMath] in { } def MADD_D32 : MMRel, MADDS_FT<"madd.d", AFGR64Opnd, II_MADD_D, fadd>, - MADDS_FM<4, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32; + MADDS_FM<4, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32, MADD4; def MSUB_D32 : MMRel, MADDS_FT<"msub.d", AFGR64Opnd, II_MSUB_D, fsub>, - MADDS_FM<5, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32; + MADDS_FM<5, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32, MADD4; -let AdditionalPredicates = [NoNaNsFPMath] in { +let AdditionalPredicates = [NoNaNsFPMath, HasMadd4] in { def NMADD_D32 : MMRel, NMADDS_FT<"nmadd.d", AFGR64Opnd, II_NMADD_D, fadd>, MADDS_FM<6, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32; def NMSUB_D32 : MMRel, NMADDS_FT<"nmsub.d", AFGR64Opnd, II_NMSUB_D, fsub>, @@ -582,12 +582,12 @@ let AdditionalPredicates = [NoNaNsFPMath] in { let DecoderNamespace = "Mips64" in { def MADD_D64 : MADDS_FT<"madd.d", FGR64Opnd, II_MADD_D, fadd>, - MADDS_FM<4, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64; + MADDS_FM<4, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64, MADD4; def MSUB_D64 : MADDS_FT<"msub.d", FGR64Opnd, II_MSUB_D, fsub>, - MADDS_FM<5, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64; + MADDS_FM<5, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64, MADD4; } -let AdditionalPredicates = [NoNaNsFPMath], +let AdditionalPredicates = [NoNaNsFPMath, HasMadd4], DecoderNamespace = "Mips64" in { def NMADD_D64 : NMADDS_FT<"nmadd.d", FGR64Opnd, II_NMADD_D, fadd>, MADDS_FM<6, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64; diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 8761946b8dbb..40078fb77144 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -238,6 +238,8 @@ def HasEVA : Predicate<"Subtarget->hasEVA()">, AssemblerPredicate<"FeatureEVA,FeatureMips32r2">; def HasMSA : Predicate<"Subtarget->hasMSA()">, AssemblerPredicate<"FeatureMSA">; +def HasMadd4 : Predicate<"!Subtarget->disableMadd4()">, + AssemblerPredicate<"!FeatureMadd4">; //===----------------------------------------------------------------------===// @@ -390,6 +392,10 @@ class ASE_NOT_DSP { list InsnPredicates = [NotDSP]; } +class MADD4 { + list AdditionalPredicates = [HasMadd4]; +} + //===----------------------------------------------------------------------===// class MipsPat : Pat, PredicateControl { diff --git a/lib/Target/Mips/MipsMachineFunction.cpp b/lib/Target/Mips/MipsMachineFunction.cpp index 63034ecab93b..e01c03db2227 100644 --- a/lib/Target/Mips/MipsMachineFunction.cpp +++ b/lib/Target/Mips/MipsMachineFunction.cpp @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/MipsABIInfo.h" #include "MipsMachineFunction.h" +#include "MCTargetDesc/MipsABIInfo.h" #include "MipsSubtarget.h" #include "MipsTargetMachine.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -40,7 +40,11 @@ unsigned MipsFunctionInfo::getGlobalBaseReg() { const TargetRegisterClass *RC = STI.inMips16Mode() ? &Mips::CPU16RegsRegClass - : static_cast(MF.getTarget()) + : STI.inMicroMipsMode() + ? STI.hasMips64() + ? &Mips::GPRMM16_64RegClass + : &Mips::GPRMM16RegClass + : static_cast(MF.getTarget()) .getABI() .IsN64() ? &Mips::GPR64RegClass diff --git a/lib/Target/Mips/MipsOptimizePICCall.cpp b/lib/Target/Mips/MipsOptimizePICCall.cpp index 94a1965f9ffb..79c8395d9dcc 100644 --- a/lib/Target/Mips/MipsOptimizePICCall.cpp +++ b/lib/Target/Mips/MipsOptimizePICCall.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "Mips.h" #include "MCTargetDesc/MipsBaseInfo.h" +#include "Mips.h" #include "MipsMachineFunction.h" #include "MipsTargetMachine.h" #include "llvm/ADT/ScopedHashTable.h" diff --git a/lib/Target/Mips/MipsOs16.cpp b/lib/Target/Mips/MipsOs16.cpp index 70ead5cde6fa..7ee45c28a7d0 100644 --- a/lib/Target/Mips/MipsOs16.cpp +++ b/lib/Target/Mips/MipsOs16.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/IR/Instructions.h" #include "Mips.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index 65be350f259d..de3389b5a6bf 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -286,7 +286,9 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, DEBUG(errs() << "FrameIndex : " << FrameIndex << "\n" << "spOffset : " << spOffset << "\n" - << "stackSize : " << stackSize << "\n"); + << "stackSize : " << stackSize << "\n" + << "alignment : " + << MF.getFrameInfo().getObjectAlignment(FrameIndex) << "\n"); eliminateFI(MI, FIOperandNum, FrameIndex, stackSize, spOffset); } diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp index e765b4625206..102ebb21609a 100644 --- a/lib/Target/Mips/MipsSEFrameLowering.cpp +++ b/lib/Target/Mips/MipsSEFrameLowering.cpp @@ -11,10 +11,10 @@ // //===----------------------------------------------------------------------===// +#include "MipsSEFrameLowering.h" #include "MCTargetDesc/MipsABIInfo.h" #include "MipsMachineFunction.h" #include "MipsRegisterInfo.h" -#include "MipsSEFrameLowering.h" #include "MipsSEInstrInfo.h" #include "MipsSubtarget.h" #include "llvm/ADT/BitVector.h" diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp index c9cf9363b8c9..49ae6dd4cd39 100644 --- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -24,11 +24,11 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Type.h" -#include "llvm/IR/Dominators.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp index 1f4e933db2a2..154d5825427b 100644 --- a/lib/Target/Mips/MipsSubtarget.cpp +++ b/lib/Target/Mips/MipsSubtarget.cpp @@ -11,10 +11,10 @@ // //===----------------------------------------------------------------------===// -#include "MipsMachineFunction.h" -#include "Mips.h" -#include "MipsRegisterInfo.h" #include "MipsSubtarget.h" +#include "Mips.h" +#include "MipsMachineFunction.h" +#include "MipsRegisterInfo.h" #include "MipsTargetMachine.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" @@ -70,7 +70,7 @@ MipsSubtarget::MipsSubtarget(const Triple &TT, StringRef CPU, StringRef FS, InMips16HardFloat(Mips16HardFloat), InMicroMipsMode(false), HasDSP(false), HasDSPR2(false), HasDSPR3(false), AllowMixed16_32(Mixed16_32 | Mips_Os16), Os16(Mips_Os16), HasMSA(false), UseTCCInDIV(false), HasSym32(false), - HasEVA(false), TM(TM), TargetTriple(TT), TSInfo(), + HasEVA(false), DisableMadd4(false), TM(TM), TargetTriple(TT), TSInfo(), InstrInfo( MipsInstrInfo::create(initializeSubtargetDependencies(CPU, FS, TM))), FrameLowering(MipsFrameLowering::create(*this)), diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h index b4d15ee361ff..625a652a0ca0 100644 --- a/lib/Target/Mips/MipsSubtarget.h +++ b/lib/Target/Mips/MipsSubtarget.h @@ -144,6 +144,10 @@ class MipsSubtarget : public MipsGenSubtargetInfo { // HasEVA -- supports EVA ASE. bool HasEVA; + + // nomadd4 - disables generation of 4-operand madd.s, madd.d and + // related instructions. + bool DisableMadd4; InstrItineraryData InstrItins; @@ -253,6 +257,7 @@ public: bool hasDSPR2() const { return HasDSPR2; } bool hasDSPR3() const { return HasDSPR3; } bool hasMSA() const { return HasMSA; } + bool disableMadd4() const { return DisableMadd4; } bool hasEVA() const { return HasEVA; } bool useSmallSection() const { return UseSmallSection; } diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp index a9d6ab055892..330ae19ecd0f 100644 --- a/lib/Target/Mips/MipsTargetMachine.cpp +++ b/lib/Target/Mips/MipsTargetMachine.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "MipsTargetMachine.h" #include "MCTargetDesc/MipsABIInfo.h" #include "MCTargetDesc/MipsMCTargetDesc.h" #include "Mips.h" @@ -18,7 +19,6 @@ #include "MipsSEISelDAGToDAG.h" #include "MipsSubtarget.h" #include "MipsTargetObjectFile.h" -#include "MipsTargetMachine.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" diff --git a/lib/Target/Mips/MipsTargetObjectFile.cpp b/lib/Target/Mips/MipsTargetObjectFile.cpp index c5d6a05d6611..4d73c3991035 100644 --- a/lib/Target/Mips/MipsTargetObjectFile.cpp +++ b/lib/Target/Mips/MipsTargetObjectFile.cpp @@ -10,13 +10,13 @@ #include "MipsTargetObjectFile.h" #include "MipsSubtarget.h" #include "MipsTargetMachine.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/ELF.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 58cb7793d040..0139646fc3f7 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -12,11 +12,11 @@ // //===----------------------------------------------------------------------===// +#include "NVPTXAsmPrinter.h" #include "InstPrinter/NVPTXInstPrinter.h" #include "MCTargetDesc/NVPTXBaseInfo.h" #include "MCTargetDesc/NVPTXMCAsmInfo.h" #include "NVPTX.h" -#include "NVPTXAsmPrinter.h" #include "NVPTXMCExpr.h" #include "NVPTXMachineFunctionInfo.h" #include "NVPTXRegisterInfo.h" @@ -73,8 +73,8 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Path.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" diff --git a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp index 390776212ce7..916b0e115664 100644 --- a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp +++ b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "NVPTX.h" #include "MCTargetDesc/NVPTXBaseInfo.h" +#include "NVPTX.h" #include "NVPTXUtilities.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/Constants.h" diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index ebaaf42bc64e..f26b9a7cb8dd 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -12,9 +12,9 @@ // //===----------------------------------------------------------------------===// +#include "NVPTXISelLowering.h" #include "MCTargetDesc/NVPTXBaseInfo.h" #include "NVPTX.h" -#include "NVPTXISelLowering.h" #include "NVPTXSection.h" #include "NVPTXSubtarget.h" #include "NVPTXTargetMachine.h" diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/lib/Target/NVPTX/NVPTXInstrInfo.cpp index 0f6c2e53e60a..da563f0531d4 100644 --- a/lib/Target/NVPTX/NVPTXInstrInfo.cpp +++ b/lib/Target/NVPTX/NVPTXInstrInfo.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "NVPTX.h" #include "NVPTXInstrInfo.h" +#include "NVPTX.h" #include "NVPTXTargetMachine.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineFunction.h" diff --git a/lib/Target/NVPTX/NVPTXLowerArgs.cpp b/lib/Target/NVPTX/NVPTXLowerArgs.cpp index e858b37e1843..139dc7fbeeda 100644 --- a/lib/Target/NVPTX/NVPTXLowerArgs.cpp +++ b/lib/Target/NVPTX/NVPTXLowerArgs.cpp @@ -90,8 +90,8 @@ //===----------------------------------------------------------------------===// #include "NVPTX.h" -#include "NVPTXUtilities.h" #include "NVPTXTargetMachine.h" +#include "NVPTXUtilities.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" diff --git a/lib/Target/NVPTX/NVPTXPeephole.cpp b/lib/Target/NVPTX/NVPTXPeephole.cpp index e10b046f7c97..4e902c0fb507 100644 --- a/lib/Target/NVPTX/NVPTXPeephole.cpp +++ b/lib/Target/NVPTX/NVPTXPeephole.cpp @@ -36,8 +36,8 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp index 8dfbfece9b8e..2b6ba8c85d4d 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -11,10 +11,10 @@ // //===----------------------------------------------------------------------===// +#include "NVPTXTargetMachine.h" #include "NVPTX.h" #include "NVPTXAllocaHoisting.h" #include "NVPTXLowerAggrCopies.h" -#include "NVPTXTargetMachine.h" #include "NVPTXTargetObjectFile.h" #include "NVPTXTargetTransformInfo.h" #include "llvm/ADT/STLExtras.h" diff --git a/lib/Target/NVPTX/NVVMIntrRange.cpp b/lib/Target/NVPTX/NVVMIntrRange.cpp index 9c71a2ee165b..11277f5ba596 100644 --- a/lib/Target/NVPTX/NVVMIntrRange.cpp +++ b/lib/Target/NVPTX/NVVMIntrRange.cpp @@ -15,8 +15,8 @@ #include "NVPTX.h" #include "llvm/IR/Constants.h" #include "llvm/IR/InstIterator.h" -#include "llvm/IR/Intrinsics.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" using namespace llvm; diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp index 84bb9ec56800..baf5902ddf58 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -12,9 +12,9 @@ //===----------------------------------------------------------------------===// #include "PPCInstPrinter.h" -#include "PPCInstrInfo.h" #include "MCTargetDesc/PPCMCTargetDesc.h" #include "MCTargetDesc/PPCPredicates.h" +#include "PPCInstrInfo.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index 4863ac542736..028c2cb562f8 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -7,8 +7,10 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/PPCMCTargetDesc.h" #include "MCTargetDesc/PPCFixupKinds.h" +#include "MCTargetDesc/PPCMCTargetDesc.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCELFObjectWriter.h" @@ -18,9 +20,7 @@ #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCValue.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MachO.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index fd279c60f3f5..1488bd5b0be6 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/PPCMCTargetDesc.h" #include "MCTargetDesc/PPCFixupKinds.h" #include "MCTargetDesc/PPCMCExpr.h" +#include "MCTargetDesc/PPCMCTargetDesc.h" #include "llvm/ADT/STLExtras.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp index 6b97d4c1456b..54f664314578 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#include "PPCFixupKinds.h" #include "PPCMCExpr.h" +#include "PPCFixupKinds.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp index 2d686f227919..e8f220ea5457 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -11,12 +11,13 @@ // //===----------------------------------------------------------------------===// +#include "MCTargetDesc/PPCMCTargetDesc.h" #include "InstPrinter/PPCInstPrinter.h" #include "MCTargetDesc/PPCMCAsmInfo.h" -#include "MCTargetDesc/PPCMCTargetDesc.h" #include "PPCTargetStreamer.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDwarf.h" @@ -30,11 +31,10 @@ #include "llvm/MC/MCSymbolELF.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CodeGen.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp index 1f38a8c947e7..6d591ca964a6 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp @@ -7,9 +7,10 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/PPCMCTargetDesc.h" #include "MCTargetDesc/PPCFixupKinds.h" +#include "MCTargetDesc/PPCMCTargetDesc.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" @@ -18,7 +19,6 @@ #include "llvm/MC/MCValue.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" -#include "llvm/Support/MachO.h" using namespace llvm; @@ -151,7 +151,7 @@ static void makeRelocationInfo(MachO::any_relocation_info &MRE, // The bitfield offsets that work (as determined by trial-and-error) // are different than what is documented in the mach-o manuals. // This appears to be an endianness issue; reversing the order of the - // documented bitfields in fixes this (but + // documented bitfields in fixes this (but // breaks x86/ARM assembly). MRE.r_word1 = ((Index << 8) | // was << 0 (IsPCRel << 7) | // was << 24 @@ -222,7 +222,7 @@ bool PPCMachObjectWriter::recordScatteredRelocation( report_fatal_error("symbol '" + B->getSymbol().getName() + "' can not be undefined in a subtraction expression"); - // FIXME: is Type correct? see include/llvm/Support/MachO.h + // FIXME: is Type correct? see include/llvm/BinaryFormat/MachO.h Value2 = Writer->getSymbolAddress(B->getSymbol(), Layout); FixedValue -= Writer->getSectionAddress(SB->getFragment()->getParent()); } diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index 1f181d007f63..841b8c514464 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -16,11 +16,11 @@ // //===----------------------------------------------------------------------===// -#include "PPC.h" -#include "PPCInstrInfo.h" #include "InstPrinter/PPCInstPrinter.h" #include "MCTargetDesc/PPCMCExpr.h" #include "MCTargetDesc/PPCMCTargetDesc.h" +#include "PPC.h" +#include "PPCInstrInfo.h" #include "PPCMachineFunctionInfo.h" #include "PPCSubtarget.h" #include "PPCTargetMachine.h" @@ -29,6 +29,8 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" @@ -55,11 +57,9 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MachO.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include #include diff --git a/lib/Target/PowerPC/PPCBoolRetToInt.cpp b/lib/Target/PowerPC/PPCBoolRetToInt.cpp index 93c201d03869..55e105dad0e5 100644 --- a/lib/Target/PowerPC/PPCBoolRetToInt.cpp +++ b/lib/Target/PowerPC/PPCBoolRetToInt.cpp @@ -7,15 +7,15 @@ // //===----------------------------------------------------------------------===// // -// This file implements converting i1 values to i32 if they could be more +// This file implements converting i1 values to i32/i64 if they could be more // profitably allocated as GPRs rather than CRs. This pass will become totally // unnecessary if Register Bank Allocation and Global Instruction Selection ever // go upstream. // -// Presently, the pass converts i1 Constants, and Arguments to i32 if the +// Presently, the pass converts i1 Constants, and Arguments to i32/i64 if the // transitive closure of their uses includes only PHINodes, CallInsts, and // ReturnInsts. The rational is that arguments are generally passed and returned -// in GPRs rather than CRs, so casting them to i32 at the LLVM IR level will +// in GPRs rather than CRs, so casting them to i32/i64 at the LLVM IR level will // actually save casts at the Machine Instruction level. // // It might be useful to expand this pass to add bit-wise operations to the list @@ -33,11 +33,12 @@ //===----------------------------------------------------------------------===// #include "PPC.h" +#include "PPCTargetMachine.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Dominators.h" @@ -50,8 +51,9 @@ #include "llvm/IR/Use.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" -#include "llvm/Support/Casting.h" #include "llvm/Pass.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/Support/Casting.h" #include using namespace llvm; @@ -87,17 +89,19 @@ class PPCBoolRetToInt : public FunctionPass { return Defs; } - // Translate a i1 value to an equivalent i32 value: - static Value *translate(Value *V) { - Type *Int32Ty = Type::getInt32Ty(V->getContext()); + // Translate a i1 value to an equivalent i32/i64 value: + Value *translate(Value *V) { + Type *IntTy = ST->isPPC64() ? Type::getInt64Ty(V->getContext()) + : Type::getInt32Ty(V->getContext()); + if (auto *C = dyn_cast(V)) - return ConstantExpr::getZExt(C, Int32Ty); + return ConstantExpr::getZExt(C, IntTy); if (auto *P = dyn_cast(V)) { // Temporarily set the operands to 0. We'll fix this later in // runOnUse. - Value *Zero = Constant::getNullValue(Int32Ty); + Value *Zero = Constant::getNullValue(IntTy); PHINode *Q = - PHINode::Create(Int32Ty, P->getNumIncomingValues(), P->getName(), P); + PHINode::Create(IntTy, P->getNumIncomingValues(), P->getName(), P); for (unsigned i = 0; i < P->getNumOperands(); ++i) Q->addIncoming(Zero, P->getIncomingBlock(i)); return Q; @@ -109,7 +113,7 @@ class PPCBoolRetToInt : public FunctionPass { auto InstPt = A ? &*A->getParent()->getEntryBlock().begin() : I->getNextNode(); - return new ZExtInst(V, Int32Ty, "", InstPt); + return new ZExtInst(V, IntTy, "", InstPt); } typedef SmallPtrSet PHINodeSet; @@ -185,6 +189,13 @@ class PPCBoolRetToInt : public FunctionPass { if (skipFunction(F)) return false; + auto *TPC = getAnalysisIfAvailable(); + if (!TPC) + return false; + + auto &TM = TPC->getTM(); + ST = TM.getSubtargetImpl(F); + PHINodeSet PromotablePHINodes = getPromotablePHINodes(F); B2IMap Bool2IntMap; bool Changed = false; @@ -205,7 +216,7 @@ class PPCBoolRetToInt : public FunctionPass { return Changed; } - static bool runOnUse(Use &U, const PHINodeSet &PromotablePHINodes, + bool runOnUse(Use &U, const PHINodeSet &PromotablePHINodes, B2IMap &BoolToIntMap) { auto Defs = findAllDefs(U); @@ -262,13 +273,16 @@ class PPCBoolRetToInt : public FunctionPass { AU.addPreserved(); FunctionPass::getAnalysisUsage(AU); } + +private: + const PPCSubtarget *ST; }; } // end anonymous namespace char PPCBoolRetToInt::ID = 0; INITIALIZE_PASS(PPCBoolRetToInt, "bool-ret-to-int", - "Convert i1 constants to i32 if they are returned", + "Convert i1 constants to i32/i64 if they are returned", false, false) FunctionPass *llvm::createPPCBoolRetToIntPass() { return new PPCBoolRetToInt(); } diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp index b7d3154d0000..d0b66f9bca09 100644 --- a/lib/Target/PowerPC/PPCBranchSelector.cpp +++ b/lib/Target/PowerPC/PPCBranchSelector.cpp @@ -15,8 +15,8 @@ // //===----------------------------------------------------------------------===// -#include "PPC.h" #include "MCTargetDesc/PPCPredicates.h" +#include "PPC.h" #include "PPCInstrBuilder.h" #include "PPCInstrInfo.h" #include "PPCSubtarget.h" diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp index 70c4170653ae..24bc027f8106 100644 --- a/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -23,7 +23,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar.h" #include "PPC.h" #include "PPCTargetMachine.h" #include "llvm/ADT/STLExtras.h" @@ -43,6 +42,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" diff --git a/lib/Target/PowerPC/PPCEarlyReturn.cpp b/lib/Target/PowerPC/PPCEarlyReturn.cpp index 6bd229625fc3..811e4dd9dfe1 100644 --- a/lib/Target/PowerPC/PPCEarlyReturn.cpp +++ b/lib/Target/PowerPC/PPCEarlyReturn.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "PPC.h" #include "MCTargetDesc/PPCPredicates.h" +#include "PPC.h" #include "PPCInstrBuilder.h" #include "PPCInstrInfo.h" #include "PPCMachineFunctionInfo.h" diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp index 2fc8654deeab..bc9957194f6d 100644 --- a/lib/Target/PowerPC/PPCFastISel.cpp +++ b/lib/Target/PowerPC/PPCFastISel.cpp @@ -13,10 +13,10 @@ // //===----------------------------------------------------------------------===// -#include "PPC.h" #include "MCTargetDesc/PPCPredicates.h" -#include "PPCCallingConv.h" +#include "PPC.h" #include "PPCCCState.h" +#include "PPCCallingConv.h" #include "PPCISelLowering.h" #include "PPCMachineFunctionInfo.h" #include "PPCSubtarget.h" diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 54414457388d..28d496ee9ca1 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -21,9 +21,10 @@ #include "PPCTargetMachine.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/ISDOpcodes.h" @@ -54,7 +55,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/ADT/Statistic.h" #include #include #include @@ -2824,6 +2824,20 @@ SDValue PPCDAGToDAGISel::get32BitZExtCompare(SDValue LHS, SDValue RHS, return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0); } + case ISD::SETNE: { + // (zext (setcc %a, %b, setne)) -> (xor (lshr (cntlzw (xor %a, %b)), 5), 1) + // (zext (setcc %a, 0, setne)) -> (xor (lshr (cntlzw %a), 5), 1) + SDValue Xor = IsRHSZero ? LHS : + SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); + SDValue Clz = + SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0); + SDValue ShiftOps[] = { Clz, getI32Imm(27, dl), getI32Imm(5, dl), + getI32Imm(31, dl) }; + SDValue Shift = + SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0); + return SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift, + getI32Imm(1, dl)), 0); + } } } @@ -2850,6 +2864,27 @@ SDValue PPCDAGToDAGISel::get32BitSExtCompare(SDValue LHS, SDValue RHS, return SDValue(CurDAG->getMachineNode(PPC::SRADI_32, dl, MVT::i32, Sldi, getI32Imm(63, dl)), 0); } + case ISD::SETNE: { + // Bitwise xor the operands, count leading zeros, shift right by 5 bits and + // flip the bit, finally take 2's complement. + // (sext (setcc %a, %b, setne)) -> + // (neg (xor (lshr (ctlz (xor %a, %b)), 5), 1)) + // Same as above, but the first xor is not needed. + // (sext (setcc %a, 0, setne)) -> + // (neg (xor (lshr (ctlz %a), 5), 1)) + SDValue Xor = IsRHSZero ? LHS : + SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); + SDValue Clz = + SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0); + SDValue ShiftOps[] = + { Clz, getI32Imm(27, dl), getI32Imm(5, dl), getI32Imm(31, dl) }; + SDValue Shift = + SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0); + SDValue Xori = + SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift, + getI32Imm(1, dl)), 0); + return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Xori), 0); + } } } @@ -2872,6 +2907,19 @@ SDValue PPCDAGToDAGISel::get64BitZExtCompare(SDValue LHS, SDValue RHS, getI64Imm(58, dl), getI64Imm(63, dl)), 0); } + case ISD::SETNE: { + // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1) + // (zext (setcc %a, %b, setne)) -> (sube addc.reg, addc.reg, addc.CA) + // {addcz.reg, addcz.CA} = (addcarry %a, -1) + // (zext (setcc %a, 0, setne)) -> (sube addcz.reg, addcz.reg, addcz.CA) + SDValue Xor = IsRHSZero ? LHS : + SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); + SDValue AC = + SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue, + Xor, getI32Imm(~0U, dl)), 0); + return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, AC, + Xor, AC.getValue(1)), 0); + } } } @@ -2896,6 +2944,19 @@ SDValue PPCDAGToDAGISel::get64BitSExtCompare(SDValue LHS, SDValue RHS, return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, Addic, Addic, Addic.getValue(1)), 0); } + case ISD::SETNE: { + // {subfc.reg, subfc.CA} = (subcarry 0, (xor %a, %b)) + // (sext (setcc %a, %b, setne)) -> (sube subfc.reg, subfc.reg, subfc.CA) + // {subfcz.reg, subfcz.CA} = (subcarry 0, %a) + // (sext (setcc %a, 0, setne)) -> (sube subfcz.reg, subfcz.reg, subfcz.CA) + SDValue Xor = IsRHSZero ? LHS : + SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); + SDValue SC = + SDValue(CurDAG->getMachineNode(PPC::SUBFIC8, dl, MVT::i64, MVT::Glue, + Xor, getI32Imm(0, dl)), 0); + return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, SC, + SC, SC.getValue(1)), 0); + } } } diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 41ff9d903aa0..bda4e5e81734 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -11,13 +11,13 @@ // //===----------------------------------------------------------------------===// +#include "PPCISelLowering.h" #include "MCTargetDesc/PPCPredicates.h" #include "PPC.h" -#include "PPCCallingConv.h" #include "PPCCCState.h" +#include "PPCCallingConv.h" #include "PPCFrameLowering.h" #include "PPCInstrInfo.h" -#include "PPCISelLowering.h" #include "PPCMachineFunctionInfo.h" #include "PPCPerfectShuffle.h" #include "PPCRegisterInfo.h" @@ -28,11 +28,11 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/None.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/CodeGen/CallingConvLower.h" @@ -52,8 +52,8 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/ValueTypes.h" -#include "llvm/IR/CallingConv.h" #include "llvm/IR/CallSite.h" +#include "llvm/IR/CallingConv.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -61,9 +61,9 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" -#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td index c4139ca8b7bd..e214d26c063b 100644 --- a/lib/Target/PowerPC/PPCInstrVSX.td +++ b/lib/Target/PowerPC/PPCInstrVSX.td @@ -2717,6 +2717,40 @@ def DblToFlt { dag B0 = (f32 (fpround (f64 (extractelt v2f64:$B, 0)))); dag B1 = (f32 (fpround (f64 (extractelt v2f64:$B, 1)))); } + +def ByteToWord { + dag A0 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 0)), i8)); + dag A1 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 4)), i8)); + dag A2 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 8)), i8)); + dag A3 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 12)), i8)); +} + +def ByteToDWord { + dag A0 = (i64 (sext_inreg + (i64 (anyext (i32 (vector_extract v16i8:$A, 0)))), i8)); + dag A1 = (i64 (sext_inreg + (i64 (anyext (i32 (vector_extract v16i8:$A, 8)))), i8)); +} + +def HWordToWord { + dag A0 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 0)), i16)); + dag A1 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 2)), i16)); + dag A2 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 4)), i16)); + dag A3 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 6)), i16)); +} + +def HWordToDWord { + dag A0 = (i64 (sext_inreg + (i64 (anyext (i32 (vector_extract v8i16:$A, 0)))), i16)); + dag A1 = (i64 (sext_inreg + (i64 (anyext (i32 (vector_extract v8i16:$A, 4)))), i16)); +} + +def WordToDWord { + dag A0 = (i64 (sext (i32 (vector_extract v4i32:$A, 0)))); + dag A1 = (i64 (sext (i32 (vector_extract v4i32:$A, 2)))); +} + def FltToIntLoad { dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (extloadf32 xoaddr:$A))))); } @@ -2969,4 +3003,21 @@ let AddedComplexity = 400 in { (VMRGOW (COPY_TO_REGCLASS (MTVSRDD AnyExts.D, AnyExts.B), VSRC), (COPY_TO_REGCLASS (MTVSRDD AnyExts.C, AnyExts.A), VSRC))>; } + // P9 Altivec instructions that can be used to build vectors. + // Adding them to PPCInstrVSX.td rather than PPCAltivecVSX.td to compete + // with complexities of existing build vector patterns in this file. + let Predicates = [HasP9Altivec] in { + def : Pat<(v2i64 (build_vector WordToDWord.A0, WordToDWord.A1)), + (v2i64 (VEXTSW2D $A))>; + def : Pat<(v2i64 (build_vector HWordToDWord.A0, HWordToDWord.A1)), + (v2i64 (VEXTSH2D $A))>; + def : Pat<(v4i32 (build_vector HWordToWord.A0, HWordToWord.A1, + HWordToWord.A2, HWordToWord.A3)), + (v4i32 (VEXTSH2W $A))>; + def : Pat<(v4i32 (build_vector ByteToWord.A0, ByteToWord.A1, + ByteToWord.A2, ByteToWord.A3)), + (v4i32 (VEXTSB2W $A))>; + def : Pat<(v2i64 (build_vector ByteToDWord.A0, ByteToDWord.A1)), + (v2i64 (VEXTSB2D $A))>; + } } diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp index 541b98e01b99..b310493587ae 100644 --- a/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "PPC.h" #include "MCTargetDesc/PPCMCExpr.h" +#include "PPC.h" #include "PPCSubtarget.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Twine.h" diff --git a/lib/Target/PowerPC/PPCMIPeephole.cpp b/lib/Target/PowerPC/PPCMIPeephole.cpp index c6d2c3ebcc0f..ff5f17c7628f 100644 --- a/lib/Target/PowerPC/PPCMIPeephole.cpp +++ b/lib/Target/PowerPC/PPCMIPeephole.cpp @@ -19,9 +19,9 @@ // //===---------------------------------------------------------------------===// -#include "PPCInstrInfo.h" #include "PPC.h" #include "PPCInstrBuilder.h" +#include "PPCInstrInfo.h" #include "PPCTargetMachine.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" diff --git a/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/lib/Target/PowerPC/PPCTLSDynamicCall.cpp index c7aa4cb78b7a..31c50785c2ee 100644 --- a/lib/Target/PowerPC/PPCTLSDynamicCall.cpp +++ b/lib/Target/PowerPC/PPCTLSDynamicCall.cpp @@ -21,9 +21,9 @@ // //===----------------------------------------------------------------------===// -#include "PPCInstrInfo.h" #include "PPC.h" #include "PPCInstrBuilder.h" +#include "PPCInstrInfo.h" #include "PPCTargetMachine.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" diff --git a/lib/Target/PowerPC/PPCTOCRegDeps.cpp b/lib/Target/PowerPC/PPCTOCRegDeps.cpp index 7c53a5601790..17345b6ca8d3 100644 --- a/lib/Target/PowerPC/PPCTOCRegDeps.cpp +++ b/lib/Target/PowerPC/PPCTOCRegDeps.cpp @@ -61,8 +61,8 @@ // //===----------------------------------------------------------------------===// -#include "PPC.h" #include "MCTargetDesc/PPCPredicates.h" +#include "PPC.h" #include "PPCInstrBuilder.h" #include "PPCInstrInfo.h" #include "PPCMachineFunctionInfo.h" diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index b9004cc8a9f5..5a226b23ff96 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -11,11 +11,11 @@ // //===----------------------------------------------------------------------===// +#include "PPCTargetMachine.h" #include "MCTargetDesc/PPCMCTargetDesc.h" #include "PPC.h" #include "PPCSubtarget.h" #include "PPCTargetObjectFile.h" -#include "PPCTargetMachine.h" #include "PPCTargetTransformInfo.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" diff --git a/lib/Target/PowerPC/PPCVSXCopy.cpp b/lib/Target/PowerPC/PPCVSXCopy.cpp index f3a0290da054..93fe3230ab81 100644 --- a/lib/Target/PowerPC/PPCVSXCopy.cpp +++ b/lib/Target/PowerPC/PPCVSXCopy.cpp @@ -13,8 +13,8 @@ // //===----------------------------------------------------------------------===// -#include "PPC.h" #include "MCTargetDesc/PPCPredicates.h" +#include "PPC.h" #include "PPCHazardRecognizers.h" #include "PPCInstrBuilder.h" #include "PPCInstrInfo.h" diff --git a/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/lib/Target/PowerPC/PPCVSXFMAMutate.cpp index f6d20ced15a0..a57484e5abdf 100644 --- a/lib/Target/PowerPC/PPCVSXFMAMutate.cpp +++ b/lib/Target/PowerPC/PPCVSXFMAMutate.cpp @@ -12,10 +12,10 @@ // //===----------------------------------------------------------------------===// -#include "PPCInstrInfo.h" #include "MCTargetDesc/PPCPredicates.h" #include "PPC.h" #include "PPCInstrBuilder.h" +#include "PPCInstrInfo.h" #include "PPCMachineFunctionInfo.h" #include "PPCTargetMachine.h" #include "llvm/ADT/STLExtras.h" diff --git a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp index d3434b77be8a..491eaf326a50 100644 --- a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp +++ b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp @@ -42,9 +42,9 @@ // //===---------------------------------------------------------------------===// -#include "PPCInstrInfo.h" #include "PPC.h" #include "PPCInstrBuilder.h" +#include "PPCInstrInfo.h" #include "PPCTargetMachine.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/EquivalenceClasses.h" diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp index d6f2672271e9..d9a71893afee 100644 --- a/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp +++ b/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp @@ -12,10 +12,10 @@ #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp index b2ed13758d41..9309d493cef4 100644 --- a/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp +++ b/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp @@ -13,13 +13,13 @@ #include "MCTargetDesc/RISCVMCTargetDesc.h" #include "llvm/ADT/Statistic.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/EndianStream.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h b/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h index ddc3bf350452..7c98b1c8f321 100644 --- a/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h +++ b/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h @@ -14,9 +14,9 @@ #ifndef LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVMCTARGETDESC_H #define LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVMCTARGETDESC_H +#include "llvm/Config/config.h" #include "llvm/MC/MCTargetOptions.h" #include "llvm/Support/DataTypes.h" -#include "llvm/Config/config.h" namespace llvm { class MCAsmBackend; diff --git a/lib/Target/RISCV/RISCVTargetMachine.cpp b/lib/Target/RISCV/RISCVTargetMachine.cpp index efdde04c582d..744d7b8aaa3a 100644 --- a/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -13,10 +13,10 @@ #include "RISCVTargetMachine.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/LegacyPassManager.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetOptions.h" diff --git a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp index 7e6dff6b7894..087c037614a9 100644 --- a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp +++ b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp @@ -9,8 +9,8 @@ #include "MCTargetDesc/SparcMCExpr.h" #include "MCTargetDesc/SparcMCTargetDesc.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/MC/MCContext.h" @@ -28,8 +28,8 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/SMLoc.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" #include #include #include diff --git a/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp b/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp index da7e0b737e78..8e298e8316da 100644 --- a/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp +++ b/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp @@ -14,11 +14,11 @@ #include "Sparc.h" #include "SparcRegisterInfo.h" #include "SparcSubtarget.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; diff --git a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp index cc07547ede2c..d1d1334163a2 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/MC/MCAsmBackend.h" #include "MCTargetDesc/SparcFixupKinds.h" #include "MCTargetDesc/SparcMCTargetDesc.h" +#include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixupKindInfo.h" diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp index 21df60237d96..50e8825b15e8 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp @@ -14,10 +14,10 @@ #include "SparcMCAsmInfo.h" #include "SparcMCExpr.h" #include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCTargetOptions.h" -#include "llvm/Support/Dwarf.h" using namespace llvm; diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp index e85a8cd5e339..a77f760d9eff 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp @@ -19,7 +19,6 @@ #include "llvm/MC/MCSymbolELF.h" #include "llvm/Object/ELF.h" - using namespace llvm; #define DEBUG_TYPE "sparcmcexpr" diff --git a/lib/Target/Sparc/SparcAsmPrinter.cpp b/lib/Target/Sparc/SparcAsmPrinter.cpp index 31a128a5f271..19fb94534b25 100644 --- a/lib/Target/Sparc/SparcAsmPrinter.cpp +++ b/lib/Target/Sparc/SparcAsmPrinter.cpp @@ -12,9 +12,9 @@ // //===----------------------------------------------------------------------===// -#include "Sparc.h" #include "InstPrinter/SparcInstPrinter.h" #include "MCTargetDesc/SparcMCExpr.h" +#include "Sparc.h" #include "SparcInstrInfo.h" #include "SparcTargetMachine.h" #include "SparcTargetStreamer.h" diff --git a/lib/Target/Sparc/SparcMCInstLower.cpp b/lib/Target/Sparc/SparcMCInstLower.cpp index a3cedcbf9dd1..a784124ff688 100644 --- a/lib/Target/Sparc/SparcMCInstLower.cpp +++ b/lib/Target/Sparc/SparcMCInstLower.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "Sparc.h" #include "MCTargetDesc/SparcMCExpr.h" +#include "Sparc.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp index 49c67e0819f7..c7a1ca262d2c 100644 --- a/lib/Target/Sparc/SparcTargetMachine.cpp +++ b/lib/Target/Sparc/SparcTargetMachine.cpp @@ -11,9 +11,9 @@ //===----------------------------------------------------------------------===// #include "SparcTargetMachine.h" -#include "SparcTargetObjectFile.h" -#include "Sparc.h" #include "LeonPasses.h" +#include "Sparc.h" +#include "SparcTargetObjectFile.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/LegacyPassManager.h" diff --git a/lib/Target/Sparc/SparcTargetObjectFile.cpp b/lib/Target/Sparc/SparcTargetObjectFile.cpp index 8fdde15d8d27..627e49a95f3c 100644 --- a/lib/Target/Sparc/SparcTargetObjectFile.cpp +++ b/lib/Target/Sparc/SparcTargetObjectFile.cpp @@ -9,8 +9,8 @@ #include "SparcTargetObjectFile.h" #include "MCTargetDesc/SparcMCExpr.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Target/TargetLowering.h" using namespace llvm; diff --git a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp index efcf6696fd50..ad05779a9f64 100644 --- a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp +++ b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp @@ -8,8 +8,8 @@ //===----------------------------------------------------------------------===// #include "MCTargetDesc/SystemZMCTargetDesc.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp index 23b7d5b5d501..fd1fd7bc40dc 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/SystemZMCTargetDesc.h" #include "MCTargetDesc/SystemZMCFixups.h" +#include "MCTargetDesc/SystemZMCTargetDesc.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCFixupKindInfo.h" diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp index 3de570bf30cc..df0a8161e6e7 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp @@ -9,11 +9,11 @@ #include "MCTargetDesc/SystemZMCFixups.h" #include "MCTargetDesc/SystemZMCTargetDesc.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCValue.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include #include diff --git a/lib/Target/SystemZ/SystemZHazardRecognizer.h b/lib/Target/SystemZ/SystemZHazardRecognizer.h index 8fa54ee434cf..0c755c9ad1b9 100644 --- a/lib/Target/SystemZ/SystemZHazardRecognizer.h +++ b/lib/Target/SystemZ/SystemZHazardRecognizer.h @@ -25,10 +25,10 @@ #define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZHAZARDRECOGNIZER_H #include "SystemZSubtarget.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineScheduler.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/Support/raw_ostream.h" #include diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index ae141dbcad34..ac4c3f6db684 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -5367,12 +5367,24 @@ MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI, if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) { if (Invert) CCMask ^= CCValid; + + // ISel pattern matching also adds a load memory operand of the same + // address, so take special care to find the storing memory operand. + MachineMemOperand *MMO = nullptr; + for (auto *I : MI.memoperands()) + if (I->isStore()) { + MMO = I; + break; + } + BuildMI(*MBB, MI, DL, TII->get(STOCOpcode)) - .addReg(SrcReg) - .add(Base) - .addImm(Disp) - .addImm(CCValid) - .addImm(CCMask); + .addReg(SrcReg) + .add(Base) + .addImm(Disp) + .addImm(CCValid) + .addImm(CCMask) + .addMemOperand(MMO); + MI.eraseFromParent(); return MBB; } @@ -5950,7 +5962,8 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper( .addImm(DestDisp) .addImm(ThisLength) .add(SrcBase) - .addImm(SrcDisp); + .addImm(SrcDisp) + ->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); DestDisp += ThisLength; SrcDisp += ThisLength; Length -= ThisLength; diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index b34c181124de..66a5ff12be46 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -11,10 +11,10 @@ // //===----------------------------------------------------------------------===// +#include "SystemZInstrInfo.h" #include "MCTargetDesc/SystemZMCTargetDesc.h" #include "SystemZ.h" #include "SystemZInstrBuilder.h" -#include "SystemZInstrInfo.h" #include "SystemZSubtarget.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" diff --git a/lib/Target/SystemZ/SystemZLDCleanup.cpp b/lib/Target/SystemZ/SystemZLDCleanup.cpp index ec8ce6e911fa..3a0e01da42f0 100644 --- a/lib/Target/SystemZ/SystemZLDCleanup.cpp +++ b/lib/Target/SystemZ/SystemZLDCleanup.cpp @@ -13,8 +13,8 @@ // //===----------------------------------------------------------------------===// -#include "SystemZTargetMachine.h" #include "SystemZMachineFunctionInfo.h" +#include "SystemZTargetMachine.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp index 6ef8000d6f43..d14a0fb0b0b2 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#include "SystemZInstrInfo.h" #include "SystemZRegisterInfo.h" +#include "SystemZInstrInfo.h" #include "SystemZSubtarget.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" diff --git a/lib/Target/SystemZ/SystemZShortenInst.cpp b/lib/Target/SystemZ/SystemZShortenInst.cpp index 263aff8b7bfb..7391df8342ef 100644 --- a/lib/Target/SystemZ/SystemZShortenInst.cpp +++ b/lib/Target/SystemZ/SystemZShortenInst.cpp @@ -14,9 +14,9 @@ //===----------------------------------------------------------------------===// #include "SystemZTargetMachine.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; diff --git a/lib/Target/SystemZ/SystemZSubtarget.h b/lib/Target/SystemZ/SystemZSubtarget.h index 36e51921bf2f..be480f03c572 100644 --- a/lib/Target/SystemZ/SystemZSubtarget.h +++ b/lib/Target/SystemZ/SystemZSubtarget.h @@ -19,8 +19,8 @@ #include "SystemZInstrInfo.h" #include "SystemZRegisterInfo.h" #include "SystemZSelectionDAGInfo.h" -#include "llvm/IR/DataLayout.h" #include "llvm/ADT/Triple.h" +#include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetSubtargetInfo.h" #include diff --git a/lib/Target/SystemZ/SystemZTDC.cpp b/lib/Target/SystemZ/SystemZTDC.cpp index 96a9ef82c125..5dbd23d420a3 100644 --- a/lib/Target/SystemZ/SystemZTDC.cpp +++ b/lib/Target/SystemZ/SystemZTDC.cpp @@ -47,10 +47,10 @@ #include "SystemZ.h" #include "llvm/ADT/MapVector.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/InstIterator.h" -#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" #include diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp index f30d52f859d7..cb81c0e5276e 100644 --- a/lib/Target/SystemZ/SystemZTargetMachine.cpp +++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -7,14 +7,14 @@ // //===----------------------------------------------------------------------===// +#include "SystemZTargetMachine.h" #include "MCTargetDesc/SystemZMCTargetDesc.h" #include "SystemZ.h" #include "SystemZMachineScheduler.h" -#include "SystemZTargetMachine.h" #include "SystemZTargetTransformInfo.h" #include "llvm/ADT/Optional.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/Passes.h" diff --git a/lib/Target/Target.cpp b/lib/Target/Target.cpp index 5d1616d03779..42d92622d6c8 100644 --- a/lib/Target/Target.cpp +++ b/lib/Target/Target.cpp @@ -14,12 +14,12 @@ #include "llvm-c/Target.h" #include "llvm-c/Initialization.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Value.h" #include "llvm/InitializePasses.h" -#include "llvm/IR/LegacyPassManager.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include using namespace llvm; diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp index 91cc97e38b3d..f941891f3183 100644 --- a/lib/Target/TargetLoweringObjectFile.cpp +++ b/lib/Target/TargetLoweringObjectFile.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" @@ -24,7 +25,6 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLowering.h" @@ -240,6 +240,20 @@ MCSection *TargetLoweringObjectFile::SectionForGlobal( if (GO->hasSection()) return getExplicitSectionGlobal(GO, Kind, TM); + if (auto *GVar = dyn_cast(GO)) { + auto Attrs = GVar->getAttributes(); + if ((Attrs.hasAttribute("bss-section") && Kind.isBSS()) || + (Attrs.hasAttribute("data-section") && Kind.isData()) || + (Attrs.hasAttribute("rodata-section") && Kind.isReadOnly())) { + return getExplicitSectionGlobal(GO, Kind, TM); + } + } + + if (auto *F = dyn_cast(GO)) { + if (F->hasFnAttribute("implicit-section-name")) + return getExplicitSectionGlobal(GO, Kind, TM); + } + // Use default section depending on the 'type' of global return SelectSectionForGlobal(GO, Kind, TM); } diff --git a/lib/Target/TargetMachineC.cpp b/lib/Target/TargetMachineC.cpp index df12e0e88e3b..01f14939864f 100644 --- a/lib/Target/TargetMachineC.cpp +++ b/lib/Target/TargetMachineC.cpp @@ -11,13 +11,13 @@ // //===----------------------------------------------------------------------===// -#include "llvm-c/TargetMachine.h" #include "llvm-c/Core.h" #include "llvm-c/Target.h" +#include "llvm-c/TargetMachine.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/Module.h" #include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Module.h" #include "llvm/Support/CodeGenCWrappers.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/FormattedStream.h" diff --git a/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp b/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp index b5f53114d3e1..9be11da9afac 100644 --- a/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp +++ b/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp @@ -15,8 +15,8 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCInst.h" diff --git a/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h b/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h index c6158720d62f..b1de84d7e8e6 100644 --- a/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h +++ b/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h @@ -16,9 +16,9 @@ #define LLVM_LIB_TARGET_WEBASSEMBLY_INSTPRINTER_WEBASSEMBLYINSTPRINTER_H #include "llvm/ADT/SmallVector.h" +#include "llvm/BinaryFormat/Wasm.h" #include "llvm/CodeGen/MachineValueType.h" #include "llvm/MC/MCInstPrinter.h" -#include "llvm/Support/Wasm.h" namespace llvm { diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp index 7c78285fbda4..4f20096c1583 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp @@ -12,8 +12,8 @@ /// //===----------------------------------------------------------------------===// -#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "MCTargetDesc/WebAssemblyFixupKinds.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCDirectives.h" diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp index 544cd653fd72..c56c591def36 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp @@ -12,8 +12,8 @@ /// //===----------------------------------------------------------------------===// -#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "MCTargetDesc/WebAssemblyFixupKinds.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/MC/MCCodeEmitter.h" diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h index 795658ca96b4..0ba700a86b74 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h @@ -15,9 +15,9 @@ #ifndef LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCTARGETDESC_H #define LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCTARGETDESC_H +#include "llvm/BinaryFormat/Wasm.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/Support/DataTypes.h" -#include "llvm/Support/Wasm.h" namespace llvm { diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h index 68d6747298df..ddf964e7dbb7 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h @@ -16,9 +16,9 @@ #ifndef LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYTARGETSTREAMER_H #define LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYTARGETSTREAMER_H +#include "llvm/BinaryFormat/Wasm.h" #include "llvm/CodeGen/MachineValueType.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/Support/Wasm.h" namespace llvm { diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp index 2846ec5e9337..27c01cb8acf7 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp @@ -13,14 +13,14 @@ /// //===----------------------------------------------------------------------===// -#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "MCTargetDesc/WebAssemblyFixupKinds.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "llvm/BinaryFormat/Wasm.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCSymbolWasm.h" #include "llvm/MC/MCWasmObjectWriter.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/Wasm.h" using namespace llvm; namespace { @@ -54,6 +54,11 @@ static bool IsFunctionExpr(const MCExpr *Expr) { return false; } +static bool IsFunctionType(const MCValue &Target) { + const MCSymbolRefExpr *RefA = Target.getSymA(); + return RefA && RefA->getKind() == MCSymbolRefExpr::VK_WebAssembly_TYPEINDEX; +} + unsigned WebAssemblyWasmObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target, const MCFixup &Fixup, @@ -71,6 +76,8 @@ unsigned WebAssemblyWasmObjectWriter::getRelocType(MCContext &Ctx, case WebAssembly::fixup_code_sleb128_i64: llvm_unreachable("fixup_sleb128_i64 not implemented yet"); case WebAssembly::fixup_code_uleb128_i32: + if (IsFunctionType(Target)) + return wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB; if (IsFunction) return wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB; return wasm::R_WEBASSEMBLY_GLOBAL_ADDR_LEB; diff --git a/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp b/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp index 40e1928197bc..1691808d05a0 100644 --- a/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp +++ b/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp @@ -17,8 +17,8 @@ /// ////===----------------------------------------------------------------------===// -#include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" #include "WebAssemblySubtarget.h" #include "WebAssemblyUtilities.h" #include "llvm/ADT/PriorityQueue.h" diff --git a/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp index bd11d1b46906..21e0f6b23777 100644 --- a/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp +++ b/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp @@ -18,8 +18,8 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" #include "WebAssemblyUtilities.h" diff --git a/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp b/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp index bc6360aafd61..b2330a232093 100644 --- a/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp +++ b/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp @@ -22,8 +22,8 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" // for WebAssembly::ARGUMENT_* +#include "WebAssembly.h" #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" #include "llvm/Analysis/AliasAnalysis.h" diff --git a/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/lib/Target/WebAssembly/WebAssemblyFastISel.cpp index 53698ff09b10..09338a4898e0 100644 --- a/lib/Target/WebAssembly/WebAssemblyFastISel.cpp +++ b/lib/Target/WebAssembly/WebAssemblyFastISel.cpp @@ -16,8 +16,8 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" #include "WebAssemblyTargetMachine.h" diff --git a/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp b/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp index 2bbf7a2b42f9..41f315c2825b 100644 --- a/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp +++ b/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp @@ -26,8 +26,8 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" #include "llvm/ADT/PriorityQueue.h" diff --git a/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp index 257f1d110aa2..4f3ae57733e5 100644 --- a/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp +++ b/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp @@ -12,8 +12,8 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" #include "WebAssemblyTargetMachine.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/IR/Function.h" // To access function attributes. diff --git a/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp b/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp index 744a3ed427af..576b71dd7966 100644 --- a/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp +++ b/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp @@ -15,8 +15,8 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" #include "llvm/CodeGen/MachineFunctionPass.h" diff --git a/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp b/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp index 473dcb7a33fd..1462c49aa9fd 100644 --- a/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp +++ b/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp @@ -19,8 +19,8 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" #include "WebAssemblyUtilities.h" diff --git a/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp b/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp index e3470825940c..766ab456a8e6 100644 --- a/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp +++ b/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp @@ -13,8 +13,8 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" #include "WebAssemblyUtilities.h" diff --git a/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp index 57d454746b06..6650191807dc 100644 --- a/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp +++ b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp @@ -20,8 +20,8 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" // for WebAssembly::ARGUMENT_* +#include "WebAssembly.h" #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" #include "WebAssemblyUtilities.h" diff --git a/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp b/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp index 9e944df637d9..878ffd08d228 100644 --- a/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp +++ b/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp @@ -19,8 +19,8 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" #include "llvm/CodeGen/MachineFunctionPass.h" diff --git a/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp b/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp index 2441ead7cb27..b1385f409fd3 100644 --- a/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp +++ b/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp @@ -12,8 +12,8 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" #include "WebAssemblyMachineFunctionInfo.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" diff --git a/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp b/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp index a9aa781610ce..8173364fa880 100644 --- a/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp +++ b/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp @@ -24,8 +24,8 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" #include "llvm/Analysis/TargetLibraryInfo.h" diff --git a/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp index d9b2b8743649..7b05f671bdcb 100644 --- a/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -12,9 +12,9 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssembly.h" -#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "WebAssemblyTargetMachine.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" #include "WebAssemblyTargetObjectFile.h" #include "WebAssemblyTargetTransformInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" diff --git a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp b/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp index 788fac62626b..f7e31de65f6d 100644 --- a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp +++ b/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp @@ -7,11 +7,11 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/X86MCTargetDesc.h" #include "X86AsmInstrumentation.h" +#include "MCTargetDesc/X86MCTargetDesc.h" #include "X86Operand.h" -#include "llvm/ADT/Twine.h" #include "llvm/ADT/Triple.h" +#include "llvm/ADT/Twine.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCExpr.h" diff --git a/lib/Target/X86/AsmParser/X86Operand.h b/lib/Target/X86/AsmParser/X86Operand.h index 33eff14b8215..0fba15cc692c 100644 --- a/lib/Target/X86/AsmParser/X86Operand.h +++ b/lib/Target/X86/AsmParser/X86Operand.h @@ -15,8 +15,8 @@ #include "llvm/ADT/StringRef.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" -#include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/SMLoc.h" diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index 7471373334f6..fc4adddc149b 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -11,7 +11,6 @@ tablegen(LLVM X86GenFastISel.inc -gen-fast-isel) tablegen(LLVM X86GenCallingConv.inc -gen-callingconv) tablegen(LLVM X86GenSubtargetInfo.inc -gen-subtarget) tablegen(LLVM X86GenEVEX2VEXTables.inc -gen-x86-EVEX2VEX-tables) -tablegen(LLVM X86GenFoldTables.inc -gen-x86-fold-tables) if(LLVM_BUILD_GLOBAL_ISEL) tablegen(LLVM X86GenRegisterBank.inc -gen-register-bank) tablegen(LLVM X86GenGlobalISel.inc -gen-global-isel) diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index 36ad23bb41c0..4ce908b1da64 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -74,8 +74,8 @@ // //===----------------------------------------------------------------------===// -#include "X86DisassemblerDecoder.h" #include "MCTargetDesc/X86MCTargetDesc.h" +#include "X86DisassemblerDecoder.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCExpr.h" diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp index b7f637e9a8cd..577b7a776c6d 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp @@ -13,10 +13,10 @@ // //===----------------------------------------------------------------------===// -#include /* for va_*() */ -#include /* for vsnprintf() */ -#include /* for exit() */ -#include /* for memset() */ +#include /* for va_*() */ +#include /* for vsnprintf() */ +#include /* for exit() */ +#include /* for memset() */ #include "X86DisassemblerDecoder.h" diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp index 6aa700306744..4d91300c7ede 100644 --- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/X86BaseInfo.h" #include "X86ATTInstPrinter.h" +#include "MCTargetDesc/X86BaseInfo.h" #include "X86InstComments.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp index b5a926f915af..5e809c34325e 100644 --- a/lib/Target/X86/InstPrinter/X86InstComments.cpp +++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -15,8 +15,8 @@ #include "X86InstComments.h" #include "MCTargetDesc/X86MCTargetDesc.h" #include "Utils/X86ShuffleDecode.h" -#include "llvm/MC/MCInst.h" #include "llvm/CodeGen/MachineValueType.h" +#include "llvm/MC/MCInst.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp index a8c631ae282f..d6af6712d5a1 100644 --- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp @@ -12,9 +12,9 @@ // //===----------------------------------------------------------------------===// +#include "X86IntelInstPrinter.h" #include "MCTargetDesc/X86BaseInfo.h" #include "X86InstComments.h" -#include "X86IntelInstPrinter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrDesc.h" diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index a713af6aadb5..7a9e4f4468ec 100644 --- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -10,6 +10,8 @@ #include "MCTargetDesc/X86BaseInfo.h" #include "MCTargetDesc/X86FixupKinds.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" @@ -22,9 +24,7 @@ #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MachO.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp index 0b73df3a2ff8..4da4eebec038 100644 --- a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp @@ -9,13 +9,13 @@ #include "MCTargetDesc/X86FixupKinds.h" #include "MCTargetDesc/X86MCTargetDesc.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCValue.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include #include diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp index 9c35a251e480..1538a515f419 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp @@ -13,12 +13,12 @@ #include "X86MCAsmInfo.h" #include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCStreamer.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/ELF.h" using namespace llvm; enum AsmWriterFlavorTy { diff --git a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp index 297926ddcfda..4097ef224d50 100644 --- a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp @@ -7,9 +7,10 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/X86MCTargetDesc.h" #include "MCTargetDesc/X86FixupKinds.h" +#include "MCTargetDesc/X86MCTargetDesc.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" @@ -19,7 +20,6 @@ #include "llvm/MC/MCValue.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" -#include "llvm/Support/MachO.h" using namespace llvm; diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp index d6777fc8aa6a..105580c913a1 100644 --- a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp @@ -9,11 +9,11 @@ #include "MCTargetDesc/X86FixupKinds.h" #include "MCTargetDesc/X86MCTargetDesc.h" +#include "llvm/BinaryFormat/COFF.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCValue.h" #include "llvm/MC/MCWinCOFFObjectWriter.h" -#include "llvm/Support/COFF.h" #include "llvm/Support/ErrorHandling.h" using namespace llvm; diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index e1825ca1eda1..dc15aeadaa61 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -17,6 +17,7 @@ #include "MCTargetDesc/X86BaseInfo.h" #include "X86InstrInfo.h" #include "X86MachineFunctionInfo.h" +#include "llvm/BinaryFormat/COFF.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/CodeGen/MachineValueType.h" @@ -34,7 +35,6 @@ #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Support/COFF.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 3cfb924abd01..621505aaded9 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -414,6 +414,8 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM, assert(HasAVX); if (IsNonTemporal && Alignment >= 32 && HasAVX2) Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm; + else if (IsNonTemporal && Alignment >= 16) + return false; // Force split for X86::VMOVNTDQArm else if (Alignment >= 32) Opc = HasVLX ? X86::VMOVAPSZ256rm : X86::VMOVAPSYrm; else @@ -424,6 +426,8 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM, assert(HasAVX); if (IsNonTemporal && Alignment >= 32 && HasAVX2) Opc = X86::VMOVNTDQAYrm; + else if (IsNonTemporal && Alignment >= 16) + return false; // Force split for X86::VMOVNTDQArm else if (Alignment >= 32) Opc = HasVLX ? X86::VMOVAPDZ256rm : X86::VMOVAPDYrm; else @@ -437,6 +441,8 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM, assert(HasAVX); if (IsNonTemporal && Alignment >= 32 && HasAVX2) Opc = X86::VMOVNTDQAYrm; + else if (IsNonTemporal && Alignment >= 16) + return false; // Force split for X86::VMOVNTDQArm else if (Alignment >= 32) Opc = HasVLX ? X86::VMOVDQA64Z256rm : X86::VMOVDQAYrm; else diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 328a80304602..2777fa89330f 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -29,8 +29,8 @@ #include "llvm/IR/Function.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Target/TargetOptions.h" #include "llvm/Support/Debug.h" +#include "llvm/Target/TargetOptions.h" #include using namespace llvm; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 5303d7a406ad..831e9bdab0e1 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1,4 +1,4 @@ - + //===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===// // // The LLVM Compiler Infrastructure @@ -81,6 +81,12 @@ static cl::opt ExperimentalPrefLoopAlignment( " of the loop header PC will be 0)."), cl::Hidden); +static cl::opt MulConstantOptimization( + "mul-constant-optimization", cl::init(true), + cl::desc("Replace 'mul x, Const' with more effective instructions like " + "SHIFT, LEA, etc."), + cl::Hidden); + /// Call this when the user attempts to do something unsupported, like /// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike /// report_fatal_error, so calling code should attempt to recover without @@ -5810,7 +5816,8 @@ static bool setTargetShuffleZeroElements(SDValue N, // The decoded shuffle mask may contain a different number of elements to the // destination value type. static bool getFauxShuffleMask(SDValue N, SmallVectorImpl &Mask, - SmallVectorImpl &Ops) { + SmallVectorImpl &Ops, + SelectionDAG &DAG) { Mask.clear(); Ops.clear(); @@ -5868,8 +5875,7 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl &Mask, assert(SrcExtract.getOperand(0).getValueType() == MVT::v16i8); } - if (!SrcExtract || !isa(SrcExtract.getOperand(1)) || - NumElts <= SrcExtract.getConstantOperandVal(1)) + if (!SrcExtract || !isa(SrcExtract.getOperand(1))) return false; SDValue SrcVec = SrcExtract.getOperand(0); @@ -5877,8 +5883,12 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl &Mask, unsigned NumSrcElts = SrcVT.getVectorNumElements(); unsigned NumZeros = (NumBitsPerElt / SrcVT.getScalarSizeInBits()) - 1; + unsigned SrcIdx = SrcExtract.getConstantOperandVal(1); + if (NumSrcElts <= SrcIdx) + return false; + Ops.push_back(SrcVec); - Mask.push_back(SrcExtract.getConstantOperandVal(1)); + Mask.push_back(SrcIdx); Mask.append(NumZeros, SM_SentinelZero); Mask.append(NumSrcElts - Mask.size(), SM_SentinelUndef); return true; @@ -5915,6 +5925,19 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl &Mask, Mask.push_back(i == InIdx ? NumElts + ExIdx : i); return true; } + case X86ISD::PACKSS: { + // If we know input saturation won't happen we can treat this + // as a truncation shuffle. + if (DAG.ComputeNumSignBits(N.getOperand(0)) <= NumBitsPerElt || + DAG.ComputeNumSignBits(N.getOperand(1)) <= NumBitsPerElt) + return false; + + Ops.push_back(N.getOperand(0)); + Ops.push_back(N.getOperand(1)); + for (unsigned i = 0; i != NumElts; ++i) + Mask.push_back(i * 2); + return true; + } case X86ISD::VSHLI: case X86ISD::VSRLI: { uint64_t ShiftVal = N.getConstantOperandVal(1); @@ -5989,9 +6012,10 @@ static void resolveTargetShuffleInputsAndMask(SmallVectorImpl &Inputs, /// Returns true if the target shuffle mask was decoded. static bool resolveTargetShuffleInputs(SDValue Op, SmallVectorImpl &Inputs, - SmallVectorImpl &Mask) { + SmallVectorImpl &Mask, + SelectionDAG &DAG) { if (!setTargetShuffleZeroElements(Op, Mask, Inputs)) - if (!getFauxShuffleMask(Op, Mask, Inputs)) + if (!getFauxShuffleMask(Op, Mask, Inputs, DAG)) return false; resolveTargetShuffleInputsAndMask(Inputs, Mask); @@ -6391,6 +6415,7 @@ static SDValue LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, const SDLoc &dl, /// Example: -> zextload a static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef Elts, const SDLoc &DL, SelectionDAG &DAG, + const X86Subtarget &Subtarget, bool isAfterLegalize) { unsigned NumElems = Elts.size(); @@ -6495,6 +6520,12 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef Elts, if (isAfterLegalize && !TLI.isOperationLegal(ISD::LOAD, VT)) return SDValue(); + // Don't create 256-bit non-temporal aligned loads without AVX2 as these + // will lower to regular temporal loads and use the cache. + if (LDBase->isNonTemporal() && LDBase->getAlignment() >= 32 && + VT.is256BitVector() && !Subtarget.hasInt256()) + return SDValue(); + if (IsConsecutiveLoad) return CreateLoad(VT, LDBase); @@ -7701,7 +7732,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { // See if we can use a vector load to get all of the elements. if (VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) { SmallVector Ops(Op->op_begin(), Op->op_begin() + NumElems); - if (SDValue LD = EltsFromConsecutiveLoads(VT, Ops, dl, DAG, false)) + if (SDValue LD = + EltsFromConsecutiveLoads(VT, Ops, dl, DAG, Subtarget, false)) return LD; } @@ -7825,24 +7857,20 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { } // Next, we iteratively mix elements, e.g. for v4f32: - // Step 1: unpcklps 0, 2 ==> X: - // : unpcklps 1, 3 ==> Y: - // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> - unsigned EltStride = NumElems >> 1; - while (EltStride != 0) { - for (unsigned i = 0; i < EltStride; ++i) { - // If Ops[i+EltStride] is undef and this is the first round of mixing, - // then it is safe to just drop this shuffle: V[i] is already in the - // right place, the one element (since it's the first round) being - // inserted as undef can be dropped. This isn't safe for successive - // rounds because they will permute elements within both vectors. - if (Ops[i+EltStride].isUndef() && - EltStride == NumElems/2) - continue; + // Step 1: unpcklps 0, 1 ==> X: + // : unpcklps 2, 3 ==> Y: + // Step 2: unpcklpd X, Y ==> <3, 2, 1, 0> + for (unsigned Scale = 1; Scale < NumElems; Scale *= 2) { + // Generate scaled UNPCKL shuffle mask. + SmallVector Mask; + for(unsigned i = 0; i != Scale; ++i) + Mask.push_back(i); + for (unsigned i = 0; i != Scale; ++i) + Mask.push_back(NumElems+i); + Mask.append(NumElems - Mask.size(), SM_SentinelUndef); - Ops[i] = getUnpackl(DAG, dl, VT, Ops[i], Ops[i + EltStride]); - } - EltStride >>= 1; + for (unsigned i = 0, e = NumElems / (2 * Scale); i != e; ++i) + Ops[i] = DAG.getVectorShuffle(VT, dl, Ops[2*i], Ops[(2*i)+1], Mask); } return Ops[0]; } @@ -17177,7 +17205,13 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, Cond == ISD::SETGE || Cond == ISD::SETUGE; bool Invert = Cond == ISD::SETNE || (Cond != ISD::SETEQ && ISD::isTrueWhenEqual(Cond)); - bool FlipSigns = ISD::isUnsignedIntSetCC(Cond); + + // If both operands are known non-negative, then an unsigned compare is the + // same as a signed compare and there's no need to flip signbits. + // TODO: We could check for more general simplifications here since we're + // computing known bits. + bool FlipSigns = ISD::isUnsignedIntSetCC(Cond) && + !(DAG.SignBitIsZero(Op0) && DAG.SignBitIsZero(Op1)); // Special case: Use min/max operations for SETULE/SETUGE MVT VET = VT.getVectorElementType(); @@ -26741,6 +26775,17 @@ unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode( return Tmp; } + case X86ISD::VSHLI: { + SDValue Src = Op.getOperand(0); + unsigned Tmp = DAG.ComputeNumSignBits(Src, Depth + 1); + APInt ShiftVal = cast(Op.getOperand(1))->getAPIntValue(); + if (ShiftVal.uge(VTBits)) + return VTBits; // Shifted all bits out --> zero. + if (ShiftVal.uge(Tmp)) + return 1; // Shifted all sign bits out --> unknown. + return Tmp - ShiftVal.getZExtValue(); + } + case X86ISD::VSRAI: { SDValue Src = Op.getOperand(0); unsigned Tmp = DAG.ComputeNumSignBits(Src, Depth + 1); @@ -27889,7 +27934,7 @@ static bool combineX86ShufflesRecursively(ArrayRef SrcOps, // Extract target shuffle mask and resolve sentinels and inputs. SmallVector OpMask; SmallVector OpInputs; - if (!resolveTargetShuffleInputs(Op, OpInputs, OpMask)) + if (!resolveTargetShuffleInputs(Op, OpInputs, OpMask, DAG)) return false; assert(OpInputs.size() <= 2 && "Too many shuffle inputs"); @@ -28788,7 +28833,8 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG, } if (Elts.size() == VT.getVectorNumElements()) - if (SDValue LD = EltsFromConsecutiveLoads(VT, Elts, dl, DAG, true)) + if (SDValue LD = + EltsFromConsecutiveLoads(VT, Elts, dl, DAG, Subtarget, true)) return LD; // For AVX2, we sometimes want to combine @@ -29430,7 +29476,7 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG, // Resolve the target shuffle inputs and mask. SmallVector Mask; SmallVector Ops; - if (!resolveTargetShuffleInputs(peekThroughBitcasts(Src), Ops, Mask)) + if (!resolveTargetShuffleInputs(peekThroughBitcasts(Src), Ops, Mask, DAG)) return SDValue(); // Attempt to narrow/widen the shuffle mask to the correct size. @@ -31017,6 +31063,77 @@ static SDValue reduceVMULWidth(SDNode *N, SelectionDAG &DAG, } } +static SDValue combineMulSpecial(uint64_t MulAmt, SDNode *N, SelectionDAG &DAG, + EVT VT, SDLoc DL) { + + auto combineMulShlAddOrSub = [&](int Mult, int Shift, bool isAdd) { + SDValue Result = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0), + DAG.getConstant(Mult, DL, VT)); + Result = DAG.getNode(ISD::SHL, DL, VT, Result, + DAG.getConstant(Shift, DL, MVT::i8)); + Result = DAG.getNode(isAdd ? ISD::ADD : ISD::SUB, DL, VT, Result, + N->getOperand(0)); + return Result; + }; + + auto combineMulMulAddOrSub = [&](bool isAdd) { + SDValue Result = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0), + DAG.getConstant(9, DL, VT)); + Result = DAG.getNode(ISD::MUL, DL, VT, Result, DAG.getConstant(3, DL, VT)); + Result = DAG.getNode(isAdd ? ISD::ADD : ISD::SUB, DL, VT, Result, + N->getOperand(0)); + return Result; + }; + + switch (MulAmt) { + default: + break; + case 11: + // mul x, 11 => add ((shl (mul x, 5), 1), x) + return combineMulShlAddOrSub(5, 1, /*isAdd*/ true); + case 21: + // mul x, 21 => add ((shl (mul x, 5), 2), x) + return combineMulShlAddOrSub(5, 2, /*isAdd*/ true); + case 22: + // mul x, 22 => add (add ((shl (mul x, 5), 2), x), x) + return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0), + combineMulShlAddOrSub(5, 2, /*isAdd*/ true)); + case 19: + // mul x, 19 => sub ((shl (mul x, 5), 2), x) + return combineMulShlAddOrSub(5, 2, /*isAdd*/ false); + case 13: + // mul x, 13 => add ((shl (mul x, 3), 2), x) + return combineMulShlAddOrSub(3, 2, /*isAdd*/ true); + case 23: + // mul x, 13 => sub ((shl (mul x, 3), 3), x) + return combineMulShlAddOrSub(3, 3, /*isAdd*/ false); + case 14: + // mul x, 14 => add (add ((shl (mul x, 3), 2), x), x) + return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0), + combineMulShlAddOrSub(3, 2, /*isAdd*/ true)); + case 26: + // mul x, 26 => sub ((mul (mul x, 9), 3), x) + return combineMulMulAddOrSub(/*isAdd*/ false); + case 28: + // mul x, 28 => add ((mul (mul x, 9), 3), x) + return combineMulMulAddOrSub(/*isAdd*/ true); + case 29: + // mul x, 29 => add (add ((mul (mul x, 9), 3), x), x) + return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0), + combineMulMulAddOrSub(/*isAdd*/ true)); + case 30: + // mul x, 30 => sub (sub ((shl x, 5), x), x) + return DAG.getNode( + ISD::SUB, DL, VT, + DAG.getNode(ISD::SUB, DL, VT, + DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), + DAG.getConstant(5, DL, MVT::i8)), + N->getOperand(0)), + N->getOperand(0)); + } + return SDValue(); +} + /// Optimize a single multiply with constant into two operations in order to /// implement it with two cheaper instructions, e.g. LEA + SHL, LEA + LEA. static SDValue combineMul(SDNode *N, SelectionDAG &DAG, @@ -31026,6 +31143,8 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG, if (DCI.isBeforeLegalize() && VT.isVector()) return reduceVMULWidth(N, DAG, Subtarget); + if (!MulConstantOptimization) + return SDValue(); // An imul is usually smaller than the alternative sequence. if (DAG.getMachineFunction().getFunction()->optForMinSize()) return SDValue(); @@ -31081,7 +31200,8 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG, else NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, NewMul, DAG.getConstant(MulAmt2, DL, VT)); - } + } else if (!Subtarget.slowLEA()) + NewMul = combineMulSpecial(MulAmt, N, DAG, VT, DL); if (!NewMul) { assert(MulAmt != 0 && @@ -32381,15 +32501,17 @@ static SDValue combineLoad(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // For chips with slow 32-byte unaligned loads, break the 32-byte operation - // into two 16-byte operations. + // into two 16-byte operations. Also split non-temporal aligned loads on + // pre-AVX2 targets as 32-byte loads will lower to regular temporal loads. ISD::LoadExtType Ext = Ld->getExtensionType(); bool Fast; unsigned AddressSpace = Ld->getAddressSpace(); unsigned Alignment = Ld->getAlignment(); if (RegVT.is256BitVector() && !DCI.isBeforeLegalizeOps() && Ext == ISD::NON_EXTLOAD && - TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), RegVT, - AddressSpace, Alignment, &Fast) && !Fast) { + ((Ld->isNonTemporal() && !Subtarget.hasInt256() && Alignment >= 16) || + (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), RegVT, + AddressSpace, Alignment, &Fast) && !Fast))) { unsigned NumElems = RegVT.getVectorNumElements(); if (NumElems < 2) return SDValue(); @@ -35097,7 +35219,8 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG, if (TLI->allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), OpVT, AS, Alignment, &Fast) && Fast) { SDValue Ops[] = {SubVec2, SubVec}; - if (SDValue Ld = EltsFromConsecutiveLoads(OpVT, Ops, dl, DAG, false)) + if (SDValue Ld = EltsFromConsecutiveLoads(OpVT, Ops, dl, DAG, + Subtarget, false)) return Ld; } } diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index e2e228f5544b..5224a16613cb 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -641,22 +641,37 @@ def sdmem : Operand { // SSE pattern fragments //===----------------------------------------------------------------------===// +// Vector load wrappers to prevent folding of non-temporal aligned loads on +// supporting targets. +def vec128load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return !Subtarget->hasSSE41() || !cast(N)->isNonTemporal() || + cast(N)->getAlignment() < 16; +}]>; +def vec256load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return !Subtarget->hasAVX2() || !cast(N)->isNonTemporal() || + cast(N)->getAlignment() < 32; +}]>; +def vec512load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return !Subtarget->hasAVX512() || !cast(N)->isNonTemporal() || + cast(N)->getAlignment() < 64; +}]>; + // 128-bit load pattern fragments // NOTE: all 128-bit integer vector loads are promoted to v2i64 -def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>; -def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>; -def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>; +def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (vec128load node:$ptr))>; +def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (vec128load node:$ptr))>; +def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (vec128load node:$ptr))>; // 256-bit load pattern fragments // NOTE: all 256-bit integer vector loads are promoted to v4i64 -def loadv8f32 : PatFrag<(ops node:$ptr), (v8f32 (load node:$ptr))>; -def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (load node:$ptr))>; -def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>; +def loadv8f32 : PatFrag<(ops node:$ptr), (v8f32 (vec256load node:$ptr))>; +def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (vec256load node:$ptr))>; +def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (vec256load node:$ptr))>; // 512-bit load pattern fragments -def loadv16f32 : PatFrag<(ops node:$ptr), (v16f32 (load node:$ptr))>; -def loadv8f64 : PatFrag<(ops node:$ptr), (v8f64 (load node:$ptr))>; -def loadv8i64 : PatFrag<(ops node:$ptr), (v8i64 (load node:$ptr))>; +def loadv16f32 : PatFrag<(ops node:$ptr), (v16f32 (vec512load node:$ptr))>; +def loadv8f64 : PatFrag<(ops node:$ptr), (v8f64 (vec512load node:$ptr))>; +def loadv8i64 : PatFrag<(ops node:$ptr), (v8i64 (vec512load node:$ptr))>; // 128-/256-/512-bit extload pattern fragments def extloadv2f32 : PatFrag<(ops node:$ptr), (v2f64 (extloadvf32 node:$ptr))>; @@ -728,9 +743,13 @@ def alignedloadv8i64 : PatFrag<(ops node:$ptr), // allows unaligned accesses, match any load, though this may require // setting a feature bit in the processor (on startup, for example). // Opteron 10h and later implement such a feature. +// Avoid non-temporal aligned loads on supported targets. def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - return Subtarget->hasSSEUnalignedMem() - || cast(N)->getAlignment() >= 16; + return (Subtarget->hasSSEUnalignedMem() || + cast(N)->getAlignment() >= 16) && + (!Subtarget->hasSSE41() || + !(cast(N)->getAlignment() >= 16 && + cast(N)->isNonTemporal())); }]>; // 128-bit memop pattern fragments diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 0aee30081a35..ff5d90c4e78b 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -121,8 +121,172 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) (STI.is64Bit() ? X86::RETQ : X86::RETL)), Subtarget(STI), RI(STI.getTargetTriple()) { -// Generated memory folding tables. -#include "X86GenFoldTables.inc" + static const X86MemoryFoldTableEntry MemoryFoldTable2Addr[] = { + { X86::ADC32ri, X86::ADC32mi, 0 }, + { X86::ADC32ri8, X86::ADC32mi8, 0 }, + { X86::ADC32rr, X86::ADC32mr, 0 }, + { X86::ADC64ri32, X86::ADC64mi32, 0 }, + { X86::ADC64ri8, X86::ADC64mi8, 0 }, + { X86::ADC64rr, X86::ADC64mr, 0 }, + { X86::ADD16ri, X86::ADD16mi, 0 }, + { X86::ADD16ri8, X86::ADD16mi8, 0 }, + { X86::ADD16ri_DB, X86::ADD16mi, TB_NO_REVERSE }, + { X86::ADD16ri8_DB, X86::ADD16mi8, TB_NO_REVERSE }, + { X86::ADD16rr, X86::ADD16mr, 0 }, + { X86::ADD16rr_DB, X86::ADD16mr, TB_NO_REVERSE }, + { X86::ADD32ri, X86::ADD32mi, 0 }, + { X86::ADD32ri8, X86::ADD32mi8, 0 }, + { X86::ADD32ri_DB, X86::ADD32mi, TB_NO_REVERSE }, + { X86::ADD32ri8_DB, X86::ADD32mi8, TB_NO_REVERSE }, + { X86::ADD32rr, X86::ADD32mr, 0 }, + { X86::ADD32rr_DB, X86::ADD32mr, TB_NO_REVERSE }, + { X86::ADD64ri32, X86::ADD64mi32, 0 }, + { X86::ADD64ri8, X86::ADD64mi8, 0 }, + { X86::ADD64ri32_DB,X86::ADD64mi32, TB_NO_REVERSE }, + { X86::ADD64ri8_DB, X86::ADD64mi8, TB_NO_REVERSE }, + { X86::ADD64rr, X86::ADD64mr, 0 }, + { X86::ADD64rr_DB, X86::ADD64mr, TB_NO_REVERSE }, + { X86::ADD8ri, X86::ADD8mi, 0 }, + { X86::ADD8rr, X86::ADD8mr, 0 }, + { X86::AND16ri, X86::AND16mi, 0 }, + { X86::AND16ri8, X86::AND16mi8, 0 }, + { X86::AND16rr, X86::AND16mr, 0 }, + { X86::AND32ri, X86::AND32mi, 0 }, + { X86::AND32ri8, X86::AND32mi8, 0 }, + { X86::AND32rr, X86::AND32mr, 0 }, + { X86::AND64ri32, X86::AND64mi32, 0 }, + { X86::AND64ri8, X86::AND64mi8, 0 }, + { X86::AND64rr, X86::AND64mr, 0 }, + { X86::AND8ri, X86::AND8mi, 0 }, + { X86::AND8rr, X86::AND8mr, 0 }, + { X86::DEC16r, X86::DEC16m, 0 }, + { X86::DEC32r, X86::DEC32m, 0 }, + { X86::DEC64r, X86::DEC64m, 0 }, + { X86::DEC8r, X86::DEC8m, 0 }, + { X86::INC16r, X86::INC16m, 0 }, + { X86::INC32r, X86::INC32m, 0 }, + { X86::INC64r, X86::INC64m, 0 }, + { X86::INC8r, X86::INC8m, 0 }, + { X86::NEG16r, X86::NEG16m, 0 }, + { X86::NEG32r, X86::NEG32m, 0 }, + { X86::NEG64r, X86::NEG64m, 0 }, + { X86::NEG8r, X86::NEG8m, 0 }, + { X86::NOT16r, X86::NOT16m, 0 }, + { X86::NOT32r, X86::NOT32m, 0 }, + { X86::NOT64r, X86::NOT64m, 0 }, + { X86::NOT8r, X86::NOT8m, 0 }, + { X86::OR16ri, X86::OR16mi, 0 }, + { X86::OR16ri8, X86::OR16mi8, 0 }, + { X86::OR16rr, X86::OR16mr, 0 }, + { X86::OR32ri, X86::OR32mi, 0 }, + { X86::OR32ri8, X86::OR32mi8, 0 }, + { X86::OR32rr, X86::OR32mr, 0 }, + { X86::OR64ri32, X86::OR64mi32, 0 }, + { X86::OR64ri8, X86::OR64mi8, 0 }, + { X86::OR64rr, X86::OR64mr, 0 }, + { X86::OR8ri, X86::OR8mi, 0 }, + { X86::OR8rr, X86::OR8mr, 0 }, + { X86::ROL16r1, X86::ROL16m1, 0 }, + { X86::ROL16rCL, X86::ROL16mCL, 0 }, + { X86::ROL16ri, X86::ROL16mi, 0 }, + { X86::ROL32r1, X86::ROL32m1, 0 }, + { X86::ROL32rCL, X86::ROL32mCL, 0 }, + { X86::ROL32ri, X86::ROL32mi, 0 }, + { X86::ROL64r1, X86::ROL64m1, 0 }, + { X86::ROL64rCL, X86::ROL64mCL, 0 }, + { X86::ROL64ri, X86::ROL64mi, 0 }, + { X86::ROL8r1, X86::ROL8m1, 0 }, + { X86::ROL8rCL, X86::ROL8mCL, 0 }, + { X86::ROL8ri, X86::ROL8mi, 0 }, + { X86::ROR16r1, X86::ROR16m1, 0 }, + { X86::ROR16rCL, X86::ROR16mCL, 0 }, + { X86::ROR16ri, X86::ROR16mi, 0 }, + { X86::ROR32r1, X86::ROR32m1, 0 }, + { X86::ROR32rCL, X86::ROR32mCL, 0 }, + { X86::ROR32ri, X86::ROR32mi, 0 }, + { X86::ROR64r1, X86::ROR64m1, 0 }, + { X86::ROR64rCL, X86::ROR64mCL, 0 }, + { X86::ROR64ri, X86::ROR64mi, 0 }, + { X86::ROR8r1, X86::ROR8m1, 0 }, + { X86::ROR8rCL, X86::ROR8mCL, 0 }, + { X86::ROR8ri, X86::ROR8mi, 0 }, + { X86::SAR16r1, X86::SAR16m1, 0 }, + { X86::SAR16rCL, X86::SAR16mCL, 0 }, + { X86::SAR16ri, X86::SAR16mi, 0 }, + { X86::SAR32r1, X86::SAR32m1, 0 }, + { X86::SAR32rCL, X86::SAR32mCL, 0 }, + { X86::SAR32ri, X86::SAR32mi, 0 }, + { X86::SAR64r1, X86::SAR64m1, 0 }, + { X86::SAR64rCL, X86::SAR64mCL, 0 }, + { X86::SAR64ri, X86::SAR64mi, 0 }, + { X86::SAR8r1, X86::SAR8m1, 0 }, + { X86::SAR8rCL, X86::SAR8mCL, 0 }, + { X86::SAR8ri, X86::SAR8mi, 0 }, + { X86::SBB32ri, X86::SBB32mi, 0 }, + { X86::SBB32ri8, X86::SBB32mi8, 0 }, + { X86::SBB32rr, X86::SBB32mr, 0 }, + { X86::SBB64ri32, X86::SBB64mi32, 0 }, + { X86::SBB64ri8, X86::SBB64mi8, 0 }, + { X86::SBB64rr, X86::SBB64mr, 0 }, + { X86::SHL16r1, X86::SHL16m1, 0 }, + { X86::SHL16rCL, X86::SHL16mCL, 0 }, + { X86::SHL16ri, X86::SHL16mi, 0 }, + { X86::SHL32r1, X86::SHL32m1, 0 }, + { X86::SHL32rCL, X86::SHL32mCL, 0 }, + { X86::SHL32ri, X86::SHL32mi, 0 }, + { X86::SHL64r1, X86::SHL64m1, 0 }, + { X86::SHL64rCL, X86::SHL64mCL, 0 }, + { X86::SHL64ri, X86::SHL64mi, 0 }, + { X86::SHL8r1, X86::SHL8m1, 0 }, + { X86::SHL8rCL, X86::SHL8mCL, 0 }, + { X86::SHL8ri, X86::SHL8mi, 0 }, + { X86::SHLD16rrCL, X86::SHLD16mrCL, 0 }, + { X86::SHLD16rri8, X86::SHLD16mri8, 0 }, + { X86::SHLD32rrCL, X86::SHLD32mrCL, 0 }, + { X86::SHLD32rri8, X86::SHLD32mri8, 0 }, + { X86::SHLD64rrCL, X86::SHLD64mrCL, 0 }, + { X86::SHLD64rri8, X86::SHLD64mri8, 0 }, + { X86::SHR16r1, X86::SHR16m1, 0 }, + { X86::SHR16rCL, X86::SHR16mCL, 0 }, + { X86::SHR16ri, X86::SHR16mi, 0 }, + { X86::SHR32r1, X86::SHR32m1, 0 }, + { X86::SHR32rCL, X86::SHR32mCL, 0 }, + { X86::SHR32ri, X86::SHR32mi, 0 }, + { X86::SHR64r1, X86::SHR64m1, 0 }, + { X86::SHR64rCL, X86::SHR64mCL, 0 }, + { X86::SHR64ri, X86::SHR64mi, 0 }, + { X86::SHR8r1, X86::SHR8m1, 0 }, + { X86::SHR8rCL, X86::SHR8mCL, 0 }, + { X86::SHR8ri, X86::SHR8mi, 0 }, + { X86::SHRD16rrCL, X86::SHRD16mrCL, 0 }, + { X86::SHRD16rri8, X86::SHRD16mri8, 0 }, + { X86::SHRD32rrCL, X86::SHRD32mrCL, 0 }, + { X86::SHRD32rri8, X86::SHRD32mri8, 0 }, + { X86::SHRD64rrCL, X86::SHRD64mrCL, 0 }, + { X86::SHRD64rri8, X86::SHRD64mri8, 0 }, + { X86::SUB16ri, X86::SUB16mi, 0 }, + { X86::SUB16ri8, X86::SUB16mi8, 0 }, + { X86::SUB16rr, X86::SUB16mr, 0 }, + { X86::SUB32ri, X86::SUB32mi, 0 }, + { X86::SUB32ri8, X86::SUB32mi8, 0 }, + { X86::SUB32rr, X86::SUB32mr, 0 }, + { X86::SUB64ri32, X86::SUB64mi32, 0 }, + { X86::SUB64ri8, X86::SUB64mi8, 0 }, + { X86::SUB64rr, X86::SUB64mr, 0 }, + { X86::SUB8ri, X86::SUB8mi, 0 }, + { X86::SUB8rr, X86::SUB8mr, 0 }, + { X86::XOR16ri, X86::XOR16mi, 0 }, + { X86::XOR16ri8, X86::XOR16mi8, 0 }, + { X86::XOR16rr, X86::XOR16mr, 0 }, + { X86::XOR32ri, X86::XOR32mi, 0 }, + { X86::XOR32ri8, X86::XOR32mi8, 0 }, + { X86::XOR32rr, X86::XOR32mr, 0 }, + { X86::XOR64ri32, X86::XOR64mi32, 0 }, + { X86::XOR64ri8, X86::XOR64mi8, 0 }, + { X86::XOR64rr, X86::XOR64mr, 0 }, + { X86::XOR8ri, X86::XOR8mi, 0 }, + { X86::XOR8rr, X86::XOR8mr, 0 } + }; for (X86MemoryFoldTableEntry Entry : MemoryFoldTable2Addr) { AddTableEntry(RegOp2MemOpTable2Addr, MemOp2RegOpTable, @@ -131,11 +295,746 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) Entry.Flags | TB_INDEX_0 | TB_FOLDED_LOAD | TB_FOLDED_STORE); } + static const X86MemoryFoldTableEntry MemoryFoldTable0[] = { + { X86::BT16ri8, X86::BT16mi8, TB_FOLDED_LOAD }, + { X86::BT32ri8, X86::BT32mi8, TB_FOLDED_LOAD }, + { X86::BT64ri8, X86::BT64mi8, TB_FOLDED_LOAD }, + { X86::CALL32r, X86::CALL32m, TB_FOLDED_LOAD }, + { X86::CALL64r, X86::CALL64m, TB_FOLDED_LOAD }, + { X86::CMP16ri, X86::CMP16mi, TB_FOLDED_LOAD }, + { X86::CMP16ri8, X86::CMP16mi8, TB_FOLDED_LOAD }, + { X86::CMP16rr, X86::CMP16mr, TB_FOLDED_LOAD }, + { X86::CMP32ri, X86::CMP32mi, TB_FOLDED_LOAD }, + { X86::CMP32ri8, X86::CMP32mi8, TB_FOLDED_LOAD }, + { X86::CMP32rr, X86::CMP32mr, TB_FOLDED_LOAD }, + { X86::CMP64ri32, X86::CMP64mi32, TB_FOLDED_LOAD }, + { X86::CMP64ri8, X86::CMP64mi8, TB_FOLDED_LOAD }, + { X86::CMP64rr, X86::CMP64mr, TB_FOLDED_LOAD }, + { X86::CMP8ri, X86::CMP8mi, TB_FOLDED_LOAD }, + { X86::CMP8rr, X86::CMP8mr, TB_FOLDED_LOAD }, + { X86::DIV16r, X86::DIV16m, TB_FOLDED_LOAD }, + { X86::DIV32r, X86::DIV32m, TB_FOLDED_LOAD }, + { X86::DIV64r, X86::DIV64m, TB_FOLDED_LOAD }, + { X86::DIV8r, X86::DIV8m, TB_FOLDED_LOAD }, + { X86::EXTRACTPSrr, X86::EXTRACTPSmr, TB_FOLDED_STORE }, + { X86::IDIV16r, X86::IDIV16m, TB_FOLDED_LOAD }, + { X86::IDIV32r, X86::IDIV32m, TB_FOLDED_LOAD }, + { X86::IDIV64r, X86::IDIV64m, TB_FOLDED_LOAD }, + { X86::IDIV8r, X86::IDIV8m, TB_FOLDED_LOAD }, + { X86::IMUL16r, X86::IMUL16m, TB_FOLDED_LOAD }, + { X86::IMUL32r, X86::IMUL32m, TB_FOLDED_LOAD }, + { X86::IMUL64r, X86::IMUL64m, TB_FOLDED_LOAD }, + { X86::IMUL8r, X86::IMUL8m, TB_FOLDED_LOAD }, + { X86::JMP32r, X86::JMP32m, TB_FOLDED_LOAD }, + { X86::JMP64r, X86::JMP64m, TB_FOLDED_LOAD }, + { X86::MOV16ri, X86::MOV16mi, TB_FOLDED_STORE }, + { X86::MOV16rr, X86::MOV16mr, TB_FOLDED_STORE }, + { X86::MOV32ri, X86::MOV32mi, TB_FOLDED_STORE }, + { X86::MOV32rr, X86::MOV32mr, TB_FOLDED_STORE }, + { X86::MOV64ri32, X86::MOV64mi32, TB_FOLDED_STORE }, + { X86::MOV64rr, X86::MOV64mr, TB_FOLDED_STORE }, + { X86::MOV8ri, X86::MOV8mi, TB_FOLDED_STORE }, + { X86::MOV8rr, X86::MOV8mr, TB_FOLDED_STORE }, + { X86::MOV8rr_NOREX, X86::MOV8mr_NOREX, TB_FOLDED_STORE }, + { X86::MOVAPDrr, X86::MOVAPDmr, TB_FOLDED_STORE | TB_ALIGN_16 }, + { X86::MOVAPSrr, X86::MOVAPSmr, TB_FOLDED_STORE | TB_ALIGN_16 }, + { X86::MOVDQArr, X86::MOVDQAmr, TB_FOLDED_STORE | TB_ALIGN_16 }, + { X86::MOVDQUrr, X86::MOVDQUmr, TB_FOLDED_STORE }, + { X86::MOVPDI2DIrr, X86::MOVPDI2DImr, TB_FOLDED_STORE }, + { X86::MOVPQIto64rr,X86::MOVPQI2QImr, TB_FOLDED_STORE }, + { X86::MOVSDto64rr, X86::MOVSDto64mr, TB_FOLDED_STORE }, + { X86::MOVSS2DIrr, X86::MOVSS2DImr, TB_FOLDED_STORE }, + { X86::MOVUPDrr, X86::MOVUPDmr, TB_FOLDED_STORE }, + { X86::MOVUPSrr, X86::MOVUPSmr, TB_FOLDED_STORE }, + { X86::MUL16r, X86::MUL16m, TB_FOLDED_LOAD }, + { X86::MUL32r, X86::MUL32m, TB_FOLDED_LOAD }, + { X86::MUL64r, X86::MUL64m, TB_FOLDED_LOAD }, + { X86::MUL8r, X86::MUL8m, TB_FOLDED_LOAD }, + { X86::PEXTRDrr, X86::PEXTRDmr, TB_FOLDED_STORE }, + { X86::PEXTRQrr, X86::PEXTRQmr, TB_FOLDED_STORE }, + { X86::PUSH16r, X86::PUSH16rmm, TB_FOLDED_LOAD }, + { X86::PUSH32r, X86::PUSH32rmm, TB_FOLDED_LOAD }, + { X86::PUSH64r, X86::PUSH64rmm, TB_FOLDED_LOAD }, + { X86::SETAEr, X86::SETAEm, TB_FOLDED_STORE }, + { X86::SETAr, X86::SETAm, TB_FOLDED_STORE }, + { X86::SETBEr, X86::SETBEm, TB_FOLDED_STORE }, + { X86::SETBr, X86::SETBm, TB_FOLDED_STORE }, + { X86::SETEr, X86::SETEm, TB_FOLDED_STORE }, + { X86::SETGEr, X86::SETGEm, TB_FOLDED_STORE }, + { X86::SETGr, X86::SETGm, TB_FOLDED_STORE }, + { X86::SETLEr, X86::SETLEm, TB_FOLDED_STORE }, + { X86::SETLr, X86::SETLm, TB_FOLDED_STORE }, + { X86::SETNEr, X86::SETNEm, TB_FOLDED_STORE }, + { X86::SETNOr, X86::SETNOm, TB_FOLDED_STORE }, + { X86::SETNPr, X86::SETNPm, TB_FOLDED_STORE }, + { X86::SETNSr, X86::SETNSm, TB_FOLDED_STORE }, + { X86::SETOr, X86::SETOm, TB_FOLDED_STORE }, + { X86::SETPr, X86::SETPm, TB_FOLDED_STORE }, + { X86::SETSr, X86::SETSm, TB_FOLDED_STORE }, + { X86::TAILJMPr, X86::TAILJMPm, TB_FOLDED_LOAD }, + { X86::TAILJMPr64, X86::TAILJMPm64, TB_FOLDED_LOAD }, + { X86::TAILJMPr64_REX, X86::TAILJMPm64_REX, TB_FOLDED_LOAD }, + { X86::TEST16ri, X86::TEST16mi, TB_FOLDED_LOAD }, + { X86::TEST32ri, X86::TEST32mi, TB_FOLDED_LOAD }, + { X86::TEST64ri32, X86::TEST64mi32, TB_FOLDED_LOAD }, + { X86::TEST8ri, X86::TEST8mi, TB_FOLDED_LOAD }, + + // AVX 128-bit versions of foldable instructions + { X86::VEXTRACTPSrr,X86::VEXTRACTPSmr, TB_FOLDED_STORE }, + { X86::VEXTRACTF128rr, X86::VEXTRACTF128mr, TB_FOLDED_STORE | TB_ALIGN_16 }, + { X86::VMOVAPDrr, X86::VMOVAPDmr, TB_FOLDED_STORE | TB_ALIGN_16 }, + { X86::VMOVAPSrr, X86::VMOVAPSmr, TB_FOLDED_STORE | TB_ALIGN_16 }, + { X86::VMOVDQArr, X86::VMOVDQAmr, TB_FOLDED_STORE | TB_ALIGN_16 }, + { X86::VMOVDQUrr, X86::VMOVDQUmr, TB_FOLDED_STORE }, + { X86::VMOVPDI2DIrr,X86::VMOVPDI2DImr, TB_FOLDED_STORE }, + { X86::VMOVPQIto64rr, X86::VMOVPQI2QImr,TB_FOLDED_STORE }, + { X86::VMOVSDto64rr,X86::VMOVSDto64mr, TB_FOLDED_STORE }, + { X86::VMOVSS2DIrr, X86::VMOVSS2DImr, TB_FOLDED_STORE }, + { X86::VMOVUPDrr, X86::VMOVUPDmr, TB_FOLDED_STORE }, + { X86::VMOVUPSrr, X86::VMOVUPSmr, TB_FOLDED_STORE }, + { X86::VPEXTRDrr, X86::VPEXTRDmr, TB_FOLDED_STORE }, + { X86::VPEXTRQrr, X86::VPEXTRQmr, TB_FOLDED_STORE }, + + // AVX 256-bit foldable instructions + { X86::VEXTRACTI128rr, X86::VEXTRACTI128mr, TB_FOLDED_STORE | TB_ALIGN_16 }, + { X86::VMOVAPDYrr, X86::VMOVAPDYmr, TB_FOLDED_STORE | TB_ALIGN_32 }, + { X86::VMOVAPSYrr, X86::VMOVAPSYmr, TB_FOLDED_STORE | TB_ALIGN_32 }, + { X86::VMOVDQAYrr, X86::VMOVDQAYmr, TB_FOLDED_STORE | TB_ALIGN_32 }, + { X86::VMOVDQUYrr, X86::VMOVDQUYmr, TB_FOLDED_STORE }, + { X86::VMOVUPDYrr, X86::VMOVUPDYmr, TB_FOLDED_STORE }, + { X86::VMOVUPSYrr, X86::VMOVUPSYmr, TB_FOLDED_STORE }, + + // AVX-512 foldable instructions + { X86::VEXTRACTF32x4Zrr,X86::VEXTRACTF32x4Zmr, TB_FOLDED_STORE }, + { X86::VEXTRACTF32x8Zrr,X86::VEXTRACTF32x8Zmr, TB_FOLDED_STORE }, + { X86::VEXTRACTF64x2Zrr,X86::VEXTRACTF64x2Zmr, TB_FOLDED_STORE }, + { X86::VEXTRACTF64x4Zrr,X86::VEXTRACTF64x4Zmr, TB_FOLDED_STORE }, + { X86::VEXTRACTI32x4Zrr,X86::VEXTRACTI32x4Zmr, TB_FOLDED_STORE }, + { X86::VEXTRACTI32x8Zrr,X86::VEXTRACTI32x8Zmr, TB_FOLDED_STORE }, + { X86::VEXTRACTI64x2Zrr,X86::VEXTRACTI64x2Zmr, TB_FOLDED_STORE }, + { X86::VEXTRACTI64x4Zrr,X86::VEXTRACTI64x4Zmr, TB_FOLDED_STORE }, + { X86::VEXTRACTPSZrr, X86::VEXTRACTPSZmr, TB_FOLDED_STORE }, + { X86::VMOVAPDZrr, X86::VMOVAPDZmr, TB_FOLDED_STORE | TB_ALIGN_64 }, + { X86::VMOVAPSZrr, X86::VMOVAPSZmr, TB_FOLDED_STORE | TB_ALIGN_64 }, + { X86::VMOVDQA32Zrr, X86::VMOVDQA32Zmr, TB_FOLDED_STORE | TB_ALIGN_64 }, + { X86::VMOVDQA64Zrr, X86::VMOVDQA64Zmr, TB_FOLDED_STORE | TB_ALIGN_64 }, + { X86::VMOVDQU8Zrr, X86::VMOVDQU8Zmr, TB_FOLDED_STORE }, + { X86::VMOVDQU16Zrr, X86::VMOVDQU16Zmr, TB_FOLDED_STORE }, + { X86::VMOVDQU32Zrr, X86::VMOVDQU32Zmr, TB_FOLDED_STORE }, + { X86::VMOVDQU64Zrr, X86::VMOVDQU64Zmr, TB_FOLDED_STORE }, + { X86::VMOVPDI2DIZrr, X86::VMOVPDI2DIZmr, TB_FOLDED_STORE }, + { X86::VMOVPQIto64Zrr, X86::VMOVPQI2QIZmr, TB_FOLDED_STORE }, + { X86::VMOVSDto64Zrr, X86::VMOVSDto64Zmr, TB_FOLDED_STORE }, + { X86::VMOVSS2DIZrr, X86::VMOVSS2DIZmr, TB_FOLDED_STORE }, + { X86::VMOVUPDZrr, X86::VMOVUPDZmr, TB_FOLDED_STORE }, + { X86::VMOVUPSZrr, X86::VMOVUPSZmr, TB_FOLDED_STORE }, + { X86::VPEXTRDZrr, X86::VPEXTRDZmr, TB_FOLDED_STORE }, + { X86::VPEXTRQZrr, X86::VPEXTRQZmr, TB_FOLDED_STORE }, + { X86::VPMOVDBZrr, X86::VPMOVDBZmr, TB_FOLDED_STORE }, + { X86::VPMOVDWZrr, X86::VPMOVDWZmr, TB_FOLDED_STORE }, + { X86::VPMOVQDZrr, X86::VPMOVQDZmr, TB_FOLDED_STORE }, + { X86::VPMOVQWZrr, X86::VPMOVQWZmr, TB_FOLDED_STORE }, + { X86::VPMOVWBZrr, X86::VPMOVWBZmr, TB_FOLDED_STORE }, + { X86::VPMOVSDBZrr, X86::VPMOVSDBZmr, TB_FOLDED_STORE }, + { X86::VPMOVSDWZrr, X86::VPMOVSDWZmr, TB_FOLDED_STORE }, + { X86::VPMOVSQDZrr, X86::VPMOVSQDZmr, TB_FOLDED_STORE }, + { X86::VPMOVSQWZrr, X86::VPMOVSQWZmr, TB_FOLDED_STORE }, + { X86::VPMOVSWBZrr, X86::VPMOVSWBZmr, TB_FOLDED_STORE }, + { X86::VPMOVUSDBZrr, X86::VPMOVUSDBZmr, TB_FOLDED_STORE }, + { X86::VPMOVUSDWZrr, X86::VPMOVUSDWZmr, TB_FOLDED_STORE }, + { X86::VPMOVUSQDZrr, X86::VPMOVUSQDZmr, TB_FOLDED_STORE }, + { X86::VPMOVUSQWZrr, X86::VPMOVUSQWZmr, TB_FOLDED_STORE }, + { X86::VPMOVUSWBZrr, X86::VPMOVUSWBZmr, TB_FOLDED_STORE }, + + // AVX-512 foldable instructions (256-bit versions) + { X86::VEXTRACTF32x4Z256rr,X86::VEXTRACTF32x4Z256mr, TB_FOLDED_STORE }, + { X86::VEXTRACTF64x2Z256rr,X86::VEXTRACTF64x2Z256mr, TB_FOLDED_STORE }, + { X86::VEXTRACTI32x4Z256rr,X86::VEXTRACTI32x4Z256mr, TB_FOLDED_STORE }, + { X86::VEXTRACTI64x2Z256rr,X86::VEXTRACTI64x2Z256mr, TB_FOLDED_STORE }, + { X86::VMOVAPDZ256rr, X86::VMOVAPDZ256mr, TB_FOLDED_STORE | TB_ALIGN_32 }, + { X86::VMOVAPSZ256rr, X86::VMOVAPSZ256mr, TB_FOLDED_STORE | TB_ALIGN_32 }, + { X86::VMOVDQA32Z256rr, X86::VMOVDQA32Z256mr, TB_FOLDED_STORE | TB_ALIGN_32 }, + { X86::VMOVDQA64Z256rr, X86::VMOVDQA64Z256mr, TB_FOLDED_STORE | TB_ALIGN_32 }, + { X86::VMOVUPDZ256rr, X86::VMOVUPDZ256mr, TB_FOLDED_STORE }, + { X86::VMOVUPSZ256rr, X86::VMOVUPSZ256mr, TB_FOLDED_STORE }, + { X86::VMOVDQU8Z256rr, X86::VMOVDQU8Z256mr, TB_FOLDED_STORE }, + { X86::VMOVDQU16Z256rr, X86::VMOVDQU16Z256mr, TB_FOLDED_STORE }, + { X86::VMOVDQU32Z256rr, X86::VMOVDQU32Z256mr, TB_FOLDED_STORE }, + { X86::VMOVDQU64Z256rr, X86::VMOVDQU64Z256mr, TB_FOLDED_STORE }, + { X86::VPMOVDWZ256rr, X86::VPMOVDWZ256mr, TB_FOLDED_STORE }, + { X86::VPMOVQDZ256rr, X86::VPMOVQDZ256mr, TB_FOLDED_STORE }, + { X86::VPMOVWBZ256rr, X86::VPMOVWBZ256mr, TB_FOLDED_STORE }, + { X86::VPMOVSDWZ256rr, X86::VPMOVSDWZ256mr, TB_FOLDED_STORE }, + { X86::VPMOVSQDZ256rr, X86::VPMOVSQDZ256mr, TB_FOLDED_STORE }, + { X86::VPMOVSWBZ256rr, X86::VPMOVSWBZ256mr, TB_FOLDED_STORE }, + { X86::VPMOVUSDWZ256rr, X86::VPMOVUSDWZ256mr, TB_FOLDED_STORE }, + { X86::VPMOVUSQDZ256rr, X86::VPMOVUSQDZ256mr, TB_FOLDED_STORE }, + { X86::VPMOVUSWBZ256rr, X86::VPMOVUSWBZ256mr, TB_FOLDED_STORE }, + + // AVX-512 foldable instructions (128-bit versions) + { X86::VMOVAPDZ128rr, X86::VMOVAPDZ128mr, TB_FOLDED_STORE | TB_ALIGN_16 }, + { X86::VMOVAPSZ128rr, X86::VMOVAPSZ128mr, TB_FOLDED_STORE | TB_ALIGN_16 }, + { X86::VMOVDQA32Z128rr, X86::VMOVDQA32Z128mr, TB_FOLDED_STORE | TB_ALIGN_16 }, + { X86::VMOVDQA64Z128rr, X86::VMOVDQA64Z128mr, TB_FOLDED_STORE | TB_ALIGN_16 }, + { X86::VMOVUPDZ128rr, X86::VMOVUPDZ128mr, TB_FOLDED_STORE }, + { X86::VMOVUPSZ128rr, X86::VMOVUPSZ128mr, TB_FOLDED_STORE }, + { X86::VMOVDQU8Z128rr, X86::VMOVDQU8Z128mr, TB_FOLDED_STORE }, + { X86::VMOVDQU16Z128rr, X86::VMOVDQU16Z128mr, TB_FOLDED_STORE }, + { X86::VMOVDQU32Z128rr, X86::VMOVDQU32Z128mr, TB_FOLDED_STORE }, + { X86::VMOVDQU64Z128rr, X86::VMOVDQU64Z128mr, TB_FOLDED_STORE }, + + // F16C foldable instructions + { X86::VCVTPS2PHrr, X86::VCVTPS2PHmr, TB_FOLDED_STORE }, + { X86::VCVTPS2PHYrr, X86::VCVTPS2PHYmr, TB_FOLDED_STORE } + }; + for (X86MemoryFoldTableEntry Entry : MemoryFoldTable0) { AddTableEntry(RegOp2MemOpTable0, MemOp2RegOpTable, Entry.RegOp, Entry.MemOp, TB_INDEX_0 | Entry.Flags); } + static const X86MemoryFoldTableEntry MemoryFoldTable1[] = { + { X86::BSF16rr, X86::BSF16rm, 0 }, + { X86::BSF32rr, X86::BSF32rm, 0 }, + { X86::BSF64rr, X86::BSF64rm, 0 }, + { X86::BSR16rr, X86::BSR16rm, 0 }, + { X86::BSR32rr, X86::BSR32rm, 0 }, + { X86::BSR64rr, X86::BSR64rm, 0 }, + { X86::CMP16rr, X86::CMP16rm, 0 }, + { X86::CMP32rr, X86::CMP32rm, 0 }, + { X86::CMP64rr, X86::CMP64rm, 0 }, + { X86::CMP8rr, X86::CMP8rm, 0 }, + { X86::CVTSD2SSrr, X86::CVTSD2SSrm, 0 }, + { X86::CVTSI2SD64rr, X86::CVTSI2SD64rm, 0 }, + { X86::CVTSI2SDrr, X86::CVTSI2SDrm, 0 }, + { X86::CVTSI2SS64rr, X86::CVTSI2SS64rm, 0 }, + { X86::CVTSI2SSrr, X86::CVTSI2SSrm, 0 }, + { X86::CVTSS2SDrr, X86::CVTSS2SDrm, 0 }, + { X86::CVTTSD2SI64rr, X86::CVTTSD2SI64rm, 0 }, + { X86::CVTTSD2SIrr, X86::CVTTSD2SIrm, 0 }, + { X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm, 0 }, + { X86::CVTTSS2SIrr, X86::CVTTSS2SIrm, 0 }, + { X86::IMUL16rri, X86::IMUL16rmi, 0 }, + { X86::IMUL16rri8, X86::IMUL16rmi8, 0 }, + { X86::IMUL32rri, X86::IMUL32rmi, 0 }, + { X86::IMUL32rri8, X86::IMUL32rmi8, 0 }, + { X86::IMUL64rri32, X86::IMUL64rmi32, 0 }, + { X86::IMUL64rri8, X86::IMUL64rmi8, 0 }, + { X86::Int_COMISDrr, X86::Int_COMISDrm, TB_NO_REVERSE }, + { X86::Int_COMISSrr, X86::Int_COMISSrm, TB_NO_REVERSE }, + { X86::CVTSD2SI64rr, X86::CVTSD2SI64rm, TB_NO_REVERSE }, + { X86::CVTSD2SIrr, X86::CVTSD2SIrm, TB_NO_REVERSE }, + { X86::CVTSS2SI64rr, X86::CVTSS2SI64rm, TB_NO_REVERSE }, + { X86::CVTSS2SIrr, X86::CVTSS2SIrm, TB_NO_REVERSE }, + { X86::CVTDQ2PDrr, X86::CVTDQ2PDrm, TB_NO_REVERSE }, + { X86::CVTDQ2PSrr, X86::CVTDQ2PSrm, TB_ALIGN_16 }, + { X86::CVTPD2DQrr, X86::CVTPD2DQrm, TB_ALIGN_16 }, + { X86::CVTPD2PSrr, X86::CVTPD2PSrm, TB_ALIGN_16 }, + { X86::CVTPS2DQrr, X86::CVTPS2DQrm, TB_ALIGN_16 }, + { X86::CVTPS2PDrr, X86::CVTPS2PDrm, TB_NO_REVERSE }, + { X86::CVTTPD2DQrr, X86::CVTTPD2DQrm, TB_ALIGN_16 }, + { X86::CVTTPS2DQrr, X86::CVTTPS2DQrm, TB_ALIGN_16 }, + { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm, TB_NO_REVERSE }, + { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm, TB_NO_REVERSE }, + { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm, TB_NO_REVERSE }, + { X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm, TB_NO_REVERSE }, + { X86::Int_UCOMISDrr, X86::Int_UCOMISDrm, TB_NO_REVERSE }, + { X86::Int_UCOMISSrr, X86::Int_UCOMISSrm, TB_NO_REVERSE }, + { X86::MOV16rr, X86::MOV16rm, 0 }, + { X86::MOV32rr, X86::MOV32rm, 0 }, + { X86::MOV64rr, X86::MOV64rm, 0 }, + { X86::MOV64toPQIrr, X86::MOVQI2PQIrm, 0 }, + { X86::MOV64toSDrr, X86::MOV64toSDrm, 0 }, + { X86::MOV8rr, X86::MOV8rm, 0 }, + { X86::MOVAPDrr, X86::MOVAPDrm, TB_ALIGN_16 }, + { X86::MOVAPSrr, X86::MOVAPSrm, TB_ALIGN_16 }, + { X86::MOVDDUPrr, X86::MOVDDUPrm, TB_NO_REVERSE }, + { X86::MOVDI2PDIrr, X86::MOVDI2PDIrm, 0 }, + { X86::MOVDI2SSrr, X86::MOVDI2SSrm, 0 }, + { X86::MOVDQArr, X86::MOVDQArm, TB_ALIGN_16 }, + { X86::MOVDQUrr, X86::MOVDQUrm, 0 }, + { X86::MOVSHDUPrr, X86::MOVSHDUPrm, TB_ALIGN_16 }, + { X86::MOVSLDUPrr, X86::MOVSLDUPrm, TB_ALIGN_16 }, + { X86::MOVSX16rr8, X86::MOVSX16rm8, 0 }, + { X86::MOVSX32rr16, X86::MOVSX32rm16, 0 }, + { X86::MOVSX32rr8, X86::MOVSX32rm8, 0 }, + { X86::MOVSX64rr16, X86::MOVSX64rm16, 0 }, + { X86::MOVSX64rr32, X86::MOVSX64rm32, 0 }, + { X86::MOVSX64rr8, X86::MOVSX64rm8, 0 }, + { X86::MOVUPDrr, X86::MOVUPDrm, 0 }, + { X86::MOVUPSrr, X86::MOVUPSrm, 0 }, + { X86::MOVZPQILo2PQIrr, X86::MOVQI2PQIrm, TB_NO_REVERSE }, + { X86::MOVZX16rr8, X86::MOVZX16rm8, 0 }, + { X86::MOVZX32rr16, X86::MOVZX32rm16, 0 }, + { X86::MOVZX32_NOREXrr8, X86::MOVZX32_NOREXrm8, 0 }, + { X86::MOVZX32rr8, X86::MOVZX32rm8, 0 }, + { X86::PABSBrr, X86::PABSBrm, TB_ALIGN_16 }, + { X86::PABSDrr, X86::PABSDrm, TB_ALIGN_16 }, + { X86::PABSWrr, X86::PABSWrm, TB_ALIGN_16 }, + { X86::PCMPESTRIrr, X86::PCMPESTRIrm, TB_ALIGN_16 }, + { X86::PCMPESTRM128rr, X86::PCMPESTRM128rm, TB_ALIGN_16 }, + { X86::PCMPISTRIrr, X86::PCMPISTRIrm, TB_ALIGN_16 }, + { X86::PCMPISTRM128rr, X86::PCMPISTRM128rm, TB_ALIGN_16 }, + { X86::PHMINPOSUWrr128, X86::PHMINPOSUWrm128, TB_ALIGN_16 }, + { X86::PMOVSXBDrr, X86::PMOVSXBDrm, TB_NO_REVERSE }, + { X86::PMOVSXBQrr, X86::PMOVSXBQrm, TB_NO_REVERSE }, + { X86::PMOVSXBWrr, X86::PMOVSXBWrm, TB_NO_REVERSE }, + { X86::PMOVSXDQrr, X86::PMOVSXDQrm, TB_NO_REVERSE }, + { X86::PMOVSXWDrr, X86::PMOVSXWDrm, TB_NO_REVERSE }, + { X86::PMOVSXWQrr, X86::PMOVSXWQrm, TB_NO_REVERSE }, + { X86::PMOVZXBDrr, X86::PMOVZXBDrm, TB_NO_REVERSE }, + { X86::PMOVZXBQrr, X86::PMOVZXBQrm, TB_NO_REVERSE }, + { X86::PMOVZXBWrr, X86::PMOVZXBWrm, TB_NO_REVERSE }, + { X86::PMOVZXDQrr, X86::PMOVZXDQrm, TB_NO_REVERSE }, + { X86::PMOVZXWDrr, X86::PMOVZXWDrm, TB_NO_REVERSE }, + { X86::PMOVZXWQrr, X86::PMOVZXWQrm, TB_NO_REVERSE }, + { X86::PSHUFDri, X86::PSHUFDmi, TB_ALIGN_16 }, + { X86::PSHUFHWri, X86::PSHUFHWmi, TB_ALIGN_16 }, + { X86::PSHUFLWri, X86::PSHUFLWmi, TB_ALIGN_16 }, + { X86::PTESTrr, X86::PTESTrm, TB_ALIGN_16 }, + { X86::RCPPSr, X86::RCPPSm, TB_ALIGN_16 }, + { X86::RCPSSr, X86::RCPSSm, 0 }, + { X86::RCPSSr_Int, X86::RCPSSm_Int, TB_NO_REVERSE }, + { X86::ROUNDPDr, X86::ROUNDPDm, TB_ALIGN_16 }, + { X86::ROUNDPSr, X86::ROUNDPSm, TB_ALIGN_16 }, + { X86::ROUNDSDr, X86::ROUNDSDm, 0 }, + { X86::ROUNDSSr, X86::ROUNDSSm, 0 }, + { X86::RSQRTPSr, X86::RSQRTPSm, TB_ALIGN_16 }, + { X86::RSQRTSSr, X86::RSQRTSSm, 0 }, + { X86::RSQRTSSr_Int, X86::RSQRTSSm_Int, TB_NO_REVERSE }, + { X86::SQRTPDr, X86::SQRTPDm, TB_ALIGN_16 }, + { X86::SQRTPSr, X86::SQRTPSm, TB_ALIGN_16 }, + { X86::SQRTSDr, X86::SQRTSDm, 0 }, + { X86::SQRTSDr_Int, X86::SQRTSDm_Int, TB_NO_REVERSE }, + { X86::SQRTSSr, X86::SQRTSSm, 0 }, + { X86::SQRTSSr_Int, X86::SQRTSSm_Int, TB_NO_REVERSE }, + { X86::TEST16rr, X86::TEST16rm, 0 }, + { X86::TEST32rr, X86::TEST32rm, 0 }, + { X86::TEST64rr, X86::TEST64rm, 0 }, + { X86::TEST8rr, X86::TEST8rm, 0 }, + // FIXME: TEST*rr EAX,EAX ---> CMP [mem], 0 + { X86::UCOMISDrr, X86::UCOMISDrm, 0 }, + { X86::UCOMISSrr, X86::UCOMISSrm, 0 }, + + // MMX version of foldable instructions + { X86::MMX_CVTPD2PIirr, X86::MMX_CVTPD2PIirm, 0 }, + { X86::MMX_CVTPI2PDirr, X86::MMX_CVTPI2PDirm, 0 }, + { X86::MMX_CVTPS2PIirr, X86::MMX_CVTPS2PIirm, 0 }, + { X86::MMX_CVTTPD2PIirr, X86::MMX_CVTTPD2PIirm, 0 }, + { X86::MMX_CVTTPS2PIirr, X86::MMX_CVTTPS2PIirm, 0 }, + { X86::MMX_MOVD64to64rr, X86::MMX_MOVQ64rm, 0 }, + { X86::MMX_PABSBrr64, X86::MMX_PABSBrm64, 0 }, + { X86::MMX_PABSDrr64, X86::MMX_PABSDrm64, 0 }, + { X86::MMX_PABSWrr64, X86::MMX_PABSWrm64, 0 }, + { X86::MMX_PSHUFWri, X86::MMX_PSHUFWmi, 0 }, + + // 3DNow! version of foldable instructions + { X86::PF2IDrr, X86::PF2IDrm, 0 }, + { X86::PF2IWrr, X86::PF2IWrm, 0 }, + { X86::PFRCPrr, X86::PFRCPrm, 0 }, + { X86::PFRSQRTrr, X86::PFRSQRTrm, 0 }, + { X86::PI2FDrr, X86::PI2FDrm, 0 }, + { X86::PI2FWrr, X86::PI2FWrm, 0 }, + { X86::PSWAPDrr, X86::PSWAPDrm, 0 }, + + // AVX 128-bit versions of foldable instructions + { X86::Int_VCOMISDrr, X86::Int_VCOMISDrm, TB_NO_REVERSE }, + { X86::Int_VCOMISSrr, X86::Int_VCOMISSrm, TB_NO_REVERSE }, + { X86::Int_VUCOMISDrr, X86::Int_VUCOMISDrm, TB_NO_REVERSE }, + { X86::Int_VUCOMISSrr, X86::Int_VUCOMISSrm, TB_NO_REVERSE }, + { X86::VCVTTSD2SI64rr, X86::VCVTTSD2SI64rm, 0 }, + { X86::Int_VCVTTSD2SI64rr,X86::Int_VCVTTSD2SI64rm,TB_NO_REVERSE }, + { X86::VCVTTSD2SIrr, X86::VCVTTSD2SIrm, 0 }, + { X86::Int_VCVTTSD2SIrr,X86::Int_VCVTTSD2SIrm, TB_NO_REVERSE }, + { X86::VCVTTSS2SI64rr, X86::VCVTTSS2SI64rm, 0 }, + { X86::Int_VCVTTSS2SI64rr,X86::Int_VCVTTSS2SI64rm,TB_NO_REVERSE }, + { X86::VCVTTSS2SIrr, X86::VCVTTSS2SIrm, 0 }, + { X86::Int_VCVTTSS2SIrr,X86::Int_VCVTTSS2SIrm, TB_NO_REVERSE }, + { X86::VCVTSD2SI64rr, X86::VCVTSD2SI64rm, TB_NO_REVERSE }, + { X86::VCVTSD2SIrr, X86::VCVTSD2SIrm, TB_NO_REVERSE }, + { X86::VCVTSS2SI64rr, X86::VCVTSS2SI64rm, TB_NO_REVERSE }, + { X86::VCVTSS2SIrr, X86::VCVTSS2SIrm, TB_NO_REVERSE }, + { X86::VCVTDQ2PDrr, X86::VCVTDQ2PDrm, TB_NO_REVERSE }, + { X86::VCVTDQ2PSrr, X86::VCVTDQ2PSrm, 0 }, + { X86::VCVTPD2DQrr, X86::VCVTPD2DQrm, 0 }, + { X86::VCVTPD2PSrr, X86::VCVTPD2PSrm, 0 }, + { X86::VCVTPS2DQrr, X86::VCVTPS2DQrm, 0 }, + { X86::VCVTPS2PDrr, X86::VCVTPS2PDrm, TB_NO_REVERSE }, + { X86::VCVTTPD2DQrr, X86::VCVTTPD2DQrm, 0 }, + { X86::VCVTTPS2DQrr, X86::VCVTTPS2DQrm, 0 }, + { X86::VMOV64toPQIrr, X86::VMOVQI2PQIrm, 0 }, + { X86::VMOV64toSDrr, X86::VMOV64toSDrm, 0 }, + { X86::VMOVAPDrr, X86::VMOVAPDrm, TB_ALIGN_16 }, + { X86::VMOVAPSrr, X86::VMOVAPSrm, TB_ALIGN_16 }, + { X86::VMOVDDUPrr, X86::VMOVDDUPrm, TB_NO_REVERSE }, + { X86::VMOVDI2PDIrr, X86::VMOVDI2PDIrm, 0 }, + { X86::VMOVDI2SSrr, X86::VMOVDI2SSrm, 0 }, + { X86::VMOVDQArr, X86::VMOVDQArm, TB_ALIGN_16 }, + { X86::VMOVDQUrr, X86::VMOVDQUrm, 0 }, + { X86::VMOVSLDUPrr, X86::VMOVSLDUPrm, 0 }, + { X86::VMOVSHDUPrr, X86::VMOVSHDUPrm, 0 }, + { X86::VMOVUPDrr, X86::VMOVUPDrm, 0 }, + { X86::VMOVUPSrr, X86::VMOVUPSrm, 0 }, + { X86::VMOVZPQILo2PQIrr,X86::VMOVQI2PQIrm, TB_NO_REVERSE }, + { X86::VPABSBrr, X86::VPABSBrm, 0 }, + { X86::VPABSDrr, X86::VPABSDrm, 0 }, + { X86::VPABSWrr, X86::VPABSWrm, 0 }, + { X86::VPCMPESTRIrr, X86::VPCMPESTRIrm, 0 }, + { X86::VPCMPESTRM128rr, X86::VPCMPESTRM128rm, 0 }, + { X86::VPCMPISTRIrr, X86::VPCMPISTRIrm, 0 }, + { X86::VPCMPISTRM128rr, X86::VPCMPISTRM128rm, 0 }, + { X86::VPHMINPOSUWrr128, X86::VPHMINPOSUWrm128, 0 }, + { X86::VPERMILPDri, X86::VPERMILPDmi, 0 }, + { X86::VPERMILPSri, X86::VPERMILPSmi, 0 }, + { X86::VPMOVSXBDrr, X86::VPMOVSXBDrm, TB_NO_REVERSE }, + { X86::VPMOVSXBQrr, X86::VPMOVSXBQrm, TB_NO_REVERSE }, + { X86::VPMOVSXBWrr, X86::VPMOVSXBWrm, TB_NO_REVERSE }, + { X86::VPMOVSXDQrr, X86::VPMOVSXDQrm, TB_NO_REVERSE }, + { X86::VPMOVSXWDrr, X86::VPMOVSXWDrm, TB_NO_REVERSE }, + { X86::VPMOVSXWQrr, X86::VPMOVSXWQrm, TB_NO_REVERSE }, + { X86::VPMOVZXBDrr, X86::VPMOVZXBDrm, TB_NO_REVERSE }, + { X86::VPMOVZXBQrr, X86::VPMOVZXBQrm, TB_NO_REVERSE }, + { X86::VPMOVZXBWrr, X86::VPMOVZXBWrm, TB_NO_REVERSE }, + { X86::VPMOVZXDQrr, X86::VPMOVZXDQrm, TB_NO_REVERSE }, + { X86::VPMOVZXWDrr, X86::VPMOVZXWDrm, TB_NO_REVERSE }, + { X86::VPMOVZXWQrr, X86::VPMOVZXWQrm, TB_NO_REVERSE }, + { X86::VPSHUFDri, X86::VPSHUFDmi, 0 }, + { X86::VPSHUFHWri, X86::VPSHUFHWmi, 0 }, + { X86::VPSHUFLWri, X86::VPSHUFLWmi, 0 }, + { X86::VPTESTrr, X86::VPTESTrm, 0 }, + { X86::VRCPPSr, X86::VRCPPSm, 0 }, + { X86::VROUNDPDr, X86::VROUNDPDm, 0 }, + { X86::VROUNDPSr, X86::VROUNDPSm, 0 }, + { X86::VRSQRTPSr, X86::VRSQRTPSm, 0 }, + { X86::VSQRTPDr, X86::VSQRTPDm, 0 }, + { X86::VSQRTPSr, X86::VSQRTPSm, 0 }, + { X86::VTESTPDrr, X86::VTESTPDrm, 0 }, + { X86::VTESTPSrr, X86::VTESTPSrm, 0 }, + { X86::VUCOMISDrr, X86::VUCOMISDrm, 0 }, + { X86::VUCOMISSrr, X86::VUCOMISSrm, 0 }, + + // AVX 256-bit foldable instructions + { X86::VCVTDQ2PDYrr, X86::VCVTDQ2PDYrm, TB_NO_REVERSE }, + { X86::VCVTDQ2PSYrr, X86::VCVTDQ2PSYrm, 0 }, + { X86::VCVTPD2DQYrr, X86::VCVTPD2DQYrm, 0 }, + { X86::VCVTPD2PSYrr, X86::VCVTPD2PSYrm, 0 }, + { X86::VCVTPS2DQYrr, X86::VCVTPS2DQYrm, 0 }, + { X86::VCVTPS2PDYrr, X86::VCVTPS2PDYrm, TB_NO_REVERSE }, + { X86::VCVTTPD2DQYrr, X86::VCVTTPD2DQYrm, 0 }, + { X86::VCVTTPS2DQYrr, X86::VCVTTPS2DQYrm, 0 }, + { X86::VMOVAPDYrr, X86::VMOVAPDYrm, TB_ALIGN_32 }, + { X86::VMOVAPSYrr, X86::VMOVAPSYrm, TB_ALIGN_32 }, + { X86::VMOVDDUPYrr, X86::VMOVDDUPYrm, 0 }, + { X86::VMOVDQAYrr, X86::VMOVDQAYrm, TB_ALIGN_32 }, + { X86::VMOVDQUYrr, X86::VMOVDQUYrm, 0 }, + { X86::VMOVSLDUPYrr, X86::VMOVSLDUPYrm, 0 }, + { X86::VMOVSHDUPYrr, X86::VMOVSHDUPYrm, 0 }, + { X86::VMOVUPDYrr, X86::VMOVUPDYrm, 0 }, + { X86::VMOVUPSYrr, X86::VMOVUPSYrm, 0 }, + { X86::VPERMILPDYri, X86::VPERMILPDYmi, 0 }, + { X86::VPERMILPSYri, X86::VPERMILPSYmi, 0 }, + { X86::VPTESTYrr, X86::VPTESTYrm, 0 }, + { X86::VRCPPSYr, X86::VRCPPSYm, 0 }, + { X86::VROUNDYPDr, X86::VROUNDYPDm, 0 }, + { X86::VROUNDYPSr, X86::VROUNDYPSm, 0 }, + { X86::VRSQRTPSYr, X86::VRSQRTPSYm, 0 }, + { X86::VSQRTPDYr, X86::VSQRTPDYm, 0 }, + { X86::VSQRTPSYr, X86::VSQRTPSYm, 0 }, + { X86::VTESTPDYrr, X86::VTESTPDYrm, 0 }, + { X86::VTESTPSYrr, X86::VTESTPSYrm, 0 }, + + // AVX2 foldable instructions + + // VBROADCASTS{SD}rr register instructions were an AVX2 addition while the + // VBROADCASTS{SD}rm memory instructions were available from AVX1. + // TB_NO_REVERSE prevents unfolding from introducing an illegal instruction + // on AVX1 targets. The VPBROADCAST instructions are all AVX2 instructions + // so they don't need an equivalent limitation. + { X86::VBROADCASTSSrr, X86::VBROADCASTSSrm, TB_NO_REVERSE }, + { X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrm, TB_NO_REVERSE }, + { X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrm, TB_NO_REVERSE }, + { X86::VPABSBYrr, X86::VPABSBYrm, 0 }, + { X86::VPABSDYrr, X86::VPABSDYrm, 0 }, + { X86::VPABSWYrr, X86::VPABSWYrm, 0 }, + { X86::VPBROADCASTBrr, X86::VPBROADCASTBrm, TB_NO_REVERSE }, + { X86::VPBROADCASTBYrr, X86::VPBROADCASTBYrm, TB_NO_REVERSE }, + { X86::VPBROADCASTDrr, X86::VPBROADCASTDrm, TB_NO_REVERSE }, + { X86::VPBROADCASTDYrr, X86::VPBROADCASTDYrm, TB_NO_REVERSE }, + { X86::VPBROADCASTQrr, X86::VPBROADCASTQrm, TB_NO_REVERSE }, + { X86::VPBROADCASTQYrr, X86::VPBROADCASTQYrm, TB_NO_REVERSE }, + { X86::VPBROADCASTWrr, X86::VPBROADCASTWrm, TB_NO_REVERSE }, + { X86::VPBROADCASTWYrr, X86::VPBROADCASTWYrm, TB_NO_REVERSE }, + { X86::VPERMPDYri, X86::VPERMPDYmi, 0 }, + { X86::VPERMQYri, X86::VPERMQYmi, 0 }, + { X86::VPMOVSXBDYrr, X86::VPMOVSXBDYrm, TB_NO_REVERSE }, + { X86::VPMOVSXBQYrr, X86::VPMOVSXBQYrm, TB_NO_REVERSE }, + { X86::VPMOVSXBWYrr, X86::VPMOVSXBWYrm, 0 }, + { X86::VPMOVSXDQYrr, X86::VPMOVSXDQYrm, 0 }, + { X86::VPMOVSXWDYrr, X86::VPMOVSXWDYrm, 0 }, + { X86::VPMOVSXWQYrr, X86::VPMOVSXWQYrm, TB_NO_REVERSE }, + { X86::VPMOVZXBDYrr, X86::VPMOVZXBDYrm, TB_NO_REVERSE }, + { X86::VPMOVZXBQYrr, X86::VPMOVZXBQYrm, TB_NO_REVERSE }, + { X86::VPMOVZXBWYrr, X86::VPMOVZXBWYrm, 0 }, + { X86::VPMOVZXDQYrr, X86::VPMOVZXDQYrm, 0 }, + { X86::VPMOVZXWDYrr, X86::VPMOVZXWDYrm, 0 }, + { X86::VPMOVZXWQYrr, X86::VPMOVZXWQYrm, TB_NO_REVERSE }, + { X86::VPSHUFDYri, X86::VPSHUFDYmi, 0 }, + { X86::VPSHUFHWYri, X86::VPSHUFHWYmi, 0 }, + { X86::VPSHUFLWYri, X86::VPSHUFLWYmi, 0 }, + + // XOP foldable instructions + { X86::VFRCZPDrr, X86::VFRCZPDrm, 0 }, + { X86::VFRCZPDrrY, X86::VFRCZPDrmY, 0 }, + { X86::VFRCZPSrr, X86::VFRCZPSrm, 0 }, + { X86::VFRCZPSrrY, X86::VFRCZPSrmY, 0 }, + { X86::VFRCZSDrr, X86::VFRCZSDrm, 0 }, + { X86::VFRCZSSrr, X86::VFRCZSSrm, 0 }, + { X86::VPHADDBDrr, X86::VPHADDBDrm, 0 }, + { X86::VPHADDBQrr, X86::VPHADDBQrm, 0 }, + { X86::VPHADDBWrr, X86::VPHADDBWrm, 0 }, + { X86::VPHADDDQrr, X86::VPHADDDQrm, 0 }, + { X86::VPHADDWDrr, X86::VPHADDWDrm, 0 }, + { X86::VPHADDWQrr, X86::VPHADDWQrm, 0 }, + { X86::VPHADDUBDrr, X86::VPHADDUBDrm, 0 }, + { X86::VPHADDUBQrr, X86::VPHADDUBQrm, 0 }, + { X86::VPHADDUBWrr, X86::VPHADDUBWrm, 0 }, + { X86::VPHADDUDQrr, X86::VPHADDUDQrm, 0 }, + { X86::VPHADDUWDrr, X86::VPHADDUWDrm, 0 }, + { X86::VPHADDUWQrr, X86::VPHADDUWQrm, 0 }, + { X86::VPHSUBBWrr, X86::VPHSUBBWrm, 0 }, + { X86::VPHSUBDQrr, X86::VPHSUBDQrm, 0 }, + { X86::VPHSUBWDrr, X86::VPHSUBWDrm, 0 }, + { X86::VPROTBri, X86::VPROTBmi, 0 }, + { X86::VPROTBrr, X86::VPROTBmr, 0 }, + { X86::VPROTDri, X86::VPROTDmi, 0 }, + { X86::VPROTDrr, X86::VPROTDmr, 0 }, + { X86::VPROTQri, X86::VPROTQmi, 0 }, + { X86::VPROTQrr, X86::VPROTQmr, 0 }, + { X86::VPROTWri, X86::VPROTWmi, 0 }, + { X86::VPROTWrr, X86::VPROTWmr, 0 }, + { X86::VPSHABrr, X86::VPSHABmr, 0 }, + { X86::VPSHADrr, X86::VPSHADmr, 0 }, + { X86::VPSHAQrr, X86::VPSHAQmr, 0 }, + { X86::VPSHAWrr, X86::VPSHAWmr, 0 }, + { X86::VPSHLBrr, X86::VPSHLBmr, 0 }, + { X86::VPSHLDrr, X86::VPSHLDmr, 0 }, + { X86::VPSHLQrr, X86::VPSHLQmr, 0 }, + { X86::VPSHLWrr, X86::VPSHLWmr, 0 }, + + // LWP foldable instructions + { X86::LWPINS32rri, X86::LWPINS32rmi, 0 }, + { X86::LWPINS64rri, X86::LWPINS64rmi, 0 }, + { X86::LWPVAL32rri, X86::LWPVAL32rmi, 0 }, + { X86::LWPVAL64rri, X86::LWPVAL64rmi, 0 }, + + // BMI/BMI2/LZCNT/POPCNT/TBM foldable instructions + { X86::BEXTR32rr, X86::BEXTR32rm, 0 }, + { X86::BEXTR64rr, X86::BEXTR64rm, 0 }, + { X86::BEXTRI32ri, X86::BEXTRI32mi, 0 }, + { X86::BEXTRI64ri, X86::BEXTRI64mi, 0 }, + { X86::BLCFILL32rr, X86::BLCFILL32rm, 0 }, + { X86::BLCFILL64rr, X86::BLCFILL64rm, 0 }, + { X86::BLCI32rr, X86::BLCI32rm, 0 }, + { X86::BLCI64rr, X86::BLCI64rm, 0 }, + { X86::BLCIC32rr, X86::BLCIC32rm, 0 }, + { X86::BLCIC64rr, X86::BLCIC64rm, 0 }, + { X86::BLCMSK32rr, X86::BLCMSK32rm, 0 }, + { X86::BLCMSK64rr, X86::BLCMSK64rm, 0 }, + { X86::BLCS32rr, X86::BLCS32rm, 0 }, + { X86::BLCS64rr, X86::BLCS64rm, 0 }, + { X86::BLSFILL32rr, X86::BLSFILL32rm, 0 }, + { X86::BLSFILL64rr, X86::BLSFILL64rm, 0 }, + { X86::BLSI32rr, X86::BLSI32rm, 0 }, + { X86::BLSI64rr, X86::BLSI64rm, 0 }, + { X86::BLSIC32rr, X86::BLSIC32rm, 0 }, + { X86::BLSIC64rr, X86::BLSIC64rm, 0 }, + { X86::BLSMSK32rr, X86::BLSMSK32rm, 0 }, + { X86::BLSMSK64rr, X86::BLSMSK64rm, 0 }, + { X86::BLSR32rr, X86::BLSR32rm, 0 }, + { X86::BLSR64rr, X86::BLSR64rm, 0 }, + { X86::BZHI32rr, X86::BZHI32rm, 0 }, + { X86::BZHI64rr, X86::BZHI64rm, 0 }, + { X86::LZCNT16rr, X86::LZCNT16rm, 0 }, + { X86::LZCNT32rr, X86::LZCNT32rm, 0 }, + { X86::LZCNT64rr, X86::LZCNT64rm, 0 }, + { X86::POPCNT16rr, X86::POPCNT16rm, 0 }, + { X86::POPCNT32rr, X86::POPCNT32rm, 0 }, + { X86::POPCNT64rr, X86::POPCNT64rm, 0 }, + { X86::RORX32ri, X86::RORX32mi, 0 }, + { X86::RORX64ri, X86::RORX64mi, 0 }, + { X86::SARX32rr, X86::SARX32rm, 0 }, + { X86::SARX64rr, X86::SARX64rm, 0 }, + { X86::SHRX32rr, X86::SHRX32rm, 0 }, + { X86::SHRX64rr, X86::SHRX64rm, 0 }, + { X86::SHLX32rr, X86::SHLX32rm, 0 }, + { X86::SHLX64rr, X86::SHLX64rm, 0 }, + { X86::T1MSKC32rr, X86::T1MSKC32rm, 0 }, + { X86::T1MSKC64rr, X86::T1MSKC64rm, 0 }, + { X86::TZCNT16rr, X86::TZCNT16rm, 0 }, + { X86::TZCNT32rr, X86::TZCNT32rm, 0 }, + { X86::TZCNT64rr, X86::TZCNT64rm, 0 }, + { X86::TZMSK32rr, X86::TZMSK32rm, 0 }, + { X86::TZMSK64rr, X86::TZMSK64rm, 0 }, + + // AVX-512 foldable instructions + { X86::VBROADCASTSSZr, X86::VBROADCASTSSZm, TB_NO_REVERSE }, + { X86::VBROADCASTSDZr, X86::VBROADCASTSDZm, TB_NO_REVERSE }, + { X86::VMOV64toPQIZrr, X86::VMOVQI2PQIZrm, 0 }, + { X86::VMOV64toSDZrr, X86::VMOV64toSDZrm, 0 }, + { X86::VMOVDI2PDIZrr, X86::VMOVDI2PDIZrm, 0 }, + { X86::VMOVDI2SSZrr, X86::VMOVDI2SSZrm, 0 }, + { X86::VMOVAPDZrr, X86::VMOVAPDZrm, TB_ALIGN_64 }, + { X86::VMOVAPSZrr, X86::VMOVAPSZrm, TB_ALIGN_64 }, + { X86::VMOVDQA32Zrr, X86::VMOVDQA32Zrm, TB_ALIGN_64 }, + { X86::VMOVDQA64Zrr, X86::VMOVDQA64Zrm, TB_ALIGN_64 }, + { X86::VMOVDQU8Zrr, X86::VMOVDQU8Zrm, 0 }, + { X86::VMOVDQU16Zrr, X86::VMOVDQU16Zrm, 0 }, + { X86::VMOVDQU32Zrr, X86::VMOVDQU32Zrm, 0 }, + { X86::VMOVDQU64Zrr, X86::VMOVDQU64Zrm, 0 }, + { X86::VMOVUPDZrr, X86::VMOVUPDZrm, 0 }, + { X86::VMOVUPSZrr, X86::VMOVUPSZrm, 0 }, + { X86::VMOVZPQILo2PQIZrr,X86::VMOVQI2PQIZrm, TB_NO_REVERSE }, + { X86::VPABSBZrr, X86::VPABSBZrm, 0 }, + { X86::VPABSDZrr, X86::VPABSDZrm, 0 }, + { X86::VPABSQZrr, X86::VPABSQZrm, 0 }, + { X86::VPABSWZrr, X86::VPABSWZrm, 0 }, + { X86::VPERMILPDZri, X86::VPERMILPDZmi, 0 }, + { X86::VPERMILPSZri, X86::VPERMILPSZmi, 0 }, + { X86::VPERMPDZri, X86::VPERMPDZmi, 0 }, + { X86::VPERMQZri, X86::VPERMQZmi, 0 }, + { X86::VPMOVSXBDZrr, X86::VPMOVSXBDZrm, 0 }, + { X86::VPMOVSXBQZrr, X86::VPMOVSXBQZrm, TB_NO_REVERSE }, + { X86::VPMOVSXBWZrr, X86::VPMOVSXBWZrm, 0 }, + { X86::VPMOVSXDQZrr, X86::VPMOVSXDQZrm, 0 }, + { X86::VPMOVSXWDZrr, X86::VPMOVSXWDZrm, 0 }, + { X86::VPMOVSXWQZrr, X86::VPMOVSXWQZrm, 0 }, + { X86::VPMOVZXBDZrr, X86::VPMOVZXBDZrm, 0 }, + { X86::VPMOVZXBQZrr, X86::VPMOVZXBQZrm, TB_NO_REVERSE }, + { X86::VPMOVZXBWZrr, X86::VPMOVZXBWZrm, 0 }, + { X86::VPMOVZXDQZrr, X86::VPMOVZXDQZrm, 0 }, + { X86::VPMOVZXWDZrr, X86::VPMOVZXWDZrm, 0 }, + { X86::VPMOVZXWQZrr, X86::VPMOVZXWQZrm, 0 }, + { X86::VPOPCNTDZrr, X86::VPOPCNTDZrm, 0 }, + { X86::VPOPCNTQZrr, X86::VPOPCNTQZrm, 0 }, + { X86::VPSHUFDZri, X86::VPSHUFDZmi, 0 }, + { X86::VPSHUFHWZri, X86::VPSHUFHWZmi, 0 }, + { X86::VPSHUFLWZri, X86::VPSHUFLWZmi, 0 }, + { X86::VPSLLDQZ512rr, X86::VPSLLDQZ512rm, 0 }, + { X86::VPSLLDZri, X86::VPSLLDZmi, 0 }, + { X86::VPSLLQZri, X86::VPSLLQZmi, 0 }, + { X86::VPSLLWZri, X86::VPSLLWZmi, 0 }, + { X86::VPSRADZri, X86::VPSRADZmi, 0 }, + { X86::VPSRAQZri, X86::VPSRAQZmi, 0 }, + { X86::VPSRAWZri, X86::VPSRAWZmi, 0 }, + { X86::VPSRLDQZ512rr, X86::VPSRLDQZ512rm, 0 }, + { X86::VPSRLDZri, X86::VPSRLDZmi, 0 }, + { X86::VPSRLQZri, X86::VPSRLQZmi, 0 }, + { X86::VPSRLWZri, X86::VPSRLWZmi, 0 }, + + // AVX-512 foldable instructions (256-bit versions) + { X86::VBROADCASTSSZ256r, X86::VBROADCASTSSZ256m, TB_NO_REVERSE }, + { X86::VBROADCASTSDZ256r, X86::VBROADCASTSDZ256m, TB_NO_REVERSE }, + { X86::VMOVAPDZ256rr, X86::VMOVAPDZ256rm, TB_ALIGN_32 }, + { X86::VMOVAPSZ256rr, X86::VMOVAPSZ256rm, TB_ALIGN_32 }, + { X86::VMOVDQA32Z256rr, X86::VMOVDQA32Z256rm, TB_ALIGN_32 }, + { X86::VMOVDQA64Z256rr, X86::VMOVDQA64Z256rm, TB_ALIGN_32 }, + { X86::VMOVDQU8Z256rr, X86::VMOVDQU8Z256rm, 0 }, + { X86::VMOVDQU16Z256rr, X86::VMOVDQU16Z256rm, 0 }, + { X86::VMOVDQU32Z256rr, X86::VMOVDQU32Z256rm, 0 }, + { X86::VMOVDQU64Z256rr, X86::VMOVDQU64Z256rm, 0 }, + { X86::VMOVUPDZ256rr, X86::VMOVUPDZ256rm, 0 }, + { X86::VMOVUPSZ256rr, X86::VMOVUPSZ256rm, 0 }, + { X86::VPABSBZ256rr, X86::VPABSBZ256rm, 0 }, + { X86::VPABSDZ256rr, X86::VPABSDZ256rm, 0 }, + { X86::VPABSQZ256rr, X86::VPABSQZ256rm, 0 }, + { X86::VPABSWZ256rr, X86::VPABSWZ256rm, 0 }, + { X86::VPERMILPDZ256ri, X86::VPERMILPDZ256mi, 0 }, + { X86::VPERMILPSZ256ri, X86::VPERMILPSZ256mi, 0 }, + { X86::VPERMPDZ256ri, X86::VPERMPDZ256mi, 0 }, + { X86::VPERMQZ256ri, X86::VPERMQZ256mi, 0 }, + { X86::VPMOVSXBDZ256rr, X86::VPMOVSXBDZ256rm, TB_NO_REVERSE }, + { X86::VPMOVSXBQZ256rr, X86::VPMOVSXBQZ256rm, TB_NO_REVERSE }, + { X86::VPMOVSXBWZ256rr, X86::VPMOVSXBWZ256rm, 0 }, + { X86::VPMOVSXDQZ256rr, X86::VPMOVSXDQZ256rm, 0 }, + { X86::VPMOVSXWDZ256rr, X86::VPMOVSXWDZ256rm, 0 }, + { X86::VPMOVSXWQZ256rr, X86::VPMOVSXWQZ256rm, TB_NO_REVERSE }, + { X86::VPMOVZXBDZ256rr, X86::VPMOVZXBDZ256rm, TB_NO_REVERSE }, + { X86::VPMOVZXBQZ256rr, X86::VPMOVZXBQZ256rm, TB_NO_REVERSE }, + { X86::VPMOVZXBWZ256rr, X86::VPMOVZXBWZ256rm, 0 }, + { X86::VPMOVZXDQZ256rr, X86::VPMOVZXDQZ256rm, 0 }, + { X86::VPMOVZXWDZ256rr, X86::VPMOVZXWDZ256rm, 0 }, + { X86::VPMOVZXWQZ256rr, X86::VPMOVZXWQZ256rm, TB_NO_REVERSE }, + { X86::VPSHUFDZ256ri, X86::VPSHUFDZ256mi, 0 }, + { X86::VPSHUFHWZ256ri, X86::VPSHUFHWZ256mi, 0 }, + { X86::VPSHUFLWZ256ri, X86::VPSHUFLWZ256mi, 0 }, + { X86::VPSLLDQZ256rr, X86::VPSLLDQZ256rm, 0 }, + { X86::VPSLLDZ256ri, X86::VPSLLDZ256mi, 0 }, + { X86::VPSLLQZ256ri, X86::VPSLLQZ256mi, 0 }, + { X86::VPSLLWZ256ri, X86::VPSLLWZ256mi, 0 }, + { X86::VPSRADZ256ri, X86::VPSRADZ256mi, 0 }, + { X86::VPSRAQZ256ri, X86::VPSRAQZ256mi, 0 }, + { X86::VPSRAWZ256ri, X86::VPSRAWZ256mi, 0 }, + { X86::VPSRLDQZ256rr, X86::VPSRLDQZ256rm, 0 }, + { X86::VPSRLDZ256ri, X86::VPSRLDZ256mi, 0 }, + { X86::VPSRLQZ256ri, X86::VPSRLQZ256mi, 0 }, + { X86::VPSRLWZ256ri, X86::VPSRLWZ256mi, 0 }, + + // AVX-512 foldable instructions (128-bit versions) + { X86::VBROADCASTSSZ128r, X86::VBROADCASTSSZ128m, TB_NO_REVERSE }, + { X86::VMOVAPDZ128rr, X86::VMOVAPDZ128rm, TB_ALIGN_16 }, + { X86::VMOVAPSZ128rr, X86::VMOVAPSZ128rm, TB_ALIGN_16 }, + { X86::VMOVDQA32Z128rr, X86::VMOVDQA32Z128rm, TB_ALIGN_16 }, + { X86::VMOVDQA64Z128rr, X86::VMOVDQA64Z128rm, TB_ALIGN_16 }, + { X86::VMOVDQU8Z128rr, X86::VMOVDQU8Z128rm, 0 }, + { X86::VMOVDQU16Z128rr, X86::VMOVDQU16Z128rm, 0 }, + { X86::VMOVDQU32Z128rr, X86::VMOVDQU32Z128rm, 0 }, + { X86::VMOVDQU64Z128rr, X86::VMOVDQU64Z128rm, 0 }, + { X86::VMOVUPDZ128rr, X86::VMOVUPDZ128rm, 0 }, + { X86::VMOVUPSZ128rr, X86::VMOVUPSZ128rm, 0 }, + { X86::VPABSBZ128rr, X86::VPABSBZ128rm, 0 }, + { X86::VPABSDZ128rr, X86::VPABSDZ128rm, 0 }, + { X86::VPABSQZ128rr, X86::VPABSQZ128rm, 0 }, + { X86::VPABSWZ128rr, X86::VPABSWZ128rm, 0 }, + { X86::VPERMILPDZ128ri, X86::VPERMILPDZ128mi, 0 }, + { X86::VPERMILPSZ128ri, X86::VPERMILPSZ128mi, 0 }, + { X86::VPMOVSXBDZ128rr, X86::VPMOVSXBDZ128rm, TB_NO_REVERSE }, + { X86::VPMOVSXBQZ128rr, X86::VPMOVSXBQZ128rm, TB_NO_REVERSE }, + { X86::VPMOVSXBWZ128rr, X86::VPMOVSXBWZ128rm, TB_NO_REVERSE }, + { X86::VPMOVSXDQZ128rr, X86::VPMOVSXDQZ128rm, TB_NO_REVERSE }, + { X86::VPMOVSXWDZ128rr, X86::VPMOVSXWDZ128rm, TB_NO_REVERSE }, + { X86::VPMOVSXWQZ128rr, X86::VPMOVSXWQZ128rm, TB_NO_REVERSE }, + { X86::VPMOVZXBDZ128rr, X86::VPMOVZXBDZ128rm, TB_NO_REVERSE }, + { X86::VPMOVZXBQZ128rr, X86::VPMOVZXBQZ128rm, TB_NO_REVERSE }, + { X86::VPMOVZXBWZ128rr, X86::VPMOVZXBWZ128rm, TB_NO_REVERSE }, + { X86::VPMOVZXDQZ128rr, X86::VPMOVZXDQZ128rm, TB_NO_REVERSE }, + { X86::VPMOVZXWDZ128rr, X86::VPMOVZXWDZ128rm, TB_NO_REVERSE }, + { X86::VPMOVZXWQZ128rr, X86::VPMOVZXWQZ128rm, TB_NO_REVERSE }, + { X86::VPSHUFDZ128ri, X86::VPSHUFDZ128mi, 0 }, + { X86::VPSHUFHWZ128ri, X86::VPSHUFHWZ128mi, 0 }, + { X86::VPSHUFLWZ128ri, X86::VPSHUFLWZ128mi, 0 }, + { X86::VPSLLDQZ128rr, X86::VPSLLDQZ128rm, 0 }, + { X86::VPSLLDZ128ri, X86::VPSLLDZ128mi, 0 }, + { X86::VPSLLQZ128ri, X86::VPSLLQZ128mi, 0 }, + { X86::VPSLLWZ128ri, X86::VPSLLWZ128mi, 0 }, + { X86::VPSRADZ128ri, X86::VPSRADZ128mi, 0 }, + { X86::VPSRAQZ128ri, X86::VPSRAQZ128mi, 0 }, + { X86::VPSRAWZ128ri, X86::VPSRAWZ128mi, 0 }, + { X86::VPSRLDQZ128rr, X86::VPSRLDQZ128rm, 0 }, + { X86::VPSRLDZ128ri, X86::VPSRLDZ128mi, 0 }, + { X86::VPSRLQZ128ri, X86::VPSRLQZ128mi, 0 }, + { X86::VPSRLWZ128ri, X86::VPSRLWZ128mi, 0 }, + + // F16C foldable instructions + { X86::VCVTPH2PSrr, X86::VCVTPH2PSrm, 0 }, + { X86::VCVTPH2PSYrr, X86::VCVTPH2PSYrm, 0 }, + + // AES foldable instructions + { X86::AESIMCrr, X86::AESIMCrm, TB_ALIGN_16 }, + { X86::AESKEYGENASSIST128rr, X86::AESKEYGENASSIST128rm, TB_ALIGN_16 }, + { X86::VAESIMCrr, X86::VAESIMCrm, 0 }, + { X86::VAESKEYGENASSIST128rr, X86::VAESKEYGENASSIST128rm, 0 } + }; + for (X86MemoryFoldTableEntry Entry : MemoryFoldTable1) { AddTableEntry(RegOp2MemOpTable1, MemOp2RegOpTable, Entry.RegOp, Entry.MemOp, @@ -143,6 +1042,1396 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) Entry.Flags | TB_INDEX_1 | TB_FOLDED_LOAD); } + static const X86MemoryFoldTableEntry MemoryFoldTable2[] = { + { X86::ADC32rr, X86::ADC32rm, 0 }, + { X86::ADC64rr, X86::ADC64rm, 0 }, + { X86::ADD16rr, X86::ADD16rm, 0 }, + { X86::ADD16rr_DB, X86::ADD16rm, TB_NO_REVERSE }, + { X86::ADD32rr, X86::ADD32rm, 0 }, + { X86::ADD32rr_DB, X86::ADD32rm, TB_NO_REVERSE }, + { X86::ADD64rr, X86::ADD64rm, 0 }, + { X86::ADD64rr_DB, X86::ADD64rm, TB_NO_REVERSE }, + { X86::ADD8rr, X86::ADD8rm, 0 }, + { X86::ADDPDrr, X86::ADDPDrm, TB_ALIGN_16 }, + { X86::ADDPSrr, X86::ADDPSrm, TB_ALIGN_16 }, + { X86::ADDSDrr, X86::ADDSDrm, 0 }, + { X86::ADDSDrr_Int, X86::ADDSDrm_Int, TB_NO_REVERSE }, + { X86::ADDSSrr, X86::ADDSSrm, 0 }, + { X86::ADDSSrr_Int, X86::ADDSSrm_Int, TB_NO_REVERSE }, + { X86::ADDSUBPDrr, X86::ADDSUBPDrm, TB_ALIGN_16 }, + { X86::ADDSUBPSrr, X86::ADDSUBPSrm, TB_ALIGN_16 }, + { X86::AND16rr, X86::AND16rm, 0 }, + { X86::AND32rr, X86::AND32rm, 0 }, + { X86::AND64rr, X86::AND64rm, 0 }, + { X86::AND8rr, X86::AND8rm, 0 }, + { X86::ANDNPDrr, X86::ANDNPDrm, TB_ALIGN_16 }, + { X86::ANDNPSrr, X86::ANDNPSrm, TB_ALIGN_16 }, + { X86::ANDPDrr, X86::ANDPDrm, TB_ALIGN_16 }, + { X86::ANDPSrr, X86::ANDPSrm, TB_ALIGN_16 }, + { X86::BLENDPDrri, X86::BLENDPDrmi, TB_ALIGN_16 }, + { X86::BLENDPSrri, X86::BLENDPSrmi, TB_ALIGN_16 }, + { X86::BLENDVPDrr0, X86::BLENDVPDrm0, TB_ALIGN_16 }, + { X86::BLENDVPSrr0, X86::BLENDVPSrm0, TB_ALIGN_16 }, + { X86::CMOVA16rr, X86::CMOVA16rm, 0 }, + { X86::CMOVA32rr, X86::CMOVA32rm, 0 }, + { X86::CMOVA64rr, X86::CMOVA64rm, 0 }, + { X86::CMOVAE16rr, X86::CMOVAE16rm, 0 }, + { X86::CMOVAE32rr, X86::CMOVAE32rm, 0 }, + { X86::CMOVAE64rr, X86::CMOVAE64rm, 0 }, + { X86::CMOVB16rr, X86::CMOVB16rm, 0 }, + { X86::CMOVB32rr, X86::CMOVB32rm, 0 }, + { X86::CMOVB64rr, X86::CMOVB64rm, 0 }, + { X86::CMOVBE16rr, X86::CMOVBE16rm, 0 }, + { X86::CMOVBE32rr, X86::CMOVBE32rm, 0 }, + { X86::CMOVBE64rr, X86::CMOVBE64rm, 0 }, + { X86::CMOVE16rr, X86::CMOVE16rm, 0 }, + { X86::CMOVE32rr, X86::CMOVE32rm, 0 }, + { X86::CMOVE64rr, X86::CMOVE64rm, 0 }, + { X86::CMOVG16rr, X86::CMOVG16rm, 0 }, + { X86::CMOVG32rr, X86::CMOVG32rm, 0 }, + { X86::CMOVG64rr, X86::CMOVG64rm, 0 }, + { X86::CMOVGE16rr, X86::CMOVGE16rm, 0 }, + { X86::CMOVGE32rr, X86::CMOVGE32rm, 0 }, + { X86::CMOVGE64rr, X86::CMOVGE64rm, 0 }, + { X86::CMOVL16rr, X86::CMOVL16rm, 0 }, + { X86::CMOVL32rr, X86::CMOVL32rm, 0 }, + { X86::CMOVL64rr, X86::CMOVL64rm, 0 }, + { X86::CMOVLE16rr, X86::CMOVLE16rm, 0 }, + { X86::CMOVLE32rr, X86::CMOVLE32rm, 0 }, + { X86::CMOVLE64rr, X86::CMOVLE64rm, 0 }, + { X86::CMOVNE16rr, X86::CMOVNE16rm, 0 }, + { X86::CMOVNE32rr, X86::CMOVNE32rm, 0 }, + { X86::CMOVNE64rr, X86::CMOVNE64rm, 0 }, + { X86::CMOVNO16rr, X86::CMOVNO16rm, 0 }, + { X86::CMOVNO32rr, X86::CMOVNO32rm, 0 }, + { X86::CMOVNO64rr, X86::CMOVNO64rm, 0 }, + { X86::CMOVNP16rr, X86::CMOVNP16rm, 0 }, + { X86::CMOVNP32rr, X86::CMOVNP32rm, 0 }, + { X86::CMOVNP64rr, X86::CMOVNP64rm, 0 }, + { X86::CMOVNS16rr, X86::CMOVNS16rm, 0 }, + { X86::CMOVNS32rr, X86::CMOVNS32rm, 0 }, + { X86::CMOVNS64rr, X86::CMOVNS64rm, 0 }, + { X86::CMOVO16rr, X86::CMOVO16rm, 0 }, + { X86::CMOVO32rr, X86::CMOVO32rm, 0 }, + { X86::CMOVO64rr, X86::CMOVO64rm, 0 }, + { X86::CMOVP16rr, X86::CMOVP16rm, 0 }, + { X86::CMOVP32rr, X86::CMOVP32rm, 0 }, + { X86::CMOVP64rr, X86::CMOVP64rm, 0 }, + { X86::CMOVS16rr, X86::CMOVS16rm, 0 }, + { X86::CMOVS32rr, X86::CMOVS32rm, 0 }, + { X86::CMOVS64rr, X86::CMOVS64rm, 0 }, + { X86::CMPPDrri, X86::CMPPDrmi, TB_ALIGN_16 }, + { X86::CMPPSrri, X86::CMPPSrmi, TB_ALIGN_16 }, + { X86::CMPSDrr, X86::CMPSDrm, 0 }, + { X86::CMPSSrr, X86::CMPSSrm, 0 }, + { X86::CRC32r32r32, X86::CRC32r32m32, 0 }, + { X86::CRC32r64r64, X86::CRC32r64m64, 0 }, + { X86::DIVPDrr, X86::DIVPDrm, TB_ALIGN_16 }, + { X86::DIVPSrr, X86::DIVPSrm, TB_ALIGN_16 }, + { X86::DIVSDrr, X86::DIVSDrm, 0 }, + { X86::DIVSDrr_Int, X86::DIVSDrm_Int, TB_NO_REVERSE }, + { X86::DIVSSrr, X86::DIVSSrm, 0 }, + { X86::DIVSSrr_Int, X86::DIVSSrm_Int, TB_NO_REVERSE }, + { X86::DPPDrri, X86::DPPDrmi, TB_ALIGN_16 }, + { X86::DPPSrri, X86::DPPSrmi, TB_ALIGN_16 }, + { X86::HADDPDrr, X86::HADDPDrm, TB_ALIGN_16 }, + { X86::HADDPSrr, X86::HADDPSrm, TB_ALIGN_16 }, + { X86::HSUBPDrr, X86::HSUBPDrm, TB_ALIGN_16 }, + { X86::HSUBPSrr, X86::HSUBPSrm, TB_ALIGN_16 }, + { X86::IMUL16rr, X86::IMUL16rm, 0 }, + { X86::IMUL32rr, X86::IMUL32rm, 0 }, + { X86::IMUL64rr, X86::IMUL64rm, 0 }, + { X86::Int_CMPSDrr, X86::Int_CMPSDrm, TB_NO_REVERSE }, + { X86::Int_CMPSSrr, X86::Int_CMPSSrm, TB_NO_REVERSE }, + { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, TB_NO_REVERSE }, + { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 }, + { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 }, + { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm, 0 }, + { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm, 0 }, + { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, TB_NO_REVERSE }, + { X86::MAXPDrr, X86::MAXPDrm, TB_ALIGN_16 }, + { X86::MAXCPDrr, X86::MAXCPDrm, TB_ALIGN_16 }, + { X86::MAXPSrr, X86::MAXPSrm, TB_ALIGN_16 }, + { X86::MAXCPSrr, X86::MAXCPSrm, TB_ALIGN_16 }, + { X86::MAXSDrr, X86::MAXSDrm, 0 }, + { X86::MAXCSDrr, X86::MAXCSDrm, 0 }, + { X86::MAXSDrr_Int, X86::MAXSDrm_Int, TB_NO_REVERSE }, + { X86::MAXSSrr, X86::MAXSSrm, 0 }, + { X86::MAXCSSrr, X86::MAXCSSrm, 0 }, + { X86::MAXSSrr_Int, X86::MAXSSrm_Int, TB_NO_REVERSE }, + { X86::MINPDrr, X86::MINPDrm, TB_ALIGN_16 }, + { X86::MINCPDrr, X86::MINCPDrm, TB_ALIGN_16 }, + { X86::MINPSrr, X86::MINPSrm, TB_ALIGN_16 }, + { X86::MINCPSrr, X86::MINCPSrm, TB_ALIGN_16 }, + { X86::MINSDrr, X86::MINSDrm, 0 }, + { X86::MINCSDrr, X86::MINCSDrm, 0 }, + { X86::MINSDrr_Int, X86::MINSDrm_Int, TB_NO_REVERSE }, + { X86::MINSSrr, X86::MINSSrm, 0 }, + { X86::MINCSSrr, X86::MINCSSrm, 0 }, + { X86::MINSSrr_Int, X86::MINSSrm_Int, TB_NO_REVERSE }, + { X86::MOVLHPSrr, X86::MOVHPSrm, TB_NO_REVERSE }, + { X86::MPSADBWrri, X86::MPSADBWrmi, TB_ALIGN_16 }, + { X86::MULPDrr, X86::MULPDrm, TB_ALIGN_16 }, + { X86::MULPSrr, X86::MULPSrm, TB_ALIGN_16 }, + { X86::MULSDrr, X86::MULSDrm, 0 }, + { X86::MULSDrr_Int, X86::MULSDrm_Int, TB_NO_REVERSE }, + { X86::MULSSrr, X86::MULSSrm, 0 }, + { X86::MULSSrr_Int, X86::MULSSrm_Int, TB_NO_REVERSE }, + { X86::OR16rr, X86::OR16rm, 0 }, + { X86::OR32rr, X86::OR32rm, 0 }, + { X86::OR64rr, X86::OR64rm, 0 }, + { X86::OR8rr, X86::OR8rm, 0 }, + { X86::ORPDrr, X86::ORPDrm, TB_ALIGN_16 }, + { X86::ORPSrr, X86::ORPSrm, TB_ALIGN_16 }, + { X86::PACKSSDWrr, X86::PACKSSDWrm, TB_ALIGN_16 }, + { X86::PACKSSWBrr, X86::PACKSSWBrm, TB_ALIGN_16 }, + { X86::PACKUSDWrr, X86::PACKUSDWrm, TB_ALIGN_16 }, + { X86::PACKUSWBrr, X86::PACKUSWBrm, TB_ALIGN_16 }, + { X86::PADDBrr, X86::PADDBrm, TB_ALIGN_16 }, + { X86::PADDDrr, X86::PADDDrm, TB_ALIGN_16 }, + { X86::PADDQrr, X86::PADDQrm, TB_ALIGN_16 }, + { X86::PADDSBrr, X86::PADDSBrm, TB_ALIGN_16 }, + { X86::PADDSWrr, X86::PADDSWrm, TB_ALIGN_16 }, + { X86::PADDUSBrr, X86::PADDUSBrm, TB_ALIGN_16 }, + { X86::PADDUSWrr, X86::PADDUSWrm, TB_ALIGN_16 }, + { X86::PADDWrr, X86::PADDWrm, TB_ALIGN_16 }, + { X86::PALIGNRrri, X86::PALIGNRrmi, TB_ALIGN_16 }, + { X86::PANDNrr, X86::PANDNrm, TB_ALIGN_16 }, + { X86::PANDrr, X86::PANDrm, TB_ALIGN_16 }, + { X86::PAVGBrr, X86::PAVGBrm, TB_ALIGN_16 }, + { X86::PAVGWrr, X86::PAVGWrm, TB_ALIGN_16 }, + { X86::PBLENDVBrr0, X86::PBLENDVBrm0, TB_ALIGN_16 }, + { X86::PBLENDWrri, X86::PBLENDWrmi, TB_ALIGN_16 }, + { X86::PCLMULQDQrr, X86::PCLMULQDQrm, TB_ALIGN_16 }, + { X86::PCMPEQBrr, X86::PCMPEQBrm, TB_ALIGN_16 }, + { X86::PCMPEQDrr, X86::PCMPEQDrm, TB_ALIGN_16 }, + { X86::PCMPEQQrr, X86::PCMPEQQrm, TB_ALIGN_16 }, + { X86::PCMPEQWrr, X86::PCMPEQWrm, TB_ALIGN_16 }, + { X86::PCMPGTBrr, X86::PCMPGTBrm, TB_ALIGN_16 }, + { X86::PCMPGTDrr, X86::PCMPGTDrm, TB_ALIGN_16 }, + { X86::PCMPGTQrr, X86::PCMPGTQrm, TB_ALIGN_16 }, + { X86::PCMPGTWrr, X86::PCMPGTWrm, TB_ALIGN_16 }, + { X86::PHADDDrr, X86::PHADDDrm, TB_ALIGN_16 }, + { X86::PHADDWrr, X86::PHADDWrm, TB_ALIGN_16 }, + { X86::PHADDSWrr128, X86::PHADDSWrm128, TB_ALIGN_16 }, + { X86::PHSUBDrr, X86::PHSUBDrm, TB_ALIGN_16 }, + { X86::PHSUBSWrr128, X86::PHSUBSWrm128, TB_ALIGN_16 }, + { X86::PHSUBWrr, X86::PHSUBWrm, TB_ALIGN_16 }, + { X86::PINSRBrr, X86::PINSRBrm, 0 }, + { X86::PINSRDrr, X86::PINSRDrm, 0 }, + { X86::PINSRQrr, X86::PINSRQrm, 0 }, + { X86::PINSRWrri, X86::PINSRWrmi, 0 }, + { X86::PMADDUBSWrr, X86::PMADDUBSWrm, TB_ALIGN_16 }, + { X86::PMADDWDrr, X86::PMADDWDrm, TB_ALIGN_16 }, + { X86::PMAXSBrr, X86::PMAXSBrm, TB_ALIGN_16 }, + { X86::PMAXSDrr, X86::PMAXSDrm, TB_ALIGN_16 }, + { X86::PMAXSWrr, X86::PMAXSWrm, TB_ALIGN_16 }, + { X86::PMAXUBrr, X86::PMAXUBrm, TB_ALIGN_16 }, + { X86::PMAXUDrr, X86::PMAXUDrm, TB_ALIGN_16 }, + { X86::PMAXUWrr, X86::PMAXUWrm, TB_ALIGN_16 }, + { X86::PMINSBrr, X86::PMINSBrm, TB_ALIGN_16 }, + { X86::PMINSDrr, X86::PMINSDrm, TB_ALIGN_16 }, + { X86::PMINSWrr, X86::PMINSWrm, TB_ALIGN_16 }, + { X86::PMINUBrr, X86::PMINUBrm, TB_ALIGN_16 }, + { X86::PMINUDrr, X86::PMINUDrm, TB_ALIGN_16 }, + { X86::PMINUWrr, X86::PMINUWrm, TB_ALIGN_16 }, + { X86::PMULDQrr, X86::PMULDQrm, TB_ALIGN_16 }, + { X86::PMULHRSWrr, X86::PMULHRSWrm, TB_ALIGN_16 }, + { X86::PMULHUWrr, X86::PMULHUWrm, TB_ALIGN_16 }, + { X86::PMULHWrr, X86::PMULHWrm, TB_ALIGN_16 }, + { X86::PMULLDrr, X86::PMULLDrm, TB_ALIGN_16 }, + { X86::PMULLWrr, X86::PMULLWrm, TB_ALIGN_16 }, + { X86::PMULUDQrr, X86::PMULUDQrm, TB_ALIGN_16 }, + { X86::PORrr, X86::PORrm, TB_ALIGN_16 }, + { X86::PSADBWrr, X86::PSADBWrm, TB_ALIGN_16 }, + { X86::PSHUFBrr, X86::PSHUFBrm, TB_ALIGN_16 }, + { X86::PSIGNBrr128, X86::PSIGNBrm128, TB_ALIGN_16 }, + { X86::PSIGNWrr128, X86::PSIGNWrm128, TB_ALIGN_16 }, + { X86::PSIGNDrr128, X86::PSIGNDrm128, TB_ALIGN_16 }, + { X86::PSLLDrr, X86::PSLLDrm, TB_ALIGN_16 }, + { X86::PSLLQrr, X86::PSLLQrm, TB_ALIGN_16 }, + { X86::PSLLWrr, X86::PSLLWrm, TB_ALIGN_16 }, + { X86::PSRADrr, X86::PSRADrm, TB_ALIGN_16 }, + { X86::PSRAWrr, X86::PSRAWrm, TB_ALIGN_16 }, + { X86::PSRLDrr, X86::PSRLDrm, TB_ALIGN_16 }, + { X86::PSRLQrr, X86::PSRLQrm, TB_ALIGN_16 }, + { X86::PSRLWrr, X86::PSRLWrm, TB_ALIGN_16 }, + { X86::PSUBBrr, X86::PSUBBrm, TB_ALIGN_16 }, + { X86::PSUBDrr, X86::PSUBDrm, TB_ALIGN_16 }, + { X86::PSUBQrr, X86::PSUBQrm, TB_ALIGN_16 }, + { X86::PSUBSBrr, X86::PSUBSBrm, TB_ALIGN_16 }, + { X86::PSUBSWrr, X86::PSUBSWrm, TB_ALIGN_16 }, + { X86::PSUBUSBrr, X86::PSUBUSBrm, TB_ALIGN_16 }, + { X86::PSUBUSWrr, X86::PSUBUSWrm, TB_ALIGN_16 }, + { X86::PSUBWrr, X86::PSUBWrm, TB_ALIGN_16 }, + { X86::PUNPCKHBWrr, X86::PUNPCKHBWrm, TB_ALIGN_16 }, + { X86::PUNPCKHDQrr, X86::PUNPCKHDQrm, TB_ALIGN_16 }, + { X86::PUNPCKHQDQrr, X86::PUNPCKHQDQrm, TB_ALIGN_16 }, + { X86::PUNPCKHWDrr, X86::PUNPCKHWDrm, TB_ALIGN_16 }, + { X86::PUNPCKLBWrr, X86::PUNPCKLBWrm, TB_ALIGN_16 }, + { X86::PUNPCKLDQrr, X86::PUNPCKLDQrm, TB_ALIGN_16 }, + { X86::PUNPCKLQDQrr, X86::PUNPCKLQDQrm, TB_ALIGN_16 }, + { X86::PUNPCKLWDrr, X86::PUNPCKLWDrm, TB_ALIGN_16 }, + { X86::PXORrr, X86::PXORrm, TB_ALIGN_16 }, + { X86::ROUNDSDr_Int, X86::ROUNDSDm_Int, TB_NO_REVERSE }, + { X86::ROUNDSSr_Int, X86::ROUNDSSm_Int, TB_NO_REVERSE }, + { X86::SBB32rr, X86::SBB32rm, 0 }, + { X86::SBB64rr, X86::SBB64rm, 0 }, + { X86::SHUFPDrri, X86::SHUFPDrmi, TB_ALIGN_16 }, + { X86::SHUFPSrri, X86::SHUFPSrmi, TB_ALIGN_16 }, + { X86::SUB16rr, X86::SUB16rm, 0 }, + { X86::SUB32rr, X86::SUB32rm, 0 }, + { X86::SUB64rr, X86::SUB64rm, 0 }, + { X86::SUB8rr, X86::SUB8rm, 0 }, + { X86::SUBPDrr, X86::SUBPDrm, TB_ALIGN_16 }, + { X86::SUBPSrr, X86::SUBPSrm, TB_ALIGN_16 }, + { X86::SUBSDrr, X86::SUBSDrm, 0 }, + { X86::SUBSDrr_Int, X86::SUBSDrm_Int, TB_NO_REVERSE }, + { X86::SUBSSrr, X86::SUBSSrm, 0 }, + { X86::SUBSSrr_Int, X86::SUBSSrm_Int, TB_NO_REVERSE }, + // FIXME: TEST*rr -> swapped operand of TEST*mr. + { X86::UNPCKHPDrr, X86::UNPCKHPDrm, TB_ALIGN_16 }, + { X86::UNPCKHPSrr, X86::UNPCKHPSrm, TB_ALIGN_16 }, + { X86::UNPCKLPDrr, X86::UNPCKLPDrm, TB_ALIGN_16 }, + { X86::UNPCKLPSrr, X86::UNPCKLPSrm, TB_ALIGN_16 }, + { X86::XOR16rr, X86::XOR16rm, 0 }, + { X86::XOR32rr, X86::XOR32rm, 0 }, + { X86::XOR64rr, X86::XOR64rm, 0 }, + { X86::XOR8rr, X86::XOR8rm, 0 }, + { X86::XORPDrr, X86::XORPDrm, TB_ALIGN_16 }, + { X86::XORPSrr, X86::XORPSrm, TB_ALIGN_16 }, + + // MMX version of foldable instructions + { X86::MMX_CVTPI2PSirr, X86::MMX_CVTPI2PSirm, 0 }, + { X86::MMX_PACKSSDWirr, X86::MMX_PACKSSDWirm, 0 }, + { X86::MMX_PACKSSWBirr, X86::MMX_PACKSSWBirm, 0 }, + { X86::MMX_PACKUSWBirr, X86::MMX_PACKUSWBirm, 0 }, + { X86::MMX_PADDBirr, X86::MMX_PADDBirm, 0 }, + { X86::MMX_PADDDirr, X86::MMX_PADDDirm, 0 }, + { X86::MMX_PADDQirr, X86::MMX_PADDQirm, 0 }, + { X86::MMX_PADDSBirr, X86::MMX_PADDSBirm, 0 }, + { X86::MMX_PADDSWirr, X86::MMX_PADDSWirm, 0 }, + { X86::MMX_PADDUSBirr, X86::MMX_PADDUSBirm, 0 }, + { X86::MMX_PADDUSWirr, X86::MMX_PADDUSWirm, 0 }, + { X86::MMX_PADDWirr, X86::MMX_PADDWirm, 0 }, + { X86::MMX_PALIGNR64irr, X86::MMX_PALIGNR64irm, 0 }, + { X86::MMX_PANDNirr, X86::MMX_PANDNirm, 0 }, + { X86::MMX_PANDirr, X86::MMX_PANDirm, 0 }, + { X86::MMX_PAVGBirr, X86::MMX_PAVGBirm, 0 }, + { X86::MMX_PAVGWirr, X86::MMX_PAVGWirm, 0 }, + { X86::MMX_PCMPEQBirr, X86::MMX_PCMPEQBirm, 0 }, + { X86::MMX_PCMPEQDirr, X86::MMX_PCMPEQDirm, 0 }, + { X86::MMX_PCMPEQWirr, X86::MMX_PCMPEQWirm, 0 }, + { X86::MMX_PCMPGTBirr, X86::MMX_PCMPGTBirm, 0 }, + { X86::MMX_PCMPGTDirr, X86::MMX_PCMPGTDirm, 0 }, + { X86::MMX_PCMPGTWirr, X86::MMX_PCMPGTWirm, 0 }, + { X86::MMX_PHADDSWrr64, X86::MMX_PHADDSWrm64, 0 }, + { X86::MMX_PHADDWrr64, X86::MMX_PHADDWrm64, 0 }, + { X86::MMX_PHADDrr64, X86::MMX_PHADDrm64, 0 }, + { X86::MMX_PHSUBDrr64, X86::MMX_PHSUBDrm64, 0 }, + { X86::MMX_PHSUBSWrr64, X86::MMX_PHSUBSWrm64, 0 }, + { X86::MMX_PHSUBWrr64, X86::MMX_PHSUBWrm64, 0 }, + { X86::MMX_PINSRWirri, X86::MMX_PINSRWirmi, 0 }, + { X86::MMX_PMADDUBSWrr64, X86::MMX_PMADDUBSWrm64, 0 }, + { X86::MMX_PMADDWDirr, X86::MMX_PMADDWDirm, 0 }, + { X86::MMX_PMAXSWirr, X86::MMX_PMAXSWirm, 0 }, + { X86::MMX_PMAXUBirr, X86::MMX_PMAXUBirm, 0 }, + { X86::MMX_PMINSWirr, X86::MMX_PMINSWirm, 0 }, + { X86::MMX_PMINUBirr, X86::MMX_PMINUBirm, 0 }, + { X86::MMX_PMULHRSWrr64, X86::MMX_PMULHRSWrm64, 0 }, + { X86::MMX_PMULHUWirr, X86::MMX_PMULHUWirm, 0 }, + { X86::MMX_PMULHWirr, X86::MMX_PMULHWirm, 0 }, + { X86::MMX_PMULLWirr, X86::MMX_PMULLWirm, 0 }, + { X86::MMX_PMULUDQirr, X86::MMX_PMULUDQirm, 0 }, + { X86::MMX_PORirr, X86::MMX_PORirm, 0 }, + { X86::MMX_PSADBWirr, X86::MMX_PSADBWirm, 0 }, + { X86::MMX_PSHUFBrr64, X86::MMX_PSHUFBrm64, 0 }, + { X86::MMX_PSIGNBrr64, X86::MMX_PSIGNBrm64, 0 }, + { X86::MMX_PSIGNDrr64, X86::MMX_PSIGNDrm64, 0 }, + { X86::MMX_PSIGNWrr64, X86::MMX_PSIGNWrm64, 0 }, + { X86::MMX_PSLLDrr, X86::MMX_PSLLDrm, 0 }, + { X86::MMX_PSLLQrr, X86::MMX_PSLLQrm, 0 }, + { X86::MMX_PSLLWrr, X86::MMX_PSLLWrm, 0 }, + { X86::MMX_PSRADrr, X86::MMX_PSRADrm, 0 }, + { X86::MMX_PSRAWrr, X86::MMX_PSRAWrm, 0 }, + { X86::MMX_PSRLDrr, X86::MMX_PSRLDrm, 0 }, + { X86::MMX_PSRLQrr, X86::MMX_PSRLQrm, 0 }, + { X86::MMX_PSRLWrr, X86::MMX_PSRLWrm, 0 }, + { X86::MMX_PSUBBirr, X86::MMX_PSUBBirm, 0 }, + { X86::MMX_PSUBDirr, X86::MMX_PSUBDirm, 0 }, + { X86::MMX_PSUBQirr, X86::MMX_PSUBQirm, 0 }, + { X86::MMX_PSUBSBirr, X86::MMX_PSUBSBirm, 0 }, + { X86::MMX_PSUBSWirr, X86::MMX_PSUBSWirm, 0 }, + { X86::MMX_PSUBUSBirr, X86::MMX_PSUBUSBirm, 0 }, + { X86::MMX_PSUBUSWirr, X86::MMX_PSUBUSWirm, 0 }, + { X86::MMX_PSUBWirr, X86::MMX_PSUBWirm, 0 }, + { X86::MMX_PUNPCKHBWirr, X86::MMX_PUNPCKHBWirm, 0 }, + { X86::MMX_PUNPCKHDQirr, X86::MMX_PUNPCKHDQirm, 0 }, + { X86::MMX_PUNPCKHWDirr, X86::MMX_PUNPCKHWDirm, 0 }, + { X86::MMX_PUNPCKLBWirr, X86::MMX_PUNPCKLBWirm, 0 }, + { X86::MMX_PUNPCKLDQirr, X86::MMX_PUNPCKLDQirm, 0 }, + { X86::MMX_PUNPCKLWDirr, X86::MMX_PUNPCKLWDirm, 0 }, + { X86::MMX_PXORirr, X86::MMX_PXORirm, 0 }, + + // 3DNow! version of foldable instructions + { X86::PAVGUSBrr, X86::PAVGUSBrm, 0 }, + { X86::PFACCrr, X86::PFACCrm, 0 }, + { X86::PFADDrr, X86::PFADDrm, 0 }, + { X86::PFCMPEQrr, X86::PFCMPEQrm, 0 }, + { X86::PFCMPGErr, X86::PFCMPGErm, 0 }, + { X86::PFCMPGTrr, X86::PFCMPGTrm, 0 }, + { X86::PFMAXrr, X86::PFMAXrm, 0 }, + { X86::PFMINrr, X86::PFMINrm, 0 }, + { X86::PFMULrr, X86::PFMULrm, 0 }, + { X86::PFNACCrr, X86::PFNACCrm, 0 }, + { X86::PFPNACCrr, X86::PFPNACCrm, 0 }, + { X86::PFRCPIT1rr, X86::PFRCPIT1rm, 0 }, + { X86::PFRCPIT2rr, X86::PFRCPIT2rm, 0 }, + { X86::PFRSQIT1rr, X86::PFRSQIT1rm, 0 }, + { X86::PFSUBrr, X86::PFSUBrm, 0 }, + { X86::PFSUBRrr, X86::PFSUBRrm, 0 }, + { X86::PMULHRWrr, X86::PMULHRWrm, 0 }, + + // AVX 128-bit versions of foldable instructions + { X86::VCVTSI2SD64rr, X86::VCVTSI2SD64rm, 0 }, + { X86::Int_VCVTSI2SD64rr, X86::Int_VCVTSI2SD64rm, 0 }, + { X86::VCVTSI2SDrr, X86::VCVTSI2SDrm, 0 }, + { X86::Int_VCVTSI2SDrr, X86::Int_VCVTSI2SDrm, 0 }, + { X86::VCVTSI2SS64rr, X86::VCVTSI2SS64rm, 0 }, + { X86::Int_VCVTSI2SS64rr, X86::Int_VCVTSI2SS64rm, 0 }, + { X86::VCVTSI2SSrr, X86::VCVTSI2SSrm, 0 }, + { X86::Int_VCVTSI2SSrr, X86::Int_VCVTSI2SSrm, 0 }, + { X86::VADDPDrr, X86::VADDPDrm, 0 }, + { X86::VADDPSrr, X86::VADDPSrm, 0 }, + { X86::VADDSDrr, X86::VADDSDrm, 0 }, + { X86::VADDSDrr_Int, X86::VADDSDrm_Int, TB_NO_REVERSE }, + { X86::VADDSSrr, X86::VADDSSrm, 0 }, + { X86::VADDSSrr_Int, X86::VADDSSrm_Int, TB_NO_REVERSE }, + { X86::VADDSUBPDrr, X86::VADDSUBPDrm, 0 }, + { X86::VADDSUBPSrr, X86::VADDSUBPSrm, 0 }, + { X86::VANDNPDrr, X86::VANDNPDrm, 0 }, + { X86::VANDNPSrr, X86::VANDNPSrm, 0 }, + { X86::VANDPDrr, X86::VANDPDrm, 0 }, + { X86::VANDPSrr, X86::VANDPSrm, 0 }, + { X86::VBLENDPDrri, X86::VBLENDPDrmi, 0 }, + { X86::VBLENDPSrri, X86::VBLENDPSrmi, 0 }, + { X86::VBLENDVPDrr, X86::VBLENDVPDrm, 0 }, + { X86::VBLENDVPSrr, X86::VBLENDVPSrm, 0 }, + { X86::VCMPPDrri, X86::VCMPPDrmi, 0 }, + { X86::VCMPPSrri, X86::VCMPPSrmi, 0 }, + { X86::VCMPSDrr, X86::VCMPSDrm, 0 }, + { X86::VCMPSSrr, X86::VCMPSSrm, 0 }, + { X86::VDIVPDrr, X86::VDIVPDrm, 0 }, + { X86::VDIVPSrr, X86::VDIVPSrm, 0 }, + { X86::VDIVSDrr, X86::VDIVSDrm, 0 }, + { X86::VDIVSDrr_Int, X86::VDIVSDrm_Int, TB_NO_REVERSE }, + { X86::VDIVSSrr, X86::VDIVSSrm, 0 }, + { X86::VDIVSSrr_Int, X86::VDIVSSrm_Int, TB_NO_REVERSE }, + { X86::VDPPDrri, X86::VDPPDrmi, 0 }, + { X86::VDPPSrri, X86::VDPPSrmi, 0 }, + { X86::VHADDPDrr, X86::VHADDPDrm, 0 }, + { X86::VHADDPSrr, X86::VHADDPSrm, 0 }, + { X86::VHSUBPDrr, X86::VHSUBPDrm, 0 }, + { X86::VHSUBPSrr, X86::VHSUBPSrm, 0 }, + { X86::Int_VCMPSDrr, X86::Int_VCMPSDrm, TB_NO_REVERSE }, + { X86::Int_VCMPSSrr, X86::Int_VCMPSSrm, TB_NO_REVERSE }, + { X86::VMAXCPDrr, X86::VMAXCPDrm, 0 }, + { X86::VMAXCPSrr, X86::VMAXCPSrm, 0 }, + { X86::VMAXCSDrr, X86::VMAXCSDrm, 0 }, + { X86::VMAXCSSrr, X86::VMAXCSSrm, 0 }, + { X86::VMAXPDrr, X86::VMAXPDrm, 0 }, + { X86::VMAXPSrr, X86::VMAXPSrm, 0 }, + { X86::VMAXSDrr, X86::VMAXSDrm, 0 }, + { X86::VMAXSDrr_Int, X86::VMAXSDrm_Int, TB_NO_REVERSE }, + { X86::VMAXSSrr, X86::VMAXSSrm, 0 }, + { X86::VMAXSSrr_Int, X86::VMAXSSrm_Int, TB_NO_REVERSE }, + { X86::VMINCPDrr, X86::VMINCPDrm, 0 }, + { X86::VMINCPSrr, X86::VMINCPSrm, 0 }, + { X86::VMINCSDrr, X86::VMINCSDrm, 0 }, + { X86::VMINCSSrr, X86::VMINCSSrm, 0 }, + { X86::VMINPDrr, X86::VMINPDrm, 0 }, + { X86::VMINPSrr, X86::VMINPSrm, 0 }, + { X86::VMINSDrr, X86::VMINSDrm, 0 }, + { X86::VMINSDrr_Int, X86::VMINSDrm_Int, TB_NO_REVERSE }, + { X86::VMINSSrr, X86::VMINSSrm, 0 }, + { X86::VMINSSrr_Int, X86::VMINSSrm_Int, TB_NO_REVERSE }, + { X86::VMOVLHPSrr, X86::VMOVHPSrm, TB_NO_REVERSE }, + { X86::VMPSADBWrri, X86::VMPSADBWrmi, 0 }, + { X86::VMULPDrr, X86::VMULPDrm, 0 }, + { X86::VMULPSrr, X86::VMULPSrm, 0 }, + { X86::VMULSDrr, X86::VMULSDrm, 0 }, + { X86::VMULSDrr_Int, X86::VMULSDrm_Int, TB_NO_REVERSE }, + { X86::VMULSSrr, X86::VMULSSrm, 0 }, + { X86::VMULSSrr_Int, X86::VMULSSrm_Int, TB_NO_REVERSE }, + { X86::VORPDrr, X86::VORPDrm, 0 }, + { X86::VORPSrr, X86::VORPSrm, 0 }, + { X86::VPACKSSDWrr, X86::VPACKSSDWrm, 0 }, + { X86::VPACKSSWBrr, X86::VPACKSSWBrm, 0 }, + { X86::VPACKUSDWrr, X86::VPACKUSDWrm, 0 }, + { X86::VPACKUSWBrr, X86::VPACKUSWBrm, 0 }, + { X86::VPADDBrr, X86::VPADDBrm, 0 }, + { X86::VPADDDrr, X86::VPADDDrm, 0 }, + { X86::VPADDQrr, X86::VPADDQrm, 0 }, + { X86::VPADDSBrr, X86::VPADDSBrm, 0 }, + { X86::VPADDSWrr, X86::VPADDSWrm, 0 }, + { X86::VPADDUSBrr, X86::VPADDUSBrm, 0 }, + { X86::VPADDUSWrr, X86::VPADDUSWrm, 0 }, + { X86::VPADDWrr, X86::VPADDWrm, 0 }, + { X86::VPALIGNRrri, X86::VPALIGNRrmi, 0 }, + { X86::VPANDNrr, X86::VPANDNrm, 0 }, + { X86::VPANDrr, X86::VPANDrm, 0 }, + { X86::VPAVGBrr, X86::VPAVGBrm, 0 }, + { X86::VPAVGWrr, X86::VPAVGWrm, 0 }, + { X86::VPBLENDVBrr, X86::VPBLENDVBrm, 0 }, + { X86::VPBLENDWrri, X86::VPBLENDWrmi, 0 }, + { X86::VPCLMULQDQrr, X86::VPCLMULQDQrm, 0 }, + { X86::VPCMPEQBrr, X86::VPCMPEQBrm, 0 }, + { X86::VPCMPEQDrr, X86::VPCMPEQDrm, 0 }, + { X86::VPCMPEQQrr, X86::VPCMPEQQrm, 0 }, + { X86::VPCMPEQWrr, X86::VPCMPEQWrm, 0 }, + { X86::VPCMPGTBrr, X86::VPCMPGTBrm, 0 }, + { X86::VPCMPGTDrr, X86::VPCMPGTDrm, 0 }, + { X86::VPCMPGTQrr, X86::VPCMPGTQrm, 0 }, + { X86::VPCMPGTWrr, X86::VPCMPGTWrm, 0 }, + { X86::VPHADDDrr, X86::VPHADDDrm, 0 }, + { X86::VPHADDSWrr128, X86::VPHADDSWrm128, 0 }, + { X86::VPHADDWrr, X86::VPHADDWrm, 0 }, + { X86::VPHSUBDrr, X86::VPHSUBDrm, 0 }, + { X86::VPHSUBSWrr128, X86::VPHSUBSWrm128, 0 }, + { X86::VPHSUBWrr, X86::VPHSUBWrm, 0 }, + { X86::VPERMILPDrr, X86::VPERMILPDrm, 0 }, + { X86::VPERMILPSrr, X86::VPERMILPSrm, 0 }, + { X86::VPINSRBrr, X86::VPINSRBrm, 0 }, + { X86::VPINSRDrr, X86::VPINSRDrm, 0 }, + { X86::VPINSRQrr, X86::VPINSRQrm, 0 }, + { X86::VPINSRWrri, X86::VPINSRWrmi, 0 }, + { X86::VPMADDUBSWrr, X86::VPMADDUBSWrm, 0 }, + { X86::VPMADDWDrr, X86::VPMADDWDrm, 0 }, + { X86::VPMAXSBrr, X86::VPMAXSBrm, 0 }, + { X86::VPMAXSDrr, X86::VPMAXSDrm, 0 }, + { X86::VPMAXSWrr, X86::VPMAXSWrm, 0 }, + { X86::VPMAXUBrr, X86::VPMAXUBrm, 0 }, + { X86::VPMAXUDrr, X86::VPMAXUDrm, 0 }, + { X86::VPMAXUWrr, X86::VPMAXUWrm, 0 }, + { X86::VPMINSBrr, X86::VPMINSBrm, 0 }, + { X86::VPMINSDrr, X86::VPMINSDrm, 0 }, + { X86::VPMINSWrr, X86::VPMINSWrm, 0 }, + { X86::VPMINUBrr, X86::VPMINUBrm, 0 }, + { X86::VPMINUDrr, X86::VPMINUDrm, 0 }, + { X86::VPMINUWrr, X86::VPMINUWrm, 0 }, + { X86::VPMULDQrr, X86::VPMULDQrm, 0 }, + { X86::VPMULHRSWrr, X86::VPMULHRSWrm, 0 }, + { X86::VPMULHUWrr, X86::VPMULHUWrm, 0 }, + { X86::VPMULHWrr, X86::VPMULHWrm, 0 }, + { X86::VPMULLDrr, X86::VPMULLDrm, 0 }, + { X86::VPMULLWrr, X86::VPMULLWrm, 0 }, + { X86::VPMULUDQrr, X86::VPMULUDQrm, 0 }, + { X86::VPORrr, X86::VPORrm, 0 }, + { X86::VPSADBWrr, X86::VPSADBWrm, 0 }, + { X86::VPSHUFBrr, X86::VPSHUFBrm, 0 }, + { X86::VPSIGNBrr128, X86::VPSIGNBrm128, 0 }, + { X86::VPSIGNWrr128, X86::VPSIGNWrm128, 0 }, + { X86::VPSIGNDrr128, X86::VPSIGNDrm128, 0 }, + { X86::VPSLLDrr, X86::VPSLLDrm, 0 }, + { X86::VPSLLQrr, X86::VPSLLQrm, 0 }, + { X86::VPSLLWrr, X86::VPSLLWrm, 0 }, + { X86::VPSRADrr, X86::VPSRADrm, 0 }, + { X86::VPSRAWrr, X86::VPSRAWrm, 0 }, + { X86::VPSRLDrr, X86::VPSRLDrm, 0 }, + { X86::VPSRLQrr, X86::VPSRLQrm, 0 }, + { X86::VPSRLWrr, X86::VPSRLWrm, 0 }, + { X86::VPSUBBrr, X86::VPSUBBrm, 0 }, + { X86::VPSUBDrr, X86::VPSUBDrm, 0 }, + { X86::VPSUBQrr, X86::VPSUBQrm, 0 }, + { X86::VPSUBSBrr, X86::VPSUBSBrm, 0 }, + { X86::VPSUBSWrr, X86::VPSUBSWrm, 0 }, + { X86::VPSUBUSBrr, X86::VPSUBUSBrm, 0 }, + { X86::VPSUBUSWrr, X86::VPSUBUSWrm, 0 }, + { X86::VPSUBWrr, X86::VPSUBWrm, 0 }, + { X86::VPUNPCKHBWrr, X86::VPUNPCKHBWrm, 0 }, + { X86::VPUNPCKHDQrr, X86::VPUNPCKHDQrm, 0 }, + { X86::VPUNPCKHQDQrr, X86::VPUNPCKHQDQrm, 0 }, + { X86::VPUNPCKHWDrr, X86::VPUNPCKHWDrm, 0 }, + { X86::VPUNPCKLBWrr, X86::VPUNPCKLBWrm, 0 }, + { X86::VPUNPCKLDQrr, X86::VPUNPCKLDQrm, 0 }, + { X86::VPUNPCKLQDQrr, X86::VPUNPCKLQDQrm, 0 }, + { X86::VPUNPCKLWDrr, X86::VPUNPCKLWDrm, 0 }, + { X86::VPXORrr, X86::VPXORrm, 0 }, + { X86::VRCPSSr, X86::VRCPSSm, 0 }, + { X86::VRCPSSr_Int, X86::VRCPSSm_Int, TB_NO_REVERSE }, + { X86::VRSQRTSSr, X86::VRSQRTSSm, 0 }, + { X86::VRSQRTSSr_Int, X86::VRSQRTSSm_Int, TB_NO_REVERSE }, + { X86::VROUNDSDr, X86::VROUNDSDm, 0 }, + { X86::VROUNDSDr_Int, X86::VROUNDSDm_Int, TB_NO_REVERSE }, + { X86::VROUNDSSr, X86::VROUNDSSm, 0 }, + { X86::VROUNDSSr_Int, X86::VROUNDSSm_Int, TB_NO_REVERSE }, + { X86::VSHUFPDrri, X86::VSHUFPDrmi, 0 }, + { X86::VSHUFPSrri, X86::VSHUFPSrmi, 0 }, + { X86::VSQRTSDr, X86::VSQRTSDm, 0 }, + { X86::VSQRTSDr_Int, X86::VSQRTSDm_Int, TB_NO_REVERSE }, + { X86::VSQRTSSr, X86::VSQRTSSm, 0 }, + { X86::VSQRTSSr_Int, X86::VSQRTSSm_Int, TB_NO_REVERSE }, + { X86::VSUBPDrr, X86::VSUBPDrm, 0 }, + { X86::VSUBPSrr, X86::VSUBPSrm, 0 }, + { X86::VSUBSDrr, X86::VSUBSDrm, 0 }, + { X86::VSUBSDrr_Int, X86::VSUBSDrm_Int, TB_NO_REVERSE }, + { X86::VSUBSSrr, X86::VSUBSSrm, 0 }, + { X86::VSUBSSrr_Int, X86::VSUBSSrm_Int, TB_NO_REVERSE }, + { X86::VUNPCKHPDrr, X86::VUNPCKHPDrm, 0 }, + { X86::VUNPCKHPSrr, X86::VUNPCKHPSrm, 0 }, + { X86::VUNPCKLPDrr, X86::VUNPCKLPDrm, 0 }, + { X86::VUNPCKLPSrr, X86::VUNPCKLPSrm, 0 }, + { X86::VXORPDrr, X86::VXORPDrm, 0 }, + { X86::VXORPSrr, X86::VXORPSrm, 0 }, + + // AVX 256-bit foldable instructions + { X86::VADDPDYrr, X86::VADDPDYrm, 0 }, + { X86::VADDPSYrr, X86::VADDPSYrm, 0 }, + { X86::VADDSUBPDYrr, X86::VADDSUBPDYrm, 0 }, + { X86::VADDSUBPSYrr, X86::VADDSUBPSYrm, 0 }, + { X86::VANDNPDYrr, X86::VANDNPDYrm, 0 }, + { X86::VANDNPSYrr, X86::VANDNPSYrm, 0 }, + { X86::VANDPDYrr, X86::VANDPDYrm, 0 }, + { X86::VANDPSYrr, X86::VANDPSYrm, 0 }, + { X86::VBLENDPDYrri, X86::VBLENDPDYrmi, 0 }, + { X86::VBLENDPSYrri, X86::VBLENDPSYrmi, 0 }, + { X86::VBLENDVPDYrr, X86::VBLENDVPDYrm, 0 }, + { X86::VBLENDVPSYrr, X86::VBLENDVPSYrm, 0 }, + { X86::VCMPPDYrri, X86::VCMPPDYrmi, 0 }, + { X86::VCMPPSYrri, X86::VCMPPSYrmi, 0 }, + { X86::VDIVPDYrr, X86::VDIVPDYrm, 0 }, + { X86::VDIVPSYrr, X86::VDIVPSYrm, 0 }, + { X86::VDPPSYrri, X86::VDPPSYrmi, 0 }, + { X86::VHADDPDYrr, X86::VHADDPDYrm, 0 }, + { X86::VHADDPSYrr, X86::VHADDPSYrm, 0 }, + { X86::VHSUBPDYrr, X86::VHSUBPDYrm, 0 }, + { X86::VHSUBPSYrr, X86::VHSUBPSYrm, 0 }, + { X86::VINSERTF128rr, X86::VINSERTF128rm, 0 }, + { X86::VMAXCPDYrr, X86::VMAXCPDYrm, 0 }, + { X86::VMAXCPSYrr, X86::VMAXCPSYrm, 0 }, + { X86::VMAXPDYrr, X86::VMAXPDYrm, 0 }, + { X86::VMAXPSYrr, X86::VMAXPSYrm, 0 }, + { X86::VMINCPDYrr, X86::VMINCPDYrm, 0 }, + { X86::VMINCPSYrr, X86::VMINCPSYrm, 0 }, + { X86::VMINPDYrr, X86::VMINPDYrm, 0 }, + { X86::VMINPSYrr, X86::VMINPSYrm, 0 }, + { X86::VMULPDYrr, X86::VMULPDYrm, 0 }, + { X86::VMULPSYrr, X86::VMULPSYrm, 0 }, + { X86::VORPDYrr, X86::VORPDYrm, 0 }, + { X86::VORPSYrr, X86::VORPSYrm, 0 }, + { X86::VPERM2F128rr, X86::VPERM2F128rm, 0 }, + { X86::VPERMILPDYrr, X86::VPERMILPDYrm, 0 }, + { X86::VPERMILPSYrr, X86::VPERMILPSYrm, 0 }, + { X86::VSHUFPDYrri, X86::VSHUFPDYrmi, 0 }, + { X86::VSHUFPSYrri, X86::VSHUFPSYrmi, 0 }, + { X86::VSUBPDYrr, X86::VSUBPDYrm, 0 }, + { X86::VSUBPSYrr, X86::VSUBPSYrm, 0 }, + { X86::VUNPCKHPDYrr, X86::VUNPCKHPDYrm, 0 }, + { X86::VUNPCKHPSYrr, X86::VUNPCKHPSYrm, 0 }, + { X86::VUNPCKLPDYrr, X86::VUNPCKLPDYrm, 0 }, + { X86::VUNPCKLPSYrr, X86::VUNPCKLPSYrm, 0 }, + { X86::VXORPDYrr, X86::VXORPDYrm, 0 }, + { X86::VXORPSYrr, X86::VXORPSYrm, 0 }, + + // AVX2 foldable instructions + { X86::VINSERTI128rr, X86::VINSERTI128rm, 0 }, + { X86::VPACKSSDWYrr, X86::VPACKSSDWYrm, 0 }, + { X86::VPACKSSWBYrr, X86::VPACKSSWBYrm, 0 }, + { X86::VPACKUSDWYrr, X86::VPACKUSDWYrm, 0 }, + { X86::VPACKUSWBYrr, X86::VPACKUSWBYrm, 0 }, + { X86::VPADDBYrr, X86::VPADDBYrm, 0 }, + { X86::VPADDDYrr, X86::VPADDDYrm, 0 }, + { X86::VPADDQYrr, X86::VPADDQYrm, 0 }, + { X86::VPADDSBYrr, X86::VPADDSBYrm, 0 }, + { X86::VPADDSWYrr, X86::VPADDSWYrm, 0 }, + { X86::VPADDUSBYrr, X86::VPADDUSBYrm, 0 }, + { X86::VPADDUSWYrr, X86::VPADDUSWYrm, 0 }, + { X86::VPADDWYrr, X86::VPADDWYrm, 0 }, + { X86::VPALIGNRYrri, X86::VPALIGNRYrmi, 0 }, + { X86::VPANDNYrr, X86::VPANDNYrm, 0 }, + { X86::VPANDYrr, X86::VPANDYrm, 0 }, + { X86::VPAVGBYrr, X86::VPAVGBYrm, 0 }, + { X86::VPAVGWYrr, X86::VPAVGWYrm, 0 }, + { X86::VPBLENDDrri, X86::VPBLENDDrmi, 0 }, + { X86::VPBLENDDYrri, X86::VPBLENDDYrmi, 0 }, + { X86::VPBLENDVBYrr, X86::VPBLENDVBYrm, 0 }, + { X86::VPBLENDWYrri, X86::VPBLENDWYrmi, 0 }, + { X86::VPCMPEQBYrr, X86::VPCMPEQBYrm, 0 }, + { X86::VPCMPEQDYrr, X86::VPCMPEQDYrm, 0 }, + { X86::VPCMPEQQYrr, X86::VPCMPEQQYrm, 0 }, + { X86::VPCMPEQWYrr, X86::VPCMPEQWYrm, 0 }, + { X86::VPCMPGTBYrr, X86::VPCMPGTBYrm, 0 }, + { X86::VPCMPGTDYrr, X86::VPCMPGTDYrm, 0 }, + { X86::VPCMPGTQYrr, X86::VPCMPGTQYrm, 0 }, + { X86::VPCMPGTWYrr, X86::VPCMPGTWYrm, 0 }, + { X86::VPERM2I128rr, X86::VPERM2I128rm, 0 }, + { X86::VPERMDYrr, X86::VPERMDYrm, 0 }, + { X86::VPERMPSYrr, X86::VPERMPSYrm, 0 }, + { X86::VPHADDDYrr, X86::VPHADDDYrm, 0 }, + { X86::VPHADDSWrr256, X86::VPHADDSWrm256, 0 }, + { X86::VPHADDWYrr, X86::VPHADDWYrm, 0 }, + { X86::VPHSUBDYrr, X86::VPHSUBDYrm, 0 }, + { X86::VPHSUBSWrr256, X86::VPHSUBSWrm256, 0 }, + { X86::VPHSUBWYrr, X86::VPHSUBWYrm, 0 }, + { X86::VPMADDUBSWYrr, X86::VPMADDUBSWYrm, 0 }, + { X86::VPMADDWDYrr, X86::VPMADDWDYrm, 0 }, + { X86::VPMAXSBYrr, X86::VPMAXSBYrm, 0 }, + { X86::VPMAXSDYrr, X86::VPMAXSDYrm, 0 }, + { X86::VPMAXSWYrr, X86::VPMAXSWYrm, 0 }, + { X86::VPMAXUBYrr, X86::VPMAXUBYrm, 0 }, + { X86::VPMAXUDYrr, X86::VPMAXUDYrm, 0 }, + { X86::VPMAXUWYrr, X86::VPMAXUWYrm, 0 }, + { X86::VPMINSBYrr, X86::VPMINSBYrm, 0 }, + { X86::VPMINSDYrr, X86::VPMINSDYrm, 0 }, + { X86::VPMINSWYrr, X86::VPMINSWYrm, 0 }, + { X86::VPMINUBYrr, X86::VPMINUBYrm, 0 }, + { X86::VPMINUDYrr, X86::VPMINUDYrm, 0 }, + { X86::VPMINUWYrr, X86::VPMINUWYrm, 0 }, + { X86::VMPSADBWYrri, X86::VMPSADBWYrmi, 0 }, + { X86::VPMULDQYrr, X86::VPMULDQYrm, 0 }, + { X86::VPMULHRSWYrr, X86::VPMULHRSWYrm, 0 }, + { X86::VPMULHUWYrr, X86::VPMULHUWYrm, 0 }, + { X86::VPMULHWYrr, X86::VPMULHWYrm, 0 }, + { X86::VPMULLDYrr, X86::VPMULLDYrm, 0 }, + { X86::VPMULLWYrr, X86::VPMULLWYrm, 0 }, + { X86::VPMULUDQYrr, X86::VPMULUDQYrm, 0 }, + { X86::VPORYrr, X86::VPORYrm, 0 }, + { X86::VPSADBWYrr, X86::VPSADBWYrm, 0 }, + { X86::VPSHUFBYrr, X86::VPSHUFBYrm, 0 }, + { X86::VPSIGNBYrr256, X86::VPSIGNBYrm256, 0 }, + { X86::VPSIGNWYrr256, X86::VPSIGNWYrm256, 0 }, + { X86::VPSIGNDYrr256, X86::VPSIGNDYrm256, 0 }, + { X86::VPSLLDYrr, X86::VPSLLDYrm, 0 }, + { X86::VPSLLQYrr, X86::VPSLLQYrm, 0 }, + { X86::VPSLLWYrr, X86::VPSLLWYrm, 0 }, + { X86::VPSLLVDrr, X86::VPSLLVDrm, 0 }, + { X86::VPSLLVDYrr, X86::VPSLLVDYrm, 0 }, + { X86::VPSLLVQrr, X86::VPSLLVQrm, 0 }, + { X86::VPSLLVQYrr, X86::VPSLLVQYrm, 0 }, + { X86::VPSRADYrr, X86::VPSRADYrm, 0 }, + { X86::VPSRAWYrr, X86::VPSRAWYrm, 0 }, + { X86::VPSRAVDrr, X86::VPSRAVDrm, 0 }, + { X86::VPSRAVDYrr, X86::VPSRAVDYrm, 0 }, + { X86::VPSRLDYrr, X86::VPSRLDYrm, 0 }, + { X86::VPSRLQYrr, X86::VPSRLQYrm, 0 }, + { X86::VPSRLWYrr, X86::VPSRLWYrm, 0 }, + { X86::VPSRLVDrr, X86::VPSRLVDrm, 0 }, + { X86::VPSRLVDYrr, X86::VPSRLVDYrm, 0 }, + { X86::VPSRLVQrr, X86::VPSRLVQrm, 0 }, + { X86::VPSRLVQYrr, X86::VPSRLVQYrm, 0 }, + { X86::VPSUBBYrr, X86::VPSUBBYrm, 0 }, + { X86::VPSUBDYrr, X86::VPSUBDYrm, 0 }, + { X86::VPSUBQYrr, X86::VPSUBQYrm, 0 }, + { X86::VPSUBSBYrr, X86::VPSUBSBYrm, 0 }, + { X86::VPSUBSWYrr, X86::VPSUBSWYrm, 0 }, + { X86::VPSUBUSBYrr, X86::VPSUBUSBYrm, 0 }, + { X86::VPSUBUSWYrr, X86::VPSUBUSWYrm, 0 }, + { X86::VPSUBWYrr, X86::VPSUBWYrm, 0 }, + { X86::VPUNPCKHBWYrr, X86::VPUNPCKHBWYrm, 0 }, + { X86::VPUNPCKHDQYrr, X86::VPUNPCKHDQYrm, 0 }, + { X86::VPUNPCKHQDQYrr, X86::VPUNPCKHQDQYrm, 0 }, + { X86::VPUNPCKHWDYrr, X86::VPUNPCKHWDYrm, 0 }, + { X86::VPUNPCKLBWYrr, X86::VPUNPCKLBWYrm, 0 }, + { X86::VPUNPCKLDQYrr, X86::VPUNPCKLDQYrm, 0 }, + { X86::VPUNPCKLQDQYrr, X86::VPUNPCKLQDQYrm, 0 }, + { X86::VPUNPCKLWDYrr, X86::VPUNPCKLWDYrm, 0 }, + { X86::VPXORYrr, X86::VPXORYrm, 0 }, + + // FMA4 foldable patterns + { X86::VFMADDSS4rr, X86::VFMADDSS4mr, TB_ALIGN_NONE }, + { X86::VFMADDSS4rr_Int, X86::VFMADDSS4mr_Int, TB_NO_REVERSE }, + { X86::VFMADDSD4rr, X86::VFMADDSD4mr, TB_ALIGN_NONE }, + { X86::VFMADDSD4rr_Int, X86::VFMADDSD4mr_Int, TB_NO_REVERSE }, + { X86::VFMADDPS4rr, X86::VFMADDPS4mr, TB_ALIGN_NONE }, + { X86::VFMADDPD4rr, X86::VFMADDPD4mr, TB_ALIGN_NONE }, + { X86::VFMADDPS4Yrr, X86::VFMADDPS4Ymr, TB_ALIGN_NONE }, + { X86::VFMADDPD4Yrr, X86::VFMADDPD4Ymr, TB_ALIGN_NONE }, + { X86::VFNMADDSS4rr, X86::VFNMADDSS4mr, TB_ALIGN_NONE }, + { X86::VFNMADDSS4rr_Int, X86::VFNMADDSS4mr_Int, TB_NO_REVERSE }, + { X86::VFNMADDSD4rr, X86::VFNMADDSD4mr, TB_ALIGN_NONE }, + { X86::VFNMADDSD4rr_Int, X86::VFNMADDSD4mr_Int, TB_NO_REVERSE }, + { X86::VFNMADDPS4rr, X86::VFNMADDPS4mr, TB_ALIGN_NONE }, + { X86::VFNMADDPD4rr, X86::VFNMADDPD4mr, TB_ALIGN_NONE }, + { X86::VFNMADDPS4Yrr, X86::VFNMADDPS4Ymr, TB_ALIGN_NONE }, + { X86::VFNMADDPD4Yrr, X86::VFNMADDPD4Ymr, TB_ALIGN_NONE }, + { X86::VFMSUBSS4rr, X86::VFMSUBSS4mr, TB_ALIGN_NONE }, + { X86::VFMSUBSS4rr_Int, X86::VFMSUBSS4mr_Int, TB_NO_REVERSE }, + { X86::VFMSUBSD4rr, X86::VFMSUBSD4mr, TB_ALIGN_NONE }, + { X86::VFMSUBSD4rr_Int, X86::VFMSUBSD4mr_Int, TB_NO_REVERSE }, + { X86::VFMSUBPS4rr, X86::VFMSUBPS4mr, TB_ALIGN_NONE }, + { X86::VFMSUBPD4rr, X86::VFMSUBPD4mr, TB_ALIGN_NONE }, + { X86::VFMSUBPS4Yrr, X86::VFMSUBPS4Ymr, TB_ALIGN_NONE }, + { X86::VFMSUBPD4Yrr, X86::VFMSUBPD4Ymr, TB_ALIGN_NONE }, + { X86::VFNMSUBSS4rr, X86::VFNMSUBSS4mr, TB_ALIGN_NONE }, + { X86::VFNMSUBSS4rr_Int, X86::VFNMSUBSS4mr_Int, TB_NO_REVERSE }, + { X86::VFNMSUBSD4rr, X86::VFNMSUBSD4mr, TB_ALIGN_NONE }, + { X86::VFNMSUBSD4rr_Int, X86::VFNMSUBSD4mr_Int, TB_NO_REVERSE }, + { X86::VFNMSUBPS4rr, X86::VFNMSUBPS4mr, TB_ALIGN_NONE }, + { X86::VFNMSUBPD4rr, X86::VFNMSUBPD4mr, TB_ALIGN_NONE }, + { X86::VFNMSUBPS4Yrr, X86::VFNMSUBPS4Ymr, TB_ALIGN_NONE }, + { X86::VFNMSUBPD4Yrr, X86::VFNMSUBPD4Ymr, TB_ALIGN_NONE }, + { X86::VFMADDSUBPS4rr, X86::VFMADDSUBPS4mr, TB_ALIGN_NONE }, + { X86::VFMADDSUBPD4rr, X86::VFMADDSUBPD4mr, TB_ALIGN_NONE }, + { X86::VFMADDSUBPS4Yrr, X86::VFMADDSUBPS4Ymr, TB_ALIGN_NONE }, + { X86::VFMADDSUBPD4Yrr, X86::VFMADDSUBPD4Ymr, TB_ALIGN_NONE }, + { X86::VFMSUBADDPS4rr, X86::VFMSUBADDPS4mr, TB_ALIGN_NONE }, + { X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4mr, TB_ALIGN_NONE }, + { X86::VFMSUBADDPS4Yrr, X86::VFMSUBADDPS4Ymr, TB_ALIGN_NONE }, + { X86::VFMSUBADDPD4Yrr, X86::VFMSUBADDPD4Ymr, TB_ALIGN_NONE }, + + // XOP foldable instructions + { X86::VPCMOVrrr, X86::VPCMOVrmr, 0 }, + { X86::VPCMOVYrrr, X86::VPCMOVYrmr, 0 }, + { X86::VPCOMBri, X86::VPCOMBmi, 0 }, + { X86::VPCOMDri, X86::VPCOMDmi, 0 }, + { X86::VPCOMQri, X86::VPCOMQmi, 0 }, + { X86::VPCOMWri, X86::VPCOMWmi, 0 }, + { X86::VPCOMUBri, X86::VPCOMUBmi, 0 }, + { X86::VPCOMUDri, X86::VPCOMUDmi, 0 }, + { X86::VPCOMUQri, X86::VPCOMUQmi, 0 }, + { X86::VPCOMUWri, X86::VPCOMUWmi, 0 }, + { X86::VPERMIL2PDrr, X86::VPERMIL2PDmr, 0 }, + { X86::VPERMIL2PDYrr, X86::VPERMIL2PDYmr, 0 }, + { X86::VPERMIL2PSrr, X86::VPERMIL2PSmr, 0 }, + { X86::VPERMIL2PSYrr, X86::VPERMIL2PSYmr, 0 }, + { X86::VPMACSDDrr, X86::VPMACSDDrm, 0 }, + { X86::VPMACSDQHrr, X86::VPMACSDQHrm, 0 }, + { X86::VPMACSDQLrr, X86::VPMACSDQLrm, 0 }, + { X86::VPMACSSDDrr, X86::VPMACSSDDrm, 0 }, + { X86::VPMACSSDQHrr, X86::VPMACSSDQHrm, 0 }, + { X86::VPMACSSDQLrr, X86::VPMACSSDQLrm, 0 }, + { X86::VPMACSSWDrr, X86::VPMACSSWDrm, 0 }, + { X86::VPMACSSWWrr, X86::VPMACSSWWrm, 0 }, + { X86::VPMACSWDrr, X86::VPMACSWDrm, 0 }, + { X86::VPMACSWWrr, X86::VPMACSWWrm, 0 }, + { X86::VPMADCSSWDrr, X86::VPMADCSSWDrm, 0 }, + { X86::VPMADCSWDrr, X86::VPMADCSWDrm, 0 }, + { X86::VPPERMrrr, X86::VPPERMrmr, 0 }, + { X86::VPROTBrr, X86::VPROTBrm, 0 }, + { X86::VPROTDrr, X86::VPROTDrm, 0 }, + { X86::VPROTQrr, X86::VPROTQrm, 0 }, + { X86::VPROTWrr, X86::VPROTWrm, 0 }, + { X86::VPSHABrr, X86::VPSHABrm, 0 }, + { X86::VPSHADrr, X86::VPSHADrm, 0 }, + { X86::VPSHAQrr, X86::VPSHAQrm, 0 }, + { X86::VPSHAWrr, X86::VPSHAWrm, 0 }, + { X86::VPSHLBrr, X86::VPSHLBrm, 0 }, + { X86::VPSHLDrr, X86::VPSHLDrm, 0 }, + { X86::VPSHLQrr, X86::VPSHLQrm, 0 }, + { X86::VPSHLWrr, X86::VPSHLWrm, 0 }, + + // BMI/BMI2 foldable instructions + { X86::ANDN32rr, X86::ANDN32rm, 0 }, + { X86::ANDN64rr, X86::ANDN64rm, 0 }, + { X86::MULX32rr, X86::MULX32rm, 0 }, + { X86::MULX64rr, X86::MULX64rm, 0 }, + { X86::PDEP32rr, X86::PDEP32rm, 0 }, + { X86::PDEP64rr, X86::PDEP64rm, 0 }, + { X86::PEXT32rr, X86::PEXT32rm, 0 }, + { X86::PEXT64rr, X86::PEXT64rm, 0 }, + + // ADX foldable instructions + { X86::ADCX32rr, X86::ADCX32rm, 0 }, + { X86::ADCX64rr, X86::ADCX64rm, 0 }, + { X86::ADOX32rr, X86::ADOX32rm, 0 }, + { X86::ADOX64rr, X86::ADOX64rm, 0 }, + + // AVX-512 foldable instructions + { X86::VADDPDZrr, X86::VADDPDZrm, 0 }, + { X86::VADDPSZrr, X86::VADDPSZrm, 0 }, + { X86::VADDSDZrr, X86::VADDSDZrm, 0 }, + { X86::VADDSDZrr_Int, X86::VADDSDZrm_Int, TB_NO_REVERSE }, + { X86::VADDSSZrr, X86::VADDSSZrm, 0 }, + { X86::VADDSSZrr_Int, X86::VADDSSZrm_Int, TB_NO_REVERSE }, + { X86::VALIGNDZrri, X86::VALIGNDZrmi, 0 }, + { X86::VALIGNQZrri, X86::VALIGNQZrmi, 0 }, + { X86::VANDNPDZrr, X86::VANDNPDZrm, 0 }, + { X86::VANDNPSZrr, X86::VANDNPSZrm, 0 }, + { X86::VANDPDZrr, X86::VANDPDZrm, 0 }, + { X86::VANDPSZrr, X86::VANDPSZrm, 0 }, + { X86::VCMPPDZrri, X86::VCMPPDZrmi, 0 }, + { X86::VCMPPSZrri, X86::VCMPPSZrmi, 0 }, + { X86::VCMPSDZrr, X86::VCMPSDZrm, 0 }, + { X86::VCMPSDZrr_Int, X86::VCMPSDZrm_Int, TB_NO_REVERSE }, + { X86::VCMPSSZrr, X86::VCMPSSZrm, 0 }, + { X86::VCMPSSZrr_Int, X86::VCMPSSZrm_Int, TB_NO_REVERSE }, + { X86::VDIVPDZrr, X86::VDIVPDZrm, 0 }, + { X86::VDIVPSZrr, X86::VDIVPSZrm, 0 }, + { X86::VDIVSDZrr, X86::VDIVSDZrm, 0 }, + { X86::VDIVSDZrr_Int, X86::VDIVSDZrm_Int, TB_NO_REVERSE }, + { X86::VDIVSSZrr, X86::VDIVSSZrm, 0 }, + { X86::VDIVSSZrr_Int, X86::VDIVSSZrm_Int, TB_NO_REVERSE }, + { X86::VINSERTF32x4Zrr, X86::VINSERTF32x4Zrm, 0 }, + { X86::VINSERTF32x8Zrr, X86::VINSERTF32x8Zrm, 0 }, + { X86::VINSERTF64x2Zrr, X86::VINSERTF64x2Zrm, 0 }, + { X86::VINSERTF64x4Zrr, X86::VINSERTF64x4Zrm, 0 }, + { X86::VINSERTI32x4Zrr, X86::VINSERTI32x4Zrm, 0 }, + { X86::VINSERTI32x8Zrr, X86::VINSERTI32x8Zrm, 0 }, + { X86::VINSERTI64x2Zrr, X86::VINSERTI64x2Zrm, 0 }, + { X86::VINSERTI64x4Zrr, X86::VINSERTI64x4Zrm, 0 }, + { X86::VMAXCPDZrr, X86::VMAXCPDZrm, 0 }, + { X86::VMAXCPSZrr, X86::VMAXCPSZrm, 0 }, + { X86::VMAXCSDZrr, X86::VMAXCSDZrm, 0 }, + { X86::VMAXCSSZrr, X86::VMAXCSSZrm, 0 }, + { X86::VMAXPDZrr, X86::VMAXPDZrm, 0 }, + { X86::VMAXPSZrr, X86::VMAXPSZrm, 0 }, + { X86::VMAXSDZrr, X86::VMAXSDZrm, 0 }, + { X86::VMAXSDZrr_Int, X86::VMAXSDZrm_Int, TB_NO_REVERSE }, + { X86::VMAXSSZrr, X86::VMAXSSZrm, 0 }, + { X86::VMAXSSZrr_Int, X86::VMAXSSZrm_Int, TB_NO_REVERSE }, + { X86::VMINCPDZrr, X86::VMINCPDZrm, 0 }, + { X86::VMINCPSZrr, X86::VMINCPSZrm, 0 }, + { X86::VMINCSDZrr, X86::VMINCSDZrm, 0 }, + { X86::VMINCSSZrr, X86::VMINCSSZrm, 0 }, + { X86::VMINPDZrr, X86::VMINPDZrm, 0 }, + { X86::VMINPSZrr, X86::VMINPSZrm, 0 }, + { X86::VMINSDZrr, X86::VMINSDZrm, 0 }, + { X86::VMINSDZrr_Int, X86::VMINSDZrm_Int, TB_NO_REVERSE }, + { X86::VMINSSZrr, X86::VMINSSZrm, 0 }, + { X86::VMINSSZrr_Int, X86::VMINSSZrm_Int, TB_NO_REVERSE }, + { X86::VMOVLHPSZrr, X86::VMOVHPSZ128rm, TB_NO_REVERSE }, + { X86::VMULPDZrr, X86::VMULPDZrm, 0 }, + { X86::VMULPSZrr, X86::VMULPSZrm, 0 }, + { X86::VMULSDZrr, X86::VMULSDZrm, 0 }, + { X86::VMULSDZrr_Int, X86::VMULSDZrm_Int, TB_NO_REVERSE }, + { X86::VMULSSZrr, X86::VMULSSZrm, 0 }, + { X86::VMULSSZrr_Int, X86::VMULSSZrm_Int, TB_NO_REVERSE }, + { X86::VORPDZrr, X86::VORPDZrm, 0 }, + { X86::VORPSZrr, X86::VORPSZrm, 0 }, + { X86::VPACKSSDWZrr, X86::VPACKSSDWZrm, 0 }, + { X86::VPACKSSWBZrr, X86::VPACKSSWBZrm, 0 }, + { X86::VPACKUSDWZrr, X86::VPACKUSDWZrm, 0 }, + { X86::VPACKUSWBZrr, X86::VPACKUSWBZrm, 0 }, + { X86::VPADDBZrr, X86::VPADDBZrm, 0 }, + { X86::VPADDDZrr, X86::VPADDDZrm, 0 }, + { X86::VPADDQZrr, X86::VPADDQZrm, 0 }, + { X86::VPADDSBZrr, X86::VPADDSBZrm, 0 }, + { X86::VPADDSWZrr, X86::VPADDSWZrm, 0 }, + { X86::VPADDUSBZrr, X86::VPADDUSBZrm, 0 }, + { X86::VPADDUSWZrr, X86::VPADDUSWZrm, 0 }, + { X86::VPADDWZrr, X86::VPADDWZrm, 0 }, + { X86::VPALIGNRZrri, X86::VPALIGNRZrmi, 0 }, + { X86::VPANDDZrr, X86::VPANDDZrm, 0 }, + { X86::VPANDNDZrr, X86::VPANDNDZrm, 0 }, + { X86::VPANDNQZrr, X86::VPANDNQZrm, 0 }, + { X86::VPANDQZrr, X86::VPANDQZrm, 0 }, + { X86::VPAVGBZrr, X86::VPAVGBZrm, 0 }, + { X86::VPAVGWZrr, X86::VPAVGWZrm, 0 }, + { X86::VPCMPBZrri, X86::VPCMPBZrmi, 0 }, + { X86::VPCMPDZrri, X86::VPCMPDZrmi, 0 }, + { X86::VPCMPEQBZrr, X86::VPCMPEQBZrm, 0 }, + { X86::VPCMPEQDZrr, X86::VPCMPEQDZrm, 0 }, + { X86::VPCMPEQQZrr, X86::VPCMPEQQZrm, 0 }, + { X86::VPCMPEQWZrr, X86::VPCMPEQWZrm, 0 }, + { X86::VPCMPGTBZrr, X86::VPCMPGTBZrm, 0 }, + { X86::VPCMPGTDZrr, X86::VPCMPGTDZrm, 0 }, + { X86::VPCMPGTQZrr, X86::VPCMPGTQZrm, 0 }, + { X86::VPCMPGTWZrr, X86::VPCMPGTWZrm, 0 }, + { X86::VPCMPQZrri, X86::VPCMPQZrmi, 0 }, + { X86::VPCMPUBZrri, X86::VPCMPUBZrmi, 0 }, + { X86::VPCMPUDZrri, X86::VPCMPUDZrmi, 0 }, + { X86::VPCMPUQZrri, X86::VPCMPUQZrmi, 0 }, + { X86::VPCMPUWZrri, X86::VPCMPUWZrmi, 0 }, + { X86::VPCMPWZrri, X86::VPCMPWZrmi, 0 }, + { X86::VPERMBZrr, X86::VPERMBZrm, 0 }, + { X86::VPERMDZrr, X86::VPERMDZrm, 0 }, + { X86::VPERMILPDZrr, X86::VPERMILPDZrm, 0 }, + { X86::VPERMILPSZrr, X86::VPERMILPSZrm, 0 }, + { X86::VPERMPDZrr, X86::VPERMPDZrm, 0 }, + { X86::VPERMPSZrr, X86::VPERMPSZrm, 0 }, + { X86::VPERMQZrr, X86::VPERMQZrm, 0 }, + { X86::VPERMWZrr, X86::VPERMWZrm, 0 }, + { X86::VPINSRBZrr, X86::VPINSRBZrm, 0 }, + { X86::VPINSRDZrr, X86::VPINSRDZrm, 0 }, + { X86::VPINSRQZrr, X86::VPINSRQZrm, 0 }, + { X86::VPINSRWZrr, X86::VPINSRWZrm, 0 }, + { X86::VPMADDUBSWZrr, X86::VPMADDUBSWZrm, 0 }, + { X86::VPMADDWDZrr, X86::VPMADDWDZrm, 0 }, + { X86::VPMAXSBZrr, X86::VPMAXSBZrm, 0 }, + { X86::VPMAXSDZrr, X86::VPMAXSDZrm, 0 }, + { X86::VPMAXSQZrr, X86::VPMAXSQZrm, 0 }, + { X86::VPMAXSWZrr, X86::VPMAXSWZrm, 0 }, + { X86::VPMAXUBZrr, X86::VPMAXUBZrm, 0 }, + { X86::VPMAXUDZrr, X86::VPMAXUDZrm, 0 }, + { X86::VPMAXUQZrr, X86::VPMAXUQZrm, 0 }, + { X86::VPMAXUWZrr, X86::VPMAXUWZrm, 0 }, + { X86::VPMINSBZrr, X86::VPMINSBZrm, 0 }, + { X86::VPMINSDZrr, X86::VPMINSDZrm, 0 }, + { X86::VPMINSQZrr, X86::VPMINSQZrm, 0 }, + { X86::VPMINSWZrr, X86::VPMINSWZrm, 0 }, + { X86::VPMINUBZrr, X86::VPMINUBZrm, 0 }, + { X86::VPMINUDZrr, X86::VPMINUDZrm, 0 }, + { X86::VPMINUQZrr, X86::VPMINUQZrm, 0 }, + { X86::VPMINUWZrr, X86::VPMINUWZrm, 0 }, + { X86::VPMULDQZrr, X86::VPMULDQZrm, 0 }, + { X86::VPMULLDZrr, X86::VPMULLDZrm, 0 }, + { X86::VPMULLQZrr, X86::VPMULLQZrm, 0 }, + { X86::VPMULLWZrr, X86::VPMULLWZrm, 0 }, + { X86::VPMULUDQZrr, X86::VPMULUDQZrm, 0 }, + { X86::VPORDZrr, X86::VPORDZrm, 0 }, + { X86::VPORQZrr, X86::VPORQZrm, 0 }, + { X86::VPSADBWZ512rr, X86::VPSADBWZ512rm, 0 }, + { X86::VPSHUFBZrr, X86::VPSHUFBZrm, 0 }, + { X86::VPSLLDZrr, X86::VPSLLDZrm, 0 }, + { X86::VPSLLQZrr, X86::VPSLLQZrm, 0 }, + { X86::VPSLLVDZrr, X86::VPSLLVDZrm, 0 }, + { X86::VPSLLVQZrr, X86::VPSLLVQZrm, 0 }, + { X86::VPSLLVWZrr, X86::VPSLLVWZrm, 0 }, + { X86::VPSLLWZrr, X86::VPSLLWZrm, 0 }, + { X86::VPSRADZrr, X86::VPSRADZrm, 0 }, + { X86::VPSRAQZrr, X86::VPSRAQZrm, 0 }, + { X86::VPSRAVDZrr, X86::VPSRAVDZrm, 0 }, + { X86::VPSRAVQZrr, X86::VPSRAVQZrm, 0 }, + { X86::VPSRAVWZrr, X86::VPSRAVWZrm, 0 }, + { X86::VPSRAWZrr, X86::VPSRAWZrm, 0 }, + { X86::VPSRLDZrr, X86::VPSRLDZrm, 0 }, + { X86::VPSRLQZrr, X86::VPSRLQZrm, 0 }, + { X86::VPSRLVDZrr, X86::VPSRLVDZrm, 0 }, + { X86::VPSRLVQZrr, X86::VPSRLVQZrm, 0 }, + { X86::VPSRLVWZrr, X86::VPSRLVWZrm, 0 }, + { X86::VPSRLWZrr, X86::VPSRLWZrm, 0 }, + { X86::VPSUBBZrr, X86::VPSUBBZrm, 0 }, + { X86::VPSUBDZrr, X86::VPSUBDZrm, 0 }, + { X86::VPSUBQZrr, X86::VPSUBQZrm, 0 }, + { X86::VPSUBSBZrr, X86::VPSUBSBZrm, 0 }, + { X86::VPSUBSWZrr, X86::VPSUBSWZrm, 0 }, + { X86::VPSUBUSBZrr, X86::VPSUBUSBZrm, 0 }, + { X86::VPSUBUSWZrr, X86::VPSUBUSWZrm, 0 }, + { X86::VPSUBWZrr, X86::VPSUBWZrm, 0 }, + { X86::VPUNPCKHBWZrr, X86::VPUNPCKHBWZrm, 0 }, + { X86::VPUNPCKHDQZrr, X86::VPUNPCKHDQZrm, 0 }, + { X86::VPUNPCKHQDQZrr, X86::VPUNPCKHQDQZrm, 0 }, + { X86::VPUNPCKHWDZrr, X86::VPUNPCKHWDZrm, 0 }, + { X86::VPUNPCKLBWZrr, X86::VPUNPCKLBWZrm, 0 }, + { X86::VPUNPCKLDQZrr, X86::VPUNPCKLDQZrm, 0 }, + { X86::VPUNPCKLQDQZrr, X86::VPUNPCKLQDQZrm, 0 }, + { X86::VPUNPCKLWDZrr, X86::VPUNPCKLWDZrm, 0 }, + { X86::VPXORDZrr, X86::VPXORDZrm, 0 }, + { X86::VPXORQZrr, X86::VPXORQZrm, 0 }, + { X86::VSHUFPDZrri, X86::VSHUFPDZrmi, 0 }, + { X86::VSHUFPSZrri, X86::VSHUFPSZrmi, 0 }, + { X86::VSUBPDZrr, X86::VSUBPDZrm, 0 }, + { X86::VSUBPSZrr, X86::VSUBPSZrm, 0 }, + { X86::VSUBSDZrr, X86::VSUBSDZrm, 0 }, + { X86::VSUBSDZrr_Int, X86::VSUBSDZrm_Int, TB_NO_REVERSE }, + { X86::VSUBSSZrr, X86::VSUBSSZrm, 0 }, + { X86::VSUBSSZrr_Int, X86::VSUBSSZrm_Int, TB_NO_REVERSE }, + { X86::VUNPCKHPDZrr, X86::VUNPCKHPDZrm, 0 }, + { X86::VUNPCKHPSZrr, X86::VUNPCKHPSZrm, 0 }, + { X86::VUNPCKLPDZrr, X86::VUNPCKLPDZrm, 0 }, + { X86::VUNPCKLPSZrr, X86::VUNPCKLPSZrm, 0 }, + { X86::VXORPDZrr, X86::VXORPDZrm, 0 }, + { X86::VXORPSZrr, X86::VXORPSZrm, 0 }, + + // AVX-512{F,VL} foldable instructions + { X86::VADDPDZ128rr, X86::VADDPDZ128rm, 0 }, + { X86::VADDPDZ256rr, X86::VADDPDZ256rm, 0 }, + { X86::VADDPSZ128rr, X86::VADDPSZ128rm, 0 }, + { X86::VADDPSZ256rr, X86::VADDPSZ256rm, 0 }, + { X86::VALIGNDZ128rri, X86::VALIGNDZ128rmi, 0 }, + { X86::VALIGNDZ256rri, X86::VALIGNDZ256rmi, 0 }, + { X86::VALIGNQZ128rri, X86::VALIGNQZ128rmi, 0 }, + { X86::VALIGNQZ256rri, X86::VALIGNQZ256rmi, 0 }, + { X86::VANDNPDZ128rr, X86::VANDNPDZ128rm, 0 }, + { X86::VANDNPDZ256rr, X86::VANDNPDZ256rm, 0 }, + { X86::VANDNPSZ128rr, X86::VANDNPSZ128rm, 0 }, + { X86::VANDNPSZ256rr, X86::VANDNPSZ256rm, 0 }, + { X86::VANDPDZ128rr, X86::VANDPDZ128rm, 0 }, + { X86::VANDPDZ256rr, X86::VANDPDZ256rm, 0 }, + { X86::VANDPSZ128rr, X86::VANDPSZ128rm, 0 }, + { X86::VANDPSZ256rr, X86::VANDPSZ256rm, 0 }, + { X86::VCMPPDZ128rri, X86::VCMPPDZ128rmi, 0 }, + { X86::VCMPPDZ256rri, X86::VCMPPDZ256rmi, 0 }, + { X86::VCMPPSZ128rri, X86::VCMPPSZ128rmi, 0 }, + { X86::VCMPPSZ256rri, X86::VCMPPSZ256rmi, 0 }, + { X86::VDIVPDZ128rr, X86::VDIVPDZ128rm, 0 }, + { X86::VDIVPDZ256rr, X86::VDIVPDZ256rm, 0 }, + { X86::VDIVPSZ128rr, X86::VDIVPSZ128rm, 0 }, + { X86::VDIVPSZ256rr, X86::VDIVPSZ256rm, 0 }, + { X86::VINSERTF32x4Z256rr,X86::VINSERTF32x4Z256rm, 0 }, + { X86::VINSERTF64x2Z256rr,X86::VINSERTF64x2Z256rm, 0 }, + { X86::VINSERTI32x4Z256rr,X86::VINSERTI32x4Z256rm, 0 }, + { X86::VINSERTI64x2Z256rr,X86::VINSERTI64x2Z256rm, 0 }, + { X86::VMAXCPDZ128rr, X86::VMAXCPDZ128rm, 0 }, + { X86::VMAXCPDZ256rr, X86::VMAXCPDZ256rm, 0 }, + { X86::VMAXCPSZ128rr, X86::VMAXCPSZ128rm, 0 }, + { X86::VMAXCPSZ256rr, X86::VMAXCPSZ256rm, 0 }, + { X86::VMAXPDZ128rr, X86::VMAXPDZ128rm, 0 }, + { X86::VMAXPDZ256rr, X86::VMAXPDZ256rm, 0 }, + { X86::VMAXPSZ128rr, X86::VMAXPSZ128rm, 0 }, + { X86::VMAXPSZ256rr, X86::VMAXPSZ256rm, 0 }, + { X86::VMINCPDZ128rr, X86::VMINCPDZ128rm, 0 }, + { X86::VMINCPDZ256rr, X86::VMINCPDZ256rm, 0 }, + { X86::VMINCPSZ128rr, X86::VMINCPSZ128rm, 0 }, + { X86::VMINCPSZ256rr, X86::VMINCPSZ256rm, 0 }, + { X86::VMINPDZ128rr, X86::VMINPDZ128rm, 0 }, + { X86::VMINPDZ256rr, X86::VMINPDZ256rm, 0 }, + { X86::VMINPSZ128rr, X86::VMINPSZ128rm, 0 }, + { X86::VMINPSZ256rr, X86::VMINPSZ256rm, 0 }, + { X86::VMULPDZ128rr, X86::VMULPDZ128rm, 0 }, + { X86::VMULPDZ256rr, X86::VMULPDZ256rm, 0 }, + { X86::VMULPSZ128rr, X86::VMULPSZ128rm, 0 }, + { X86::VMULPSZ256rr, X86::VMULPSZ256rm, 0 }, + { X86::VORPDZ128rr, X86::VORPDZ128rm, 0 }, + { X86::VORPDZ256rr, X86::VORPDZ256rm, 0 }, + { X86::VORPSZ128rr, X86::VORPSZ128rm, 0 }, + { X86::VORPSZ256rr, X86::VORPSZ256rm, 0 }, + { X86::VPACKSSDWZ256rr, X86::VPACKSSDWZ256rm, 0 }, + { X86::VPACKSSDWZ128rr, X86::VPACKSSDWZ128rm, 0 }, + { X86::VPACKSSWBZ256rr, X86::VPACKSSWBZ256rm, 0 }, + { X86::VPACKSSWBZ128rr, X86::VPACKSSWBZ128rm, 0 }, + { X86::VPACKUSDWZ256rr, X86::VPACKUSDWZ256rm, 0 }, + { X86::VPACKUSDWZ128rr, X86::VPACKUSDWZ128rm, 0 }, + { X86::VPACKUSWBZ256rr, X86::VPACKUSWBZ256rm, 0 }, + { X86::VPACKUSWBZ128rr, X86::VPACKUSWBZ128rm, 0 }, + { X86::VPADDBZ128rr, X86::VPADDBZ128rm, 0 }, + { X86::VPADDBZ256rr, X86::VPADDBZ256rm, 0 }, + { X86::VPADDDZ128rr, X86::VPADDDZ128rm, 0 }, + { X86::VPADDDZ256rr, X86::VPADDDZ256rm, 0 }, + { X86::VPADDQZ128rr, X86::VPADDQZ128rm, 0 }, + { X86::VPADDQZ256rr, X86::VPADDQZ256rm, 0 }, + { X86::VPADDSBZ128rr, X86::VPADDSBZ128rm, 0 }, + { X86::VPADDSBZ256rr, X86::VPADDSBZ256rm, 0 }, + { X86::VPADDSWZ128rr, X86::VPADDSWZ128rm, 0 }, + { X86::VPADDSWZ256rr, X86::VPADDSWZ256rm, 0 }, + { X86::VPADDUSBZ128rr, X86::VPADDUSBZ128rm, 0 }, + { X86::VPADDUSBZ256rr, X86::VPADDUSBZ256rm, 0 }, + { X86::VPADDUSWZ128rr, X86::VPADDUSWZ128rm, 0 }, + { X86::VPADDUSWZ256rr, X86::VPADDUSWZ256rm, 0 }, + { X86::VPADDWZ128rr, X86::VPADDWZ128rm, 0 }, + { X86::VPADDWZ256rr, X86::VPADDWZ256rm, 0 }, + { X86::VPALIGNRZ128rri, X86::VPALIGNRZ128rmi, 0 }, + { X86::VPALIGNRZ256rri, X86::VPALIGNRZ256rmi, 0 }, + { X86::VPANDDZ128rr, X86::VPANDDZ128rm, 0 }, + { X86::VPANDDZ256rr, X86::VPANDDZ256rm, 0 }, + { X86::VPANDNDZ128rr, X86::VPANDNDZ128rm, 0 }, + { X86::VPANDNDZ256rr, X86::VPANDNDZ256rm, 0 }, + { X86::VPANDNQZ128rr, X86::VPANDNQZ128rm, 0 }, + { X86::VPANDNQZ256rr, X86::VPANDNQZ256rm, 0 }, + { X86::VPANDQZ128rr, X86::VPANDQZ128rm, 0 }, + { X86::VPANDQZ256rr, X86::VPANDQZ256rm, 0 }, + { X86::VPAVGBZ128rr, X86::VPAVGBZ128rm, 0 }, + { X86::VPAVGBZ256rr, X86::VPAVGBZ256rm, 0 }, + { X86::VPAVGWZ128rr, X86::VPAVGWZ128rm, 0 }, + { X86::VPAVGWZ256rr, X86::VPAVGWZ256rm, 0 }, + { X86::VPCMPBZ128rri, X86::VPCMPBZ128rmi, 0 }, + { X86::VPCMPBZ256rri, X86::VPCMPBZ256rmi, 0 }, + { X86::VPCMPDZ128rri, X86::VPCMPDZ128rmi, 0 }, + { X86::VPCMPDZ256rri, X86::VPCMPDZ256rmi, 0 }, + { X86::VPCMPEQBZ128rr, X86::VPCMPEQBZ128rm, 0 }, + { X86::VPCMPEQBZ256rr, X86::VPCMPEQBZ256rm, 0 }, + { X86::VPCMPEQDZ128rr, X86::VPCMPEQDZ128rm, 0 }, + { X86::VPCMPEQDZ256rr, X86::VPCMPEQDZ256rm, 0 }, + { X86::VPCMPEQQZ128rr, X86::VPCMPEQQZ128rm, 0 }, + { X86::VPCMPEQQZ256rr, X86::VPCMPEQQZ256rm, 0 }, + { X86::VPCMPEQWZ128rr, X86::VPCMPEQWZ128rm, 0 }, + { X86::VPCMPEQWZ256rr, X86::VPCMPEQWZ256rm, 0 }, + { X86::VPCMPGTBZ128rr, X86::VPCMPGTBZ128rm, 0 }, + { X86::VPCMPGTBZ256rr, X86::VPCMPGTBZ256rm, 0 }, + { X86::VPCMPGTDZ128rr, X86::VPCMPGTDZ128rm, 0 }, + { X86::VPCMPGTDZ256rr, X86::VPCMPGTDZ256rm, 0 }, + { X86::VPCMPGTQZ128rr, X86::VPCMPGTQZ128rm, 0 }, + { X86::VPCMPGTQZ256rr, X86::VPCMPGTQZ256rm, 0 }, + { X86::VPCMPGTWZ128rr, X86::VPCMPGTWZ128rm, 0 }, + { X86::VPCMPGTWZ256rr, X86::VPCMPGTWZ256rm, 0 }, + { X86::VPCMPQZ128rri, X86::VPCMPQZ128rmi, 0 }, + { X86::VPCMPQZ256rri, X86::VPCMPQZ256rmi, 0 }, + { X86::VPCMPUBZ128rri, X86::VPCMPUBZ128rmi, 0 }, + { X86::VPCMPUBZ256rri, X86::VPCMPUBZ256rmi, 0 }, + { X86::VPCMPUDZ128rri, X86::VPCMPUDZ128rmi, 0 }, + { X86::VPCMPUDZ256rri, X86::VPCMPUDZ256rmi, 0 }, + { X86::VPCMPUQZ128rri, X86::VPCMPUQZ128rmi, 0 }, + { X86::VPCMPUQZ256rri, X86::VPCMPUQZ256rmi, 0 }, + { X86::VPCMPUWZ128rri, X86::VPCMPUWZ128rmi, 0 }, + { X86::VPCMPUWZ256rri, X86::VPCMPUWZ256rmi, 0 }, + { X86::VPCMPWZ128rri, X86::VPCMPWZ128rmi, 0 }, + { X86::VPCMPWZ256rri, X86::VPCMPWZ256rmi, 0 }, + { X86::VPERMBZ128rr, X86::VPERMBZ128rm, 0 }, + { X86::VPERMBZ256rr, X86::VPERMBZ256rm, 0 }, + { X86::VPERMDZ256rr, X86::VPERMDZ256rm, 0 }, + { X86::VPERMILPDZ128rr, X86::VPERMILPDZ128rm, 0 }, + { X86::VPERMILPDZ256rr, X86::VPERMILPDZ256rm, 0 }, + { X86::VPERMILPSZ128rr, X86::VPERMILPSZ128rm, 0 }, + { X86::VPERMILPSZ256rr, X86::VPERMILPSZ256rm, 0 }, + { X86::VPERMPDZ256rr, X86::VPERMPDZ256rm, 0 }, + { X86::VPERMPSZ256rr, X86::VPERMPSZ256rm, 0 }, + { X86::VPERMQZ256rr, X86::VPERMQZ256rm, 0 }, + { X86::VPERMWZ128rr, X86::VPERMWZ128rm, 0 }, + { X86::VPERMWZ256rr, X86::VPERMWZ256rm, 0 }, + { X86::VPMADDUBSWZ128rr, X86::VPMADDUBSWZ128rm, 0 }, + { X86::VPMADDUBSWZ256rr, X86::VPMADDUBSWZ256rm, 0 }, + { X86::VPMADDWDZ128rr, X86::VPMADDWDZ128rm, 0 }, + { X86::VPMADDWDZ256rr, X86::VPMADDWDZ256rm, 0 }, + { X86::VPMAXSBZ128rr, X86::VPMAXSBZ128rm, 0 }, + { X86::VPMAXSBZ256rr, X86::VPMAXSBZ256rm, 0 }, + { X86::VPMAXSDZ128rr, X86::VPMAXSDZ128rm, 0 }, + { X86::VPMAXSDZ256rr, X86::VPMAXSDZ256rm, 0 }, + { X86::VPMAXSQZ128rr, X86::VPMAXSQZ128rm, 0 }, + { X86::VPMAXSQZ256rr, X86::VPMAXSQZ256rm, 0 }, + { X86::VPMAXSWZ128rr, X86::VPMAXSWZ128rm, 0 }, + { X86::VPMAXSWZ256rr, X86::VPMAXSWZ256rm, 0 }, + { X86::VPMAXUBZ128rr, X86::VPMAXUBZ128rm, 0 }, + { X86::VPMAXUBZ256rr, X86::VPMAXUBZ256rm, 0 }, + { X86::VPMAXUDZ128rr, X86::VPMAXUDZ128rm, 0 }, + { X86::VPMAXUDZ256rr, X86::VPMAXUDZ256rm, 0 }, + { X86::VPMAXUQZ128rr, X86::VPMAXUQZ128rm, 0 }, + { X86::VPMAXUQZ256rr, X86::VPMAXUQZ256rm, 0 }, + { X86::VPMAXUWZ128rr, X86::VPMAXUWZ128rm, 0 }, + { X86::VPMAXUWZ256rr, X86::VPMAXUWZ256rm, 0 }, + { X86::VPMINSBZ128rr, X86::VPMINSBZ128rm, 0 }, + { X86::VPMINSBZ256rr, X86::VPMINSBZ256rm, 0 }, + { X86::VPMINSDZ128rr, X86::VPMINSDZ128rm, 0 }, + { X86::VPMINSDZ256rr, X86::VPMINSDZ256rm, 0 }, + { X86::VPMINSQZ128rr, X86::VPMINSQZ128rm, 0 }, + { X86::VPMINSQZ256rr, X86::VPMINSQZ256rm, 0 }, + { X86::VPMINSWZ128rr, X86::VPMINSWZ128rm, 0 }, + { X86::VPMINSWZ256rr, X86::VPMINSWZ256rm, 0 }, + { X86::VPMINUBZ128rr, X86::VPMINUBZ128rm, 0 }, + { X86::VPMINUBZ256rr, X86::VPMINUBZ256rm, 0 }, + { X86::VPMINUDZ128rr, X86::VPMINUDZ128rm, 0 }, + { X86::VPMINUDZ256rr, X86::VPMINUDZ256rm, 0 }, + { X86::VPMINUQZ128rr, X86::VPMINUQZ128rm, 0 }, + { X86::VPMINUQZ256rr, X86::VPMINUQZ256rm, 0 }, + { X86::VPMINUWZ128rr, X86::VPMINUWZ128rm, 0 }, + { X86::VPMINUWZ256rr, X86::VPMINUWZ256rm, 0 }, + { X86::VPMULDQZ128rr, X86::VPMULDQZ128rm, 0 }, + { X86::VPMULDQZ256rr, X86::VPMULDQZ256rm, 0 }, + { X86::VPMULLDZ128rr, X86::VPMULLDZ128rm, 0 }, + { X86::VPMULLDZ256rr, X86::VPMULLDZ256rm, 0 }, + { X86::VPMULLQZ128rr, X86::VPMULLQZ128rm, 0 }, + { X86::VPMULLQZ256rr, X86::VPMULLQZ256rm, 0 }, + { X86::VPMULLWZ128rr, X86::VPMULLWZ128rm, 0 }, + { X86::VPMULLWZ256rr, X86::VPMULLWZ256rm, 0 }, + { X86::VPMULUDQZ128rr, X86::VPMULUDQZ128rm, 0 }, + { X86::VPMULUDQZ256rr, X86::VPMULUDQZ256rm, 0 }, + { X86::VPORDZ128rr, X86::VPORDZ128rm, 0 }, + { X86::VPORDZ256rr, X86::VPORDZ256rm, 0 }, + { X86::VPORQZ128rr, X86::VPORQZ128rm, 0 }, + { X86::VPORQZ256rr, X86::VPORQZ256rm, 0 }, + { X86::VPSADBWZ128rr, X86::VPSADBWZ128rm, 0 }, + { X86::VPSADBWZ256rr, X86::VPSADBWZ256rm, 0 }, + { X86::VPSHUFBZ128rr, X86::VPSHUFBZ128rm, 0 }, + { X86::VPSHUFBZ256rr, X86::VPSHUFBZ256rm, 0 }, + { X86::VPSLLDZ128rr, X86::VPSLLDZ128rm, 0 }, + { X86::VPSLLDZ256rr, X86::VPSLLDZ256rm, 0 }, + { X86::VPSLLQZ128rr, X86::VPSLLQZ128rm, 0 }, + { X86::VPSLLQZ256rr, X86::VPSLLQZ256rm, 0 }, + { X86::VPSLLVDZ128rr, X86::VPSLLVDZ128rm, 0 }, + { X86::VPSLLVDZ256rr, X86::VPSLLVDZ256rm, 0 }, + { X86::VPSLLVQZ128rr, X86::VPSLLVQZ128rm, 0 }, + { X86::VPSLLVQZ256rr, X86::VPSLLVQZ256rm, 0 }, + { X86::VPSLLVWZ128rr, X86::VPSLLVWZ128rm, 0 }, + { X86::VPSLLVWZ256rr, X86::VPSLLVWZ256rm, 0 }, + { X86::VPSLLWZ128rr, X86::VPSLLWZ128rm, 0 }, + { X86::VPSLLWZ256rr, X86::VPSLLWZ256rm, 0 }, + { X86::VPSRADZ128rr, X86::VPSRADZ128rm, 0 }, + { X86::VPSRADZ256rr, X86::VPSRADZ256rm, 0 }, + { X86::VPSRAQZ128rr, X86::VPSRAQZ128rm, 0 }, + { X86::VPSRAQZ256rr, X86::VPSRAQZ256rm, 0 }, + { X86::VPSRAVDZ128rr, X86::VPSRAVDZ128rm, 0 }, + { X86::VPSRAVDZ256rr, X86::VPSRAVDZ256rm, 0 }, + { X86::VPSRAVQZ128rr, X86::VPSRAVQZ128rm, 0 }, + { X86::VPSRAVQZ256rr, X86::VPSRAVQZ256rm, 0 }, + { X86::VPSRAVWZ128rr, X86::VPSRAVWZ128rm, 0 }, + { X86::VPSRAVWZ256rr, X86::VPSRAVWZ256rm, 0 }, + { X86::VPSRAWZ128rr, X86::VPSRAWZ128rm, 0 }, + { X86::VPSRAWZ256rr, X86::VPSRAWZ256rm, 0 }, + { X86::VPSRLDZ128rr, X86::VPSRLDZ128rm, 0 }, + { X86::VPSRLDZ256rr, X86::VPSRLDZ256rm, 0 }, + { X86::VPSRLQZ128rr, X86::VPSRLQZ128rm, 0 }, + { X86::VPSRLQZ256rr, X86::VPSRLQZ256rm, 0 }, + { X86::VPSRLVDZ128rr, X86::VPSRLVDZ128rm, 0 }, + { X86::VPSRLVDZ256rr, X86::VPSRLVDZ256rm, 0 }, + { X86::VPSRLVQZ128rr, X86::VPSRLVQZ128rm, 0 }, + { X86::VPSRLVQZ256rr, X86::VPSRLVQZ256rm, 0 }, + { X86::VPSRLVWZ128rr, X86::VPSRLVWZ128rm, 0 }, + { X86::VPSRLVWZ256rr, X86::VPSRLVWZ256rm, 0 }, + { X86::VPSRLWZ128rr, X86::VPSRLWZ128rm, 0 }, + { X86::VPSRLWZ256rr, X86::VPSRLWZ256rm, 0 }, + { X86::VPSUBBZ128rr, X86::VPSUBBZ128rm, 0 }, + { X86::VPSUBBZ256rr, X86::VPSUBBZ256rm, 0 }, + { X86::VPSUBDZ128rr, X86::VPSUBDZ128rm, 0 }, + { X86::VPSUBDZ256rr, X86::VPSUBDZ256rm, 0 }, + { X86::VPSUBQZ128rr, X86::VPSUBQZ128rm, 0 }, + { X86::VPSUBQZ256rr, X86::VPSUBQZ256rm, 0 }, + { X86::VPSUBSBZ128rr, X86::VPSUBSBZ128rm, 0 }, + { X86::VPSUBSBZ256rr, X86::VPSUBSBZ256rm, 0 }, + { X86::VPSUBSWZ128rr, X86::VPSUBSWZ128rm, 0 }, + { X86::VPSUBSWZ256rr, X86::VPSUBSWZ256rm, 0 }, + { X86::VPSUBUSBZ128rr, X86::VPSUBUSBZ128rm, 0 }, + { X86::VPSUBUSBZ256rr, X86::VPSUBUSBZ256rm, 0 }, + { X86::VPSUBUSWZ128rr, X86::VPSUBUSWZ128rm, 0 }, + { X86::VPSUBUSWZ256rr, X86::VPSUBUSWZ256rm, 0 }, + { X86::VPSUBWZ128rr, X86::VPSUBWZ128rm, 0 }, + { X86::VPSUBWZ256rr, X86::VPSUBWZ256rm, 0 }, + { X86::VPUNPCKHBWZ128rr, X86::VPUNPCKHBWZ128rm, 0 }, + { X86::VPUNPCKHBWZ256rr, X86::VPUNPCKHBWZ256rm, 0 }, + { X86::VPUNPCKHDQZ128rr, X86::VPUNPCKHDQZ128rm, 0 }, + { X86::VPUNPCKHDQZ256rr, X86::VPUNPCKHDQZ256rm, 0 }, + { X86::VPUNPCKHQDQZ128rr, X86::VPUNPCKHQDQZ128rm, 0 }, + { X86::VPUNPCKHQDQZ256rr, X86::VPUNPCKHQDQZ256rm, 0 }, + { X86::VPUNPCKHWDZ128rr, X86::VPUNPCKHWDZ128rm, 0 }, + { X86::VPUNPCKHWDZ256rr, X86::VPUNPCKHWDZ256rm, 0 }, + { X86::VPUNPCKLBWZ128rr, X86::VPUNPCKLBWZ128rm, 0 }, + { X86::VPUNPCKLBWZ256rr, X86::VPUNPCKLBWZ256rm, 0 }, + { X86::VPUNPCKLDQZ128rr, X86::VPUNPCKLDQZ128rm, 0 }, + { X86::VPUNPCKLDQZ256rr, X86::VPUNPCKLDQZ256rm, 0 }, + { X86::VPUNPCKLQDQZ128rr, X86::VPUNPCKLQDQZ128rm, 0 }, + { X86::VPUNPCKLQDQZ256rr, X86::VPUNPCKLQDQZ256rm, 0 }, + { X86::VPUNPCKLWDZ128rr, X86::VPUNPCKLWDZ128rm, 0 }, + { X86::VPUNPCKLWDZ256rr, X86::VPUNPCKLWDZ256rm, 0 }, + { X86::VPXORDZ128rr, X86::VPXORDZ128rm, 0 }, + { X86::VPXORDZ256rr, X86::VPXORDZ256rm, 0 }, + { X86::VPXORQZ128rr, X86::VPXORQZ128rm, 0 }, + { X86::VPXORQZ256rr, X86::VPXORQZ256rm, 0 }, + { X86::VSHUFPDZ128rri, X86::VSHUFPDZ128rmi, 0 }, + { X86::VSHUFPDZ256rri, X86::VSHUFPDZ256rmi, 0 }, + { X86::VSHUFPSZ128rri, X86::VSHUFPSZ128rmi, 0 }, + { X86::VSHUFPSZ256rri, X86::VSHUFPSZ256rmi, 0 }, + { X86::VSUBPDZ128rr, X86::VSUBPDZ128rm, 0 }, + { X86::VSUBPDZ256rr, X86::VSUBPDZ256rm, 0 }, + { X86::VSUBPSZ128rr, X86::VSUBPSZ128rm, 0 }, + { X86::VSUBPSZ256rr, X86::VSUBPSZ256rm, 0 }, + { X86::VUNPCKHPDZ128rr, X86::VUNPCKHPDZ128rm, 0 }, + { X86::VUNPCKHPDZ256rr, X86::VUNPCKHPDZ256rm, 0 }, + { X86::VUNPCKHPSZ128rr, X86::VUNPCKHPSZ128rm, 0 }, + { X86::VUNPCKHPSZ256rr, X86::VUNPCKHPSZ256rm, 0 }, + { X86::VUNPCKLPDZ128rr, X86::VUNPCKLPDZ128rm, 0 }, + { X86::VUNPCKLPDZ256rr, X86::VUNPCKLPDZ256rm, 0 }, + { X86::VUNPCKLPSZ128rr, X86::VUNPCKLPSZ128rm, 0 }, + { X86::VUNPCKLPSZ256rr, X86::VUNPCKLPSZ256rm, 0 }, + { X86::VXORPDZ128rr, X86::VXORPDZ128rm, 0 }, + { X86::VXORPDZ256rr, X86::VXORPDZ256rm, 0 }, + { X86::VXORPSZ128rr, X86::VXORPSZ128rm, 0 }, + { X86::VXORPSZ256rr, X86::VXORPSZ256rm, 0 }, + + // AVX-512 masked foldable instructions + { X86::VBROADCASTSSZrkz, X86::VBROADCASTSSZmkz, TB_NO_REVERSE }, + { X86::VBROADCASTSDZrkz, X86::VBROADCASTSDZmkz, TB_NO_REVERSE }, + { X86::VPABSBZrrkz, X86::VPABSBZrmkz, 0 }, + { X86::VPABSDZrrkz, X86::VPABSDZrmkz, 0 }, + { X86::VPABSQZrrkz, X86::VPABSQZrmkz, 0 }, + { X86::VPABSWZrrkz, X86::VPABSWZrmkz, 0 }, + { X86::VPERMILPDZrikz, X86::VPERMILPDZmikz, 0 }, + { X86::VPERMILPSZrikz, X86::VPERMILPSZmikz, 0 }, + { X86::VPERMPDZrikz, X86::VPERMPDZmikz, 0 }, + { X86::VPERMQZrikz, X86::VPERMQZmikz, 0 }, + { X86::VPMOVSXBDZrrkz, X86::VPMOVSXBDZrmkz, 0 }, + { X86::VPMOVSXBQZrrkz, X86::VPMOVSXBQZrmkz, TB_NO_REVERSE }, + { X86::VPMOVSXBWZrrkz, X86::VPMOVSXBWZrmkz, 0 }, + { X86::VPMOVSXDQZrrkz, X86::VPMOVSXDQZrmkz, 0 }, + { X86::VPMOVSXWDZrrkz, X86::VPMOVSXWDZrmkz, 0 }, + { X86::VPMOVSXWQZrrkz, X86::VPMOVSXWQZrmkz, 0 }, + { X86::VPMOVZXBDZrrkz, X86::VPMOVZXBDZrmkz, 0 }, + { X86::VPMOVZXBQZrrkz, X86::VPMOVZXBQZrmkz, TB_NO_REVERSE }, + { X86::VPMOVZXBWZrrkz, X86::VPMOVZXBWZrmkz, 0 }, + { X86::VPMOVZXDQZrrkz, X86::VPMOVZXDQZrmkz, 0 }, + { X86::VPMOVZXWDZrrkz, X86::VPMOVZXWDZrmkz, 0 }, + { X86::VPMOVZXWQZrrkz, X86::VPMOVZXWQZrmkz, 0 }, + { X86::VPOPCNTDZrrkz, X86::VPOPCNTDZrmkz, 0 }, + { X86::VPOPCNTQZrrkz, X86::VPOPCNTQZrmkz, 0 }, + { X86::VPSHUFDZrikz, X86::VPSHUFDZmikz, 0 }, + { X86::VPSHUFHWZrikz, X86::VPSHUFHWZmikz, 0 }, + { X86::VPSHUFLWZrikz, X86::VPSHUFLWZmikz, 0 }, + { X86::VPSLLDZrikz, X86::VPSLLDZmikz, 0 }, + { X86::VPSLLQZrikz, X86::VPSLLQZmikz, 0 }, + { X86::VPSLLWZrikz, X86::VPSLLWZmikz, 0 }, + { X86::VPSRADZrikz, X86::VPSRADZmikz, 0 }, + { X86::VPSRAQZrikz, X86::VPSRAQZmikz, 0 }, + { X86::VPSRAWZrikz, X86::VPSRAWZmikz, 0 }, + { X86::VPSRLDZrikz, X86::VPSRLDZmikz, 0 }, + { X86::VPSRLQZrikz, X86::VPSRLQZmikz, 0 }, + { X86::VPSRLWZrikz, X86::VPSRLWZmikz, 0 }, + + // AVX-512VL 256-bit masked foldable instructions + { X86::VBROADCASTSDZ256rkz, X86::VBROADCASTSDZ256mkz, TB_NO_REVERSE }, + { X86::VBROADCASTSSZ256rkz, X86::VBROADCASTSSZ256mkz, TB_NO_REVERSE }, + { X86::VPABSBZ256rrkz, X86::VPABSBZ256rmkz, 0 }, + { X86::VPABSDZ256rrkz, X86::VPABSDZ256rmkz, 0 }, + { X86::VPABSQZ256rrkz, X86::VPABSQZ256rmkz, 0 }, + { X86::VPABSWZ256rrkz, X86::VPABSWZ256rmkz, 0 }, + { X86::VPERMILPDZ256rikz, X86::VPERMILPDZ256mikz, 0 }, + { X86::VPERMILPSZ256rikz, X86::VPERMILPSZ256mikz, 0 }, + { X86::VPERMPDZ256rikz, X86::VPERMPDZ256mikz, 0 }, + { X86::VPERMQZ256rikz, X86::VPERMQZ256mikz, 0 }, + { X86::VPMOVSXBDZ256rrkz, X86::VPMOVSXBDZ256rmkz, TB_NO_REVERSE }, + { X86::VPMOVSXBQZ256rrkz, X86::VPMOVSXBQZ256rmkz, TB_NO_REVERSE }, + { X86::VPMOVSXBWZ256rrkz, X86::VPMOVSXBWZ256rmkz, 0 }, + { X86::VPMOVSXDQZ256rrkz, X86::VPMOVSXDQZ256rmkz, 0 }, + { X86::VPMOVSXWDZ256rrkz, X86::VPMOVSXWDZ256rmkz, 0 }, + { X86::VPMOVSXWQZ256rrkz, X86::VPMOVSXWQZ256rmkz, TB_NO_REVERSE }, + { X86::VPMOVZXBDZ256rrkz, X86::VPMOVZXBDZ256rmkz, TB_NO_REVERSE }, + { X86::VPMOVZXBQZ256rrkz, X86::VPMOVZXBQZ256rmkz, TB_NO_REVERSE }, + { X86::VPMOVZXBWZ256rrkz, X86::VPMOVZXBWZ256rmkz, 0 }, + { X86::VPMOVZXDQZ256rrkz, X86::VPMOVZXDQZ256rmkz, 0 }, + { X86::VPMOVZXWDZ256rrkz, X86::VPMOVZXWDZ256rmkz, 0 }, + { X86::VPMOVZXWQZ256rrkz, X86::VPMOVZXWQZ256rmkz, TB_NO_REVERSE }, + { X86::VPSHUFDZ256rikz, X86::VPSHUFDZ256mikz, 0 }, + { X86::VPSHUFHWZ256rikz, X86::VPSHUFHWZ256mikz, 0 }, + { X86::VPSHUFLWZ256rikz, X86::VPSHUFLWZ256mikz, 0 }, + { X86::VPSLLDZ256rikz, X86::VPSLLDZ256mikz, 0 }, + { X86::VPSLLQZ256rikz, X86::VPSLLQZ256mikz, 0 }, + { X86::VPSLLWZ256rikz, X86::VPSLLWZ256mikz, 0 }, + { X86::VPSRADZ256rikz, X86::VPSRADZ256mikz, 0 }, + { X86::VPSRAQZ256rikz, X86::VPSRAQZ256mikz, 0 }, + { X86::VPSRAWZ256rikz, X86::VPSRAWZ256mikz, 0 }, + { X86::VPSRLDZ256rikz, X86::VPSRLDZ256mikz, 0 }, + { X86::VPSRLQZ256rikz, X86::VPSRLQZ256mikz, 0 }, + { X86::VPSRLWZ256rikz, X86::VPSRLWZ256mikz, 0 }, + + // AVX-512VL 128-bit masked foldable instructions + { X86::VBROADCASTSSZ128rkz, X86::VBROADCASTSSZ128mkz, TB_NO_REVERSE }, + { X86::VPABSBZ128rrkz, X86::VPABSBZ128rmkz, 0 }, + { X86::VPABSDZ128rrkz, X86::VPABSDZ128rmkz, 0 }, + { X86::VPABSQZ128rrkz, X86::VPABSQZ128rmkz, 0 }, + { X86::VPABSWZ128rrkz, X86::VPABSWZ128rmkz, 0 }, + { X86::VPERMILPDZ128rikz, X86::VPERMILPDZ128mikz, 0 }, + { X86::VPERMILPSZ128rikz, X86::VPERMILPSZ128mikz, 0 }, + { X86::VPMOVSXBDZ128rrkz, X86::VPMOVSXBDZ128rmkz, TB_NO_REVERSE }, + { X86::VPMOVSXBQZ128rrkz, X86::VPMOVSXBQZ128rmkz, TB_NO_REVERSE }, + { X86::VPMOVSXBWZ128rrkz, X86::VPMOVSXBWZ128rmkz, TB_NO_REVERSE }, + { X86::VPMOVSXDQZ128rrkz, X86::VPMOVSXDQZ128rmkz, TB_NO_REVERSE }, + { X86::VPMOVSXWDZ128rrkz, X86::VPMOVSXWDZ128rmkz, TB_NO_REVERSE }, + { X86::VPMOVSXWQZ128rrkz, X86::VPMOVSXWQZ128rmkz, TB_NO_REVERSE }, + { X86::VPMOVZXBDZ128rrkz, X86::VPMOVZXBDZ128rmkz, TB_NO_REVERSE }, + { X86::VPMOVZXBQZ128rrkz, X86::VPMOVZXBQZ128rmkz, TB_NO_REVERSE }, + { X86::VPMOVZXBWZ128rrkz, X86::VPMOVZXBWZ128rmkz, TB_NO_REVERSE }, + { X86::VPMOVZXDQZ128rrkz, X86::VPMOVZXDQZ128rmkz, TB_NO_REVERSE }, + { X86::VPMOVZXWDZ128rrkz, X86::VPMOVZXWDZ128rmkz, TB_NO_REVERSE }, + { X86::VPMOVZXWQZ128rrkz, X86::VPMOVZXWQZ128rmkz, TB_NO_REVERSE }, + { X86::VPSHUFDZ128rikz, X86::VPSHUFDZ128mikz, 0 }, + { X86::VPSHUFHWZ128rikz, X86::VPSHUFHWZ128mikz, 0 }, + { X86::VPSHUFLWZ128rikz, X86::VPSHUFLWZ128mikz, 0 }, + { X86::VPSLLDZ128rikz, X86::VPSLLDZ128mikz, 0 }, + { X86::VPSLLQZ128rikz, X86::VPSLLQZ128mikz, 0 }, + { X86::VPSLLWZ128rikz, X86::VPSLLWZ128mikz, 0 }, + { X86::VPSRADZ128rikz, X86::VPSRADZ128mikz, 0 }, + { X86::VPSRAQZ128rikz, X86::VPSRAQZ128mikz, 0 }, + { X86::VPSRAWZ128rikz, X86::VPSRAWZ128mikz, 0 }, + { X86::VPSRLDZ128rikz, X86::VPSRLDZ128mikz, 0 }, + { X86::VPSRLQZ128rikz, X86::VPSRLQZ128mikz, 0 }, + { X86::VPSRLWZ128rikz, X86::VPSRLWZ128mikz, 0 }, + + // AES foldable instructions + { X86::AESDECLASTrr, X86::AESDECLASTrm, TB_ALIGN_16 }, + { X86::AESDECrr, X86::AESDECrm, TB_ALIGN_16 }, + { X86::AESENCLASTrr, X86::AESENCLASTrm, TB_ALIGN_16 }, + { X86::AESENCrr, X86::AESENCrm, TB_ALIGN_16 }, + { X86::VAESDECLASTrr, X86::VAESDECLASTrm, 0 }, + { X86::VAESDECrr, X86::VAESDECrm, 0 }, + { X86::VAESENCLASTrr, X86::VAESENCLASTrm, 0 }, + { X86::VAESENCrr, X86::VAESENCrm, 0 }, + + // SHA foldable instructions + { X86::SHA1MSG1rr, X86::SHA1MSG1rm, TB_ALIGN_16 }, + { X86::SHA1MSG2rr, X86::SHA1MSG2rm, TB_ALIGN_16 }, + { X86::SHA1NEXTErr, X86::SHA1NEXTErm, TB_ALIGN_16 }, + { X86::SHA1RNDS4rri, X86::SHA1RNDS4rmi, TB_ALIGN_16 }, + { X86::SHA256MSG1rr, X86::SHA256MSG1rm, TB_ALIGN_16 }, + { X86::SHA256MSG2rr, X86::SHA256MSG2rm, TB_ALIGN_16 }, + { X86::SHA256RNDS2rr, X86::SHA256RNDS2rm, TB_ALIGN_16 } + }; + for (X86MemoryFoldTableEntry Entry : MemoryFoldTable2) { AddTableEntry(RegOp2MemOpTable2, MemOp2RegOpTable, Entry.RegOp, Entry.MemOp, @@ -150,12 +2439,1105 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) Entry.Flags | TB_INDEX_2 | TB_FOLDED_LOAD); } + static const X86MemoryFoldTableEntry MemoryFoldTable3[] = { + // FMA4 foldable patterns + { X86::VFMADDSS4rr, X86::VFMADDSS4rm, TB_ALIGN_NONE }, + { X86::VFMADDSS4rr_Int, X86::VFMADDSS4rm_Int, TB_NO_REVERSE }, + { X86::VFMADDSD4rr, X86::VFMADDSD4rm, TB_ALIGN_NONE }, + { X86::VFMADDSD4rr_Int, X86::VFMADDSD4rm_Int, TB_NO_REVERSE }, + { X86::VFMADDPS4rr, X86::VFMADDPS4rm, TB_ALIGN_NONE }, + { X86::VFMADDPD4rr, X86::VFMADDPD4rm, TB_ALIGN_NONE }, + { X86::VFMADDPS4Yrr, X86::VFMADDPS4Yrm, TB_ALIGN_NONE }, + { X86::VFMADDPD4Yrr, X86::VFMADDPD4Yrm, TB_ALIGN_NONE }, + { X86::VFNMADDSS4rr, X86::VFNMADDSS4rm, TB_ALIGN_NONE }, + { X86::VFNMADDSS4rr_Int, X86::VFNMADDSS4rm_Int, TB_NO_REVERSE }, + { X86::VFNMADDSD4rr, X86::VFNMADDSD4rm, TB_ALIGN_NONE }, + { X86::VFNMADDSD4rr_Int, X86::VFNMADDSD4rm_Int, TB_NO_REVERSE }, + { X86::VFNMADDPS4rr, X86::VFNMADDPS4rm, TB_ALIGN_NONE }, + { X86::VFNMADDPD4rr, X86::VFNMADDPD4rm, TB_ALIGN_NONE }, + { X86::VFNMADDPS4Yrr, X86::VFNMADDPS4Yrm, TB_ALIGN_NONE }, + { X86::VFNMADDPD4Yrr, X86::VFNMADDPD4Yrm, TB_ALIGN_NONE }, + { X86::VFMSUBSS4rr, X86::VFMSUBSS4rm, TB_ALIGN_NONE }, + { X86::VFMSUBSS4rr_Int, X86::VFMSUBSS4rm_Int, TB_NO_REVERSE }, + { X86::VFMSUBSD4rr, X86::VFMSUBSD4rm, TB_ALIGN_NONE }, + { X86::VFMSUBSD4rr_Int, X86::VFMSUBSD4rm_Int, TB_NO_REVERSE }, + { X86::VFMSUBPS4rr, X86::VFMSUBPS4rm, TB_ALIGN_NONE }, + { X86::VFMSUBPD4rr, X86::VFMSUBPD4rm, TB_ALIGN_NONE }, + { X86::VFMSUBPS4Yrr, X86::VFMSUBPS4Yrm, TB_ALIGN_NONE }, + { X86::VFMSUBPD4Yrr, X86::VFMSUBPD4Yrm, TB_ALIGN_NONE }, + { X86::VFNMSUBSS4rr, X86::VFNMSUBSS4rm, TB_ALIGN_NONE }, + { X86::VFNMSUBSS4rr_Int, X86::VFNMSUBSS4rm_Int, TB_NO_REVERSE }, + { X86::VFNMSUBSD4rr, X86::VFNMSUBSD4rm, TB_ALIGN_NONE }, + { X86::VFNMSUBSD4rr_Int, X86::VFNMSUBSD4rm_Int, TB_NO_REVERSE }, + { X86::VFNMSUBPS4rr, X86::VFNMSUBPS4rm, TB_ALIGN_NONE }, + { X86::VFNMSUBPD4rr, X86::VFNMSUBPD4rm, TB_ALIGN_NONE }, + { X86::VFNMSUBPS4Yrr, X86::VFNMSUBPS4Yrm, TB_ALIGN_NONE }, + { X86::VFNMSUBPD4Yrr, X86::VFNMSUBPD4Yrm, TB_ALIGN_NONE }, + { X86::VFMADDSUBPS4rr, X86::VFMADDSUBPS4rm, TB_ALIGN_NONE }, + { X86::VFMADDSUBPD4rr, X86::VFMADDSUBPD4rm, TB_ALIGN_NONE }, + { X86::VFMADDSUBPS4Yrr, X86::VFMADDSUBPS4Yrm, TB_ALIGN_NONE }, + { X86::VFMADDSUBPD4Yrr, X86::VFMADDSUBPD4Yrm, TB_ALIGN_NONE }, + { X86::VFMSUBADDPS4rr, X86::VFMSUBADDPS4rm, TB_ALIGN_NONE }, + { X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4rm, TB_ALIGN_NONE }, + { X86::VFMSUBADDPS4Yrr, X86::VFMSUBADDPS4Yrm, TB_ALIGN_NONE }, + { X86::VFMSUBADDPD4Yrr, X86::VFMSUBADDPD4Yrm, TB_ALIGN_NONE }, + + // XOP foldable instructions + { X86::VPCMOVrrr, X86::VPCMOVrrm, 0 }, + { X86::VPCMOVYrrr, X86::VPCMOVYrrm, 0 }, + { X86::VPERMIL2PDrr, X86::VPERMIL2PDrm, 0 }, + { X86::VPERMIL2PDYrr, X86::VPERMIL2PDYrm, 0 }, + { X86::VPERMIL2PSrr, X86::VPERMIL2PSrm, 0 }, + { X86::VPERMIL2PSYrr, X86::VPERMIL2PSYrm, 0 }, + { X86::VPPERMrrr, X86::VPPERMrrm, 0 }, + + // AVX-512 instructions with 3 source operands. + { X86::VPERMI2Brr, X86::VPERMI2Brm, 0 }, + { X86::VPERMI2Drr, X86::VPERMI2Drm, 0 }, + { X86::VPERMI2PSrr, X86::VPERMI2PSrm, 0 }, + { X86::VPERMI2PDrr, X86::VPERMI2PDrm, 0 }, + { X86::VPERMI2Qrr, X86::VPERMI2Qrm, 0 }, + { X86::VPERMI2Wrr, X86::VPERMI2Wrm, 0 }, + { X86::VPERMT2Brr, X86::VPERMT2Brm, 0 }, + { X86::VPERMT2Drr, X86::VPERMT2Drm, 0 }, + { X86::VPERMT2PSrr, X86::VPERMT2PSrm, 0 }, + { X86::VPERMT2PDrr, X86::VPERMT2PDrm, 0 }, + { X86::VPERMT2Qrr, X86::VPERMT2Qrm, 0 }, + { X86::VPERMT2Wrr, X86::VPERMT2Wrm, 0 }, + { X86::VPTERNLOGDZrri, X86::VPTERNLOGDZrmi, 0 }, + { X86::VPTERNLOGQZrri, X86::VPTERNLOGQZrmi, 0 }, + + // AVX-512VL 256-bit instructions with 3 source operands. + { X86::VPERMI2B256rr, X86::VPERMI2B256rm, 0 }, + { X86::VPERMI2D256rr, X86::VPERMI2D256rm, 0 }, + { X86::VPERMI2PD256rr, X86::VPERMI2PD256rm, 0 }, + { X86::VPERMI2PS256rr, X86::VPERMI2PS256rm, 0 }, + { X86::VPERMI2Q256rr, X86::VPERMI2Q256rm, 0 }, + { X86::VPERMI2W256rr, X86::VPERMI2W256rm, 0 }, + { X86::VPERMT2B256rr, X86::VPERMT2B256rm, 0 }, + { X86::VPERMT2D256rr, X86::VPERMT2D256rm, 0 }, + { X86::VPERMT2PD256rr, X86::VPERMT2PD256rm, 0 }, + { X86::VPERMT2PS256rr, X86::VPERMT2PS256rm, 0 }, + { X86::VPERMT2Q256rr, X86::VPERMT2Q256rm, 0 }, + { X86::VPERMT2W256rr, X86::VPERMT2W256rm, 0 }, + { X86::VPTERNLOGDZ256rri, X86::VPTERNLOGDZ256rmi, 0 }, + { X86::VPTERNLOGQZ256rri, X86::VPTERNLOGQZ256rmi, 0 }, + + // AVX-512VL 128-bit instructions with 3 source operands. + { X86::VPERMI2B128rr, X86::VPERMI2B128rm, 0 }, + { X86::VPERMI2D128rr, X86::VPERMI2D128rm, 0 }, + { X86::VPERMI2PD128rr, X86::VPERMI2PD128rm, 0 }, + { X86::VPERMI2PS128rr, X86::VPERMI2PS128rm, 0 }, + { X86::VPERMI2Q128rr, X86::VPERMI2Q128rm, 0 }, + { X86::VPERMI2W128rr, X86::VPERMI2W128rm, 0 }, + { X86::VPERMT2B128rr, X86::VPERMT2B128rm, 0 }, + { X86::VPERMT2D128rr, X86::VPERMT2D128rm, 0 }, + { X86::VPERMT2PD128rr, X86::VPERMT2PD128rm, 0 }, + { X86::VPERMT2PS128rr, X86::VPERMT2PS128rm, 0 }, + { X86::VPERMT2Q128rr, X86::VPERMT2Q128rm, 0 }, + { X86::VPERMT2W128rr, X86::VPERMT2W128rm, 0 }, + { X86::VPTERNLOGDZ128rri, X86::VPTERNLOGDZ128rmi, 0 }, + { X86::VPTERNLOGQZ128rri, X86::VPTERNLOGQZ128rmi, 0 }, + + // AVX-512 masked instructions + { X86::VADDPDZrrkz, X86::VADDPDZrmkz, 0 }, + { X86::VADDPSZrrkz, X86::VADDPSZrmkz, 0 }, + { X86::VADDSDZrr_Intkz, X86::VADDSDZrm_Intkz, TB_NO_REVERSE }, + { X86::VADDSSZrr_Intkz, X86::VADDSSZrm_Intkz, TB_NO_REVERSE }, + { X86::VALIGNDZrrikz, X86::VALIGNDZrmikz, 0 }, + { X86::VALIGNQZrrikz, X86::VALIGNQZrmikz, 0 }, + { X86::VANDNPDZrrkz, X86::VANDNPDZrmkz, 0 }, + { X86::VANDNPSZrrkz, X86::VANDNPSZrmkz, 0 }, + { X86::VANDPDZrrkz, X86::VANDPDZrmkz, 0 }, + { X86::VANDPSZrrkz, X86::VANDPSZrmkz, 0 }, + { X86::VDIVPDZrrkz, X86::VDIVPDZrmkz, 0 }, + { X86::VDIVPSZrrkz, X86::VDIVPSZrmkz, 0 }, + { X86::VDIVSDZrr_Intkz, X86::VDIVSDZrm_Intkz, TB_NO_REVERSE }, + { X86::VDIVSSZrr_Intkz, X86::VDIVSSZrm_Intkz, TB_NO_REVERSE }, + { X86::VINSERTF32x4Zrrkz, X86::VINSERTF32x4Zrmkz, 0 }, + { X86::VINSERTF32x8Zrrkz, X86::VINSERTF32x8Zrmkz, 0 }, + { X86::VINSERTF64x2Zrrkz, X86::VINSERTF64x2Zrmkz, 0 }, + { X86::VINSERTF64x4Zrrkz, X86::VINSERTF64x4Zrmkz, 0 }, + { X86::VINSERTI32x4Zrrkz, X86::VINSERTI32x4Zrmkz, 0 }, + { X86::VINSERTI32x8Zrrkz, X86::VINSERTI32x8Zrmkz, 0 }, + { X86::VINSERTI64x2Zrrkz, X86::VINSERTI64x2Zrmkz, 0 }, + { X86::VINSERTI64x4Zrrkz, X86::VINSERTI64x4Zrmkz, 0 }, + { X86::VMAXCPDZrrkz, X86::VMAXCPDZrmkz, 0 }, + { X86::VMAXCPSZrrkz, X86::VMAXCPSZrmkz, 0 }, + { X86::VMAXPDZrrkz, X86::VMAXPDZrmkz, 0 }, + { X86::VMAXPSZrrkz, X86::VMAXPSZrmkz, 0 }, + { X86::VMAXSDZrr_Intkz, X86::VMAXSDZrm_Intkz, 0 }, + { X86::VMAXSSZrr_Intkz, X86::VMAXSSZrm_Intkz, 0 }, + { X86::VMINCPDZrrkz, X86::VMINCPDZrmkz, 0 }, + { X86::VMINCPSZrrkz, X86::VMINCPSZrmkz, 0 }, + { X86::VMINPDZrrkz, X86::VMINPDZrmkz, 0 }, + { X86::VMINPSZrrkz, X86::VMINPSZrmkz, 0 }, + { X86::VMINSDZrr_Intkz, X86::VMINSDZrm_Intkz, 0 }, + { X86::VMINSSZrr_Intkz, X86::VMINSSZrm_Intkz, 0 }, + { X86::VMULPDZrrkz, X86::VMULPDZrmkz, 0 }, + { X86::VMULPSZrrkz, X86::VMULPSZrmkz, 0 }, + { X86::VMULSDZrr_Intkz, X86::VMULSDZrm_Intkz, TB_NO_REVERSE }, + { X86::VMULSSZrr_Intkz, X86::VMULSSZrm_Intkz, TB_NO_REVERSE }, + { X86::VORPDZrrkz, X86::VORPDZrmkz, 0 }, + { X86::VORPSZrrkz, X86::VORPSZrmkz, 0 }, + { X86::VPACKSSDWZrrkz, X86::VPACKSSDWZrmkz, 0 }, + { X86::VPACKSSWBZrrkz, X86::VPACKSSWBZrmkz, 0 }, + { X86::VPACKUSDWZrrkz, X86::VPACKUSDWZrmkz, 0 }, + { X86::VPACKUSWBZrrkz, X86::VPACKUSWBZrmkz, 0 }, + { X86::VPADDBZrrkz, X86::VPADDBZrmkz, 0 }, + { X86::VPADDDZrrkz, X86::VPADDDZrmkz, 0 }, + { X86::VPADDQZrrkz, X86::VPADDQZrmkz, 0 }, + { X86::VPADDSBZrrkz, X86::VPADDSBZrmkz, 0 }, + { X86::VPADDSWZrrkz, X86::VPADDSWZrmkz, 0 }, + { X86::VPADDUSBZrrkz, X86::VPADDUSBZrmkz, 0 }, + { X86::VPADDUSWZrrkz, X86::VPADDUSWZrmkz, 0 }, + { X86::VPADDWZrrkz, X86::VPADDWZrmkz, 0 }, + { X86::VPALIGNRZrrikz, X86::VPALIGNRZrmikz, 0 }, + { X86::VPANDDZrrkz, X86::VPANDDZrmkz, 0 }, + { X86::VPANDNDZrrkz, X86::VPANDNDZrmkz, 0 }, + { X86::VPANDNQZrrkz, X86::VPANDNQZrmkz, 0 }, + { X86::VPANDQZrrkz, X86::VPANDQZrmkz, 0 }, + { X86::VPAVGBZrrkz, X86::VPAVGBZrmkz, 0 }, + { X86::VPAVGWZrrkz, X86::VPAVGWZrmkz, 0 }, + { X86::VPERMBZrrkz, X86::VPERMBZrmkz, 0 }, + { X86::VPERMDZrrkz, X86::VPERMDZrmkz, 0 }, + { X86::VPERMILPDZrrkz, X86::VPERMILPDZrmkz, 0 }, + { X86::VPERMILPSZrrkz, X86::VPERMILPSZrmkz, 0 }, + { X86::VPERMPDZrrkz, X86::VPERMPDZrmkz, 0 }, + { X86::VPERMPSZrrkz, X86::VPERMPSZrmkz, 0 }, + { X86::VPERMQZrrkz, X86::VPERMQZrmkz, 0 }, + { X86::VPERMWZrrkz, X86::VPERMWZrmkz, 0 }, + { X86::VPMADDUBSWZrrkz, X86::VPMADDUBSWZrmkz, 0 }, + { X86::VPMADDWDZrrkz, X86::VPMADDWDZrmkz, 0 }, + { X86::VPMAXSBZrrkz, X86::VPMAXSBZrmkz, 0 }, + { X86::VPMAXSDZrrkz, X86::VPMAXSDZrmkz, 0 }, + { X86::VPMAXSQZrrkz, X86::VPMAXSQZrmkz, 0 }, + { X86::VPMAXSWZrrkz, X86::VPMAXSWZrmkz, 0 }, + { X86::VPMAXUBZrrkz, X86::VPMAXUBZrmkz, 0 }, + { X86::VPMAXUDZrrkz, X86::VPMAXUDZrmkz, 0 }, + { X86::VPMAXUQZrrkz, X86::VPMAXUQZrmkz, 0 }, + { X86::VPMAXUWZrrkz, X86::VPMAXUWZrmkz, 0 }, + { X86::VPMINSBZrrkz, X86::VPMINSBZrmkz, 0 }, + { X86::VPMINSDZrrkz, X86::VPMINSDZrmkz, 0 }, + { X86::VPMINSQZrrkz, X86::VPMINSQZrmkz, 0 }, + { X86::VPMINSWZrrkz, X86::VPMINSWZrmkz, 0 }, + { X86::VPMINUBZrrkz, X86::VPMINUBZrmkz, 0 }, + { X86::VPMINUDZrrkz, X86::VPMINUDZrmkz, 0 }, + { X86::VPMINUQZrrkz, X86::VPMINUQZrmkz, 0 }, + { X86::VPMINUWZrrkz, X86::VPMINUWZrmkz, 0 }, + { X86::VPMULLDZrrkz, X86::VPMULLDZrmkz, 0 }, + { X86::VPMULLQZrrkz, X86::VPMULLQZrmkz, 0 }, + { X86::VPMULLWZrrkz, X86::VPMULLWZrmkz, 0 }, + { X86::VPMULDQZrrkz, X86::VPMULDQZrmkz, 0 }, + { X86::VPMULUDQZrrkz, X86::VPMULUDQZrmkz, 0 }, + { X86::VPORDZrrkz, X86::VPORDZrmkz, 0 }, + { X86::VPORQZrrkz, X86::VPORQZrmkz, 0 }, + { X86::VPSHUFBZrrkz, X86::VPSHUFBZrmkz, 0 }, + { X86::VPSLLDZrrkz, X86::VPSLLDZrmkz, 0 }, + { X86::VPSLLQZrrkz, X86::VPSLLQZrmkz, 0 }, + { X86::VPSLLVDZrrkz, X86::VPSLLVDZrmkz, 0 }, + { X86::VPSLLVQZrrkz, X86::VPSLLVQZrmkz, 0 }, + { X86::VPSLLVWZrrkz, X86::VPSLLVWZrmkz, 0 }, + { X86::VPSLLWZrrkz, X86::VPSLLWZrmkz, 0 }, + { X86::VPSRADZrrkz, X86::VPSRADZrmkz, 0 }, + { X86::VPSRAQZrrkz, X86::VPSRAQZrmkz, 0 }, + { X86::VPSRAVDZrrkz, X86::VPSRAVDZrmkz, 0 }, + { X86::VPSRAVQZrrkz, X86::VPSRAVQZrmkz, 0 }, + { X86::VPSRAVWZrrkz, X86::VPSRAVWZrmkz, 0 }, + { X86::VPSRAWZrrkz, X86::VPSRAWZrmkz, 0 }, + { X86::VPSRLDZrrkz, X86::VPSRLDZrmkz, 0 }, + { X86::VPSRLQZrrkz, X86::VPSRLQZrmkz, 0 }, + { X86::VPSRLVDZrrkz, X86::VPSRLVDZrmkz, 0 }, + { X86::VPSRLVQZrrkz, X86::VPSRLVQZrmkz, 0 }, + { X86::VPSRLVWZrrkz, X86::VPSRLVWZrmkz, 0 }, + { X86::VPSRLWZrrkz, X86::VPSRLWZrmkz, 0 }, + { X86::VPSUBBZrrkz, X86::VPSUBBZrmkz, 0 }, + { X86::VPSUBDZrrkz, X86::VPSUBDZrmkz, 0 }, + { X86::VPSUBQZrrkz, X86::VPSUBQZrmkz, 0 }, + { X86::VPSUBSBZrrkz, X86::VPSUBSBZrmkz, 0 }, + { X86::VPSUBSWZrrkz, X86::VPSUBSWZrmkz, 0 }, + { X86::VPSUBUSBZrrkz, X86::VPSUBUSBZrmkz, 0 }, + { X86::VPSUBUSWZrrkz, X86::VPSUBUSWZrmkz, 0 }, + { X86::VPSUBWZrrkz, X86::VPSUBWZrmkz, 0 }, + { X86::VPUNPCKHBWZrrkz, X86::VPUNPCKHBWZrmkz, 0 }, + { X86::VPUNPCKHDQZrrkz, X86::VPUNPCKHDQZrmkz, 0 }, + { X86::VPUNPCKHQDQZrrkz, X86::VPUNPCKHQDQZrmkz, 0 }, + { X86::VPUNPCKHWDZrrkz, X86::VPUNPCKHWDZrmkz, 0 }, + { X86::VPUNPCKLBWZrrkz, X86::VPUNPCKLBWZrmkz, 0 }, + { X86::VPUNPCKLDQZrrkz, X86::VPUNPCKLDQZrmkz, 0 }, + { X86::VPUNPCKLQDQZrrkz, X86::VPUNPCKLQDQZrmkz, 0 }, + { X86::VPUNPCKLWDZrrkz, X86::VPUNPCKLWDZrmkz, 0 }, + { X86::VPXORDZrrkz, X86::VPXORDZrmkz, 0 }, + { X86::VPXORQZrrkz, X86::VPXORQZrmkz, 0 }, + { X86::VSHUFPDZrrikz, X86::VSHUFPDZrmikz, 0 }, + { X86::VSHUFPSZrrikz, X86::VSHUFPSZrmikz, 0 }, + { X86::VSUBPDZrrkz, X86::VSUBPDZrmkz, 0 }, + { X86::VSUBPSZrrkz, X86::VSUBPSZrmkz, 0 }, + { X86::VSUBSDZrr_Intkz, X86::VSUBSDZrm_Intkz, TB_NO_REVERSE }, + { X86::VSUBSSZrr_Intkz, X86::VSUBSSZrm_Intkz, TB_NO_REVERSE }, + { X86::VUNPCKHPDZrrkz, X86::VUNPCKHPDZrmkz, 0 }, + { X86::VUNPCKHPSZrrkz, X86::VUNPCKHPSZrmkz, 0 }, + { X86::VUNPCKLPDZrrkz, X86::VUNPCKLPDZrmkz, 0 }, + { X86::VUNPCKLPSZrrkz, X86::VUNPCKLPSZrmkz, 0 }, + { X86::VXORPDZrrkz, X86::VXORPDZrmkz, 0 }, + { X86::VXORPSZrrkz, X86::VXORPSZrmkz, 0 }, + + // AVX-512{F,VL} masked arithmetic instructions 256-bit + { X86::VADDPDZ256rrkz, X86::VADDPDZ256rmkz, 0 }, + { X86::VADDPSZ256rrkz, X86::VADDPSZ256rmkz, 0 }, + { X86::VALIGNDZ256rrikz, X86::VALIGNDZ256rmikz, 0 }, + { X86::VALIGNQZ256rrikz, X86::VALIGNQZ256rmikz, 0 }, + { X86::VANDNPDZ256rrkz, X86::VANDNPDZ256rmkz, 0 }, + { X86::VANDNPSZ256rrkz, X86::VANDNPSZ256rmkz, 0 }, + { X86::VANDPDZ256rrkz, X86::VANDPDZ256rmkz, 0 }, + { X86::VANDPSZ256rrkz, X86::VANDPSZ256rmkz, 0 }, + { X86::VDIVPDZ256rrkz, X86::VDIVPDZ256rmkz, 0 }, + { X86::VDIVPSZ256rrkz, X86::VDIVPSZ256rmkz, 0 }, + { X86::VINSERTF32x4Z256rrkz, X86::VINSERTF32x4Z256rmkz, 0 }, + { X86::VINSERTF64x2Z256rrkz, X86::VINSERTF64x2Z256rmkz, 0 }, + { X86::VINSERTI32x4Z256rrkz, X86::VINSERTI32x4Z256rmkz, 0 }, + { X86::VINSERTI64x2Z256rrkz, X86::VINSERTI64x2Z256rmkz, 0 }, + { X86::VMAXCPDZ256rrkz, X86::VMAXCPDZ256rmkz, 0 }, + { X86::VMAXCPSZ256rrkz, X86::VMAXCPSZ256rmkz, 0 }, + { X86::VMAXPDZ256rrkz, X86::VMAXPDZ256rmkz, 0 }, + { X86::VMAXPSZ256rrkz, X86::VMAXPSZ256rmkz, 0 }, + { X86::VMINCPDZ256rrkz, X86::VMINCPDZ256rmkz, 0 }, + { X86::VMINCPSZ256rrkz, X86::VMINCPSZ256rmkz, 0 }, + { X86::VMINPDZ256rrkz, X86::VMINPDZ256rmkz, 0 }, + { X86::VMINPSZ256rrkz, X86::VMINPSZ256rmkz, 0 }, + { X86::VMULPDZ256rrkz, X86::VMULPDZ256rmkz, 0 }, + { X86::VMULPSZ256rrkz, X86::VMULPSZ256rmkz, 0 }, + { X86::VORPDZ256rrkz, X86::VORPDZ256rmkz, 0 }, + { X86::VORPSZ256rrkz, X86::VORPSZ256rmkz, 0 }, + { X86::VPACKSSDWZ256rrkz, X86::VPACKSSDWZ256rmkz, 0 }, + { X86::VPACKSSWBZ256rrkz, X86::VPACKSSWBZ256rmkz, 0 }, + { X86::VPACKUSDWZ256rrkz, X86::VPACKUSDWZ256rmkz, 0 }, + { X86::VPACKUSWBZ256rrkz, X86::VPACKUSWBZ256rmkz, 0 }, + { X86::VPADDBZ256rrkz, X86::VPADDBZ256rmkz, 0 }, + { X86::VPADDDZ256rrkz, X86::VPADDDZ256rmkz, 0 }, + { X86::VPADDQZ256rrkz, X86::VPADDQZ256rmkz, 0 }, + { X86::VPADDSBZ256rrkz, X86::VPADDSBZ256rmkz, 0 }, + { X86::VPADDSWZ256rrkz, X86::VPADDSWZ256rmkz, 0 }, + { X86::VPADDUSBZ256rrkz, X86::VPADDUSBZ256rmkz, 0 }, + { X86::VPADDUSWZ256rrkz, X86::VPADDUSWZ256rmkz, 0 }, + { X86::VPADDWZ256rrkz, X86::VPADDWZ256rmkz, 0 }, + { X86::VPALIGNRZ256rrikz, X86::VPALIGNRZ256rmikz, 0 }, + { X86::VPANDDZ256rrkz, X86::VPANDDZ256rmkz, 0 }, + { X86::VPANDNDZ256rrkz, X86::VPANDNDZ256rmkz, 0 }, + { X86::VPANDNQZ256rrkz, X86::VPANDNQZ256rmkz, 0 }, + { X86::VPANDQZ256rrkz, X86::VPANDQZ256rmkz, 0 }, + { X86::VPAVGBZ256rrkz, X86::VPAVGBZ256rmkz, 0 }, + { X86::VPAVGWZ256rrkz, X86::VPAVGWZ256rmkz, 0 }, + { X86::VPERMBZ256rrkz, X86::VPERMBZ256rmkz, 0 }, + { X86::VPERMDZ256rrkz, X86::VPERMDZ256rmkz, 0 }, + { X86::VPERMILPDZ256rrkz, X86::VPERMILPDZ256rmkz, 0 }, + { X86::VPERMILPSZ256rrkz, X86::VPERMILPSZ256rmkz, 0 }, + { X86::VPERMPDZ256rrkz, X86::VPERMPDZ256rmkz, 0 }, + { X86::VPERMPSZ256rrkz, X86::VPERMPSZ256rmkz, 0 }, + { X86::VPERMQZ256rrkz, X86::VPERMQZ256rmkz, 0 }, + { X86::VPERMWZ256rrkz, X86::VPERMWZ256rmkz, 0 }, + { X86::VPMADDUBSWZ256rrkz, X86::VPMADDUBSWZ256rmkz, 0 }, + { X86::VPMADDWDZ256rrkz, X86::VPMADDWDZ256rmkz, 0 }, + { X86::VPMAXSBZ256rrkz, X86::VPMAXSBZ256rmkz, 0 }, + { X86::VPMAXSDZ256rrkz, X86::VPMAXSDZ256rmkz, 0 }, + { X86::VPMAXSQZ256rrkz, X86::VPMAXSQZ256rmkz, 0 }, + { X86::VPMAXSWZ256rrkz, X86::VPMAXSWZ256rmkz, 0 }, + { X86::VPMAXUBZ256rrkz, X86::VPMAXUBZ256rmkz, 0 }, + { X86::VPMAXUDZ256rrkz, X86::VPMAXUDZ256rmkz, 0 }, + { X86::VPMAXUQZ256rrkz, X86::VPMAXUQZ256rmkz, 0 }, + { X86::VPMAXUWZ256rrkz, X86::VPMAXUWZ256rmkz, 0 }, + { X86::VPMINSBZ256rrkz, X86::VPMINSBZ256rmkz, 0 }, + { X86::VPMINSDZ256rrkz, X86::VPMINSDZ256rmkz, 0 }, + { X86::VPMINSQZ256rrkz, X86::VPMINSQZ256rmkz, 0 }, + { X86::VPMINSWZ256rrkz, X86::VPMINSWZ256rmkz, 0 }, + { X86::VPMINUBZ256rrkz, X86::VPMINUBZ256rmkz, 0 }, + { X86::VPMINUDZ256rrkz, X86::VPMINUDZ256rmkz, 0 }, + { X86::VPMINUQZ256rrkz, X86::VPMINUQZ256rmkz, 0 }, + { X86::VPMINUWZ256rrkz, X86::VPMINUWZ256rmkz, 0 }, + { X86::VPMULDQZ256rrkz, X86::VPMULDQZ256rmkz, 0 }, + { X86::VPMULLDZ256rrkz, X86::VPMULLDZ256rmkz, 0 }, + { X86::VPMULLQZ256rrkz, X86::VPMULLQZ256rmkz, 0 }, + { X86::VPMULLWZ256rrkz, X86::VPMULLWZ256rmkz, 0 }, + { X86::VPMULUDQZ256rrkz, X86::VPMULUDQZ256rmkz, 0 }, + { X86::VPORDZ256rrkz, X86::VPORDZ256rmkz, 0 }, + { X86::VPORQZ256rrkz, X86::VPORQZ256rmkz, 0 }, + { X86::VPSHUFBZ256rrkz, X86::VPSHUFBZ256rmkz, 0 }, + { X86::VPSLLDZ256rrkz, X86::VPSLLDZ256rmkz, 0 }, + { X86::VPSLLQZ256rrkz, X86::VPSLLQZ256rmkz, 0 }, + { X86::VPSLLVDZ256rrkz, X86::VPSLLVDZ256rmkz, 0 }, + { X86::VPSLLVQZ256rrkz, X86::VPSLLVQZ256rmkz, 0 }, + { X86::VPSLLVWZ256rrkz, X86::VPSLLVWZ256rmkz, 0 }, + { X86::VPSLLWZ256rrkz, X86::VPSLLWZ256rmkz, 0 }, + { X86::VPSRADZ256rrkz, X86::VPSRADZ256rmkz, 0 }, + { X86::VPSRAQZ256rrkz, X86::VPSRAQZ256rmkz, 0 }, + { X86::VPSRAVDZ256rrkz, X86::VPSRAVDZ256rmkz, 0 }, + { X86::VPSRAVQZ256rrkz, X86::VPSRAVQZ256rmkz, 0 }, + { X86::VPSRAVWZ256rrkz, X86::VPSRAVWZ256rmkz, 0 }, + { X86::VPSRAWZ256rrkz, X86::VPSRAWZ256rmkz, 0 }, + { X86::VPSRLDZ256rrkz, X86::VPSRLDZ256rmkz, 0 }, + { X86::VPSRLQZ256rrkz, X86::VPSRLQZ256rmkz, 0 }, + { X86::VPSRLVDZ256rrkz, X86::VPSRLVDZ256rmkz, 0 }, + { X86::VPSRLVQZ256rrkz, X86::VPSRLVQZ256rmkz, 0 }, + { X86::VPSRLVWZ256rrkz, X86::VPSRLVWZ256rmkz, 0 }, + { X86::VPSRLWZ256rrkz, X86::VPSRLWZ256rmkz, 0 }, + { X86::VPSUBBZ256rrkz, X86::VPSUBBZ256rmkz, 0 }, + { X86::VPSUBDZ256rrkz, X86::VPSUBDZ256rmkz, 0 }, + { X86::VPSUBQZ256rrkz, X86::VPSUBQZ256rmkz, 0 }, + { X86::VPSUBSBZ256rrkz, X86::VPSUBSBZ256rmkz, 0 }, + { X86::VPSUBSWZ256rrkz, X86::VPSUBSWZ256rmkz, 0 }, + { X86::VPSUBUSBZ256rrkz, X86::VPSUBUSBZ256rmkz, 0 }, + { X86::VPSUBUSWZ256rrkz, X86::VPSUBUSWZ256rmkz, 0 }, + { X86::VPSUBWZ256rrkz, X86::VPSUBWZ256rmkz, 0 }, + { X86::VPUNPCKHBWZ256rrkz, X86::VPUNPCKHBWZ256rmkz, 0 }, + { X86::VPUNPCKHDQZ256rrkz, X86::VPUNPCKHDQZ256rmkz, 0 }, + { X86::VPUNPCKHQDQZ256rrkz, X86::VPUNPCKHQDQZ256rmkz, 0 }, + { X86::VPUNPCKHWDZ256rrkz, X86::VPUNPCKHWDZ256rmkz, 0 }, + { X86::VPUNPCKLBWZ256rrkz, X86::VPUNPCKLBWZ256rmkz, 0 }, + { X86::VPUNPCKLDQZ256rrkz, X86::VPUNPCKLDQZ256rmkz, 0 }, + { X86::VPUNPCKLQDQZ256rrkz, X86::VPUNPCKLQDQZ256rmkz, 0 }, + { X86::VPUNPCKLWDZ256rrkz, X86::VPUNPCKLWDZ256rmkz, 0 }, + { X86::VPXORDZ256rrkz, X86::VPXORDZ256rmkz, 0 }, + { X86::VPXORQZ256rrkz, X86::VPXORQZ256rmkz, 0 }, + { X86::VSHUFPDZ256rrikz, X86::VSHUFPDZ256rmikz, 0 }, + { X86::VSHUFPSZ256rrikz, X86::VSHUFPSZ256rmikz, 0 }, + { X86::VSUBPDZ256rrkz, X86::VSUBPDZ256rmkz, 0 }, + { X86::VSUBPSZ256rrkz, X86::VSUBPSZ256rmkz, 0 }, + { X86::VUNPCKHPDZ256rrkz, X86::VUNPCKHPDZ256rmkz, 0 }, + { X86::VUNPCKHPSZ256rrkz, X86::VUNPCKHPSZ256rmkz, 0 }, + { X86::VUNPCKLPDZ256rrkz, X86::VUNPCKLPDZ256rmkz, 0 }, + { X86::VUNPCKLPSZ256rrkz, X86::VUNPCKLPSZ256rmkz, 0 }, + { X86::VXORPDZ256rrkz, X86::VXORPDZ256rmkz, 0 }, + { X86::VXORPSZ256rrkz, X86::VXORPSZ256rmkz, 0 }, + + // AVX-512{F,VL} masked arithmetic instructions 128-bit + { X86::VADDPDZ128rrkz, X86::VADDPDZ128rmkz, 0 }, + { X86::VADDPSZ128rrkz, X86::VADDPSZ128rmkz, 0 }, + { X86::VALIGNDZ128rrikz, X86::VALIGNDZ128rmikz, 0 }, + { X86::VALIGNQZ128rrikz, X86::VALIGNQZ128rmikz, 0 }, + { X86::VANDNPDZ128rrkz, X86::VANDNPDZ128rmkz, 0 }, + { X86::VANDNPSZ128rrkz, X86::VANDNPSZ128rmkz, 0 }, + { X86::VANDPDZ128rrkz, X86::VANDPDZ128rmkz, 0 }, + { X86::VANDPSZ128rrkz, X86::VANDPSZ128rmkz, 0 }, + { X86::VDIVPDZ128rrkz, X86::VDIVPDZ128rmkz, 0 }, + { X86::VDIVPSZ128rrkz, X86::VDIVPSZ128rmkz, 0 }, + { X86::VMAXCPDZ128rrkz, X86::VMAXCPDZ128rmkz, 0 }, + { X86::VMAXCPSZ128rrkz, X86::VMAXCPSZ128rmkz, 0 }, + { X86::VMAXPDZ128rrkz, X86::VMAXPDZ128rmkz, 0 }, + { X86::VMAXPSZ128rrkz, X86::VMAXPSZ128rmkz, 0 }, + { X86::VMINCPDZ128rrkz, X86::VMINCPDZ128rmkz, 0 }, + { X86::VMINCPSZ128rrkz, X86::VMINCPSZ128rmkz, 0 }, + { X86::VMINPDZ128rrkz, X86::VMINPDZ128rmkz, 0 }, + { X86::VMINPSZ128rrkz, X86::VMINPSZ128rmkz, 0 }, + { X86::VMULPDZ128rrkz, X86::VMULPDZ128rmkz, 0 }, + { X86::VMULPSZ128rrkz, X86::VMULPSZ128rmkz, 0 }, + { X86::VORPDZ128rrkz, X86::VORPDZ128rmkz, 0 }, + { X86::VORPSZ128rrkz, X86::VORPSZ128rmkz, 0 }, + { X86::VPACKSSDWZ128rrkz, X86::VPACKSSDWZ128rmkz, 0 }, + { X86::VPACKSSWBZ128rrkz, X86::VPACKSSWBZ128rmkz, 0 }, + { X86::VPACKUSDWZ128rrkz, X86::VPACKUSDWZ128rmkz, 0 }, + { X86::VPACKUSWBZ128rrkz, X86::VPACKUSWBZ128rmkz, 0 }, + { X86::VPADDBZ128rrkz, X86::VPADDBZ128rmkz, 0 }, + { X86::VPADDDZ128rrkz, X86::VPADDDZ128rmkz, 0 }, + { X86::VPADDQZ128rrkz, X86::VPADDQZ128rmkz, 0 }, + { X86::VPADDSBZ128rrkz, X86::VPADDSBZ128rmkz, 0 }, + { X86::VPADDSWZ128rrkz, X86::VPADDSWZ128rmkz, 0 }, + { X86::VPADDUSBZ128rrkz, X86::VPADDUSBZ128rmkz, 0 }, + { X86::VPADDUSWZ128rrkz, X86::VPADDUSWZ128rmkz, 0 }, + { X86::VPADDWZ128rrkz, X86::VPADDWZ128rmkz, 0 }, + { X86::VPALIGNRZ128rrikz, X86::VPALIGNRZ128rmikz, 0 }, + { X86::VPANDDZ128rrkz, X86::VPANDDZ128rmkz, 0 }, + { X86::VPANDNDZ128rrkz, X86::VPANDNDZ128rmkz, 0 }, + { X86::VPANDNQZ128rrkz, X86::VPANDNQZ128rmkz, 0 }, + { X86::VPANDQZ128rrkz, X86::VPANDQZ128rmkz, 0 }, + { X86::VPAVGBZ128rrkz, X86::VPAVGBZ128rmkz, 0 }, + { X86::VPAVGWZ128rrkz, X86::VPAVGWZ128rmkz, 0 }, + { X86::VPERMBZ128rrkz, X86::VPERMBZ128rmkz, 0 }, + { X86::VPERMILPDZ128rrkz, X86::VPERMILPDZ128rmkz, 0 }, + { X86::VPERMILPSZ128rrkz, X86::VPERMILPSZ128rmkz, 0 }, + { X86::VPERMWZ128rrkz, X86::VPERMWZ128rmkz, 0 }, + { X86::VPMADDUBSWZ128rrkz, X86::VPMADDUBSWZ128rmkz, 0 }, + { X86::VPMADDWDZ128rrkz, X86::VPMADDWDZ128rmkz, 0 }, + { X86::VPMAXSBZ128rrkz, X86::VPMAXSBZ128rmkz, 0 }, + { X86::VPMAXSDZ128rrkz, X86::VPMAXSDZ128rmkz, 0 }, + { X86::VPMAXSQZ128rrkz, X86::VPMAXSQZ128rmkz, 0 }, + { X86::VPMAXSWZ128rrkz, X86::VPMAXSWZ128rmkz, 0 }, + { X86::VPMAXUBZ128rrkz, X86::VPMAXUBZ128rmkz, 0 }, + { X86::VPMAXUDZ128rrkz, X86::VPMAXUDZ128rmkz, 0 }, + { X86::VPMAXUQZ128rrkz, X86::VPMAXUQZ128rmkz, 0 }, + { X86::VPMAXUWZ128rrkz, X86::VPMAXUWZ128rmkz, 0 }, + { X86::VPMINSBZ128rrkz, X86::VPMINSBZ128rmkz, 0 }, + { X86::VPMINSDZ128rrkz, X86::VPMINSDZ128rmkz, 0 }, + { X86::VPMINSQZ128rrkz, X86::VPMINSQZ128rmkz, 0 }, + { X86::VPMINSWZ128rrkz, X86::VPMINSWZ128rmkz, 0 }, + { X86::VPMINUBZ128rrkz, X86::VPMINUBZ128rmkz, 0 }, + { X86::VPMINUDZ128rrkz, X86::VPMINUDZ128rmkz, 0 }, + { X86::VPMINUQZ128rrkz, X86::VPMINUQZ128rmkz, 0 }, + { X86::VPMINUWZ128rrkz, X86::VPMINUWZ128rmkz, 0 }, + { X86::VPMULDQZ128rrkz, X86::VPMULDQZ128rmkz, 0 }, + { X86::VPMULLDZ128rrkz, X86::VPMULLDZ128rmkz, 0 }, + { X86::VPMULLQZ128rrkz, X86::VPMULLQZ128rmkz, 0 }, + { X86::VPMULLWZ128rrkz, X86::VPMULLWZ128rmkz, 0 }, + { X86::VPMULUDQZ128rrkz, X86::VPMULUDQZ128rmkz, 0 }, + { X86::VPORDZ128rrkz, X86::VPORDZ128rmkz, 0 }, + { X86::VPORQZ128rrkz, X86::VPORQZ128rmkz, 0 }, + { X86::VPSHUFBZ128rrkz, X86::VPSHUFBZ128rmkz, 0 }, + { X86::VPSLLDZ128rrkz, X86::VPSLLDZ128rmkz, 0 }, + { X86::VPSLLQZ128rrkz, X86::VPSLLQZ128rmkz, 0 }, + { X86::VPSLLVDZ128rrkz, X86::VPSLLVDZ128rmkz, 0 }, + { X86::VPSLLVQZ128rrkz, X86::VPSLLVQZ128rmkz, 0 }, + { X86::VPSLLVWZ128rrkz, X86::VPSLLVWZ128rmkz, 0 }, + { X86::VPSLLWZ128rrkz, X86::VPSLLWZ128rmkz, 0 }, + { X86::VPSRADZ128rrkz, X86::VPSRADZ128rmkz, 0 }, + { X86::VPSRAQZ128rrkz, X86::VPSRAQZ128rmkz, 0 }, + { X86::VPSRAVDZ128rrkz, X86::VPSRAVDZ128rmkz, 0 }, + { X86::VPSRAVQZ128rrkz, X86::VPSRAVQZ128rmkz, 0 }, + { X86::VPSRAVWZ128rrkz, X86::VPSRAVWZ128rmkz, 0 }, + { X86::VPSRAWZ128rrkz, X86::VPSRAWZ128rmkz, 0 }, + { X86::VPSRLDZ128rrkz, X86::VPSRLDZ128rmkz, 0 }, + { X86::VPSRLQZ128rrkz, X86::VPSRLQZ128rmkz, 0 }, + { X86::VPSRLVDZ128rrkz, X86::VPSRLVDZ128rmkz, 0 }, + { X86::VPSRLVQZ128rrkz, X86::VPSRLVQZ128rmkz, 0 }, + { X86::VPSRLVWZ128rrkz, X86::VPSRLVWZ128rmkz, 0 }, + { X86::VPSRLWZ128rrkz, X86::VPSRLWZ128rmkz, 0 }, + { X86::VPSUBBZ128rrkz, X86::VPSUBBZ128rmkz, 0 }, + { X86::VPSUBDZ128rrkz, X86::VPSUBDZ128rmkz, 0 }, + { X86::VPSUBQZ128rrkz, X86::VPSUBQZ128rmkz, 0 }, + { X86::VPSUBSBZ128rrkz, X86::VPSUBSBZ128rmkz, 0 }, + { X86::VPSUBSWZ128rrkz, X86::VPSUBSWZ128rmkz, 0 }, + { X86::VPSUBUSBZ128rrkz, X86::VPSUBUSBZ128rmkz, 0 }, + { X86::VPSUBUSWZ128rrkz, X86::VPSUBUSWZ128rmkz, 0 }, + { X86::VPSUBWZ128rrkz, X86::VPSUBWZ128rmkz, 0 }, + { X86::VPUNPCKHBWZ128rrkz, X86::VPUNPCKHBWZ128rmkz, 0 }, + { X86::VPUNPCKHDQZ128rrkz, X86::VPUNPCKHDQZ128rmkz, 0 }, + { X86::VPUNPCKHQDQZ128rrkz, X86::VPUNPCKHQDQZ128rmkz, 0 }, + { X86::VPUNPCKHWDZ128rrkz, X86::VPUNPCKHWDZ128rmkz, 0 }, + { X86::VPUNPCKLBWZ128rrkz, X86::VPUNPCKLBWZ128rmkz, 0 }, + { X86::VPUNPCKLDQZ128rrkz, X86::VPUNPCKLDQZ128rmkz, 0 }, + { X86::VPUNPCKLQDQZ128rrkz, X86::VPUNPCKLQDQZ128rmkz, 0 }, + { X86::VPUNPCKLWDZ128rrkz, X86::VPUNPCKLWDZ128rmkz, 0 }, + { X86::VPXORDZ128rrkz, X86::VPXORDZ128rmkz, 0 }, + { X86::VPXORQZ128rrkz, X86::VPXORQZ128rmkz, 0 }, + { X86::VSHUFPDZ128rrikz, X86::VSHUFPDZ128rmikz, 0 }, + { X86::VSHUFPSZ128rrikz, X86::VSHUFPSZ128rmikz, 0 }, + { X86::VSUBPDZ128rrkz, X86::VSUBPDZ128rmkz, 0 }, + { X86::VSUBPSZ128rrkz, X86::VSUBPSZ128rmkz, 0 }, + { X86::VUNPCKHPDZ128rrkz, X86::VUNPCKHPDZ128rmkz, 0 }, + { X86::VUNPCKHPSZ128rrkz, X86::VUNPCKHPSZ128rmkz, 0 }, + { X86::VUNPCKLPDZ128rrkz, X86::VUNPCKLPDZ128rmkz, 0 }, + { X86::VUNPCKLPSZ128rrkz, X86::VUNPCKLPSZ128rmkz, 0 }, + { X86::VXORPDZ128rrkz, X86::VXORPDZ128rmkz, 0 }, + { X86::VXORPSZ128rrkz, X86::VXORPSZ128rmkz, 0 }, + + // AVX-512 masked foldable instructions + { X86::VBROADCASTSSZrk, X86::VBROADCASTSSZmk, TB_NO_REVERSE }, + { X86::VBROADCASTSDZrk, X86::VBROADCASTSDZmk, TB_NO_REVERSE }, + { X86::VPABSBZrrk, X86::VPABSBZrmk, 0 }, + { X86::VPABSDZrrk, X86::VPABSDZrmk, 0 }, + { X86::VPABSQZrrk, X86::VPABSQZrmk, 0 }, + { X86::VPABSWZrrk, X86::VPABSWZrmk, 0 }, + { X86::VPERMILPDZrik, X86::VPERMILPDZmik, 0 }, + { X86::VPERMILPSZrik, X86::VPERMILPSZmik, 0 }, + { X86::VPERMPDZrik, X86::VPERMPDZmik, 0 }, + { X86::VPERMQZrik, X86::VPERMQZmik, 0 }, + { X86::VPMOVSXBDZrrk, X86::VPMOVSXBDZrmk, 0 }, + { X86::VPMOVSXBQZrrk, X86::VPMOVSXBQZrmk, TB_NO_REVERSE }, + { X86::VPMOVSXBWZrrk, X86::VPMOVSXBWZrmk, 0 }, + { X86::VPMOVSXDQZrrk, X86::VPMOVSXDQZrmk, 0 }, + { X86::VPMOVSXWDZrrk, X86::VPMOVSXWDZrmk, 0 }, + { X86::VPMOVSXWQZrrk, X86::VPMOVSXWQZrmk, 0 }, + { X86::VPMOVZXBDZrrk, X86::VPMOVZXBDZrmk, 0 }, + { X86::VPMOVZXBQZrrk, X86::VPMOVZXBQZrmk, TB_NO_REVERSE }, + { X86::VPMOVZXBWZrrk, X86::VPMOVZXBWZrmk, 0 }, + { X86::VPMOVZXDQZrrk, X86::VPMOVZXDQZrmk, 0 }, + { X86::VPMOVZXWDZrrk, X86::VPMOVZXWDZrmk, 0 }, + { X86::VPMOVZXWQZrrk, X86::VPMOVZXWQZrmk, 0 }, + { X86::VPOPCNTDZrrk, X86::VPOPCNTDZrmk, 0 }, + { X86::VPOPCNTQZrrk, X86::VPOPCNTQZrmk, 0 }, + { X86::VPSHUFDZrik, X86::VPSHUFDZmik, 0 }, + { X86::VPSHUFHWZrik, X86::VPSHUFHWZmik, 0 }, + { X86::VPSHUFLWZrik, X86::VPSHUFLWZmik, 0 }, + { X86::VPSLLDZrik, X86::VPSLLDZmik, 0 }, + { X86::VPSLLQZrik, X86::VPSLLQZmik, 0 }, + { X86::VPSLLWZrik, X86::VPSLLWZmik, 0 }, + { X86::VPSRADZrik, X86::VPSRADZmik, 0 }, + { X86::VPSRAQZrik, X86::VPSRAQZmik, 0 }, + { X86::VPSRAWZrik, X86::VPSRAWZmik, 0 }, + { X86::VPSRLDZrik, X86::VPSRLDZmik, 0 }, + { X86::VPSRLQZrik, X86::VPSRLQZmik, 0 }, + { X86::VPSRLWZrik, X86::VPSRLWZmik, 0 }, + + // AVX-512VL 256-bit masked foldable instructions + { X86::VBROADCASTSSZ256rk, X86::VBROADCASTSSZ256mk, TB_NO_REVERSE }, + { X86::VBROADCASTSDZ256rk, X86::VBROADCASTSDZ256mk, TB_NO_REVERSE }, + { X86::VPABSBZ256rrk, X86::VPABSBZ256rmk, 0 }, + { X86::VPABSDZ256rrk, X86::VPABSDZ256rmk, 0 }, + { X86::VPABSQZ256rrk, X86::VPABSQZ256rmk, 0 }, + { X86::VPABSWZ256rrk, X86::VPABSWZ256rmk, 0 }, + { X86::VPERMILPDZ256rik, X86::VPERMILPDZ256mik, 0 }, + { X86::VPERMILPSZ256rik, X86::VPERMILPSZ256mik, 0 }, + { X86::VPERMPDZ256rik, X86::VPERMPDZ256mik, 0 }, + { X86::VPERMQZ256rik, X86::VPERMQZ256mik, 0 }, + { X86::VPMOVSXBDZ256rrk, X86::VPMOVSXBDZ256rmk, TB_NO_REVERSE }, + { X86::VPMOVSXBQZ256rrk, X86::VPMOVSXBQZ256rmk, TB_NO_REVERSE }, + { X86::VPMOVSXBWZ256rrk, X86::VPMOVSXBWZ256rmk, 0 }, + { X86::VPMOVSXDQZ256rrk, X86::VPMOVSXDQZ256rmk, 0 }, + { X86::VPMOVSXWDZ256rrk, X86::VPMOVSXWDZ256rmk, 0 }, + { X86::VPMOVSXWQZ256rrk, X86::VPMOVSXWQZ256rmk, TB_NO_REVERSE }, + { X86::VPMOVZXBDZ256rrk, X86::VPMOVZXBDZ256rmk, TB_NO_REVERSE }, + { X86::VPMOVZXBQZ256rrk, X86::VPMOVZXBQZ256rmk, TB_NO_REVERSE }, + { X86::VPMOVZXBWZ256rrk, X86::VPMOVZXBWZ256rmk, 0 }, + { X86::VPMOVZXDQZ256rrk, X86::VPMOVZXDQZ256rmk, 0 }, + { X86::VPMOVZXWDZ256rrk, X86::VPMOVZXWDZ256rmk, 0 }, + { X86::VPMOVZXWQZ256rrk, X86::VPMOVZXWQZ256rmk, TB_NO_REVERSE }, + { X86::VPSHUFDZ256rik, X86::VPSHUFDZ256mik, 0 }, + { X86::VPSHUFHWZ256rik, X86::VPSHUFHWZ256mik, 0 }, + { X86::VPSHUFLWZ256rik, X86::VPSHUFLWZ256mik, 0 }, + { X86::VPSLLDZ256rik, X86::VPSLLDZ256mik, 0 }, + { X86::VPSLLQZ256rik, X86::VPSLLQZ256mik, 0 }, + { X86::VPSLLWZ256rik, X86::VPSLLWZ256mik, 0 }, + { X86::VPSRADZ256rik, X86::VPSRADZ256mik, 0 }, + { X86::VPSRAQZ256rik, X86::VPSRAQZ256mik, 0 }, + { X86::VPSRAWZ256rik, X86::VPSRAWZ256mik, 0 }, + { X86::VPSRLDZ256rik, X86::VPSRLDZ256mik, 0 }, + { X86::VPSRLQZ256rik, X86::VPSRLQZ256mik, 0 }, + { X86::VPSRLWZ256rik, X86::VPSRLWZ256mik, 0 }, + + // AVX-512VL 128-bit masked foldable instructions + { X86::VBROADCASTSSZ128rk, X86::VBROADCASTSSZ128mk, TB_NO_REVERSE }, + { X86::VPABSBZ128rrk, X86::VPABSBZ128rmk, 0 }, + { X86::VPABSDZ128rrk, X86::VPABSDZ128rmk, 0 }, + { X86::VPABSQZ128rrk, X86::VPABSQZ128rmk, 0 }, + { X86::VPABSWZ128rrk, X86::VPABSWZ128rmk, 0 }, + { X86::VPERMILPDZ128rik, X86::VPERMILPDZ128mik, 0 }, + { X86::VPERMILPSZ128rik, X86::VPERMILPSZ128mik, 0 }, + { X86::VPMOVSXBDZ128rrk, X86::VPMOVSXBDZ128rmk, TB_NO_REVERSE }, + { X86::VPMOVSXBQZ128rrk, X86::VPMOVSXBQZ128rmk, TB_NO_REVERSE }, + { X86::VPMOVSXBWZ128rrk, X86::VPMOVSXBWZ128rmk, TB_NO_REVERSE }, + { X86::VPMOVSXDQZ128rrk, X86::VPMOVSXDQZ128rmk, TB_NO_REVERSE }, + { X86::VPMOVSXWDZ128rrk, X86::VPMOVSXWDZ128rmk, TB_NO_REVERSE }, + { X86::VPMOVSXWQZ128rrk, X86::VPMOVSXWQZ128rmk, TB_NO_REVERSE }, + { X86::VPMOVZXBDZ128rrk, X86::VPMOVZXBDZ128rmk, TB_NO_REVERSE }, + { X86::VPMOVZXBQZ128rrk, X86::VPMOVZXBQZ128rmk, TB_NO_REVERSE }, + { X86::VPMOVZXBWZ128rrk, X86::VPMOVZXBWZ128rmk, TB_NO_REVERSE }, + { X86::VPMOVZXDQZ128rrk, X86::VPMOVZXDQZ128rmk, TB_NO_REVERSE }, + { X86::VPMOVZXWDZ128rrk, X86::VPMOVZXWDZ128rmk, TB_NO_REVERSE }, + { X86::VPMOVZXWQZ128rrk, X86::VPMOVZXWQZ128rmk, TB_NO_REVERSE }, + { X86::VPSHUFDZ128rik, X86::VPSHUFDZ128mik, 0 }, + { X86::VPSHUFHWZ128rik, X86::VPSHUFHWZ128mik, 0 }, + { X86::VPSHUFLWZ128rik, X86::VPSHUFLWZ128mik, 0 }, + { X86::VPSLLDZ128rik, X86::VPSLLDZ128mik, 0 }, + { X86::VPSLLQZ128rik, X86::VPSLLQZ128mik, 0 }, + { X86::VPSLLWZ128rik, X86::VPSLLWZ128mik, 0 }, + { X86::VPSRADZ128rik, X86::VPSRADZ128mik, 0 }, + { X86::VPSRAQZ128rik, X86::VPSRAQZ128mik, 0 }, + { X86::VPSRAWZ128rik, X86::VPSRAWZ128mik, 0 }, + { X86::VPSRLDZ128rik, X86::VPSRLDZ128mik, 0 }, + { X86::VPSRLQZ128rik, X86::VPSRLQZ128mik, 0 }, + { X86::VPSRLWZ128rik, X86::VPSRLWZ128mik, 0 }, + }; + for (X86MemoryFoldTableEntry Entry : MemoryFoldTable3) { AddTableEntry(RegOp2MemOpTable3, MemOp2RegOpTable, Entry.RegOp, Entry.MemOp, // Index 3, folded load Entry.Flags | TB_INDEX_3 | TB_FOLDED_LOAD); } + auto I = X86InstrFMA3Info::rm_begin(); + auto E = X86InstrFMA3Info::rm_end(); + for (; I != E; ++I) { + if (!I.getGroup()->isKMasked()) { + // Intrinsic forms need to pass TB_NO_REVERSE. + if (I.getGroup()->isIntrinsic()) { + AddTableEntry(RegOp2MemOpTable3, MemOp2RegOpTable, + I.getRegOpcode(), I.getMemOpcode(), + TB_ALIGN_NONE | TB_INDEX_3 | TB_FOLDED_LOAD | TB_NO_REVERSE); + } else { + AddTableEntry(RegOp2MemOpTable3, MemOp2RegOpTable, + I.getRegOpcode(), I.getMemOpcode(), + TB_ALIGN_NONE | TB_INDEX_3 | TB_FOLDED_LOAD); + } + } + } + + static const X86MemoryFoldTableEntry MemoryFoldTable4[] = { + // AVX-512 foldable masked instructions + { X86::VADDPDZrrk, X86::VADDPDZrmk, 0 }, + { X86::VADDPSZrrk, X86::VADDPSZrmk, 0 }, + { X86::VADDSDZrr_Intk, X86::VADDSDZrm_Intk, TB_NO_REVERSE }, + { X86::VADDSSZrr_Intk, X86::VADDSSZrm_Intk, TB_NO_REVERSE }, + { X86::VALIGNDZrrik, X86::VALIGNDZrmik, 0 }, + { X86::VALIGNQZrrik, X86::VALIGNQZrmik, 0 }, + { X86::VANDNPDZrrk, X86::VANDNPDZrmk, 0 }, + { X86::VANDNPSZrrk, X86::VANDNPSZrmk, 0 }, + { X86::VANDPDZrrk, X86::VANDPDZrmk, 0 }, + { X86::VANDPSZrrk, X86::VANDPSZrmk, 0 }, + { X86::VDIVPDZrrk, X86::VDIVPDZrmk, 0 }, + { X86::VDIVPSZrrk, X86::VDIVPSZrmk, 0 }, + { X86::VDIVSDZrr_Intk, X86::VDIVSDZrm_Intk, TB_NO_REVERSE }, + { X86::VDIVSSZrr_Intk, X86::VDIVSSZrm_Intk, TB_NO_REVERSE }, + { X86::VINSERTF32x4Zrrk, X86::VINSERTF32x4Zrmk, 0 }, + { X86::VINSERTF32x8Zrrk, X86::VINSERTF32x8Zrmk, 0 }, + { X86::VINSERTF64x2Zrrk, X86::VINSERTF64x2Zrmk, 0 }, + { X86::VINSERTF64x4Zrrk, X86::VINSERTF64x4Zrmk, 0 }, + { X86::VINSERTI32x4Zrrk, X86::VINSERTI32x4Zrmk, 0 }, + { X86::VINSERTI32x8Zrrk, X86::VINSERTI32x8Zrmk, 0 }, + { X86::VINSERTI64x2Zrrk, X86::VINSERTI64x2Zrmk, 0 }, + { X86::VINSERTI64x4Zrrk, X86::VINSERTI64x4Zrmk, 0 }, + { X86::VMAXCPDZrrk, X86::VMAXCPDZrmk, 0 }, + { X86::VMAXCPSZrrk, X86::VMAXCPSZrmk, 0 }, + { X86::VMAXPDZrrk, X86::VMAXPDZrmk, 0 }, + { X86::VMAXPSZrrk, X86::VMAXPSZrmk, 0 }, + { X86::VMAXSDZrr_Intk, X86::VMAXSDZrm_Intk, 0 }, + { X86::VMAXSSZrr_Intk, X86::VMAXSSZrm_Intk, 0 }, + { X86::VMINCPDZrrk, X86::VMINCPDZrmk, 0 }, + { X86::VMINCPSZrrk, X86::VMINCPSZrmk, 0 }, + { X86::VMINPDZrrk, X86::VMINPDZrmk, 0 }, + { X86::VMINPSZrrk, X86::VMINPSZrmk, 0 }, + { X86::VMINSDZrr_Intk, X86::VMINSDZrm_Intk, 0 }, + { X86::VMINSSZrr_Intk, X86::VMINSSZrm_Intk, 0 }, + { X86::VMULPDZrrk, X86::VMULPDZrmk, 0 }, + { X86::VMULPSZrrk, X86::VMULPSZrmk, 0 }, + { X86::VMULSDZrr_Intk, X86::VMULSDZrm_Intk, TB_NO_REVERSE }, + { X86::VMULSSZrr_Intk, X86::VMULSSZrm_Intk, TB_NO_REVERSE }, + { X86::VORPDZrrk, X86::VORPDZrmk, 0 }, + { X86::VORPSZrrk, X86::VORPSZrmk, 0 }, + { X86::VPACKSSDWZrrk, X86::VPACKSSDWZrmk, 0 }, + { X86::VPACKSSWBZrrk, X86::VPACKSSWBZrmk, 0 }, + { X86::VPACKUSDWZrrk, X86::VPACKUSDWZrmk, 0 }, + { X86::VPACKUSWBZrrk, X86::VPACKUSWBZrmk, 0 }, + { X86::VPADDBZrrk, X86::VPADDBZrmk, 0 }, + { X86::VPADDDZrrk, X86::VPADDDZrmk, 0 }, + { X86::VPADDQZrrk, X86::VPADDQZrmk, 0 }, + { X86::VPADDSBZrrk, X86::VPADDSBZrmk, 0 }, + { X86::VPADDSWZrrk, X86::VPADDSWZrmk, 0 }, + { X86::VPADDUSBZrrk, X86::VPADDUSBZrmk, 0 }, + { X86::VPADDUSWZrrk, X86::VPADDUSWZrmk, 0 }, + { X86::VPADDWZrrk, X86::VPADDWZrmk, 0 }, + { X86::VPALIGNRZrrik, X86::VPALIGNRZrmik, 0 }, + { X86::VPANDDZrrk, X86::VPANDDZrmk, 0 }, + { X86::VPANDNDZrrk, X86::VPANDNDZrmk, 0 }, + { X86::VPANDNQZrrk, X86::VPANDNQZrmk, 0 }, + { X86::VPANDQZrrk, X86::VPANDQZrmk, 0 }, + { X86::VPAVGBZrrk, X86::VPAVGBZrmk, 0 }, + { X86::VPAVGWZrrk, X86::VPAVGWZrmk, 0 }, + { X86::VPERMBZrrk, X86::VPERMBZrmk, 0 }, + { X86::VPERMDZrrk, X86::VPERMDZrmk, 0 }, + { X86::VPERMI2Brrk, X86::VPERMI2Brmk, 0 }, + { X86::VPERMI2Drrk, X86::VPERMI2Drmk, 0 }, + { X86::VPERMI2PSrrk, X86::VPERMI2PSrmk, 0 }, + { X86::VPERMI2PDrrk, X86::VPERMI2PDrmk, 0 }, + { X86::VPERMI2Qrrk, X86::VPERMI2Qrmk, 0 }, + { X86::VPERMI2Wrrk, X86::VPERMI2Wrmk, 0 }, + { X86::VPERMILPDZrrk, X86::VPERMILPDZrmk, 0 }, + { X86::VPERMILPSZrrk, X86::VPERMILPSZrmk, 0 }, + { X86::VPERMPDZrrk, X86::VPERMPDZrmk, 0 }, + { X86::VPERMPSZrrk, X86::VPERMPSZrmk, 0 }, + { X86::VPERMQZrrk, X86::VPERMQZrmk, 0 }, + { X86::VPERMT2Brrk, X86::VPERMT2Brmk, 0 }, + { X86::VPERMT2Drrk, X86::VPERMT2Drmk, 0 }, + { X86::VPERMT2PSrrk, X86::VPERMT2PSrmk, 0 }, + { X86::VPERMT2PDrrk, X86::VPERMT2PDrmk, 0 }, + { X86::VPERMT2Qrrk, X86::VPERMT2Qrmk, 0 }, + { X86::VPERMT2Wrrk, X86::VPERMT2Wrmk, 0 }, + { X86::VPERMWZrrk, X86::VPERMWZrmk, 0 }, + { X86::VPMADDUBSWZrrk, X86::VPMADDUBSWZrmk, 0 }, + { X86::VPMADDWDZrrk, X86::VPMADDWDZrmk, 0 }, + { X86::VPMAXSBZrrk, X86::VPMAXSBZrmk, 0 }, + { X86::VPMAXSDZrrk, X86::VPMAXSDZrmk, 0 }, + { X86::VPMAXSQZrrk, X86::VPMAXSQZrmk, 0 }, + { X86::VPMAXSWZrrk, X86::VPMAXSWZrmk, 0 }, + { X86::VPMAXUBZrrk, X86::VPMAXUBZrmk, 0 }, + { X86::VPMAXUDZrrk, X86::VPMAXUDZrmk, 0 }, + { X86::VPMAXUQZrrk, X86::VPMAXUQZrmk, 0 }, + { X86::VPMAXUWZrrk, X86::VPMAXUWZrmk, 0 }, + { X86::VPMINSBZrrk, X86::VPMINSBZrmk, 0 }, + { X86::VPMINSDZrrk, X86::VPMINSDZrmk, 0 }, + { X86::VPMINSQZrrk, X86::VPMINSQZrmk, 0 }, + { X86::VPMINSWZrrk, X86::VPMINSWZrmk, 0 }, + { X86::VPMINUBZrrk, X86::VPMINUBZrmk, 0 }, + { X86::VPMINUDZrrk, X86::VPMINUDZrmk, 0 }, + { X86::VPMINUQZrrk, X86::VPMINUQZrmk, 0 }, + { X86::VPMINUWZrrk, X86::VPMINUWZrmk, 0 }, + { X86::VPMULDQZrrk, X86::VPMULDQZrmk, 0 }, + { X86::VPMULLDZrrk, X86::VPMULLDZrmk, 0 }, + { X86::VPMULLQZrrk, X86::VPMULLQZrmk, 0 }, + { X86::VPMULLWZrrk, X86::VPMULLWZrmk, 0 }, + { X86::VPMULUDQZrrk, X86::VPMULUDQZrmk, 0 }, + { X86::VPORDZrrk, X86::VPORDZrmk, 0 }, + { X86::VPORQZrrk, X86::VPORQZrmk, 0 }, + { X86::VPSHUFBZrrk, X86::VPSHUFBZrmk, 0 }, + { X86::VPSLLDZrrk, X86::VPSLLDZrmk, 0 }, + { X86::VPSLLQZrrk, X86::VPSLLQZrmk, 0 }, + { X86::VPSLLVDZrrk, X86::VPSLLVDZrmk, 0 }, + { X86::VPSLLVQZrrk, X86::VPSLLVQZrmk, 0 }, + { X86::VPSLLVWZrrk, X86::VPSLLVWZrmk, 0 }, + { X86::VPSLLWZrrk, X86::VPSLLWZrmk, 0 }, + { X86::VPSRADZrrk, X86::VPSRADZrmk, 0 }, + { X86::VPSRAQZrrk, X86::VPSRAQZrmk, 0 }, + { X86::VPSRAVDZrrk, X86::VPSRAVDZrmk, 0 }, + { X86::VPSRAVQZrrk, X86::VPSRAVQZrmk, 0 }, + { X86::VPSRAVWZrrk, X86::VPSRAVWZrmk, 0 }, + { X86::VPSRAWZrrk, X86::VPSRAWZrmk, 0 }, + { X86::VPSRLDZrrk, X86::VPSRLDZrmk, 0 }, + { X86::VPSRLQZrrk, X86::VPSRLQZrmk, 0 }, + { X86::VPSRLVDZrrk, X86::VPSRLVDZrmk, 0 }, + { X86::VPSRLVQZrrk, X86::VPSRLVQZrmk, 0 }, + { X86::VPSRLVWZrrk, X86::VPSRLVWZrmk, 0 }, + { X86::VPSRLWZrrk, X86::VPSRLWZrmk, 0 }, + { X86::VPSUBBZrrk, X86::VPSUBBZrmk, 0 }, + { X86::VPSUBDZrrk, X86::VPSUBDZrmk, 0 }, + { X86::VPSUBQZrrk, X86::VPSUBQZrmk, 0 }, + { X86::VPSUBSBZrrk, X86::VPSUBSBZrmk, 0 }, + { X86::VPSUBSWZrrk, X86::VPSUBSWZrmk, 0 }, + { X86::VPSUBUSBZrrk, X86::VPSUBUSBZrmk, 0 }, + { X86::VPSUBUSWZrrk, X86::VPSUBUSWZrmk, 0 }, + { X86::VPTERNLOGDZrrik, X86::VPTERNLOGDZrmik, 0 }, + { X86::VPTERNLOGQZrrik, X86::VPTERNLOGQZrmik, 0 }, + { X86::VPUNPCKHBWZrrk, X86::VPUNPCKHBWZrmk, 0 }, + { X86::VPUNPCKHDQZrrk, X86::VPUNPCKHDQZrmk, 0 }, + { X86::VPUNPCKHQDQZrrk, X86::VPUNPCKHQDQZrmk, 0 }, + { X86::VPUNPCKHWDZrrk, X86::VPUNPCKHWDZrmk, 0 }, + { X86::VPUNPCKLBWZrrk, X86::VPUNPCKLBWZrmk, 0 }, + { X86::VPUNPCKLDQZrrk, X86::VPUNPCKLDQZrmk, 0 }, + { X86::VPUNPCKLQDQZrrk, X86::VPUNPCKLQDQZrmk, 0 }, + { X86::VPUNPCKLWDZrrk, X86::VPUNPCKLWDZrmk, 0 }, + { X86::VPXORDZrrk, X86::VPXORDZrmk, 0 }, + { X86::VPXORQZrrk, X86::VPXORQZrmk, 0 }, + { X86::VSHUFPDZrrik, X86::VSHUFPDZrmik, 0 }, + { X86::VSHUFPSZrrik, X86::VSHUFPSZrmik, 0 }, + { X86::VSUBPDZrrk, X86::VSUBPDZrmk, 0 }, + { X86::VSUBPSZrrk, X86::VSUBPSZrmk, 0 }, + { X86::VSUBSDZrr_Intk, X86::VSUBSDZrm_Intk, TB_NO_REVERSE }, + { X86::VSUBSSZrr_Intk, X86::VSUBSSZrm_Intk, TB_NO_REVERSE }, + { X86::VUNPCKHPDZrrk, X86::VUNPCKHPDZrmk, 0 }, + { X86::VUNPCKHPSZrrk, X86::VUNPCKHPSZrmk, 0 }, + { X86::VUNPCKLPDZrrk, X86::VUNPCKLPDZrmk, 0 }, + { X86::VUNPCKLPSZrrk, X86::VUNPCKLPSZrmk, 0 }, + { X86::VXORPDZrrk, X86::VXORPDZrmk, 0 }, + { X86::VXORPSZrrk, X86::VXORPSZrmk, 0 }, + + // AVX-512{F,VL} foldable masked instructions 256-bit + { X86::VADDPDZ256rrk, X86::VADDPDZ256rmk, 0 }, + { X86::VADDPSZ256rrk, X86::VADDPSZ256rmk, 0 }, + { X86::VALIGNDZ256rrik, X86::VALIGNDZ256rmik, 0 }, + { X86::VALIGNQZ256rrik, X86::VALIGNQZ256rmik, 0 }, + { X86::VANDNPDZ256rrk, X86::VANDNPDZ256rmk, 0 }, + { X86::VANDNPSZ256rrk, X86::VANDNPSZ256rmk, 0 }, + { X86::VANDPDZ256rrk, X86::VANDPDZ256rmk, 0 }, + { X86::VANDPSZ256rrk, X86::VANDPSZ256rmk, 0 }, + { X86::VDIVPDZ256rrk, X86::VDIVPDZ256rmk, 0 }, + { X86::VDIVPSZ256rrk, X86::VDIVPSZ256rmk, 0 }, + { X86::VINSERTF32x4Z256rrk,X86::VINSERTF32x4Z256rmk, 0 }, + { X86::VINSERTF64x2Z256rrk,X86::VINSERTF64x2Z256rmk, 0 }, + { X86::VINSERTI32x4Z256rrk,X86::VINSERTI32x4Z256rmk, 0 }, + { X86::VINSERTI64x2Z256rrk,X86::VINSERTI64x2Z256rmk, 0 }, + { X86::VMAXCPDZ256rrk, X86::VMAXCPDZ256rmk, 0 }, + { X86::VMAXCPSZ256rrk, X86::VMAXCPSZ256rmk, 0 }, + { X86::VMAXPDZ256rrk, X86::VMAXPDZ256rmk, 0 }, + { X86::VMAXPSZ256rrk, X86::VMAXPSZ256rmk, 0 }, + { X86::VMINCPDZ256rrk, X86::VMINCPDZ256rmk, 0 }, + { X86::VMINCPSZ256rrk, X86::VMINCPSZ256rmk, 0 }, + { X86::VMINPDZ256rrk, X86::VMINPDZ256rmk, 0 }, + { X86::VMINPSZ256rrk, X86::VMINPSZ256rmk, 0 }, + { X86::VMULPDZ256rrk, X86::VMULPDZ256rmk, 0 }, + { X86::VMULPSZ256rrk, X86::VMULPSZ256rmk, 0 }, + { X86::VORPDZ256rrk, X86::VORPDZ256rmk, 0 }, + { X86::VORPSZ256rrk, X86::VORPSZ256rmk, 0 }, + { X86::VPACKSSDWZ256rrk, X86::VPACKSSDWZ256rmk, 0 }, + { X86::VPACKSSWBZ256rrk, X86::VPACKSSWBZ256rmk, 0 }, + { X86::VPACKUSDWZ256rrk, X86::VPACKUSDWZ256rmk, 0 }, + { X86::VPACKUSWBZ256rrk, X86::VPACKUSWBZ256rmk, 0 }, + { X86::VPADDBZ256rrk, X86::VPADDBZ256rmk, 0 }, + { X86::VPADDDZ256rrk, X86::VPADDDZ256rmk, 0 }, + { X86::VPADDQZ256rrk, X86::VPADDQZ256rmk, 0 }, + { X86::VPADDSBZ256rrk, X86::VPADDSBZ256rmk, 0 }, + { X86::VPADDSWZ256rrk, X86::VPADDSWZ256rmk, 0 }, + { X86::VPADDUSBZ256rrk, X86::VPADDUSBZ256rmk, 0 }, + { X86::VPADDUSWZ256rrk, X86::VPADDUSWZ256rmk, 0 }, + { X86::VPADDWZ256rrk, X86::VPADDWZ256rmk, 0 }, + { X86::VPALIGNRZ256rrik, X86::VPALIGNRZ256rmik, 0 }, + { X86::VPANDDZ256rrk, X86::VPANDDZ256rmk, 0 }, + { X86::VPANDNDZ256rrk, X86::VPANDNDZ256rmk, 0 }, + { X86::VPANDNQZ256rrk, X86::VPANDNQZ256rmk, 0 }, + { X86::VPANDQZ256rrk, X86::VPANDQZ256rmk, 0 }, + { X86::VPAVGBZ256rrk, X86::VPAVGBZ256rmk, 0 }, + { X86::VPAVGWZ256rrk, X86::VPAVGWZ256rmk, 0 }, + { X86::VPERMBZ256rrk, X86::VPERMBZ256rmk, 0 }, + { X86::VPERMDZ256rrk, X86::VPERMDZ256rmk, 0 }, + { X86::VPERMI2B256rrk, X86::VPERMI2B256rmk, 0 }, + { X86::VPERMI2D256rrk, X86::VPERMI2D256rmk, 0 }, + { X86::VPERMI2PD256rrk, X86::VPERMI2PD256rmk, 0 }, + { X86::VPERMI2PS256rrk, X86::VPERMI2PS256rmk, 0 }, + { X86::VPERMI2Q256rrk, X86::VPERMI2Q256rmk, 0 }, + { X86::VPERMI2W256rrk, X86::VPERMI2W256rmk, 0 }, + { X86::VPERMILPDZ256rrk, X86::VPERMILPDZ256rmk, 0 }, + { X86::VPERMILPSZ256rrk, X86::VPERMILPSZ256rmk, 0 }, + { X86::VPERMPDZ256rrk, X86::VPERMPDZ256rmk, 0 }, + { X86::VPERMPSZ256rrk, X86::VPERMPSZ256rmk, 0 }, + { X86::VPERMQZ256rrk, X86::VPERMQZ256rmk, 0 }, + { X86::VPERMT2B256rrk, X86::VPERMT2B256rmk, 0 }, + { X86::VPERMT2D256rrk, X86::VPERMT2D256rmk, 0 }, + { X86::VPERMT2PD256rrk, X86::VPERMT2PD256rmk, 0 }, + { X86::VPERMT2PS256rrk, X86::VPERMT2PS256rmk, 0 }, + { X86::VPERMT2Q256rrk, X86::VPERMT2Q256rmk, 0 }, + { X86::VPERMT2W256rrk, X86::VPERMT2W256rmk, 0 }, + { X86::VPERMWZ256rrk, X86::VPERMWZ256rmk, 0 }, + { X86::VPMADDUBSWZ256rrk, X86::VPMADDUBSWZ256rmk, 0 }, + { X86::VPMADDWDZ256rrk, X86::VPMADDWDZ256rmk, 0 }, + { X86::VPMAXSBZ256rrk, X86::VPMAXSBZ256rmk, 0 }, + { X86::VPMAXSDZ256rrk, X86::VPMAXSDZ256rmk, 0 }, + { X86::VPMAXSQZ256rrk, X86::VPMAXSQZ256rmk, 0 }, + { X86::VPMAXSWZ256rrk, X86::VPMAXSWZ256rmk, 0 }, + { X86::VPMAXUBZ256rrk, X86::VPMAXUBZ256rmk, 0 }, + { X86::VPMAXUDZ256rrk, X86::VPMAXUDZ256rmk, 0 }, + { X86::VPMAXUQZ256rrk, X86::VPMAXUQZ256rmk, 0 }, + { X86::VPMAXUWZ256rrk, X86::VPMAXUWZ256rmk, 0 }, + { X86::VPMINSBZ256rrk, X86::VPMINSBZ256rmk, 0 }, + { X86::VPMINSDZ256rrk, X86::VPMINSDZ256rmk, 0 }, + { X86::VPMINSQZ256rrk, X86::VPMINSQZ256rmk, 0 }, + { X86::VPMINSWZ256rrk, X86::VPMINSWZ256rmk, 0 }, + { X86::VPMINUBZ256rrk, X86::VPMINUBZ256rmk, 0 }, + { X86::VPMINUDZ256rrk, X86::VPMINUDZ256rmk, 0 }, + { X86::VPMINUQZ256rrk, X86::VPMINUQZ256rmk, 0 }, + { X86::VPMINUWZ256rrk, X86::VPMINUWZ256rmk, 0 }, + { X86::VPMULDQZ256rrk, X86::VPMULDQZ256rmk, 0 }, + { X86::VPMULLDZ256rrk, X86::VPMULLDZ256rmk, 0 }, + { X86::VPMULLQZ256rrk, X86::VPMULLQZ256rmk, 0 }, + { X86::VPMULLWZ256rrk, X86::VPMULLWZ256rmk, 0 }, + { X86::VPMULUDQZ256rrk, X86::VPMULUDQZ256rmk, 0 }, + { X86::VPORDZ256rrk, X86::VPORDZ256rmk, 0 }, + { X86::VPORQZ256rrk, X86::VPORQZ256rmk, 0 }, + { X86::VPSHUFBZ256rrk, X86::VPSHUFBZ256rmk, 0 }, + { X86::VPSLLDZ256rrk, X86::VPSLLDZ256rmk, 0 }, + { X86::VPSLLQZ256rrk, X86::VPSLLQZ256rmk, 0 }, + { X86::VPSLLVDZ256rrk, X86::VPSLLVDZ256rmk, 0 }, + { X86::VPSLLVQZ256rrk, X86::VPSLLVQZ256rmk, 0 }, + { X86::VPSLLVWZ256rrk, X86::VPSLLVWZ256rmk, 0 }, + { X86::VPSLLWZ256rrk, X86::VPSLLWZ256rmk, 0 }, + { X86::VPSRADZ256rrk, X86::VPSRADZ256rmk, 0 }, + { X86::VPSRAQZ256rrk, X86::VPSRAQZ256rmk, 0 }, + { X86::VPSRAVDZ256rrk, X86::VPSRAVDZ256rmk, 0 }, + { X86::VPSRAVQZ256rrk, X86::VPSRAVQZ256rmk, 0 }, + { X86::VPSRAVWZ256rrk, X86::VPSRAVWZ256rmk, 0 }, + { X86::VPSRAWZ256rrk, X86::VPSRAWZ256rmk, 0 }, + { X86::VPSRLDZ256rrk, X86::VPSRLDZ256rmk, 0 }, + { X86::VPSRLQZ256rrk, X86::VPSRLQZ256rmk, 0 }, + { X86::VPSRLVDZ256rrk, X86::VPSRLVDZ256rmk, 0 }, + { X86::VPSRLVQZ256rrk, X86::VPSRLVQZ256rmk, 0 }, + { X86::VPSRLVWZ256rrk, X86::VPSRLVWZ256rmk, 0 }, + { X86::VPSRLWZ256rrk, X86::VPSRLWZ256rmk, 0 }, + { X86::VPSUBBZ256rrk, X86::VPSUBBZ256rmk, 0 }, + { X86::VPSUBDZ256rrk, X86::VPSUBDZ256rmk, 0 }, + { X86::VPSUBQZ256rrk, X86::VPSUBQZ256rmk, 0 }, + { X86::VPSUBSBZ256rrk, X86::VPSUBSBZ256rmk, 0 }, + { X86::VPSUBSWZ256rrk, X86::VPSUBSWZ256rmk, 0 }, + { X86::VPSUBUSBZ256rrk, X86::VPSUBUSBZ256rmk, 0 }, + { X86::VPSUBUSWZ256rrk, X86::VPSUBUSWZ256rmk, 0 }, + { X86::VPSUBWZ256rrk, X86::VPSUBWZ256rmk, 0 }, + { X86::VPTERNLOGDZ256rrik, X86::VPTERNLOGDZ256rmik, 0 }, + { X86::VPTERNLOGQZ256rrik, X86::VPTERNLOGQZ256rmik, 0 }, + { X86::VPUNPCKHBWZ256rrk, X86::VPUNPCKHBWZ256rmk, 0 }, + { X86::VPUNPCKHDQZ256rrk, X86::VPUNPCKHDQZ256rmk, 0 }, + { X86::VPUNPCKHQDQZ256rrk, X86::VPUNPCKHQDQZ256rmk, 0 }, + { X86::VPUNPCKHWDZ256rrk, X86::VPUNPCKHWDZ256rmk, 0 }, + { X86::VPUNPCKLBWZ256rrk, X86::VPUNPCKLBWZ256rmk, 0 }, + { X86::VPUNPCKLDQZ256rrk, X86::VPUNPCKLDQZ256rmk, 0 }, + { X86::VPUNPCKLQDQZ256rrk, X86::VPUNPCKLQDQZ256rmk, 0 }, + { X86::VPUNPCKLWDZ256rrk, X86::VPUNPCKLWDZ256rmk, 0 }, + { X86::VPXORDZ256rrk, X86::VPXORDZ256rmk, 0 }, + { X86::VPXORQZ256rrk, X86::VPXORQZ256rmk, 0 }, + { X86::VSHUFPDZ256rrik, X86::VSHUFPDZ256rmik, 0 }, + { X86::VSHUFPSZ256rrik, X86::VSHUFPSZ256rmik, 0 }, + { X86::VSUBPDZ256rrk, X86::VSUBPDZ256rmk, 0 }, + { X86::VSUBPSZ256rrk, X86::VSUBPSZ256rmk, 0 }, + { X86::VUNPCKHPDZ256rrk, X86::VUNPCKHPDZ256rmk, 0 }, + { X86::VUNPCKHPSZ256rrk, X86::VUNPCKHPSZ256rmk, 0 }, + { X86::VUNPCKLPDZ256rrk, X86::VUNPCKLPDZ256rmk, 0 }, + { X86::VUNPCKLPSZ256rrk, X86::VUNPCKLPSZ256rmk, 0 }, + { X86::VXORPDZ256rrk, X86::VXORPDZ256rmk, 0 }, + { X86::VXORPSZ256rrk, X86::VXORPSZ256rmk, 0 }, + + // AVX-512{F,VL} foldable instructions 128-bit + { X86::VADDPDZ128rrk, X86::VADDPDZ128rmk, 0 }, + { X86::VADDPSZ128rrk, X86::VADDPSZ128rmk, 0 }, + { X86::VALIGNDZ128rrik, X86::VALIGNDZ128rmik, 0 }, + { X86::VALIGNQZ128rrik, X86::VALIGNQZ128rmik, 0 }, + { X86::VANDNPDZ128rrk, X86::VANDNPDZ128rmk, 0 }, + { X86::VANDNPSZ128rrk, X86::VANDNPSZ128rmk, 0 }, + { X86::VANDPDZ128rrk, X86::VANDPDZ128rmk, 0 }, + { X86::VANDPSZ128rrk, X86::VANDPSZ128rmk, 0 }, + { X86::VDIVPDZ128rrk, X86::VDIVPDZ128rmk, 0 }, + { X86::VDIVPSZ128rrk, X86::VDIVPSZ128rmk, 0 }, + { X86::VMAXCPDZ128rrk, X86::VMAXCPDZ128rmk, 0 }, + { X86::VMAXCPSZ128rrk, X86::VMAXCPSZ128rmk, 0 }, + { X86::VMAXPDZ128rrk, X86::VMAXPDZ128rmk, 0 }, + { X86::VMAXPSZ128rrk, X86::VMAXPSZ128rmk, 0 }, + { X86::VMINCPDZ128rrk, X86::VMINCPDZ128rmk, 0 }, + { X86::VMINCPSZ128rrk, X86::VMINCPSZ128rmk, 0 }, + { X86::VMINPDZ128rrk, X86::VMINPDZ128rmk, 0 }, + { X86::VMINPSZ128rrk, X86::VMINPSZ128rmk, 0 }, + { X86::VMULPDZ128rrk, X86::VMULPDZ128rmk, 0 }, + { X86::VMULPSZ128rrk, X86::VMULPSZ128rmk, 0 }, + { X86::VORPDZ128rrk, X86::VORPDZ128rmk, 0 }, + { X86::VORPSZ128rrk, X86::VORPSZ128rmk, 0 }, + { X86::VPACKSSDWZ128rrk, X86::VPACKSSDWZ128rmk, 0 }, + { X86::VPACKSSWBZ128rrk, X86::VPACKSSWBZ128rmk, 0 }, + { X86::VPACKUSDWZ128rrk, X86::VPACKUSDWZ128rmk, 0 }, + { X86::VPACKUSWBZ128rrk, X86::VPACKUSWBZ128rmk, 0 }, + { X86::VPADDBZ128rrk, X86::VPADDBZ128rmk, 0 }, + { X86::VPADDDZ128rrk, X86::VPADDDZ128rmk, 0 }, + { X86::VPADDQZ128rrk, X86::VPADDQZ128rmk, 0 }, + { X86::VPADDSBZ128rrk, X86::VPADDSBZ128rmk, 0 }, + { X86::VPADDSWZ128rrk, X86::VPADDSWZ128rmk, 0 }, + { X86::VPADDUSBZ128rrk, X86::VPADDUSBZ128rmk, 0 }, + { X86::VPADDUSWZ128rrk, X86::VPADDUSWZ128rmk, 0 }, + { X86::VPADDWZ128rrk, X86::VPADDWZ128rmk, 0 }, + { X86::VPALIGNRZ128rrik, X86::VPALIGNRZ128rmik, 0 }, + { X86::VPANDDZ128rrk, X86::VPANDDZ128rmk, 0 }, + { X86::VPANDNDZ128rrk, X86::VPANDNDZ128rmk, 0 }, + { X86::VPANDNQZ128rrk, X86::VPANDNQZ128rmk, 0 }, + { X86::VPANDQZ128rrk, X86::VPANDQZ128rmk, 0 }, + { X86::VPAVGBZ128rrk, X86::VPAVGBZ128rmk, 0 }, + { X86::VPAVGWZ128rrk, X86::VPAVGWZ128rmk, 0 }, + { X86::VPERMBZ128rrk, X86::VPERMBZ128rmk, 0 }, + { X86::VPERMI2B128rrk, X86::VPERMI2B128rmk, 0 }, + { X86::VPERMI2D128rrk, X86::VPERMI2D128rmk, 0 }, + { X86::VPERMI2PD128rrk, X86::VPERMI2PD128rmk, 0 }, + { X86::VPERMI2PS128rrk, X86::VPERMI2PS128rmk, 0 }, + { X86::VPERMI2Q128rrk, X86::VPERMI2Q128rmk, 0 }, + { X86::VPERMI2W128rrk, X86::VPERMI2W128rmk, 0 }, + { X86::VPERMILPDZ128rrk, X86::VPERMILPDZ128rmk, 0 }, + { X86::VPERMILPSZ128rrk, X86::VPERMILPSZ128rmk, 0 }, + { X86::VPERMT2B128rrk, X86::VPERMT2B128rmk, 0 }, + { X86::VPERMT2D128rrk, X86::VPERMT2D128rmk, 0 }, + { X86::VPERMT2PD128rrk, X86::VPERMT2PD128rmk, 0 }, + { X86::VPERMT2PS128rrk, X86::VPERMT2PS128rmk, 0 }, + { X86::VPERMT2Q128rrk, X86::VPERMT2Q128rmk, 0 }, + { X86::VPERMT2W128rrk, X86::VPERMT2W128rmk, 0 }, + { X86::VPERMWZ128rrk, X86::VPERMWZ128rmk, 0 }, + { X86::VPMADDUBSWZ128rrk, X86::VPMADDUBSWZ128rmk, 0 }, + { X86::VPMADDWDZ128rrk, X86::VPMADDWDZ128rmk, 0 }, + { X86::VPMAXSBZ128rrk, X86::VPMAXSBZ128rmk, 0 }, + { X86::VPMAXSDZ128rrk, X86::VPMAXSDZ128rmk, 0 }, + { X86::VPMAXSQZ128rrk, X86::VPMAXSQZ128rmk, 0 }, + { X86::VPMAXSWZ128rrk, X86::VPMAXSWZ128rmk, 0 }, + { X86::VPMAXUBZ128rrk, X86::VPMAXUBZ128rmk, 0 }, + { X86::VPMAXUDZ128rrk, X86::VPMAXUDZ128rmk, 0 }, + { X86::VPMAXUQZ128rrk, X86::VPMAXUQZ128rmk, 0 }, + { X86::VPMAXUWZ128rrk, X86::VPMAXUWZ128rmk, 0 }, + { X86::VPMINSBZ128rrk, X86::VPMINSBZ128rmk, 0 }, + { X86::VPMINSDZ128rrk, X86::VPMINSDZ128rmk, 0 }, + { X86::VPMINSQZ128rrk, X86::VPMINSQZ128rmk, 0 }, + { X86::VPMINSWZ128rrk, X86::VPMINSWZ128rmk, 0 }, + { X86::VPMINUBZ128rrk, X86::VPMINUBZ128rmk, 0 }, + { X86::VPMINUDZ128rrk, X86::VPMINUDZ128rmk, 0 }, + { X86::VPMINUQZ128rrk, X86::VPMINUQZ128rmk, 0 }, + { X86::VPMINUWZ128rrk, X86::VPMINUWZ128rmk, 0 }, + { X86::VPMULDQZ128rrk, X86::VPMULDQZ128rmk, 0 }, + { X86::VPMULLDZ128rrk, X86::VPMULLDZ128rmk, 0 }, + { X86::VPMULLQZ128rrk, X86::VPMULLQZ128rmk, 0 }, + { X86::VPMULLWZ128rrk, X86::VPMULLWZ128rmk, 0 }, + { X86::VPMULUDQZ128rrk, X86::VPMULUDQZ128rmk, 0 }, + { X86::VPORDZ128rrk, X86::VPORDZ128rmk, 0 }, + { X86::VPORQZ128rrk, X86::VPORQZ128rmk, 0 }, + { X86::VPSHUFBZ128rrk, X86::VPSHUFBZ128rmk, 0 }, + { X86::VPSLLDZ128rrk, X86::VPSLLDZ128rmk, 0 }, + { X86::VPSLLQZ128rrk, X86::VPSLLQZ128rmk, 0 }, + { X86::VPSLLVDZ128rrk, X86::VPSLLVDZ128rmk, 0 }, + { X86::VPSLLVQZ128rrk, X86::VPSLLVQZ128rmk, 0 }, + { X86::VPSLLVWZ128rrk, X86::VPSLLVWZ128rmk, 0 }, + { X86::VPSLLWZ128rrk, X86::VPSLLWZ128rmk, 0 }, + { X86::VPSRADZ128rrk, X86::VPSRADZ128rmk, 0 }, + { X86::VPSRAQZ128rrk, X86::VPSRAQZ128rmk, 0 }, + { X86::VPSRAVDZ128rrk, X86::VPSRAVDZ128rmk, 0 }, + { X86::VPSRAVQZ128rrk, X86::VPSRAVQZ128rmk, 0 }, + { X86::VPSRAVWZ128rrk, X86::VPSRAVWZ128rmk, 0 }, + { X86::VPSRAWZ128rrk, X86::VPSRAWZ128rmk, 0 }, + { X86::VPSRLDZ128rrk, X86::VPSRLDZ128rmk, 0 }, + { X86::VPSRLQZ128rrk, X86::VPSRLQZ128rmk, 0 }, + { X86::VPSRLVDZ128rrk, X86::VPSRLVDZ128rmk, 0 }, + { X86::VPSRLVQZ128rrk, X86::VPSRLVQZ128rmk, 0 }, + { X86::VPSRLVWZ128rrk, X86::VPSRLVWZ128rmk, 0 }, + { X86::VPSRLWZ128rrk, X86::VPSRLWZ128rmk, 0 }, + { X86::VPSUBBZ128rrk, X86::VPSUBBZ128rmk, 0 }, + { X86::VPSUBDZ128rrk, X86::VPSUBDZ128rmk, 0 }, + { X86::VPSUBQZ128rrk, X86::VPSUBQZ128rmk, 0 }, + { X86::VPSUBSBZ128rrk, X86::VPSUBSBZ128rmk, 0 }, + { X86::VPSUBSWZ128rrk, X86::VPSUBSWZ128rmk, 0 }, + { X86::VPSUBUSBZ128rrk, X86::VPSUBUSBZ128rmk, 0 }, + { X86::VPSUBUSWZ128rrk, X86::VPSUBUSWZ128rmk, 0 }, + { X86::VPSUBWZ128rrk, X86::VPSUBWZ128rmk, 0 }, + { X86::VPTERNLOGDZ128rrik, X86::VPTERNLOGDZ128rmik, 0 }, + { X86::VPTERNLOGQZ128rrik, X86::VPTERNLOGQZ128rmik, 0 }, + { X86::VPUNPCKHBWZ128rrk, X86::VPUNPCKHBWZ128rmk, 0 }, + { X86::VPUNPCKHDQZ128rrk, X86::VPUNPCKHDQZ128rmk, 0 }, + { X86::VPUNPCKHQDQZ128rrk, X86::VPUNPCKHQDQZ128rmk, 0 }, + { X86::VPUNPCKHWDZ128rrk, X86::VPUNPCKHWDZ128rmk, 0 }, + { X86::VPUNPCKLBWZ128rrk, X86::VPUNPCKLBWZ128rmk, 0 }, + { X86::VPUNPCKLDQZ128rrk, X86::VPUNPCKLDQZ128rmk, 0 }, + { X86::VPUNPCKLQDQZ128rrk, X86::VPUNPCKLQDQZ128rmk, 0 }, + { X86::VPUNPCKLWDZ128rrk, X86::VPUNPCKLWDZ128rmk, 0 }, + { X86::VPXORDZ128rrk, X86::VPXORDZ128rmk, 0 }, + { X86::VPXORQZ128rrk, X86::VPXORQZ128rmk, 0 }, + { X86::VSHUFPDZ128rrik, X86::VSHUFPDZ128rmik, 0 }, + { X86::VSHUFPSZ128rrik, X86::VSHUFPSZ128rmik, 0 }, + { X86::VSUBPDZ128rrk, X86::VSUBPDZ128rmk, 0 }, + { X86::VSUBPSZ128rrk, X86::VSUBPSZ128rmk, 0 }, + { X86::VUNPCKHPDZ128rrk, X86::VUNPCKHPDZ128rmk, 0 }, + { X86::VUNPCKHPSZ128rrk, X86::VUNPCKHPSZ128rmk, 0 }, + { X86::VUNPCKLPDZ128rrk, X86::VUNPCKLPDZ128rmk, 0 }, + { X86::VUNPCKLPSZ128rrk, X86::VUNPCKLPSZ128rmk, 0 }, + { X86::VXORPDZ128rrk, X86::VXORPDZ128rmk, 0 }, + { X86::VXORPSZ128rrk, X86::VXORPSZ128rmk, 0 }, + + // 512-bit three source instructions with zero masking. + { X86::VPERMI2Brrkz, X86::VPERMI2Brmkz, 0 }, + { X86::VPERMI2Drrkz, X86::VPERMI2Drmkz, 0 }, + { X86::VPERMI2PSrrkz, X86::VPERMI2PSrmkz, 0 }, + { X86::VPERMI2PDrrkz, X86::VPERMI2PDrmkz, 0 }, + { X86::VPERMI2Qrrkz, X86::VPERMI2Qrmkz, 0 }, + { X86::VPERMI2Wrrkz, X86::VPERMI2Wrmkz, 0 }, + { X86::VPERMT2Brrkz, X86::VPERMT2Brmkz, 0 }, + { X86::VPERMT2Drrkz, X86::VPERMT2Drmkz, 0 }, + { X86::VPERMT2PSrrkz, X86::VPERMT2PSrmkz, 0 }, + { X86::VPERMT2PDrrkz, X86::VPERMT2PDrmkz, 0 }, + { X86::VPERMT2Qrrkz, X86::VPERMT2Qrmkz, 0 }, + { X86::VPERMT2Wrrkz, X86::VPERMT2Wrmkz, 0 }, + { X86::VPTERNLOGDZrrikz, X86::VPTERNLOGDZrmikz, 0 }, + { X86::VPTERNLOGQZrrikz, X86::VPTERNLOGQZrmikz, 0 }, + + // 256-bit three source instructions with zero masking. + { X86::VPERMI2B256rrkz, X86::VPERMI2B256rmkz, 0 }, + { X86::VPERMI2D256rrkz, X86::VPERMI2D256rmkz, 0 }, + { X86::VPERMI2PD256rrkz, X86::VPERMI2PD256rmkz, 0 }, + { X86::VPERMI2PS256rrkz, X86::VPERMI2PS256rmkz, 0 }, + { X86::VPERMI2Q256rrkz, X86::VPERMI2Q256rmkz, 0 }, + { X86::VPERMI2W256rrkz, X86::VPERMI2W256rmkz, 0 }, + { X86::VPERMT2B256rrkz, X86::VPERMT2B256rmkz, 0 }, + { X86::VPERMT2D256rrkz, X86::VPERMT2D256rmkz, 0 }, + { X86::VPERMT2PD256rrkz, X86::VPERMT2PD256rmkz, 0 }, + { X86::VPERMT2PS256rrkz, X86::VPERMT2PS256rmkz, 0 }, + { X86::VPERMT2Q256rrkz, X86::VPERMT2Q256rmkz, 0 }, + { X86::VPERMT2W256rrkz, X86::VPERMT2W256rmkz, 0 }, + { X86::VPTERNLOGDZ256rrikz,X86::VPTERNLOGDZ256rmikz, 0 }, + { X86::VPTERNLOGQZ256rrikz,X86::VPTERNLOGQZ256rmikz, 0 }, + + // 128-bit three source instructions with zero masking. + { X86::VPERMI2B128rrkz, X86::VPERMI2B128rmkz, 0 }, + { X86::VPERMI2D128rrkz, X86::VPERMI2D128rmkz, 0 }, + { X86::VPERMI2PD128rrkz, X86::VPERMI2PD128rmkz, 0 }, + { X86::VPERMI2PS128rrkz, X86::VPERMI2PS128rmkz, 0 }, + { X86::VPERMI2Q128rrkz, X86::VPERMI2Q128rmkz, 0 }, + { X86::VPERMI2W128rrkz, X86::VPERMI2W128rmkz, 0 }, + { X86::VPERMT2B128rrkz, X86::VPERMT2B128rmkz, 0 }, + { X86::VPERMT2D128rrkz, X86::VPERMT2D128rmkz, 0 }, + { X86::VPERMT2PD128rrkz, X86::VPERMT2PD128rmkz, 0 }, + { X86::VPERMT2PS128rrkz, X86::VPERMT2PS128rmkz, 0 }, + { X86::VPERMT2Q128rrkz, X86::VPERMT2Q128rmkz, 0 }, + { X86::VPERMT2W128rrkz, X86::VPERMT2W128rmkz, 0 }, + { X86::VPTERNLOGDZ128rrikz,X86::VPTERNLOGDZ128rmikz, 0 }, + { X86::VPTERNLOGQZ128rrikz,X86::VPTERNLOGQZ128rmikz, 0 }, + }; for (X86MemoryFoldTableEntry Entry : MemoryFoldTable4) { AddTableEntry(RegOp2MemOpTable4, MemOp2RegOpTable, @@ -163,6 +3545,20 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) // Index 4, folded load Entry.Flags | TB_INDEX_4 | TB_FOLDED_LOAD); } + for (I = X86InstrFMA3Info::rm_begin(); I != E; ++I) { + if (I.getGroup()->isKMasked()) { + // Intrinsics need to pass TB_NO_REVERSE. + if (I.getGroup()->isIntrinsic()) { + AddTableEntry(RegOp2MemOpTable4, MemOp2RegOpTable, + I.getRegOpcode(), I.getMemOpcode(), + TB_ALIGN_NONE | TB_INDEX_4 | TB_FOLDED_LOAD | TB_NO_REVERSE); + } else { + AddTableEntry(RegOp2MemOpTable4, MemOp2RegOpTable, + I.getRegOpcode(), I.getMemOpcode(), + TB_ALIGN_NONE | TB_INDEX_4 | TB_FOLDED_LOAD); + } + } + } } void diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index a3e677209305..8490b972eb5c 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -5183,14 +5183,14 @@ multiclass S3D_Int o, string OpcodeStr, ValueType vt, RegisterClass RC, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>, - Sched<[WriteFAdd]>; + Sched<[WriteFHAdd]>; def rm : S3DI, Sched<[WriteFAddLd, ReadAfterLd]>; + IIC_SSE_HADDSUB_RM>, Sched<[WriteFHAddLd, ReadAfterLd]>; } multiclass S3_Int o, string OpcodeStr, ValueType vt, RegisterClass RC, X86MemOperand x86memop, SDNode OpNode, PatFrag ld_frag, @@ -5200,14 +5200,14 @@ multiclass S3_Int o, string OpcodeStr, ValueType vt, RegisterClass RC, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>, - Sched<[WriteFAdd]>; + Sched<[WriteFHAdd]>; def rm : S3I, Sched<[WriteFAddLd, ReadAfterLd]>; + IIC_SSE_HADDSUB_RM>, Sched<[WriteFHAddLd, ReadAfterLd]>; } let Predicates = [HasAVX] in { @@ -5310,7 +5310,7 @@ defm PABSD : SS3I_unop_rm<0x1E, "pabsd", v4i32, abs, memopv2i64>; // SSSE3 - Packed Binary Operator Instructions //===---------------------------------------------------------------------===// -let Sched = WriteVecALU in { +let Sched = WritePHAdd in { def SSE_PHADDSUBD : OpndItins< IIC_SSE_PHADDSUBD_RR, IIC_SSE_PHADDSUBD_RM >; diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 598d88d8b9c3..33bc8e11a572 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -12,20 +12,21 @@ // //===----------------------------------------------------------------------===// -#include "X86AsmPrinter.h" -#include "X86RegisterInfo.h" -#include "X86ShuffleDecodeConstantPool.h" #include "InstPrinter/X86ATTInstPrinter.h" #include "InstPrinter/X86InstComments.h" #include "MCTargetDesc/X86BaseInfo.h" #include "Utils/X86ShuffleDecode.h" +#include "X86AsmPrinter.h" +#include "X86RegisterInfo.h" +#include "X86ShuffleDecodeConstantPool.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/iterator_range.h" -#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/GlobalValue.h" @@ -38,13 +39,12 @@ #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstBuilder.h" #include "llvm/MC/MCSection.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCSymbolELF.h" -#include "llvm/MC/MCSectionELF.h" -#include "llvm/MC/MCSectionMachO.h" #include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/ELF.h" #include "llvm/Target/TargetLoweringObjectFile.h" using namespace llvm; diff --git a/lib/Target/X86/X86OptimizeLEAs.cpp b/lib/Target/X86/X86OptimizeLEAs.cpp index aabbf67a16b6..e6756b975c10 100644 --- a/lib/Target/X86/X86OptimizeLEAs.cpp +++ b/lib/Target/X86/X86OptimizeLEAs.cpp @@ -27,8 +27,8 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DIBuilder.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/Function.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/Target/X86/X86SchedHaswell.td b/lib/Target/X86/X86SchedHaswell.td index 677e82459766..03c8ccb53afe 100644 --- a/lib/Target/X86/X86SchedHaswell.td +++ b/lib/Target/X86/X86SchedHaswell.td @@ -1488,6 +1488,39 @@ def : InstRW<[WriteVPGATHERQQ256, ReadAfterLd], (instregex "VPGATHERQQYrm")>; //-- Arithmetic instructions --// +//////////////////////////////////////////////////////////////////////////////// +// Horizontal add/sub instructions. +//////////////////////////////////////////////////////////////////////////////// + +// HADD, HSUB PS/PD +// x,x / v,v,v. +def : WriteRes { + let Latency = 5; + let NumMicroOps = 3; + let ResourceCycles = [1, 2]; +} + +// x,m / v,v,m. +def : WriteRes { + let Latency = 9; + let NumMicroOps = 4; + let ResourceCycles = [1, 2, 1]; +} + +// PHADD|PHSUB (S) W/D. +// v <- v,v. +def : WriteRes { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [1, 2]; +} +// v <- v,m. +def : WriteRes { + let Latency = 6; + let NumMicroOps = 3; + let ResourceCycles = [1, 2, 1]; +} + // PHADD|PHSUB (S) W/D. // v <- v,v. def WritePHADDSUBr : SchedWriteRes<[HWPort1, HWPort5]> { diff --git a/lib/Target/X86/X86SchedSandyBridge.td b/lib/Target/X86/X86SchedSandyBridge.td index eca65c2892b7..b8ec5883152c 100644 --- a/lib/Target/X86/X86SchedSandyBridge.td +++ b/lib/Target/X86/X86SchedSandyBridge.td @@ -157,6 +157,31 @@ def : WriteRes { let ResourceCycles = [1, 1, 1, 1]; } +//////////////////////////////////////////////////////////////////////////////// +// Horizontal add/sub instructions. +//////////////////////////////////////////////////////////////////////////////// +// HADD, HSUB PS/PD +// x,x / v,v,v. +def : WriteRes { + let Latency = 3; +} + +// x,m / v,v,m. +def : WriteRes { + let Latency = 7; + let ResourceCycles = [1, 1]; +} + +// PHADD|PHSUB (S) W/D. +// v <- v,v. +def : WriteRes; + +// v <- v,m. +def : WriteRes { + let Latency = 5; + let ResourceCycles = [1, 1]; +} + // String instructions. // Packed Compare Implicit Length Strings, Return Mask def : WriteRes { diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td index 4eae6ca7abe3..a12fa68faf4f 100644 --- a/lib/Target/X86/X86Schedule.td +++ b/lib/Target/X86/X86Schedule.td @@ -77,6 +77,10 @@ defm WriteFVarBlend : X86SchedWritePair; // Fp vector variable blends. // FMA Scheduling helper class. class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; } +// Horizontal Add/Sub (float and integer) +defm WriteFHAdd : X86SchedWritePair; +defm WritePHAdd : X86SchedWritePair; + // Vector integer operations. defm WriteVecALU : X86SchedWritePair; // Vector integer ALU op, no logicals. defm WriteVecShift : X86SchedWritePair; // Vector integer shifts. diff --git a/lib/Target/X86/X86ScheduleBtVer2.td b/lib/Target/X86/X86ScheduleBtVer2.td index ce1ece34e431..6cb2a3694d92 100644 --- a/lib/Target/X86/X86ScheduleBtVer2.td +++ b/lib/Target/X86/X86ScheduleBtVer2.td @@ -319,6 +319,38 @@ def : WriteRes { let ResourceCycles = [1, 1]; } +//////////////////////////////////////////////////////////////////////////////// +// Horizontal add/sub instructions. +//////////////////////////////////////////////////////////////////////////////// + +def : WriteRes { + let Latency = 3; +} + +def : WriteRes { + let Latency = 8; +} + +def : WriteRes { + let ResourceCycles = [1]; +} +def : WriteRes { + let Latency = 6; + let ResourceCycles = [1, 1]; +} + +def WriteFHAddY: SchedWriteRes<[JFPU0]> { + let Latency = 3; + let ResourceCycles = [2]; +} +def : InstRW<[WriteFHAddY], (instregex "VH(ADD|SUB)P(S|D)Yrr")>; + +def WriteFHAddYLd: SchedWriteRes<[JLAGU, JFPU0]> { + let Latency = 8; + let ResourceCycles = [1, 2]; +} +def : InstRW<[WriteFHAddYLd], (instregex "VH(ADD|SUB)P(S|D)Yrm")>; + //////////////////////////////////////////////////////////////////////////////// // Carry-less multiplication instructions. //////////////////////////////////////////////////////////////////////////////// diff --git a/lib/Target/X86/X86ScheduleSLM.td b/lib/Target/X86/X86ScheduleSLM.td index f95d4fa04177..03ed2db2350d 100644 --- a/lib/Target/X86/X86ScheduleSLM.td +++ b/lib/Target/X86/X86ScheduleSLM.td @@ -137,6 +137,33 @@ defm : SMWriteResPair; defm : SMWriteResPair; defm : SMWriteResPair; +//////////////////////////////////////////////////////////////////////////////// +// Horizontal add/sub instructions. +//////////////////////////////////////////////////////////////////////////////// + +// HADD, HSUB PS/PD + +def : WriteRes { + let Latency = 3; + let ResourceCycles = [2]; +} + +def : WriteRes { + let Latency = 6; + let ResourceCycles = [2, 1]; +} + +// PHADD|PHSUB (S) W/D. +def : WriteRes { + let Latency = 1; + let ResourceCycles = [1]; +} + +def : WriteRes { + let Latency = 4; + let ResourceCycles = [1, 1]; +} + // String instructions. // Packed Compare Implicit Length Strings, Return Mask def : WriteRes { diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp index d4b2392eb1f5..c67aa04aebea 100644 --- a/lib/Target/X86/X86SelectionDAGInfo.cpp +++ b/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -11,11 +11,11 @@ // //===----------------------------------------------------------------------===// -#include "X86InstrInfo.h" +#include "X86SelectionDAGInfo.h" #include "X86ISelLowering.h" +#include "X86InstrInfo.h" #include "X86RegisterInfo.h" #include "X86Subtarget.h" -#include "X86SelectionDAGInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/Target/TargetLowering.h" diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 84ec98484f8e..e36a47506ba0 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/X86BaseInfo.h" #include "X86Subtarget.h" +#include "MCTargetDesc/X86BaseInfo.h" #include "X86TargetMachine.h" #include "llvm/ADT/Triple.h" #include "llvm/IR/Attributes.h" diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index cb21f1bd7706..278b57eb00b7 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -24,8 +24,8 @@ #include "X86TargetObjectFile.h" #include "X86TargetTransformInfo.h" #include "llvm/ADT/Optional.h" -#include "llvm/ADT/SmallString.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/Analysis/TargetTransformInfo.h" diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp index 7f70829cb6c6..4fd95717478e 100644 --- a/lib/Target/X86/X86TargetObjectFile.cpp +++ b/lib/Target/X86/X86TargetObjectFile.cpp @@ -9,6 +9,8 @@ #include "X86TargetObjectFile.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/Mangler.h" #include "llvm/IR/Operator.h" #include "llvm/MC/MCContext.h" @@ -16,8 +18,6 @@ #include "llvm/MC/MCSectionCOFF.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCValue.h" -#include "llvm/Support/COFF.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Target/TargetLowering.h" using namespace llvm; diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index fe94079fd869..11ba7025e1b7 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -1383,6 +1383,8 @@ int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I); } +unsigned X86TTIImpl::getAtomicMemIntrinsicMaxElementSize() const { return 16; } + int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef Tys, FastMathFlags FMF, unsigned ScalarizationCostPassed) { @@ -2176,6 +2178,17 @@ int X86TTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *SrcVTy, return getGSVectorCost(Opcode, SrcVTy, Ptr, Alignment, AddressSpace); } +bool X86TTIImpl::isLSRCostLess(TargetTransformInfo::LSRCost &C1, + TargetTransformInfo::LSRCost &C2) { + // X86 specific here are "instruction number 1st priority". + return std::tie(C1.Insns, C1.NumRegs, C1.AddRecCost, + C1.NumIVMuls, C1.NumBaseAdds, + C1.ScaleCost, C1.ImmCost, C1.SetupCost) < + std::tie(C2.Insns, C2.NumRegs, C2.AddRecCost, + C2.NumIVMuls, C2.NumBaseAdds, + C2.ScaleCost, C2.ImmCost, C2.SetupCost); +} + bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy) { Type *ScalarTy = DataTy->getScalarType(); int DataWidth = isa(ScalarTy) ? diff --git a/lib/Target/X86/X86TargetTransformInfo.h b/lib/Target/X86/X86TargetTransformInfo.h index 9bef9e80c395..09ce2c90498d 100644 --- a/lib/Target/X86/X86TargetTransformInfo.h +++ b/lib/Target/X86/X86TargetTransformInfo.h @@ -76,6 +76,8 @@ public: int getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr); + unsigned getAtomicMemIntrinsicMaxElementSize() const; + int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef Tys, FastMathFlags FMF, unsigned ScalarizationCostPassed = UINT_MAX); @@ -99,6 +101,8 @@ public: int getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty); int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty); + bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, + TargetTransformInfo::LSRCost &C2); bool isLegalMaskedLoad(Type *DataType); bool isLegalMaskedStore(Type *DataType); bool isLegalMaskedGather(Type *DataType); diff --git a/lib/Target/X86/X86WinEHState.cpp b/lib/Target/X86/X86WinEHState.cpp index 3ee14a0ff7b1..0c3b34341476 100644 --- a/lib/Target/X86/X86WinEHState.cpp +++ b/lib/Target/X86/X86WinEHState.cpp @@ -22,9 +22,9 @@ #include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp index 5fc58d831319..dd27e7ca30aa 100644 --- a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp +++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp @@ -11,9 +11,9 @@ // //===----------------------------------------------------------------------===// +#include "MCTargetDesc/XCoreMCTargetDesc.h" #include "InstPrinter/XCoreInstPrinter.h" #include "MCTargetDesc/XCoreMCAsmInfo.h" -#include "MCTargetDesc/XCoreMCTargetDesc.h" #include "XCoreTargetStreamer.h" #include "llvm/ADT/StringRef.h" #include "llvm/MC/MCDwarf.h" @@ -23,8 +23,8 @@ #include "llvm/Support/CodeGen.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/lib/Target/XCore/XCoreAsmPrinter.cpp b/lib/Target/XCore/XCoreAsmPrinter.cpp index b35aa0b95821..8f7c8a82380a 100644 --- a/lib/Target/XCore/XCoreAsmPrinter.cpp +++ b/lib/Target/XCore/XCoreAsmPrinter.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "XCore.h" #include "InstPrinter/XCoreInstPrinter.h" +#include "XCore.h" #include "XCoreInstrInfo.h" #include "XCoreMCInstLower.h" #include "XCoreSubtarget.h" diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp index 1a1cbd474888..cb23399995da 100644 --- a/lib/Target/XCore/XCoreTargetMachine.cpp +++ b/lib/Target/XCore/XCoreTargetMachine.cpp @@ -10,9 +10,9 @@ // //===----------------------------------------------------------------------===// +#include "XCoreTargetMachine.h" #include "MCTargetDesc/XCoreMCTargetDesc.h" #include "XCore.h" -#include "XCoreTargetMachine.h" #include "XCoreTargetObjectFile.h" #include "XCoreTargetTransformInfo.h" #include "llvm/ADT/Optional.h" diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h index 2b53f01a996d..a047b3c9d9fc 100644 --- a/lib/Target/XCore/XCoreTargetMachine.h +++ b/lib/Target/XCore/XCoreTargetMachine.h @@ -15,9 +15,9 @@ #define LLVM_LIB_TARGET_XCORE_XCORETARGETMACHINE_H #include "XCoreSubtarget.h" -#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Support/CodeGen.h" #include "llvm/Target/TargetMachine.h" #include diff --git a/lib/Target/XCore/XCoreTargetObjectFile.cpp b/lib/Target/XCore/XCoreTargetObjectFile.cpp index ad8693fd325e..c60a262e719c 100644 --- a/lib/Target/XCore/XCoreTargetObjectFile.cpp +++ b/lib/Target/XCore/XCoreTargetObjectFile.cpp @@ -9,10 +9,10 @@ #include "XCoreTargetObjectFile.h" #include "XCoreSubtarget.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/IR/DataLayout.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" -#include "llvm/Support/ELF.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; diff --git a/lib/ToolDrivers/llvm-lib/LLVMBuild.txt b/lib/ToolDrivers/llvm-lib/LLVMBuild.txt index 799dc997c0bb..e4b32ec4af90 100644 --- a/lib/ToolDrivers/llvm-lib/LLVMBuild.txt +++ b/lib/ToolDrivers/llvm-lib/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Library name = LibDriver parent = Libraries -required_libraries = Object Option Support +required_libraries = BinaryFormat Object Option Support diff --git a/lib/ToolDrivers/llvm-lib/LibDriver.cpp b/lib/ToolDrivers/llvm-lib/LibDriver.cpp index 3bae3826d62e..797e4ffc2d45 100644 --- a/lib/ToolDrivers/llvm-lib/LibDriver.cpp +++ b/lib/ToolDrivers/llvm-lib/LibDriver.cpp @@ -14,14 +14,15 @@ #include "llvm/ToolDrivers/llvm-lib/LibDriver.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/BinaryFormat/Magic.h" #include "llvm/Object/ArchiveWriter.h" #include "llvm/Option/Arg.h" #include "llvm/Option/ArgList.h" #include "llvm/Option/Option.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/StringSaver.h" #include "llvm/Support/Path.h" #include "llvm/Support/Process.h" +#include "llvm/Support/StringSaver.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -143,11 +144,10 @@ int llvm::libDriverMain(llvm::ArrayRef ArgsArr) { }); return 1; } - sys::fs::file_magic Magic = - sys::fs::identify_magic(MOrErr->Buf->getBuffer()); - if (Magic != sys::fs::file_magic::coff_object && - Magic != sys::fs::file_magic::bitcode && - Magic != sys::fs::file_magic::windows_resource) { + llvm::file_magic Magic = llvm::identify_magic(MOrErr->Buf->getBuffer()); + if (Magic != llvm::file_magic::coff_object && + Magic != llvm::file_magic::bitcode && + Magic != llvm::file_magic::windows_resource) { llvm::errs() << Arg->getValue() << ": not a COFF object, bitcode or resource file\n"; return 1; diff --git a/lib/Transforms/Coroutines/CoroSplit.cpp b/lib/Transforms/Coroutines/CoroSplit.cpp index 626a891f65c6..173dc05f0584 100644 --- a/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/lib/Transforms/Coroutines/CoroSplit.cpp @@ -22,8 +22,8 @@ #include "CoroInternal.h" #include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/InstIterator.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Verifier.h" #include "llvm/Transforms/Scalar.h" diff --git a/lib/Transforms/IPO/ElimAvailExtern.cpp b/lib/Transforms/IPO/ElimAvailExtern.cpp index 98c4b1740306..ecff88c88dcb 100644 --- a/lib/Transforms/IPO/ElimAvailExtern.cpp +++ b/lib/Transforms/IPO/ElimAvailExtern.cpp @@ -17,9 +17,9 @@ #include "llvm/ADT/Statistic.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Module.h" +#include "llvm/Pass.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Utils/GlobalStatus.h" -#include "llvm/Pass.h" using namespace llvm; #define DEBUG_TYPE "elim-avail-extern" diff --git a/lib/Transforms/IPO/ExtractGV.cpp b/lib/Transforms/IPO/ExtractGV.cpp index 479fd182598a..d1147f7d844b 100644 --- a/lib/Transforms/IPO/ExtractGV.cpp +++ b/lib/Transforms/IPO/ExtractGV.cpp @@ -11,13 +11,13 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/IPO.h" #include "llvm/ADT/SetVector.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" +#include "llvm/Transforms/IPO.h" #include using namespace llvm; @@ -53,18 +53,18 @@ static void makeVisible(GlobalValue &GV, bool Delete) { } namespace { - /// @brief A pass to extract specific functions and their dependencies. + /// @brief A pass to extract specific global values and their dependencies. class GVExtractorPass : public ModulePass { SetVector Named; bool deleteStuff; public: static char ID; // Pass identification, replacement for typeid - /// FunctionExtractorPass - If deleteFn is true, this pass deletes as the - /// specified function. Otherwise, it deletes as much of the module as - /// possible, except for the function specified. - /// - explicit GVExtractorPass(std::vector& GVs, bool deleteS = true) + /// If deleteS is true, this pass deletes the specified global values. + /// Otherwise, it deletes as much of the module as possible, except for the + /// global values specified. + explicit GVExtractorPass(std::vector &GVs, + bool deleteS = true) : ModulePass(ID), Named(GVs.begin(), GVs.end()), deleteStuff(deleteS) {} bool runOnModule(Module &M) override { diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp index 5cc29a493798..813a4b6e2831 100644 --- a/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/lib/Transforms/IPO/FunctionAttrs.cpp @@ -14,7 +14,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/IPO/FunctionAttrs.h" -#include "llvm/Transforms/IPO.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" @@ -34,7 +33,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Transforms/IPO.h" using namespace llvm; #define DEBUG_TYPE "functionattrs" diff --git a/lib/Transforms/IPO/GlobalSplit.cpp b/lib/Transforms/IPO/GlobalSplit.cpp index 4705ebe265ae..e47d881d1127 100644 --- a/lib/Transforms/IPO/GlobalSplit.cpp +++ b/lib/Transforms/IPO/GlobalSplit.cpp @@ -14,7 +14,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/GlobalSplit.h" #include "llvm/ADT/StringExtras.h" #include "llvm/IR/Constants.h" @@ -23,6 +22,7 @@ #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/Pass.h" +#include "llvm/Transforms/IPO.h" #include diff --git a/lib/Transforms/IPO/IPConstantPropagation.cpp b/lib/Transforms/IPO/IPConstantPropagation.cpp index 349807496dc2..f79b61037f1d 100644 --- a/lib/Transforms/IPO/IPConstantPropagation.cpp +++ b/lib/Transforms/IPO/IPConstantPropagation.cpp @@ -15,7 +15,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/IPO.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ValueTracking.h" @@ -24,6 +23,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" +#include "llvm/Transforms/IPO.h" using namespace llvm; #define DEBUG_TYPE "ipconstprop" diff --git a/lib/Transforms/IPO/IPO.cpp b/lib/Transforms/IPO/IPO.cpp index 89518f3c5fae..5bb305ca84d0 100644 --- a/lib/Transforms/IPO/IPO.cpp +++ b/lib/Transforms/IPO/IPO.cpp @@ -13,10 +13,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm-c/Initialization.h" #include "llvm-c/Transforms/IPO.h" -#include "llvm/InitializePasses.h" +#include "llvm-c/Initialization.h" #include "llvm/IR/LegacyPassManager.h" +#include "llvm/InitializePasses.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/AlwaysInliner.h" #include "llvm/Transforms/IPO/FunctionAttrs.h" diff --git a/lib/Transforms/IPO/InferFunctionAttrs.cpp b/lib/Transforms/IPO/InferFunctionAttrs.cpp index 2ef299d9a2f0..15d7515cc842 100644 --- a/lib/Transforms/IPO/InferFunctionAttrs.cpp +++ b/lib/Transforms/IPO/InferFunctionAttrs.cpp @@ -8,8 +8,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/IPO/InferFunctionAttrs.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index 673d3af0ab52..c0dfeede05c5 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -519,6 +519,10 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG, Function *Caller = CS.getCaller(); Function *Callee = CS.getCalledFunction(); + // We can only inline direct calls to non-declarations. + if (!Callee || Callee->isDeclaration()) + continue; + // If this call site is dead and it is to a readonly function, we should // just delete the call instead of trying to inline it, regardless of // size. This happens because IPSCCP propagates the result out of the @@ -531,10 +535,6 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG, CS.getInstruction()->eraseFromParent(); ++NumCallsDeleted; } else { - // We can only inline direct calls to non-declarations. - if (!Callee || Callee->isDeclaration()) - continue; - // If this call site was obtained by inlining another function, verify // that the include path for the function did not include the callee // itself. If so, we'd be recursively inlining the same function, diff --git a/lib/Transforms/IPO/LoopExtractor.cpp b/lib/Transforms/IPO/LoopExtractor.cpp index f898c3b5a935..c74b0a35e296 100644 --- a/lib/Transforms/IPO/LoopExtractor.cpp +++ b/lib/Transforms/IPO/LoopExtractor.cpp @@ -14,7 +14,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/IPO.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/IR/Dominators.h" @@ -22,6 +21,7 @@ #include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/CodeExtractor.h" diff --git a/lib/Transforms/IPO/LowerTypeTests.cpp b/lib/Transforms/IPO/LowerTypeTests.cpp index 7bec50d9d25f..90896d285f5a 100644 --- a/lib/Transforms/IPO/LowerTypeTests.cpp +++ b/lib/Transforms/IPO/LowerTypeTests.cpp @@ -235,7 +235,6 @@ class LowerTypeTestsModule { ModuleSummaryIndex *ExportSummary; const ModuleSummaryIndex *ImportSummary; - bool LinkerSubsectionsViaSymbols; Triple::ArchType Arch; Triple::OSType OS; Triple::ObjectFormatType ObjectFormat; @@ -475,13 +474,9 @@ void LowerTypeTestsModule::allocateByteArrays() { // Create an alias instead of RAUW'ing the gep directly. On x86 this ensures // that the pc-relative displacement is folded into the lea instead of the // test instruction getting another displacement. - if (LinkerSubsectionsViaSymbols) { - BAI->ByteArray->replaceAllUsesWith(GEP); - } else { - GlobalAlias *Alias = GlobalAlias::create( - Int8Ty, 0, GlobalValue::PrivateLinkage, "bits", GEP, &M); - BAI->ByteArray->replaceAllUsesWith(Alias); - } + GlobalAlias *Alias = GlobalAlias::create( + Int8Ty, 0, GlobalValue::PrivateLinkage, "bits", GEP, &M); + BAI->ByteArray->replaceAllUsesWith(Alias); BAI->ByteArray->eraseFromParent(); } @@ -502,7 +497,7 @@ Value *LowerTypeTestsModule::createBitSetTest(IRBuilder<> &B, return createMaskedBitTest(B, TIL.InlineBits, BitOffset); } else { Constant *ByteArray = TIL.TheByteArray; - if (!LinkerSubsectionsViaSymbols && AvoidReuse && !ImportSummary) { + if (AvoidReuse && !ImportSummary) { // Each use of the byte array uses a different alias. This makes the // backend less likely to reuse previously computed byte array addresses, // improving the security of the CFI mechanism based on this pass. @@ -608,8 +603,25 @@ Value *LowerTypeTestsModule::lowerTypeTestCall(Metadata *TypeId, CallInst *CI, if (TIL.TheKind == TypeTestResolution::AllOnes) return OffsetInRange; - TerminatorInst *Term = SplitBlockAndInsertIfThen(OffsetInRange, CI, false); - IRBuilder<> ThenB(Term); + // See if the intrinsic is used in the following common pattern: + // br(llvm.type.test(...), thenbb, elsebb) + // where nothing happens between the type test and the br. + // If so, create slightly simpler IR. + if (CI->hasOneUse()) + if (auto *Br = dyn_cast(*CI->user_begin())) + if (CI->getNextNode() == Br) { + BasicBlock *Then = InitialBB->splitBasicBlock(CI->getIterator()); + BasicBlock *Else = Br->getSuccessor(1); + BranchInst *NewBr = BranchInst::Create(Then, Else, OffsetInRange); + NewBr->setMetadata(LLVMContext::MD_prof, + Br->getMetadata(LLVMContext::MD_prof)); + ReplaceInstWithInst(InitialBB->getTerminator(), NewBr); + + IRBuilder<> ThenB(CI); + return createBitSetTest(ThenB, TIL, BitOffset); + } + + IRBuilder<> ThenB(SplitBlockAndInsertIfThen(OffsetInRange, CI, false)); // Now that we know that the offset is in range and aligned, load the // appropriate bit from the bitset. @@ -680,17 +692,13 @@ void LowerTypeTestsModule::buildBitSetsFromGlobalVariables( ConstantInt::get(Int32Ty, I * 2)}; Constant *CombinedGlobalElemPtr = ConstantExpr::getGetElementPtr( NewInit->getType(), CombinedGlobal, CombinedGlobalIdxs); - if (LinkerSubsectionsViaSymbols) { - GV->replaceAllUsesWith(CombinedGlobalElemPtr); - } else { - assert(GV->getType()->getAddressSpace() == 0); - GlobalAlias *GAlias = GlobalAlias::create(NewTy->getElementType(I * 2), 0, - GV->getLinkage(), "", - CombinedGlobalElemPtr, &M); - GAlias->setVisibility(GV->getVisibility()); - GAlias->takeName(GV); - GV->replaceAllUsesWith(GAlias); - } + assert(GV->getType()->getAddressSpace() == 0); + GlobalAlias *GAlias = + GlobalAlias::create(NewTy->getElementType(I * 2), 0, GV->getLinkage(), + "", CombinedGlobalElemPtr, &M); + GAlias->setVisibility(GV->getVisibility()); + GAlias->takeName(GV); + GV->replaceAllUsesWith(GAlias); GV->eraseFromParent(); } } @@ -1166,8 +1174,7 @@ void LowerTypeTestsModule::buildBitSetsFromFunctionsNative( ArrayRef{ConstantInt::get(IntPtrTy, 0), ConstantInt::get(IntPtrTy, I)}), F->getType()); - if (LinkerSubsectionsViaSymbols || F->isDeclarationForLinker()) { - + if (F->isDeclarationForLinker()) { if (F->isWeakForLinker()) replaceWeakDeclarationWithJumpTablePtr(F, CombinedGlobalElemPtr); else @@ -1302,7 +1309,6 @@ LowerTypeTestsModule::LowerTypeTestsModule( : M(M), ExportSummary(ExportSummary), ImportSummary(ImportSummary) { assert(!(ExportSummary && ImportSummary)); Triple TargetTriple(M.getTargetTriple()); - LinkerSubsectionsViaSymbols = TargetTriple.isMacOSX(); Arch = TargetTriple.getArch(); OS = TargetTriple.getOS(); ObjectFormat = TargetTriple.getObjectFormat(); diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp index d9acb9b1a743..3fd59847a005 100644 --- a/lib/Transforms/IPO/PruneEH.cpp +++ b/lib/Transforms/IPO/PruneEH.cpp @@ -14,10 +14,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/IPO.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/Analysis/EHPersonalities.h" @@ -28,6 +26,8 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Utils/Local.h" #include using namespace llvm; diff --git a/lib/Transforms/IPO/SampleProfile.cpp b/lib/Transforms/IPO/SampleProfile.cpp index e755e2bd8f26..67bc8f5f6b7a 100644 --- a/lib/Transforms/IPO/SampleProfile.cpp +++ b/lib/Transforms/IPO/SampleProfile.cpp @@ -695,6 +695,13 @@ bool SampleProfileLoader::inlineHotFunctions( CallSite(I).isIndirectCall()) for (const auto *FS : findIndirectCallFunctionSamples(*I)) { auto CalleeFunctionName = FS->getName(); + // If it is a recursive call, we do not inline it as it could bloat + // the code exponentially. There is way to better handle this, e.g. + // clone the caller first, and inline the cloned caller if it is + // recursive. As llvm does not inline recursive calls, we will simply + // ignore it instead of handling it explicitly. + if (CalleeFunctionName == F.getName()) + continue; const char *Reason = "Callee function not available"; auto R = SymbolMap.find(CalleeFunctionName); if (R == SymbolMap.end()) diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp index fb64367eef91..de1b51e206ff 100644 --- a/lib/Transforms/IPO/StripSymbols.cpp +++ b/lib/Transforms/IPO/StripSymbols.cpp @@ -20,7 +20,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/IPO.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfo.h" @@ -30,6 +29,7 @@ #include "llvm/IR/TypeFinder.h" #include "llvm/IR/ValueSymbolTable.h" #include "llvm/Pass.h" +#include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; diff --git a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp index 9dede4cedd1d..a7bcc7cc5532 100644 --- a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp +++ b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp @@ -318,6 +318,12 @@ void splitAndWriteThinLTOBitcode( ProfileSummaryInfo PSI(M); ModuleSummaryIndex Index = buildModuleSummaryIndex(M, nullptr, &PSI); + // Mark the merged module as requiring full LTO. We still want an index for + // it though, so that it can participate in summary-based dead stripping. + MergedM->addModuleFlag(Module::Error, "ThinLTO", uint32_t(0)); + ModuleSummaryIndex MergedMIndex = + buildModuleSummaryIndex(*MergedM, nullptr, &PSI); + SmallVector Buffer; BitcodeWriter W(Buffer); @@ -327,7 +333,8 @@ void splitAndWriteThinLTOBitcode( ModuleHash ModHash = {{0}}; W.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index, /*GenerateHash=*/true, &ModHash); - W.writeModule(MergedM.get()); + W.writeModule(MergedM.get(), /*ShouldPreserveUseListOrder=*/false, + &MergedMIndex); W.writeStrtab(); OS << Buffer; @@ -340,7 +347,8 @@ void splitAndWriteThinLTOBitcode( StripDebugInfo(M); W2.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index, /*GenerateHash=*/false, &ModHash); - W2.writeModule(MergedM.get()); + W2.writeModule(MergedM.get(), /*ShouldPreserveUseListOrder=*/false, + &MergedMIndex); W2.writeStrtab(); *ThinLinkOS << Buffer; } diff --git a/lib/Transforms/IPO/WholeProgramDevirt.cpp b/lib/Transforms/IPO/WholeProgramDevirt.cpp index aae22c5457ba..00769cd63229 100644 --- a/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ b/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -46,9 +46,9 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/TypeMetadataUtils.h" diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 7204bf517681..287a5167fe2a 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -991,8 +991,9 @@ static Instruction *foldAddWithConstant(BinaryOperator &Add, // Shifts and add used to flip and mask off the low bit: // add (ashr (shl i32 X, 31), 31), 1 --> and (not X), 1 const APInt *C3; - if (*C == 1 && match(Op0, m_OneUse(m_AShr(m_Shl(m_Value(X), m_APInt(C2)), - m_APInt(C3)))) && + if (C->isOneValue() && + match(Op0, + m_OneUse(m_AShr(m_Shl(m_Value(X), m_APInt(C2)), m_APInt(C3)))) && C2 == C3 && *C2 == Ty->getScalarSizeInBits() - 1) { Value *NotX = Builder.CreateNot(X); return BinaryOperator::CreateAnd(NotX, ConstantInt::get(Ty, 1)); @@ -1008,8 +1009,9 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(), - I.hasNoUnsignedWrap(), SQ)) + if (Value *V = + SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(), I.hasNoUnsignedWrap(), + SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); // (A*B)+(A*C) -> A*(B+C) etc @@ -1294,7 +1296,8 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyFAddInst(LHS, RHS, I.getFastMathFlags(), SQ)) + if (Value *V = SimplifyFAddInst(LHS, RHS, I.getFastMathFlags(), + SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); if (isa(RHS)) @@ -1484,8 +1487,9 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifySubInst(Op0, Op1, I.hasNoSignedWrap(), - I.hasNoUnsignedWrap(), SQ)) + if (Value *V = + SimplifySubInst(Op0, Op1, I.hasNoSignedWrap(), I.hasNoUnsignedWrap(), + SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); // (A*B)-(A*C) -> A*(B-C) etc @@ -1554,7 +1558,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { // -(X >>u 31) -> (X >>s 31) // -(X >>s 31) -> (X >>u 31) - if (*Op0C == 0) { + if (Op0C->isNullValue()) { Value *X; const APInt *ShAmt; if (match(Op1, m_LShr(m_Value(X), m_APInt(ShAmt))) && @@ -1690,7 +1694,8 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyFSubInst(Op0, Op1, I.getFastMathFlags(), SQ)) + if (Value *V = SimplifyFSubInst(Op0, Op1, I.getFastMathFlags(), + SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); // fsub nsz 0, X ==> fsub nsz -0.0, X diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 1f8319efb3be..4fe3225a2172 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -172,12 +172,12 @@ Instruction *InstCombiner::OptAndOp(BinaryOperator *Op, const APInt& AddRHS = OpRHS->getValue(); // Check to see if any bits below the one bit set in AndRHSV are set. - if ((AddRHS & (AndRHSV-1)) == 0) { + if ((AddRHS & (AndRHSV - 1)).isNullValue()) { // If not, the only thing that can effect the output of the AND is // the bit specified by AndRHSV. If that bit is set, the effect of // the XOR is to toggle the bit. If it is clear, then the ADD has // no effect. - if ((AddRHS & AndRHSV) == 0) { // Bit is not set, noop + if ((AddRHS & AndRHSV).isNullValue()) { // Bit is not set, noop TheAnd.setOperand(0, X); return &TheAnd; } else { @@ -641,7 +641,7 @@ static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, // If there is a conflict, we should actually return a false for the // whole construct. if (((BCst->getValue() & DCst->getValue()) & - (CCst->getValue() ^ ECst->getValue())) != 0) + (CCst->getValue() ^ ECst->getValue())).getBoolValue()) return ConstantInt::get(LHS->getType(), !IsAnd); Value *NewOr1 = Builder->CreateOr(B, D); @@ -748,7 +748,7 @@ foldAndOrOfEqualityCmpsWithConstants(ICmpInst *LHS, ICmpInst *RHS, // Special case: get the ordering right when the values wrap around zero. // Ie, we assumed the constants were unsigned when swapping earlier. - if (*C1 == 0 && C2->isAllOnesValue()) + if (C1->isNullValue() && C2->isAllOnesValue()) std::swap(C1, C2); if (*C1 == *C2 - 1) { @@ -840,7 +840,8 @@ Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { // Check that the low bits are zero. APInt Low = APInt::getLowBitsSet(BigBitSize, SmallBitSize); - if ((Low & AndC->getValue()) == 0 && (Low & BigC->getValue()) == 0) { + if ((Low & AndC->getValue()).isNullValue() && + (Low & BigC->getValue()).isNullValue()) { Value *NewAnd = Builder->CreateAnd(V, Low | AndC->getValue()); APInt N = SmallC->getValue().zext(BigBitSize) | BigC->getValue(); Value *NewVal = ConstantInt::get(AndC->getType()->getContext(), N); @@ -1234,7 +1235,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyAndInst(Op0, Op1, SQ)) + if (Value *V = SimplifyAndInst(Op0, Op1, SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); // See if we can simplify any instructions used by the instruction whose sole @@ -1286,7 +1287,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { } case Instruction::Sub: // -x & 1 -> x & 1 - if (AndRHSMask == 1 && match(Op0LHS, m_Zero())) + if (AndRHSMask.isOneValue() && match(Op0LHS, m_Zero())) return BinaryOperator::CreateAnd(Op0RHS, AndRHS); break; @@ -1295,7 +1296,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { case Instruction::LShr: // (1 << x) & 1 --> zext(x == 0) // (1 >> x) & 1 --> zext(x == 0) - if (AndRHSMask == 1 && Op0LHS == AndRHS) { + if (AndRHSMask.isOneValue() && Op0LHS == AndRHS) { Value *NewICmp = Builder->CreateICmpEQ(Op0RHS, Constant::getNullValue(I.getType())); return new ZExtInst(NewICmp, I.getType()); @@ -1962,7 +1963,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyOrInst(Op0, Op1, SQ)) + if (Value *V = SimplifyOrInst(Op0, Op1, SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); // See if we can simplify any instructions used by the instruction whose sole @@ -2033,7 +2034,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { ConstantInt *C1 = dyn_cast(C); ConstantInt *C2 = dyn_cast(D); if (C1 && C2) { // (A & C1)|(B & C2) - if ((C1->getValue() & C2->getValue()) == 0) { + if ((C1->getValue() & C2->getValue()).isNullValue()) { // ((V | N) & C1) | (V & C2) --> (V|N) & (C1|C2) // iff (C1&C2) == 0 and (N&~C1) == 0 if (match(A, m_Or(m_Value(V1), m_Value(V2))) && @@ -2056,9 +2057,9 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { // iff (C1&C2) == 0 and (C3&~C1) == 0 and (C4&~C2) == 0. ConstantInt *C3 = nullptr, *C4 = nullptr; if (match(A, m_Or(m_Value(V1), m_ConstantInt(C3))) && - (C3->getValue() & ~C1->getValue()) == 0 && + (C3->getValue() & ~C1->getValue()).isNullValue() && match(B, m_Or(m_Specific(V1), m_ConstantInt(C4))) && - (C4->getValue() & ~C2->getValue()) == 0) { + (C4->getValue() & ~C2->getValue()).isNullValue()) { V2 = Builder->CreateOr(V1, ConstantExpr::getOr(C3, C4), "bitfield"); return BinaryOperator::CreateAnd(V2, Builder->getInt(C1->getValue()|C2->getValue())); @@ -2344,7 +2345,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyXorInst(Op0, Op1, SQ)) + if (Value *V = SimplifyXorInst(Op0, Op1, SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); if (Instruction *NewXor = foldXorToXor(I)) diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index b44499ec4be9..d29ed49eca0b 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -16,9 +16,9 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/None.h" -#include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" @@ -393,7 +393,7 @@ static Value *simplifyX86immShift(const IntrinsicInst &II, unsigned BitWidth = SVT->getPrimitiveSizeInBits(); // If shift-by-zero then just return the original value. - if (Count == 0) + if (Count.isNullValue()) return Vec; // Handle cases when Shift >= BitWidth. @@ -1373,10 +1373,6 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC) { II.getIntrinsicID() == Intrinsic::ctlz) && "Expected cttz or ctlz intrinsic"); Value *Op0 = II.getArgOperand(0); - // FIXME: Try to simplify vectors of integers. - auto *IT = dyn_cast(Op0->getType()); - if (!IT) - return nullptr; KnownBits Known = IC.computeKnownBits(Op0, 0, &II); @@ -1392,14 +1388,14 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC) { // FIXME: This should be in InstSimplify because we're replacing an // instruction with a constant. if (PossibleZeros == DefiniteZeros) { - auto *C = ConstantInt::get(IT, DefiniteZeros); + auto *C = ConstantInt::get(Op0->getType(), DefiniteZeros); return IC.replaceInstUsesWith(II, C); } // If the input to cttz/ctlz is known to be non-zero, // then change the 'ZeroIsUndef' parameter to 'true' // because we know the zero behavior can't affect the result. - if (Known.One != 0 || + if (!Known.One.isNullValue() || isKnownNonZero(Op0, IC.getDataLayout(), 0, &IC.getAssumptionCache(), &II, &IC.getDominatorTree())) { if (!match(II.getArgOperand(1), m_One())) { @@ -1818,8 +1814,8 @@ Instruction *InstCombiner::visitVACopyInst(VACopyInst &I) { /// lifting. Instruction *InstCombiner::visitCallInst(CallInst &CI) { auto Args = CI.arg_operands(); - if (Value *V = - SimplifyCall(CI.getCalledValue(), Args.begin(), Args.end(), SQ)) + if (Value *V = SimplifyCall(&CI, CI.getCalledValue(), Args.begin(), + Args.end(), SQ.getWithInstruction(&CI))) return replaceInstUsesWith(CI, V); if (isFreeCall(&CI, &TLI)) diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index 766939c56dff..38e95fb11639 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -661,7 +661,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI, // zext (x x>>u31 true if signbit set. // zext (x >s -1) to i32 --> (x>>u31)^1 true if signbit clear. - if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV == 0) || + if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV.isNullValue()) || (ICI->getPredicate() == ICmpInst::ICMP_SGT && Op1CV.isAllOnesValue())) { if (!DoTransform) return ICI; @@ -688,7 +688,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI, // zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set. // zext (X != 1) to i32 --> X^1 iff X has only the low bit set. // zext (X != 2) to i32 --> (X>>1)^1 iff X has only the 2nd bit set. - if ((Op1CV == 0 || Op1CV.isPowerOf2()) && + if ((Op1CV.isNullValue() || Op1CV.isPowerOf2()) && // This only works for EQ and NE ICI->isEquality()) { // If Op1C some other power of two, convert: @@ -699,7 +699,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI, if (!DoTransform) return ICI; bool isNE = ICI->getPredicate() == ICmpInst::ICMP_NE; - if (Op1CV != 0 && (Op1CV != KnownZeroMask)) { + if (!Op1CV.isNullValue() && (Op1CV != KnownZeroMask)) { // (X&4) == 2 --> false // (X&4) != 2 --> true Constant *Res = ConstantInt::get(Type::getInt1Ty(CI.getContext()), @@ -717,7 +717,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI, In->getName() + ".lobit"); } - if ((Op1CV != 0) == isNE) { // Toggle the low bit. + if (!Op1CV.isNullValue() == isNE) { // Toggle the low bit. Constant *One = ConstantInt::get(In->getType(), 1); In = Builder->CreateXor(In, One); } diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index c0798e164c39..1ef4acfb058c 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -127,7 +127,7 @@ static bool isSignBitCheck(ICmpInst::Predicate Pred, const APInt &RHS, switch (Pred) { case ICmpInst::ICMP_SLT: // True if LHS s< 0 TrueIfSigned = true; - return RHS == 0; + return RHS.isNullValue(); case ICmpInst::ICMP_SLE: // True if LHS s<= RHS and RHS == -1 TrueIfSigned = true; return RHS.isAllOnesValue(); @@ -155,10 +155,10 @@ static bool isSignTest(ICmpInst::Predicate &Pred, const APInt &C) { if (!ICmpInst::isSigned(Pred)) return false; - if (C == 0) + if (C.isNullValue()) return ICmpInst::isRelational(Pred); - if (C == 1) { + if (C.isOneValue()) { if (Pred == ICmpInst::ICMP_SLT) { Pred = ICmpInst::ICMP_SLE; return true; @@ -1193,7 +1193,7 @@ Instruction *InstCombiner::foldICmpShrConstConst(ICmpInst &I, Value *A, }; // Don't bother doing any work for cases which InstSimplify handles. - if (AP2 == 0) + if (AP2.isNullValue()) return nullptr; bool IsAShr = isa(I.getOperand(0)); @@ -1252,7 +1252,7 @@ Instruction *InstCombiner::foldICmpShlConstConst(ICmpInst &I, Value *A, }; // Don't bother doing any work for cases which InstSimplify handles. - if (AP2 == 0) + if (AP2.isNullValue()) return nullptr; unsigned AP2TrailingZeros = AP2.countTrailingZeros(); @@ -1399,7 +1399,7 @@ Instruction *InstCombiner::foldICmpWithConstant(ICmpInst &Cmp) { } // (icmp sgt smin(PosA, B) 0) -> (icmp sgt B 0) - if (*C == 0 && Pred == ICmpInst::ICMP_SGT) { + if (C->isNullValue() && Pred == ICmpInst::ICMP_SGT) { SelectPatternResult SPR = matchSelectPattern(X, A, B); if (SPR.Flavor == SPF_SMIN) { if (isKnownPositive(A, DL, 0, &AC, &Cmp, &DT)) @@ -1465,7 +1465,7 @@ Instruction *InstCombiner::foldICmpTruncConstant(ICmpInst &Cmp, const APInt *C) { ICmpInst::Predicate Pred = Cmp.getPredicate(); Value *X = Trunc->getOperand(0); - if (*C == 1 && C->getBitWidth() > 1) { + if (C->isOneValue() && C->getBitWidth() > 1) { // icmp slt trunc(signum(V)) 1 --> icmp slt V, 1 Value *V = nullptr; if (Pred == ICmpInst::ICMP_SLT && match(X, m_Signum(m_Value(V)))) @@ -1505,7 +1505,7 @@ Instruction *InstCombiner::foldICmpXorConstant(ICmpInst &Cmp, // If this is a comparison that tests the signbit (X < 0) or (x > -1), // fold the xor. ICmpInst::Predicate Pred = Cmp.getPredicate(); - if ((Pred == ICmpInst::ICMP_SLT && *C == 0) || + if ((Pred == ICmpInst::ICMP_SLT && C->isNullValue()) || (Pred == ICmpInst::ICMP_SGT && C->isAllOnesValue())) { // If the sign bit of the XorCst is not set, there is no change to @@ -1623,7 +1623,7 @@ Instruction *InstCombiner::foldICmpAndShift(ICmpInst &Cmp, BinaryOperator *And, // Turn ((X >> Y) & C2) == 0 into (X & (C2 << Y)) == 0. The latter is // preferable because it allows the C2 << Y expression to be hoisted out of a // loop if Y is invariant and X is not. - if (Shift->hasOneUse() && *C1 == 0 && Cmp.isEquality() && + if (Shift->hasOneUse() && C1->isNullValue() && Cmp.isEquality() && !Shift->isArithmeticShift() && !isa(Shift->getOperand(0))) { // Compute C2 << Y. Value *NewShift = @@ -1681,7 +1681,8 @@ Instruction *InstCombiner::foldICmpAndConstConst(ICmpInst &Cmp, // (icmp pred (and A, (or (shl 1, B), 1), 0)) // // iff pred isn't signed - if (!Cmp.isSigned() && *C1 == 0 && match(And->getOperand(1), m_One())) { + if (!Cmp.isSigned() && C1->isNullValue() && + match(And->getOperand(1), m_One())) { Constant *One = cast(And->getOperand(1)); Value *Or = And->getOperand(0); Value *A, *B, *LShr; @@ -1764,7 +1765,7 @@ Instruction *InstCombiner::foldICmpAndConstant(ICmpInst &Cmp, // (X & C2) != 0 -> (trunc X) < 0 // iff C2 is a power of 2 and it masks the sign bit of a legal integer type. const APInt *C2; - if (And->hasOneUse() && *C == 0 && match(Y, m_APInt(C2))) { + if (And->hasOneUse() && C->isNullValue() && match(Y, m_APInt(C2))) { int32_t ExactLogBase2 = C2->exactLogBase2(); if (ExactLogBase2 != -1 && DL.isLegalInteger(ExactLogBase2 + 1)) { Type *NTy = IntegerType::get(Cmp.getContext(), ExactLogBase2 + 1); @@ -1784,7 +1785,7 @@ Instruction *InstCombiner::foldICmpAndConstant(ICmpInst &Cmp, Instruction *InstCombiner::foldICmpOrConstant(ICmpInst &Cmp, BinaryOperator *Or, const APInt *C) { ICmpInst::Predicate Pred = Cmp.getPredicate(); - if (*C == 1) { + if (C->isOneValue()) { // icmp slt signum(V) 1 --> icmp slt V, 1 Value *V = nullptr; if (Pred == ICmpInst::ICMP_SLT && match(Or, m_Signum(m_Value(V)))) @@ -1801,7 +1802,7 @@ Instruction *InstCombiner::foldICmpOrConstant(ICmpInst &Cmp, BinaryOperator *Or, return new ICmpInst(Pred, Or->getOperand(0), Or->getOperand(1)); } - if (!Cmp.isEquality() || *C != 0 || !Or->hasOneUse()) + if (!Cmp.isEquality() || !C->isNullValue() || !Or->hasOneUse()) return nullptr; Value *P, *Q; @@ -2036,7 +2037,8 @@ Instruction *InstCombiner::foldICmpShrConstant(ICmpInst &Cmp, // icmp eq/ne (shr X, Y), 0 --> icmp eq/ne X, 0 Value *X = Shr->getOperand(0); CmpInst::Predicate Pred = Cmp.getPredicate(); - if (Cmp.isEquality() && Shr->isExact() && Shr->hasOneUse() && *C == 0) + if (Cmp.isEquality() && Shr->isExact() && Shr->hasOneUse() && + C->isNullValue()) return new ICmpInst(Pred, X, Cmp.getOperand(1)); const APInt *ShiftVal; @@ -2127,7 +2129,7 @@ Instruction *InstCombiner::foldICmpUDivConstant(ICmpInst &Cmp, if (!match(UDiv->getOperand(0), m_APInt(C2))) return nullptr; - assert(C2 != 0 && "udiv 0, X should have been simplified already."); + assert(*C2 != 0 && "udiv 0, X should have been simplified already."); // (icmp ugt (udiv C2, Y), C) -> (icmp ule Y, C2/(C+1)) Value *Y = UDiv->getOperand(1); @@ -2140,7 +2142,7 @@ Instruction *InstCombiner::foldICmpUDivConstant(ICmpInst &Cmp, // (icmp ult (udiv C2, Y), C) -> (icmp ugt Y, C2/C) if (Cmp.getPredicate() == ICmpInst::ICMP_ULT) { - assert(C != 0 && "icmp ult X, 0 should have been simplified already."); + assert(*C != 0 && "icmp ult X, 0 should have been simplified already."); return new ICmpInst(ICmpInst::ICMP_UGT, Y, ConstantInt::get(Y->getType(), C2->udiv(*C))); } @@ -2178,7 +2180,8 @@ Instruction *InstCombiner::foldICmpDivConstant(ICmpInst &Cmp, // INT_MIN will also fail if the divisor is 1. Although folds of all these // division-by-constant cases should be present, we can not assert that they // have happened before we reach this icmp instruction. - if (*C2 == 0 || *C2 == 1 || (DivIsSigned && C2->isAllOnesValue())) + if (C2->isNullValue() || C2->isOneValue() || + (DivIsSigned && C2->isAllOnesValue())) return nullptr; // TODO: We could do all of the computations below using APInt. @@ -2224,7 +2227,7 @@ Instruction *InstCombiner::foldICmpDivConstant(ICmpInst &Cmp, HiOverflow = addWithOverflow(HiBound, LoBound, RangeSize, false); } } else if (C2->isStrictlyPositive()) { // Divisor is > 0. - if (*C == 0) { // (X / pos) op 0 + if (C->isNullValue()) { // (X / pos) op 0 // Can't overflow. e.g. X/2 op 0 --> [-1, 2) LoBound = ConstantExpr::getNeg(SubOne(RangeSize)); HiBound = RangeSize; @@ -2245,7 +2248,7 @@ Instruction *InstCombiner::foldICmpDivConstant(ICmpInst &Cmp, } else if (C2->isNegative()) { // Divisor is < 0. if (Div->isExact()) RangeSize = ConstantExpr::getNeg(RangeSize); - if (*C == 0) { // (X / neg) op 0 + if (C->isNullValue()) { // (X / neg) op 0 // e.g. X/-5 op 0 --> [-4, 5) LoBound = AddOne(RangeSize); HiBound = ConstantExpr::getNeg(RangeSize); @@ -2337,15 +2340,15 @@ Instruction *InstCombiner::foldICmpSubConstant(ICmpInst &Cmp, return new ICmpInst(ICmpInst::ICMP_SGE, X, Y); // (icmp sgt (sub nsw X, Y), 0) -> (icmp sgt X, Y) - if (Pred == ICmpInst::ICMP_SGT && *C == 0) + if (Pred == ICmpInst::ICMP_SGT && C->isNullValue()) return new ICmpInst(ICmpInst::ICMP_SGT, X, Y); // (icmp slt (sub nsw X, Y), 0) -> (icmp slt X, Y) - if (Pred == ICmpInst::ICMP_SLT && *C == 0) + if (Pred == ICmpInst::ICMP_SLT && C->isNullValue()) return new ICmpInst(ICmpInst::ICMP_SLT, X, Y); // (icmp slt (sub nsw X, Y), 1) -> (icmp sle X, Y) - if (Pred == ICmpInst::ICMP_SLT && *C == 1) + if (Pred == ICmpInst::ICMP_SLT && C->isOneValue()) return new ICmpInst(ICmpInst::ICMP_SLE, X, Y); } @@ -2520,7 +2523,7 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp, switch (BO->getOpcode()) { case Instruction::SRem: // If we have a signed (X % (2^c)) == 0, turn it into an unsigned one. - if (*C == 0 && BO->hasOneUse()) { + if (C->isNullValue() && BO->hasOneUse()) { const APInt *BOC; if (match(BOp1, m_APInt(BOC)) && BOC->sgt(1) && BOC->isPowerOf2()) { Value *NewRem = Builder->CreateURem(BOp0, BOp1, BO->getName()); @@ -2537,7 +2540,7 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp, Constant *SubC = ConstantExpr::getSub(RHS, cast(BOp1)); return new ICmpInst(Pred, BOp0, SubC); } - } else if (*C == 0) { + } else if (C->isNullValue()) { // Replace ((add A, B) != 0) with (A != -B) if A or B is // efficiently invertible, or if the add has just this one use. if (Value *NegVal = dyn_castNegVal(BOp1)) @@ -2558,7 +2561,7 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp, // For the xor case, we can xor two constants together, eliminating // the explicit xor. return new ICmpInst(Pred, BOp0, ConstantExpr::getXor(RHS, BOC)); - } else if (*C == 0) { + } else if (C->isNullValue()) { // Replace ((xor A, B) != 0) with (A != B) return new ICmpInst(Pred, BOp0, BOp1); } @@ -2571,7 +2574,7 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp, // Replace ((sub BOC, B) != C) with (B != BOC-C). Constant *SubC = ConstantExpr::getSub(cast(BOp0), RHS); return new ICmpInst(Pred, BOp1, SubC); - } else if (*C == 0) { + } else if (C->isNullValue()) { // Replace ((sub A, B) != 0) with (A != B). return new ICmpInst(Pred, BOp0, BOp1); } @@ -2609,7 +2612,7 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp, } // ((X & ~7) == 0) --> X < 8 - if (*C == 0 && (~(*BOC) + 1).isPowerOf2()) { + if (C->isNullValue() && (~(*BOC) + 1).isPowerOf2()) { Constant *NegBOC = ConstantExpr::getNeg(cast(BOp1)); auto NewPred = isICMP_NE ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT; return new ICmpInst(NewPred, BOp0, NegBOC); @@ -2618,9 +2621,9 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp, break; } case Instruction::Mul: - if (*C == 0 && BO->hasNoSignedWrap()) { + if (C->isNullValue() && BO->hasNoSignedWrap()) { const APInt *BOC; - if (match(BOp1, m_APInt(BOC)) && *BOC != 0) { + if (match(BOp1, m_APInt(BOC)) && !BOC->isNullValue()) { // The trivial case (mul X, 0) is handled by InstSimplify. // General case : (mul X, C) != 0 iff X != 0 // (mul X, C) == 0 iff X == 0 @@ -2629,7 +2632,7 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp, } break; case Instruction::UDiv: - if (*C == 0) { + if (C->isNullValue()) { // (icmp eq/ne (udiv A, B), 0) -> (icmp ugt/ule i32 B, A) auto NewPred = isICMP_NE ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_UGT; return new ICmpInst(NewPred, BOp1, BOp0); @@ -2668,7 +2671,7 @@ Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp, case Intrinsic::ctpop: { // popcount(A) == 0 -> A == 0 and likewise for != // popcount(A) == bitwidth(A) -> A == -1 and likewise for != - bool IsZero = *C == 0; + bool IsZero = C->isNullValue(); if (IsZero || *C == C->getBitWidth()) { Worklist.Add(II); Cmp.setOperand(0, II->getArgOperand(0)); @@ -3057,7 +3060,8 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) { break; const APInt *C; - if (match(BO0->getOperand(1), m_APInt(C)) && *C != 0 && *C != 1) { + if (match(BO0->getOperand(1), m_APInt(C)) && !C->isNullValue() && + !C->isOneValue()) { // icmp eq/ne (X * C), (Y * C) --> icmp (X & Mask), (Y & Mask) // Mask = -1 >> count-trailing-zeros(C). if (unsigned TZs = C->countTrailingZeros()) { @@ -4093,7 +4097,7 @@ Instruction *InstCombiner::foldICmpUsingKnownBits(ICmpInst &I) { // Check if the LHS is 8 >>u x and the result is a power of 2 like 1. const APInt *CI; - if (Op0KnownZeroInverted == 1 && + if (Op0KnownZeroInverted.isOneValue() && match(LHS, m_LShr(m_Power2(CI), m_Value(X)))) { // ((8 >>u X) & 1) == 0 -> X != 3 // ((8 >>u X) & 1) != 0 -> X == 3 diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h index 56f133de3de1..fd0a64a5bbb5 100644 --- a/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/lib/Transforms/InstCombine/InstCombineInternal.h @@ -21,6 +21,7 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetFolder.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" @@ -29,7 +30,6 @@ #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Pass.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/KnownBits.h" #include "llvm/Transforms/InstCombine/InstCombineWorklist.h" #include "llvm/Transforms/Utils/Local.h" diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 4d408359eeea..365c4ba75154 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -176,7 +176,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyMulInst(Op0, Op1, SQ)) + if (Value *V = SimplifyMulInst(Op0, Op1, SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); if (Value *V = SimplifyUsingDistributiveLaws(I)) @@ -599,7 +599,8 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { if (isa(Op0)) std::swap(Op0, Op1); - if (Value *V = SimplifyFMulInst(Op0, Op1, I.getFastMathFlags(), SQ)) + if (Value *V = SimplifyFMulInst(Op0, Op1, I.getFastMathFlags(), + SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); bool AllowReassociate = I.hasUnsafeAlgebra(); @@ -930,7 +931,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { } } - if (*C2 != 0) // avoid X udiv 0 + if (!C2->isNullValue()) // avoid X udiv 0 if (Instruction *FoldedDiv = foldOpWithConstantIntoOperand(I)) return FoldedDiv; } @@ -1103,7 +1104,7 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyUDivInst(Op0, Op1, SQ)) + if (Value *V = SimplifyUDivInst(Op0, Op1, SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); // Handle the integer div common cases @@ -1176,7 +1177,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifySDivInst(Op0, Op1, SQ)) + if (Value *V = SimplifySDivInst(Op0, Op1, SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); // Handle the integer div common cases @@ -1288,7 +1289,8 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyFDivInst(Op0, Op1, I.getFastMathFlags(), SQ)) + if (Value *V = SimplifyFDivInst(Op0, Op1, I.getFastMathFlags(), + SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); if (isa(Op0)) @@ -1472,7 +1474,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyURemInst(Op0, Op1, SQ)) + if (Value *V = SimplifyURemInst(Op0, Op1, SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); if (Instruction *common = commonIRemTransforms(I)) @@ -1515,7 +1517,7 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifySRemInst(Op0, Op1, SQ)) + if (Value *V = SimplifySRemInst(Op0, Op1, SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); // Handle the integer rem common cases @@ -1588,7 +1590,8 @@ Instruction *InstCombiner::visitFRem(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyFRemInst(Op0, Op1, I.getFastMathFlags(), SQ)) + if (Value *V = SimplifyFRemInst(Op0, Op1, I.getFastMathFlags(), + SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); // Handle cases involving: rem X, (select Cond, Y, Z) diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp index 1117c11f4f51..5dbf1e85b05b 100644 --- a/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -16,9 +16,9 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/IR/DebugInfo.h" using namespace llvm; using namespace llvm::PatternMatch; @@ -880,7 +880,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { // PHINode simplification // Instruction *InstCombiner::visitPHINode(PHINode &PN) { - if (Value *V = SimplifyInstruction(&PN, SQ)) + if (Value *V = SimplifyInstruction(&PN, SQ.getWithInstruction(&PN))) return replaceInstUsesWith(PN, V); if (Instruction *Result = FoldPHIArgZextsIntoPHI(PN)) diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index 7afb8814fe52..b9674d85634d 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -1121,7 +1121,8 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { Value *FalseVal = SI.getFalseValue(); Type *SelType = SI.getType(); - if (Value *V = SimplifySelectInst(CondVal, TrueVal, FalseVal, SQ)) + if (Value *V = SimplifySelectInst(CondVal, TrueVal, FalseVal, + SQ.getWithInstruction(&SI))) return replaceInstUsesWith(SI, V); if (Instruction *I = canonicalizeSelectToShuffle(SI)) @@ -1478,9 +1479,9 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { if (!CondVal->getType()->isVectorTy() && !AC.assumptions().empty()) { KnownBits Known(1); computeKnownBits(CondVal, Known, 0, &SI); - if (Known.One == 1) + if (Known.One.isOneValue()) return replaceInstUsesWith(SI, TrueVal); - if (Known.Zero == 1) + if (Known.Zero.isOneValue()) return replaceInstUsesWith(SI, FalseVal); } diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp index b40d067b2817..3f2ddcacce2b 100644 --- a/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -520,8 +520,9 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) { return replaceInstUsesWith(I, V); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - if (Value *V = SimplifyShlInst(Op0, Op1, I.hasNoSignedWrap(), - I.hasNoUnsignedWrap(), SQ)) + if (Value *V = + SimplifyShlInst(Op0, Op1, I.hasNoSignedWrap(), I.hasNoUnsignedWrap(), + SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); if (Instruction *V = commonShiftTransforms(I)) @@ -619,7 +620,8 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) { return replaceInstUsesWith(I, V); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - if (Value *V = SimplifyLShrInst(Op0, Op1, I.isExact(), SQ)) + if (Value *V = + SimplifyLShrInst(Op0, Op1, I.isExact(), SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); if (Instruction *R = commonShiftTransforms(I)) @@ -680,6 +682,25 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) { return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, Mask)); } + if (match(Op0, m_SExt(m_Value(X)))) { + // Are we moving the sign bit to the low bit and widening with high zeros? + unsigned SrcTyBitWidth = X->getType()->getScalarSizeInBits(); + if (ShAmt == BitWidth - 1 && + (!Ty->isIntegerTy() || shouldChangeType(Ty, X->getType()))) { + // lshr (sext i1 X to iN), N-1 --> zext X to iN + if (SrcTyBitWidth == 1) + return new ZExtInst(X, Ty); + + // lshr (sext iM X to iN), N-1 --> zext (lshr X, M-1) to iN + if (Op0->hasOneUse()) { + Value *NewLShr = Builder->CreateLShr(X, SrcTyBitWidth - 1); + return new ZExtInst(NewLShr, Ty); + } + } + + // TODO: Convert to ashr+zext if the shift equals the extension amount. + } + if (match(Op0, m_LShr(m_Value(X), m_APInt(ShOp1)))) { unsigned AmtSum = ShAmt + ShOp1->getZExtValue(); // Oversized shifts are simplified to zero in InstSimplify. @@ -703,7 +724,8 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) { return replaceInstUsesWith(I, V); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - if (Value *V = SimplifyAShrInst(Op0, Op1, I.isExact(), SQ)) + if (Value *V = + SimplifyAShrInst(Op0, Op1, I.isExact(), SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); if (Instruction *R = commonShiftTransforms(I)) diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 5df55f01b83f..03841164b58d 100644 --- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -121,7 +121,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, } Known.resetAll(); - if (DemandedMask == 0) // Not demanding any bits from V. + if (DemandedMask.isNullValue()) // Not demanding any bits from V. return UndefValue::get(VTy); if (Depth == 6) // Limit search depth. @@ -488,7 +488,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // always convert this into a logical shr, even if the shift amount is // variable. The low bit of the shift cannot be an input sign bit unless // the shift amount is >= the size of the datatype, which is undefined. - if (DemandedMask == 1) { + if (DemandedMask.isOneValue()) { // Perform the logical shift right. Instruction *NewVal = BinaryOperator::CreateLShr( I->getOperand(0), I->getOperand(1), I->getName()); @@ -656,7 +656,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // If we don't need any of low bits then return zero, // we know that DemandedMask is non-zero already. APInt DemandedElts = DemandedMask.zextOrTrunc(ArgWidth); - if (DemandedElts == 0) + if (DemandedElts.isNullValue()) return ConstantInt::getNullValue(VTy); // We know that the upper bits are set to zero. @@ -908,7 +908,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, return nullptr; } - if (DemandedElts == 0) { // If nothing is demanded, provide undef. + if (DemandedElts.isNullValue()) { // If nothing is demanded, provide undef. UndefElts = EltMask; return UndefValue::get(V->getType()); } diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 7fc6774f1849..926e46655eb8 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -145,7 +145,8 @@ Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) { Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { if (Value *V = SimplifyExtractElementInst(EI.getVectorOperand(), - EI.getIndexOperand(), SQ)) + EI.getIndexOperand(), + SQ.getWithInstruction(&EI))) return replaceInstUsesWith(EI, V); // If vector val is constant with all elements the same, replace EI with @@ -440,7 +441,7 @@ static void replaceExtractElements(InsertElementInst *InsElt, if (!OldExt || OldExt->getParent() != WideVec->getParent()) continue; auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1)); - NewExt->insertAfter(WideVec); + NewExt->insertAfter(OldExt); IC.replaceInstUsesWith(*OldExt, NewExt); } } @@ -1140,8 +1141,8 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { SmallVector Mask = SVI.getShuffleMask(); Type *Int32Ty = Type::getInt32Ty(SVI.getContext()); - if (auto *V = - SimplifyShuffleVectorInst(LHS, RHS, SVI.getMask(), SVI.getType(), SQ)) + if (auto *V = SimplifyShuffleVectorInst( + LHS, RHS, SVI.getMask(), SVI.getType(), SQ.getWithInstruction(&SVI))) return replaceInstUsesWith(SVI, V); bool MadeChange = false; diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 2730afc5c5b9..65e6d2e35905 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -33,7 +33,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/InstCombine/InstCombine.h" #include "InstCombineInternal.h" #include "llvm-c/Initialization.h" #include "llvm/ADT/SmallPtrSet.h" @@ -62,6 +61,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/InstCombine/InstCombine.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" #include @@ -256,7 +256,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { Value *C = I.getOperand(1); // Does "B op C" simplify? - if (Value *V = SimplifyBinOp(Opcode, B, C, SQ)) { + if (Value *V = SimplifyBinOp(Opcode, B, C, SQ.getWithInstruction(&I))) { // It simplifies to V. Form "A op V". I.setOperand(0, A); I.setOperand(1, V); @@ -285,7 +285,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { Value *C = Op1->getOperand(1); // Does "A op B" simplify? - if (Value *V = SimplifyBinOp(Opcode, A, B, SQ)) { + if (Value *V = SimplifyBinOp(Opcode, A, B, SQ.getWithInstruction(&I))) { // It simplifies to V. Form "V op C". I.setOperand(0, V); I.setOperand(1, C); @@ -313,7 +313,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { Value *C = I.getOperand(1); // Does "C op A" simplify? - if (Value *V = SimplifyBinOp(Opcode, C, A, SQ)) { + if (Value *V = SimplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) { // It simplifies to V. Form "V op B". I.setOperand(0, V); I.setOperand(1, B); @@ -333,7 +333,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { Value *C = Op1->getOperand(1); // Does "C op A" simplify? - if (Value *V = SimplifyBinOp(Opcode, C, A, SQ)) { + if (Value *V = SimplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) { // It simplifies to V. Form "B op V". I.setOperand(0, B); I.setOperand(1, V); @@ -521,7 +521,7 @@ Value *InstCombiner::tryFactorization(InstCombiner::BuilderTy *Builder, std::swap(C, D); // Consider forming "A op' (B op D)". // If "B op D" simplifies then it can be formed with no cost. - V = SimplifyBinOp(TopLevelOpcode, B, D, SQ); + V = SimplifyBinOp(TopLevelOpcode, B, D, SQ.getWithInstruction(&I)); // If "B op D" doesn't simplify then only go on if both of the existing // operations "A op' B" and "C op' D" will be zapped as no longer used. if (!V && LHS->hasOneUse() && RHS->hasOneUse()) @@ -540,7 +540,7 @@ Value *InstCombiner::tryFactorization(InstCombiner::BuilderTy *Builder, std::swap(C, D); // Consider forming "(A op C) op' B". // If "A op C" simplifies then it can be formed with no cost. - V = SimplifyBinOp(TopLevelOpcode, A, C, SQ); + V = SimplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I)); // If "A op C" doesn't simplify then only go on if both of the existing // operations "A op' B" and "C op' D" will be zapped as no longer used. @@ -638,8 +638,10 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op' // Do "A op C" and "B op C" both simplify? - if (Value *L = SimplifyBinOp(TopLevelOpcode, A, C, SQ)) - if (Value *R = SimplifyBinOp(TopLevelOpcode, B, C, SQ)) { + if (Value *L = + SimplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I))) + if (Value *R = + SimplifyBinOp(TopLevelOpcode, B, C, SQ.getWithInstruction(&I))) { // They do! Return "L op' R". ++NumExpand; C = Builder->CreateBinOp(InnerOpcode, L, R); @@ -655,8 +657,10 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op' // Do "A op B" and "A op C" both simplify? - if (Value *L = SimplifyBinOp(TopLevelOpcode, A, B, SQ)) - if (Value *R = SimplifyBinOp(TopLevelOpcode, A, C, SQ)) { + if (Value *L = + SimplifyBinOp(TopLevelOpcode, A, B, SQ.getWithInstruction(&I))) + if (Value *R = + SimplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I))) { // They do! Return "L op' R". ++NumExpand; A = Builder->CreateBinOp(InnerOpcode, L, R); @@ -671,15 +675,17 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { if (auto *SI1 = dyn_cast(RHS)) { if (SI0->getCondition() == SI1->getCondition()) { Value *SI = nullptr; - if (Value *V = SimplifyBinOp(TopLevelOpcode, SI0->getFalseValue(), - SI1->getFalseValue(), SQ)) + if (Value *V = + SimplifyBinOp(TopLevelOpcode, SI0->getFalseValue(), + SI1->getFalseValue(), SQ.getWithInstruction(&I))) SI = Builder->CreateSelect(SI0->getCondition(), Builder->CreateBinOp(TopLevelOpcode, SI0->getTrueValue(), SI1->getTrueValue()), V); - if (Value *V = SimplifyBinOp(TopLevelOpcode, SI0->getTrueValue(), - SI1->getTrueValue(), SQ)) + if (Value *V = + SimplifyBinOp(TopLevelOpcode, SI0->getTrueValue(), + SI1->getTrueValue(), SQ.getWithInstruction(&I))) SI = Builder->CreateSelect( SI0->getCondition(), V, Builder->CreateBinOp(TopLevelOpcode, SI0->getFalseValue(), @@ -1399,7 +1405,8 @@ Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) { Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { SmallVector Ops(GEP.op_begin(), GEP.op_end()); - if (Value *V = SimplifyGEPInst(GEP.getSourceElementType(), Ops, SQ)) + if (Value *V = SimplifyGEPInst(GEP.getSourceElementType(), Ops, + SQ.getWithInstruction(&GEP))) return replaceInstUsesWith(GEP, V); Value *PtrOp = GEP.getOperand(0); @@ -1588,7 +1595,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (SO1->getType() != GO1->getType()) return nullptr; - Value *Sum = SimplifyAddInst(GO1, SO1, false, false, SQ); + Value *Sum = + SimplifyAddInst(GO1, SO1, false, false, SQ.getWithInstruction(&GEP)); // Only do the combine when we are sure the cost after the // merge is never more than that before the merge. if (Sum == nullptr) @@ -2283,7 +2291,8 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { if (!EV.hasIndices()) return replaceInstUsesWith(EV, Agg); - if (Value *V = SimplifyExtractValueInst(Agg, EV.getIndices(), SQ)) + if (Value *V = SimplifyExtractValueInst(Agg, EV.getIndices(), + SQ.getWithInstruction(&EV))) return replaceInstUsesWith(EV, V); if (InsertValueInst *IV = dyn_cast(Agg)) { diff --git a/lib/Transforms/Instrumentation/BoundsChecking.cpp b/lib/Transforms/Instrumentation/BoundsChecking.cpp index d4c8369fa9d3..a193efe902cf 100644 --- a/lib/Transforms/Instrumentation/BoundsChecking.cpp +++ b/lib/Transforms/Instrumentation/BoundsChecking.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Instrumentation.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/TargetFolder.h" @@ -25,6 +24,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Instrumentation.h" using namespace llvm; #define DEBUG_TYPE "bounds-checking" diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp index e2e3cbdbc295..a33490f6e4ac 100644 --- a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp +++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -44,15 +44,14 @@ /// For more information, please refer to the design document: /// http://clang.llvm.org/docs/DataFlowSanitizerDesign.html -#include "llvm/Transforms/Instrumentation.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Triple.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/Dominators.h" #include "llvm/IR/DebugInfo.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/InstVisitor.h" @@ -63,6 +62,7 @@ #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/SpecialCaseList.h" +#include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include diff --git a/lib/Transforms/Instrumentation/EfficiencySanitizer.cpp b/lib/Transforms/Instrumentation/EfficiencySanitizer.cpp index e89384c559fe..6864d295525c 100644 --- a/lib/Transforms/Instrumentation/EfficiencySanitizer.cpp +++ b/lib/Transforms/Instrumentation/EfficiencySanitizer.cpp @@ -18,7 +18,6 @@ // The rest is handled by the run-time library. //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Instrumentation.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -32,6 +31,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ModuleUtils.h" diff --git a/lib/Transforms/Instrumentation/InstrProfiling.cpp b/lib/Transforms/Instrumentation/InstrProfiling.cpp index 9a82532d7703..f83c930ca61b 100644 --- a/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -28,10 +28,10 @@ #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/Pass.h" diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index 9260217bd5e6..a991792bf5a3 100644 --- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -19,7 +19,6 @@ // The rest is handled by the run-time library. //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Instrumentation.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" @@ -42,6 +41,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/EscapeEnumerator.h" #include "llvm/Transforms/Utils/Local.h" diff --git a/lib/Transforms/ObjCARC/BlotMapVector.h b/lib/Transforms/ObjCARC/BlotMapVector.h index ef075bdccbfe..9c5cf6f5f5ab 100644 --- a/lib/Transforms/ObjCARC/BlotMapVector.h +++ b/lib/Transforms/ObjCARC/BlotMapVector.h @@ -8,8 +8,8 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/DenseMap.h" -#include #include +#include namespace llvm { /// \brief An associative container with fast insertion-order (deterministic) diff --git a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp index 9d78e5ae3b9b..464805051c65 100644 --- a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp +++ b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp @@ -20,8 +20,8 @@ /// //===----------------------------------------------------------------------===// -#include "ObjCARC.h" #include "DependencyAnalysis.h" +#include "ObjCARC.h" #include "ProvenanceAnalysis.h" #include "llvm/IR/CFG.h" diff --git a/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/lib/Transforms/ObjCARC/ObjCARCContract.cpp index a86eaaec7641..e70e7591f6a7 100644 --- a/lib/Transforms/ObjCARC/ObjCARCContract.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCContract.cpp @@ -26,9 +26,9 @@ // TODO: ObjCARCContract could insert PHI nodes when uses aren't // dominated by single calls. -#include "ObjCARC.h" #include "ARCRuntimeEntryPoints.h" #include "DependencyAnalysis.h" +#include "ObjCARC.h" #include "ProvenanceAnalysis.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/Dominators.h" diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp index 3c73376c9906..8f3a33f66c7f 100644 --- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp @@ -24,10 +24,10 @@ /// //===----------------------------------------------------------------------===// -#include "ObjCARC.h" #include "ARCRuntimeEntryPoints.h" #include "BlotMapVector.h" #include "DependencyAnalysis.h" +#include "ObjCARC.h" #include "ProvenanceAnalysis.h" #include "PtrState.h" #include "llvm/ADT/DenseMap.h" diff --git a/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp b/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp index 9ffdfb4f7f9c..62fc52f6d091 100644 --- a/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp +++ b/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp @@ -22,8 +22,8 @@ /// //===----------------------------------------------------------------------===// -#include "ObjCARC.h" #include "ProvenanceAnalysis.h" +#include "ObjCARC.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" diff --git a/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp b/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp index c274e8182fb5..870a5f600fd8 100644 --- a/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp +++ b/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp @@ -8,13 +8,13 @@ //===----------------------------------------------------------------------===// #include "ProvenanceAnalysis.h" -#include "llvm/Pass.h" #include "llvm/ADT/SetVector.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/Passes.h" -#include "llvm/IR/InstIterator.h" #include "llvm/IR/Function.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/Module.h" +#include "llvm/Pass.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/lib/Transforms/ObjCARC/PtrState.h b/lib/Transforms/ObjCARC/PtrState.h index 9749e44822b2..87298fa59bfd 100644 --- a/lib/Transforms/ObjCARC/PtrState.h +++ b/lib/Transforms/ObjCARC/PtrState.h @@ -21,8 +21,8 @@ #include "llvm/Analysis/ObjCARCInstKind.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Value.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" namespace llvm { namespace objcarc { diff --git a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp index fd931c521c8f..99480f12da9e 100644 --- a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp +++ b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp @@ -19,12 +19,11 @@ #define AA_NAME "alignment-from-assumptions" #define DEBUG_TYPE AA_NAME #include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h" -#include "llvm/Transforms/Scalar.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ValueTracking.h" @@ -35,6 +34,7 @@ #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" using namespace llvm; STATISTIC(NumLoadAlignChanged, diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp index 9e982194bac7..4fa27891a974 100644 --- a/lib/Transforms/Scalar/ConstantProp.cpp +++ b/lib/Transforms/Scalar/ConstantProp.cpp @@ -18,15 +18,15 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/Constant.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instruction.h" #include "llvm/Pass.h" -#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" #include using namespace llvm; diff --git a/lib/Transforms/Scalar/DCE.cpp b/lib/Transforms/Scalar/DCE.cpp index 07a0ba9b1222..fa4806e884c3 100644 --- a/lib/Transforms/Scalar/DCE.cpp +++ b/lib/Transforms/Scalar/DCE.cpp @@ -19,10 +19,10 @@ #include "llvm/Transforms/Scalar/DCE.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instruction.h" #include "llvm/Pass.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; diff --git a/lib/Transforms/Scalar/FlattenCFGPass.cpp b/lib/Transforms/Scalar/FlattenCFGPass.cpp index 185cdbdda378..063df779a30b 100644 --- a/lib/Transforms/Scalar/FlattenCFGPass.cpp +++ b/lib/Transforms/Scalar/FlattenCFGPass.cpp @@ -11,10 +11,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/IR/CFG.h" #include "llvm/Pass.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; diff --git a/lib/Transforms/Scalar/GVNHoist.cpp b/lib/Transforms/Scalar/GVNHoist.cpp index b7514a6d5793..29de792bd248 100644 --- a/lib/Transforms/Scalar/GVNHoist.cpp +++ b/lib/Transforms/Scalar/GVNHoist.cpp @@ -41,7 +41,6 @@ // ret void //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar/GVN.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" @@ -50,6 +49,7 @@ #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/GVN.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; diff --git a/lib/Transforms/Scalar/GVNSink.cpp b/lib/Transforms/Scalar/GVNSink.cpp index 5c75f39e381d..8634816e702f 100644 --- a/lib/Transforms/Scalar/GVNSink.cpp +++ b/lib/Transforms/Scalar/GVNSink.cpp @@ -169,8 +169,8 @@ struct SinkingInstructionCandidate { NumExtraPHIs) // PHIs are expensive, so make sure they're worth it. - SplitEdgeCost; } - bool operator>=(const SinkingInstructionCandidate &Other) const { - return Cost >= Other.Cost; + bool operator>(const SinkingInstructionCandidate &Other) const { + return Cost > Other.Cost; } }; @@ -745,7 +745,7 @@ unsigned GVNSink::sinkBB(BasicBlock *BBEnd) { std::stable_sort( Candidates.begin(), Candidates.end(), [](const SinkingInstructionCandidate &A, - const SinkingInstructionCandidate &B) { return A >= B; }); + const SinkingInstructionCandidate &B) { return A > B; }); DEBUG(dbgs() << " -- Sinking candidates:\n"; for (auto &C : Candidates) dbgs() << " " << C << "\n";); diff --git a/lib/Transforms/Scalar/GuardWidening.cpp b/lib/Transforms/Scalar/GuardWidening.cpp index 65a2cd955672..fb7c6e15758d 100644 --- a/lib/Transforms/Scalar/GuardWidening.cpp +++ b/lib/Transforms/Scalar/GuardWidening.cpp @@ -40,7 +40,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/GuardWidening.h" -#include "llvm/Pass.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/Analysis/LoopInfo.h" @@ -50,6 +49,7 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/KnownBits.h" #include "llvm/Transforms/Scalar.h" diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 9a7882211bac..10782963177c 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -86,6 +86,10 @@ static cl::opt UsePostIncrementRanges( cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), cl::init(true)); +static cl::opt +DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), + cl::desc("Disable Linear Function Test Replace optimization")); + namespace { struct RewritePhi; @@ -2413,7 +2417,8 @@ bool IndVarSimplify::run(Loop *L) { // If we have a trip count expression, rewrite the loop's exit condition // using it. We can currently only handle loops with a single exit. - if (canExpandBackedgeTakenCount(L, SE, Rewriter) && needsLFTR(L, DT)) { + if (!DisableLFTR && canExpandBackedgeTakenCount(L, SE, Rewriter) && + needsLFTR(L, DT)) { PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT); if (IndVar) { // Check preconditions for proper SCEVExpander operation. SCEV does not diff --git a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp index e21b0feb7c5a..2f96c3064b86 100644 --- a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp +++ b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp @@ -59,8 +59,8 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/LoopSimplify.h" +#include "llvm/Transforms/Utils/LoopUtils.h" using namespace llvm; @@ -1371,28 +1371,35 @@ bool LoopConstrainer::run() { DT.recalculate(F); + // We need to first add all the pre and post loop blocks into the loop + // structures (as part of createClonedLoopStructure), and then update the + // LCSSA form and LoopSimplifyForm. This is necessary for correctly updating + // LI when LoopSimplifyForm is generated. + Loop *PreL = nullptr, *PostL = nullptr; if (!PreLoop.Blocks.empty()) { - auto *L = createClonedLoopStructure( + PreL = createClonedLoopStructure( &OriginalLoop, OriginalLoop.getParentLoop(), PreLoop.Map); - formLCSSARecursively(*L, DT, &LI, &SE); - simplifyLoop(L, &DT, &LI, &SE, nullptr, true); - // Pre loops are slow paths, we do not need to perform any loop - // optimizations on them. - DisableAllLoopOptsOnLoop(*L); } if (!PostLoop.Blocks.empty()) { - auto *L = createClonedLoopStructure( + PostL = createClonedLoopStructure( &OriginalLoop, OriginalLoop.getParentLoop(), PostLoop.Map); - formLCSSARecursively(*L, DT, &LI, &SE); - simplifyLoop(L, &DT, &LI, &SE, nullptr, true); - // Post loops are slow paths, we do not need to perform any loop - // optimizations on them. - DisableAllLoopOptsOnLoop(*L); } - formLCSSARecursively(OriginalLoop, DT, &LI, &SE); - simplifyLoop(&OriginalLoop, &DT, &LI, &SE, nullptr, true); + // This function canonicalizes the loop into Loop-Simplify and LCSSA forms. + auto CanonicalizeLoop = [&] (Loop *L, bool IsOriginalLoop) { + formLCSSARecursively(*L, DT, &LI, &SE); + simplifyLoop(L, &DT, &LI, &SE, nullptr, true); + // Pre/post loops are slow paths, we do not need to perform any loop + // optimizations on them. + if (!IsOriginalLoop) + DisableAllLoopOptsOnLoop(*L); + }; + if (PreL) + CanonicalizeLoop(PreL, false); + if (PostL) + CanonicalizeLoop(PostL, false); + CanonicalizeLoop(&OriginalLoop, true); return true; } diff --git a/lib/Transforms/Scalar/InferAddressSpaces.cpp b/lib/Transforms/Scalar/InferAddressSpaces.cpp index 5e116ef2fe75..3c8fbd35bf8c 100644 --- a/lib/Transforms/Scalar/InferAddressSpaces.cpp +++ b/lib/Transforms/Scalar/InferAddressSpaces.cpp @@ -89,7 +89,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SetVector.h" @@ -100,6 +99,7 @@ #include "llvm/IR/Operator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ValueMapper.h" @@ -500,6 +500,7 @@ static Value *cloneConstantExprWithNewAddressSpace( } // Computes the operands of the new constant expression. + bool IsNew = false; SmallVector NewOperands; for (unsigned Index = 0; Index < CE->getNumOperands(); ++Index) { Constant *Operand = CE->getOperand(Index); @@ -509,6 +510,7 @@ static Value *cloneConstantExprWithNewAddressSpace( // bitcast, and getelementptr) do not incur cycles in the data flow graph // and (2) this function is called on constant expressions in postorder. if (Value *NewOperand = ValueWithNewAddrSpace.lookup(Operand)) { + IsNew = true; NewOperands.push_back(cast(NewOperand)); } else { // Otherwise, reuses the old operand. @@ -516,6 +518,11 @@ static Value *cloneConstantExprWithNewAddressSpace( } } + // If !IsNew, we will replace the Value with itself. However, replaced values + // are assumed to wrapped in a addrspace cast later so drop it now. + if (!IsNew) + return nullptr; + if (CE->getOpcode() == Instruction::GetElementPtr) { // Needs to specify the source type while constructing a getelementptr // constant expression. diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index 2ef8f8563bb9..c120036464d0 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -12,16 +12,15 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/JumpThreading.h" -#include "llvm/Transforms/Scalar.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/GlobalsModRef.h" -#include "llvm/Analysis/CFG.h" #include "llvm/Analysis/BlockFrequencyInfoImpl.h" +#include "llvm/Analysis/CFG.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/LoopInfo.h" @@ -36,6 +35,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" @@ -132,7 +132,7 @@ bool JumpThreading::runOnFunction(Function &F) { bool HasProfileData = F.getEntryCount().hasValue(); if (HasProfileData) { LoopInfo LI{DominatorTree(F)}; - BPI.reset(new BranchProbabilityInfo(F, LI)); + BPI.reset(new BranchProbabilityInfo(F, LI, TLI)); BFI.reset(new BlockFrequencyInfo(F, *BPI, LI)); } @@ -152,7 +152,7 @@ PreservedAnalyses JumpThreadingPass::run(Function &F, bool HasProfileData = F.getEntryCount().hasValue(); if (HasProfileData) { LoopInfo LI{DominatorTree(F)}; - BPI.reset(new BranchProbabilityInfo(F, LI)); + BPI.reset(new BranchProbabilityInfo(F, LI, &TLI)); BFI.reset(new BlockFrequencyInfo(F, *BPI, LI)); } diff --git a/lib/Transforms/Scalar/LoadCombine.cpp b/lib/Transforms/Scalar/LoadCombine.cpp index 494cbc61bc9c..025ba1bfedc1 100644 --- a/lib/Transforms/Scalar/LoadCombine.cpp +++ b/lib/Transforms/Scalar/LoadCombine.cpp @@ -11,7 +11,6 @@ /// //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -28,6 +27,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" using namespace llvm; diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index c6a05ecbd0b1..b706152f30c8 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -116,6 +116,7 @@ private: Memset, MemsetPattern, Memcpy, + UnorderedAtomicMemcpy, DontUse // Dummy retval never to be used. Allows catching errors in retval // handling. }; @@ -353,8 +354,12 @@ static Constant *getMemSetPatternValue(Value *V, const DataLayout *DL) { LoopIdiomRecognize::LegalStoreKind LoopIdiomRecognize::isLegalStore(StoreInst *SI) { + // Don't touch volatile stores. - if (!SI->isSimple()) + if (SI->isVolatile()) + return LegalStoreKind::None; + // We only want simple or unordered-atomic stores. + if (!SI->isUnordered()) return LegalStoreKind::None; // Don't convert stores of non-integral pointer types to memsets (which stores @@ -395,15 +400,18 @@ LoopIdiomRecognize::isLegalStore(StoreInst *SI) { Value *SplatValue = isBytewiseValue(StoredVal); Constant *PatternValue = nullptr; + // Note: memset and memset_pattern on unordered-atomic is yet not supported + bool UnorderedAtomic = SI->isUnordered() && !SI->isSimple(); + // If we're allowed to form a memset, and the stored value would be // acceptable for memset, use it. - if (HasMemset && SplatValue && + if (!UnorderedAtomic && HasMemset && SplatValue && // Verify that the stored value is loop invariant. If not, we can't // promote the memset. CurLoop->isLoopInvariant(SplatValue)) { // It looks like we can use SplatValue. return LegalStoreKind::Memset; - } else if (HasMemsetPattern && + } else if (!UnorderedAtomic && HasMemsetPattern && // Don't create memset_pattern16s with address spaces. StorePtr->getType()->getPointerAddressSpace() == 0 && (PatternValue = getMemSetPatternValue(StoredVal, DL))) { @@ -422,7 +430,12 @@ LoopIdiomRecognize::isLegalStore(StoreInst *SI) { // The store must be feeding a non-volatile load. LoadInst *LI = dyn_cast(SI->getValueOperand()); - if (!LI || !LI->isSimple()) + + // Only allow non-volatile loads + if (!LI || LI->isVolatile()) + return LegalStoreKind::None; + // Only allow simple or unordered-atomic loads + if (!LI->isUnordered()) return LegalStoreKind::None; // See if the pointer expression is an AddRec like {base,+,1} on the current @@ -438,7 +451,9 @@ LoopIdiomRecognize::isLegalStore(StoreInst *SI) { return LegalStoreKind::None; // Success. This store can be converted into a memcpy. - return LegalStoreKind::Memcpy; + UnorderedAtomic = UnorderedAtomic || LI->isAtomic(); + return UnorderedAtomic ? LegalStoreKind::UnorderedAtomicMemcpy + : LegalStoreKind::Memcpy; } // This store can't be transformed into a memset/memcpy. return LegalStoreKind::None; @@ -469,6 +484,7 @@ void LoopIdiomRecognize::collectStores(BasicBlock *BB) { StoreRefsForMemsetPattern[Ptr].push_back(SI); } break; case LegalStoreKind::Memcpy: + case LegalStoreKind::UnorderedAtomicMemcpy: StoreRefsForMemcpy.push_back(SI); break; default: @@ -882,7 +898,7 @@ bool LoopIdiomRecognize::processLoopStridedStore( /// for (i) A[i] = B[i]; bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, const SCEV *BECount) { - assert(SI->isSimple() && "Expected only non-volatile stores."); + assert(SI->isUnordered() && "Expected only non-volatile non-ordered stores."); Value *StorePtr = SI->getPointerOperand(); const SCEVAddRecExpr *StoreEv = cast(SE->getSCEV(StorePtr)); @@ -892,7 +908,7 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, // The store must be feeding a non-volatile load. LoadInst *LI = cast(SI->getValueOperand()); - assert(LI->isSimple() && "Expected only non-volatile stores."); + assert(LI->isUnordered() && "Expected only non-volatile non-ordered loads."); // See if the pointer expression is an AddRec like {base,+,1} on the current // loop, which indicates a strided load. If we have something else, it's a @@ -966,16 +982,47 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getOne(IntPtrTy), SCEV::FlagNUW); - if (StoreSize != 1) - NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtrTy, StoreSize), - SCEV::FlagNUW); - Value *NumBytes = - Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator()); + unsigned Align = std::min(SI->getAlignment(), LI->getAlignment()); + CallInst *NewCall = nullptr; + // Check whether to generate an unordered atomic memcpy: + // If the load or store are atomic, then they must neccessarily be unordered + // by previous checks. + if (!SI->isAtomic() && !LI->isAtomic()) { + if (StoreSize != 1) + NumBytesS = SE->getMulExpr( + NumBytesS, SE->getConstant(IntPtrTy, StoreSize), SCEV::FlagNUW); - CallInst *NewCall = - Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes, - std::min(SI->getAlignment(), LI->getAlignment())); + Value *NumBytes = + Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator()); + + NewCall = Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes, Align); + } else { + // We cannot allow unaligned ops for unordered load/store, so reject + // anything where the alignment isn't at least the element size. + if (Align < StoreSize) + return false; + + // If the element.atomic memcpy is not lowered into explicit + // loads/stores later, then it will be lowered into an element-size + // specific lib call. If the lib call doesn't exist for our store size, then + // we shouldn't generate the memcpy. + if (StoreSize > TTI->getAtomicMemIntrinsicMaxElementSize()) + return false; + + Value *NumElements = + Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator()); + + NewCall = Builder.CreateElementAtomicMemCpy(StoreBasePtr, LoadBasePtr, + NumElements, StoreSize); + // Propagate alignment info onto the pointer args. Note that unordered + // atomic loads/stores are *required* by the spec to have an alignment + // but non-atomic loads/stores may not. + NewCall->addParamAttr(0, Attribute::getWithAlignment(NewCall->getContext(), + SI->getAlignment())); + NewCall->addParamAttr(1, Attribute::getWithAlignment(NewCall->getContext(), + LI->getAlignment())); + } NewCall->setDebugLoc(SI->getDebugLoc()); DEBUG(dbgs() << " Formed memcpy: " << *NewCall << "\n" diff --git a/lib/Transforms/Scalar/LoopPredication.cpp b/lib/Transforms/Scalar/LoopPredication.cpp index 32fd3da465fe..9b12ba180444 100644 --- a/lib/Transforms/Scalar/LoopPredication.cpp +++ b/lib/Transforms/Scalar/LoopPredication.cpp @@ -37,7 +37,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/LoopPredication.h" -#include "llvm/Pass.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" @@ -48,6 +47,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/LoopUtils.h" diff --git a/lib/Transforms/Scalar/LoopRerollPass.cpp b/lib/Transforms/Scalar/LoopRerollPass.cpp index fd15a9014def..fc0216e76a5b 100644 --- a/lib/Transforms/Scalar/LoopRerollPass.cpp +++ b/lib/Transforms/Scalar/LoopRerollPass.cpp @@ -11,10 +11,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/BitVector.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -31,6 +30,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 28d94497a3ef..b027278b24f2 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -131,7 +131,7 @@ static cl::opt EnablePhiElim( // The flag adds instruction count to solutions cost comparision. static cl::opt InsnsCost( - "lsr-insns-cost", cl::Hidden, cl::init(false), + "lsr-insns-cost", cl::Hidden, cl::init(true), cl::desc("Add instruction count to a LSR cost model")); // Flag to choose how to narrow complex lsr solution @@ -950,39 +950,37 @@ namespace { /// This class is used to measure and compare candidate formulae. class Cost { - /// TODO: Some of these could be merged. Also, a lexical ordering - /// isn't always optimal. - unsigned Insns; - unsigned NumRegs; - unsigned AddRecCost; - unsigned NumIVMuls; - unsigned NumBaseAdds; - unsigned ImmCost; - unsigned SetupCost; - unsigned ScaleCost; + TargetTransformInfo::LSRCost C; public: - Cost() - : Insns(0), NumRegs(0), AddRecCost(0), NumIVMuls(0), NumBaseAdds(0), - ImmCost(0), SetupCost(0), ScaleCost(0) {} + Cost() { + C.Insns = 0; + C.NumRegs = 0; + C.AddRecCost = 0; + C.NumIVMuls = 0; + C.NumBaseAdds = 0; + C.ImmCost = 0; + C.SetupCost = 0; + C.ScaleCost = 0; + } - bool operator<(const Cost &Other) const; + bool isLess(Cost &Other, const TargetTransformInfo &TTI); void Lose(); #ifndef NDEBUG // Once any of the metrics loses, they must all remain losers. bool isValid() { - return ((Insns | NumRegs | AddRecCost | NumIVMuls | NumBaseAdds - | ImmCost | SetupCost | ScaleCost) != ~0u) - || ((Insns & NumRegs & AddRecCost & NumIVMuls & NumBaseAdds - & ImmCost & SetupCost & ScaleCost) == ~0u); + return ((C.Insns | C.NumRegs | C.AddRecCost | C.NumIVMuls | C.NumBaseAdds + | C.ImmCost | C.SetupCost | C.ScaleCost) != ~0u) + || ((C.Insns & C.NumRegs & C.AddRecCost & C.NumIVMuls & C.NumBaseAdds + & C.ImmCost & C.SetupCost & C.ScaleCost) == ~0u); } #endif bool isLoser() { assert(isValid() && "invalid cost"); - return NumRegs == ~0u; + return C.NumRegs == ~0u; } void RateFormula(const TargetTransformInfo &TTI, @@ -1170,10 +1168,10 @@ void Cost::RateRegister(const SCEV *Reg, } // Otherwise, it will be an invariant with respect to Loop L. - ++NumRegs; + ++C.NumRegs; return; } - AddRecCost += 1; /// TODO: This should be a function of the stride. + C.AddRecCost += 1; /// TODO: This should be a function of the stride. // Add the step value register, if it needs one. // TODO: The non-affine case isn't precisely modeled here. @@ -1185,7 +1183,7 @@ void Cost::RateRegister(const SCEV *Reg, } } } - ++NumRegs; + ++C.NumRegs; // Rough heuristic; favor registers which don't require extra setup // instructions in the preheader. @@ -1194,9 +1192,9 @@ void Cost::RateRegister(const SCEV *Reg, !(isa(Reg) && (isa(cast(Reg)->getStart()) || isa(cast(Reg)->getStart())))) - ++SetupCost; + ++C.SetupCost; - NumIVMuls += isa(Reg) && + C.NumIVMuls += isa(Reg) && SE.hasComputableLoopEvolution(Reg, L); } @@ -1229,9 +1227,9 @@ void Cost::RateFormula(const TargetTransformInfo &TTI, SmallPtrSetImpl *LoserRegs) { assert(F.isCanonical(*L) && "Cost is accurate only for canonical formula"); // Tally up the registers. - unsigned PrevAddRecCost = AddRecCost; - unsigned PrevNumRegs = NumRegs; - unsigned PrevNumBaseAdds = NumBaseAdds; + unsigned PrevAddRecCost = C.AddRecCost; + unsigned PrevNumRegs = C.NumRegs; + unsigned PrevNumBaseAdds = C.NumBaseAdds; if (const SCEV *ScaledReg = F.ScaledReg) { if (VisitedRegs.count(ScaledReg)) { Lose(); @@ -1251,45 +1249,51 @@ void Cost::RateFormula(const TargetTransformInfo &TTI, return; } - // Treat every new register that exceeds TTI.getNumberOfRegisters() - 1 as - // additional instruction (at least fill). - unsigned TTIRegNum = TTI.getNumberOfRegisters(false) - 1; - if (NumRegs > TTIRegNum) { - // Cost already exceeded TTIRegNum, then only newly added register can add - // new instructions. - if (PrevNumRegs > TTIRegNum) - Insns += (NumRegs - PrevNumRegs); - else - Insns += (NumRegs - TTIRegNum); - } - // Determine how many (unfolded) adds we'll need inside the loop. size_t NumBaseParts = F.getNumRegs(); if (NumBaseParts > 1) // Do not count the base and a possible second register if the target // allows to fold 2 registers. - NumBaseAdds += + C.NumBaseAdds += NumBaseParts - (1 + (F.Scale && isAMCompletelyFolded(TTI, LU, F))); - NumBaseAdds += (F.UnfoldedOffset != 0); + C.NumBaseAdds += (F.UnfoldedOffset != 0); // Accumulate non-free scaling amounts. - ScaleCost += getScalingFactorCost(TTI, LU, F, *L); + C.ScaleCost += getScalingFactorCost(TTI, LU, F, *L); // Tally up the non-zero immediates. for (const LSRFixup &Fixup : LU.Fixups) { int64_t O = Fixup.Offset; int64_t Offset = (uint64_t)O + F.BaseOffset; if (F.BaseGV) - ImmCost += 64; // Handle symbolic values conservatively. + C.ImmCost += 64; // Handle symbolic values conservatively. // TODO: This should probably be the pointer size. else if (Offset != 0) - ImmCost += APInt(64, Offset, true).getMinSignedBits(); + C.ImmCost += APInt(64, Offset, true).getMinSignedBits(); // Check with target if this offset with this instruction is // specifically not supported. if ((isa(Fixup.UserInst) || isa(Fixup.UserInst)) && !TTI.isFoldableMemAccessOffset(Fixup.UserInst, Offset)) - NumBaseAdds++; + C.NumBaseAdds++; + } + + // If we don't count instruction cost exit here. + if (!InsnsCost) { + assert(isValid() && "invalid cost"); + return; + } + + // Treat every new register that exceeds TTI.getNumberOfRegisters() - 1 as + // additional instruction (at least fill). + unsigned TTIRegNum = TTI.getNumberOfRegisters(false) - 1; + if (C.NumRegs > TTIRegNum) { + // Cost already exceeded TTIRegNum, then only newly added register can add + // new instructions. + if (PrevNumRegs > TTIRegNum) + C.Insns += (C.NumRegs - PrevNumRegs); + else + C.Insns += (C.NumRegs - TTIRegNum); } // If ICmpZero formula ends with not 0, it could not be replaced by @@ -1302,55 +1306,54 @@ void Cost::RateFormula(const TargetTransformInfo &TTI, // For {-10, +, 1}: // i = i + 1; if (LU.Kind == LSRUse::ICmpZero && !F.hasZeroEnd()) - Insns++; + C.Insns++; // Each new AddRec adds 1 instruction to calculation. - Insns += (AddRecCost - PrevAddRecCost); + C.Insns += (C.AddRecCost - PrevAddRecCost); // BaseAdds adds instructions for unfolded registers. if (LU.Kind != LSRUse::ICmpZero) - Insns += NumBaseAdds - PrevNumBaseAdds; + C.Insns += C.NumBaseAdds - PrevNumBaseAdds; assert(isValid() && "invalid cost"); } /// Set this cost to a losing value. void Cost::Lose() { - Insns = ~0u; - NumRegs = ~0u; - AddRecCost = ~0u; - NumIVMuls = ~0u; - NumBaseAdds = ~0u; - ImmCost = ~0u; - SetupCost = ~0u; - ScaleCost = ~0u; + C.Insns = ~0u; + C.NumRegs = ~0u; + C.AddRecCost = ~0u; + C.NumIVMuls = ~0u; + C.NumBaseAdds = ~0u; + C.ImmCost = ~0u; + C.SetupCost = ~0u; + C.ScaleCost = ~0u; } /// Choose the lower cost. -bool Cost::operator<(const Cost &Other) const { - if (InsnsCost && Insns != Other.Insns) - return Insns < Other.Insns; - return std::tie(NumRegs, AddRecCost, NumIVMuls, NumBaseAdds, ScaleCost, - ImmCost, SetupCost) < - std::tie(Other.NumRegs, Other.AddRecCost, Other.NumIVMuls, - Other.NumBaseAdds, Other.ScaleCost, Other.ImmCost, - Other.SetupCost); +bool Cost::isLess(Cost &Other, const TargetTransformInfo &TTI) { + if (InsnsCost.getNumOccurrences() > 0 && InsnsCost && + C.Insns != Other.C.Insns) + return C.Insns < Other.C.Insns; + return TTI.isLSRCostLess(C, Other.C); } void Cost::print(raw_ostream &OS) const { - OS << Insns << " instruction" << (Insns == 1 ? " " : "s "); - OS << NumRegs << " reg" << (NumRegs == 1 ? "" : "s"); - if (AddRecCost != 0) - OS << ", with addrec cost " << AddRecCost; - if (NumIVMuls != 0) - OS << ", plus " << NumIVMuls << " IV mul" << (NumIVMuls == 1 ? "" : "s"); - if (NumBaseAdds != 0) - OS << ", plus " << NumBaseAdds << " base add" - << (NumBaseAdds == 1 ? "" : "s"); - if (ScaleCost != 0) - OS << ", plus " << ScaleCost << " scale cost"; - if (ImmCost != 0) - OS << ", plus " << ImmCost << " imm cost"; - if (SetupCost != 0) - OS << ", plus " << SetupCost << " setup cost"; + if (InsnsCost) + OS << C.Insns << " instruction" << (C.Insns == 1 ? " " : "s "); + OS << C.NumRegs << " reg" << (C.NumRegs == 1 ? "" : "s"); + if (C.AddRecCost != 0) + OS << ", with addrec cost " << C.AddRecCost; + if (C.NumIVMuls != 0) + OS << ", plus " << C.NumIVMuls << " IV mul" + << (C.NumIVMuls == 1 ? "" : "s"); + if (C.NumBaseAdds != 0) + OS << ", plus " << C.NumBaseAdds << " base add" + << (C.NumBaseAdds == 1 ? "" : "s"); + if (C.ScaleCost != 0) + OS << ", plus " << C.ScaleCost << " scale cost"; + if (C.ImmCost != 0) + OS << ", plus " << C.ImmCost << " imm cost"; + if (C.SetupCost != 0) + OS << ", plus " << C.SetupCost << " setup cost"; } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -4105,7 +4108,7 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() { Cost CostBest; Regs.clear(); CostBest.RateFormula(TTI, Best, Regs, VisitedRegs, L, SE, DT, LU); - if (CostF < CostBest) + if (CostF.isLess(CostBest, TTI)) std::swap(F, Best); DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs()); dbgs() << "\n" @@ -4573,7 +4576,7 @@ void LSRInstance::SolveRecurse(SmallVectorImpl &Solution, NewCost = CurCost; NewRegs = CurRegs; NewCost.RateFormula(TTI, F, NewRegs, VisitedRegs, L, SE, DT, LU); - if (NewCost < SolutionCost) { + if (NewCost.isLess(SolutionCost, TTI)) { Workspace.push_back(&F); if (Workspace.size() != Uses.size()) { SolveRecurse(Solution, SolutionCost, Workspace, NewCost, diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index 19daebd0613a..d0c96fa627a4 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -26,34 +26,34 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/BlockFrequencyInfoImpl.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/DivergenceAnalysis.h" +#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/Analysis/BlockFrequencyInfoImpl.h" -#include "llvm/Analysis/BlockFrequencyInfo.h" -#include "llvm/Analysis/BranchProbabilityInfo.h" -#include "llvm/Support/BranchProbability.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" -#include "llvm/IR/Instructions.h" #include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Module.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/BranchProbability.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" diff --git a/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp b/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp index 7d8da9b453f9..46f8a3564265 100644 --- a/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp +++ b/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp @@ -93,7 +93,9 @@ static bool handleSwitchExpect(SwitchInst &SI) { /// the branch probability info for the originating branch can be inferred. static void handlePhiDef(CallInst *Expect) { Value &Arg = *Expect->getArgOperand(0); - ConstantInt *ExpectedValue = cast(Expect->getArgOperand(1)); + ConstantInt *ExpectedValue = dyn_cast(Expect->getArgOperand(1)); + if (!ExpectedValue) + return; const APInt &ExpectedPhiValue = ExpectedValue->getValue(); // Walk up in backward a list of instructions that diff --git a/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp b/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp index 4f413715ffe6..070114a84cc5 100644 --- a/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp +++ b/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp @@ -17,10 +17,10 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" -#include "llvm/IR/IRBuilder.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 21a632073da7..7896396f0898 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -12,11 +12,12 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Transforms/Scalar/MemCpyOptimizer.h" #include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/iterator_range.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" @@ -31,12 +32,12 @@ #include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" -#include "llvm/IR/IRBuilder.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" @@ -49,7 +50,6 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Scalar/MemCpyOptimizer.h" #include "llvm/Transforms/Utils/Local.h" #include #include diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp index 27809f5b6f66..6926aae37963 100644 --- a/lib/Transforms/Scalar/NewGVN.cpp +++ b/lib/Transforms/Scalar/NewGVN.cpp @@ -378,6 +378,15 @@ private: }; namespace llvm { +struct ExactEqualsExpression { + const Expression &E; + explicit ExactEqualsExpression(const Expression &E) : E(E) {} + hash_code getComputedHash() const { return E.getComputedHash(); } + bool operator==(const Expression &Other) const { + return E.exactlyEquals(Other); + } +}; + template <> struct DenseMapInfo { static const Expression *getEmptyKey() { auto Val = static_cast(-1); @@ -390,8 +399,17 @@ template <> struct DenseMapInfo { return reinterpret_cast(Val); } static unsigned getHashValue(const Expression *E) { - return static_cast(E->getComputedHash()); + return E->getComputedHash(); } + static unsigned getHashValue(const ExactEqualsExpression &E) { + return E.getComputedHash(); + } + static bool isEqual(const ExactEqualsExpression &LHS, const Expression *RHS) { + if (RHS == getTombstoneKey() || RHS == getEmptyKey()) + return false; + return LHS == *RHS; + } + static bool isEqual(const Expression *LHS, const Expression *RHS) { if (LHS == RHS) return true; @@ -848,6 +866,8 @@ PHIExpression *NewGVN::createPHIExpression(Instruction *I, bool &HasBackedge, // Things in TOPClass are equivalent to everything. if (ValueToClass.lookup(*U) == TOPClass) return false; + if (lookupOperandLeader(*U) == PN) + return false; return true; }); std::transform(Filtered.begin(), Filtered.end(), op_inserter(E), @@ -1571,30 +1591,6 @@ bool NewGVN::isCycleFree(const Instruction *I) const { // Evaluate PHI nodes symbolically, and create an expression result. const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) const { - // Resolve irreducible and reducible phi cycles. - // FIXME: This is hopefully a temporary solution while we resolve the issues - // with fixpointing self-cycles. It currently should be "guaranteed" to be - // correct, but non-optimal. The SCCFinder does not, for example, take - // reachability of arguments into account, etc. - SCCFinder.Start(I); - bool CanOptimize = true; - SmallPtrSet OuterOps; - - auto &Component = SCCFinder.getComponentFor(I); - for (auto *Member : Component) { - if (!isa(Member)) { - CanOptimize = false; - break; - } - for (auto &PHIOp : cast(Member)->operands()) - if (!isa(PHIOp) || !Component.count(cast(PHIOp))) - OuterOps.insert(PHIOp); - } - if (CanOptimize && OuterOps.size() == 1) { - DEBUG(dbgs() << "Resolving cyclic phi to value " << *(*OuterOps.begin()) - << "\n"); - return createVariableOrConstant(*OuterOps.begin()); - } // True if one of the incoming phi edges is a backedge. bool HasBackedge = false; // All constant tracks the state of whether all the *original* phi operands @@ -1662,7 +1658,12 @@ const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) const { if (!someEquivalentDominates(AllSameInst, I)) return E; } - + // Can't simplify to something that comes later in the iteration. + // Otherwise, when and if it changes congruence class, we will never catch + // up. We will always be a class behind it. + if (isa(AllSameValue) && + InstrToDFSNum(AllSameValue) > InstrToDFSNum(I)) + return E; NumGVNPhisAllSame++; DEBUG(dbgs() << "Simplified PHI node " << *I << " to " << *AllSameValue << "\n"); @@ -2158,7 +2159,17 @@ void NewGVN::moveValueToNewCongruenceClass(Instruction *I, const Expression *E, if (OldClass->getDefiningExpr()) { DEBUG(dbgs() << "Erasing expression " << *OldClass->getDefiningExpr() << " from table\n"); - ExpressionToClass.erase(OldClass->getDefiningExpr()); + // We erase it as an exact expression to make sure we don't just erase an + // equivalent one. + auto Iter = ExpressionToClass.find_as( + ExactEqualsExpression(*OldClass->getDefiningExpr())); + if (Iter != ExpressionToClass.end()) + ExpressionToClass.erase(Iter); +#ifdef EXPENSIVE_CHECKS + assert( + (*OldClass->getDefiningExpr() != *E || ExpressionToClass.lookup(E)) && + "We erased the expression we just inserted, which should not happen"); +#endif } } else if (OldClass->getLeader() == I) { // When the leader changes, the value numbering of @@ -2272,8 +2283,13 @@ void NewGVN::performCongruenceFinding(Instruction *I, const Expression *E) { auto *OldE = ValueToExpression.lookup(I); // It could just be that the old class died. We don't want to erase it if we // just moved classes. - if (OldE && isa(OldE) && *E != *OldE) - ExpressionToClass.erase(OldE); + if (OldE && isa(OldE) && *E != *OldE) { + // Erase this as an exact expression to ensure we don't erase expressions + // equivalent to it. + auto Iter = ExpressionToClass.find_as(ExactEqualsExpression(*OldE)); + if (Iter != ExpressionToClass.end()) + ExpressionToClass.erase(Iter); + } } ValueToExpression[I] = E; } @@ -3060,6 +3076,9 @@ void NewGVN::iterateTouchedInstructions() { } updateProcessedCount(CurrBlock); } + // Reset after processing (because we may mark ourselves as touched when + // we propagate equalities). + TouchedInstructions.reset(InstrNum); if (auto *MP = dyn_cast(V)) { DEBUG(dbgs() << "Processing MemoryPhi " << *MP << "\n"); @@ -3070,9 +3089,6 @@ void NewGVN::iterateTouchedInstructions() { llvm_unreachable("Should have been a MemoryPhi or Instruction"); } updateProcessedCount(V); - // Reset after processing (because we may mark ourselves as touched when - // we propagate equalities). - TouchedInstructions.reset(InstrNum); } } NumGVNMaxIterations = std::max(NumGVNMaxIterations.getValue(), Iterations); diff --git a/lib/Transforms/Scalar/Reg2Mem.cpp b/lib/Transforms/Scalar/Reg2Mem.cpp index 615029dd161b..96295683314c 100644 --- a/lib/Transforms/Scalar/Reg2Mem.cpp +++ b/lib/Transforms/Scalar/Reg2Mem.cpp @@ -16,7 +16,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" @@ -25,6 +24,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" #include using namespace llvm; diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index 350b50ffcdd4..bae7911d222c 100644 --- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -12,15 +12,14 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Pass.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SetOperations.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/ADT/SetOperations.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/MapVector.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Dominators.h" @@ -28,15 +27,16 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Module.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Module.h" #include "llvm/IR/Statepoint.h" #include "llvm/IR/Value.h" #include "llvm/IR/Verifier.h" -#include "llvm/Support/Debug.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index 1d0e8396f6a2..815492ac354c 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -1117,7 +1117,7 @@ CallOverdefined: // Otherwise, if we have a single return value case, and if the function is // a declaration, maybe we can constant fold it. if (F && F->isDeclaration() && !I->getType()->isStructTy() && - canConstantFoldCallTo(F)) { + canConstantFoldCallTo(CS, F)) { SmallVector Operands; for (CallSite::arg_iterator AI = CS.arg_begin(), E = CS.arg_end(); @@ -1137,7 +1137,7 @@ CallOverdefined: // If we can constant fold this, mark the result of the call as a // constant. - if (Constant *C = ConstantFoldCall(F, Operands, TLI)) { + if (Constant *C = ConstantFoldCall(CS, F, Operands, TLI)) { // call -> undef. if (isa(C)) return; diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index fb1b5813fd79..1527f15f18a3 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -3626,10 +3626,12 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { auto *PartPtrTy = PLoad->getType()->getPointerTo(SI->getPointerAddressSpace()); + auto AS = SI->getPointerAddressSpace(); StoreInst *PStore = IRB.CreateAlignedStore( - PLoad, getAdjustedPtr(IRB, DL, StoreBasePtr, - APInt(DL.getPointerSizeInBits(), PartOffset), - PartPtrTy, StoreBasePtr->getName() + "."), + PLoad, + getAdjustedPtr(IRB, DL, StoreBasePtr, + APInt(DL.getPointerSizeInBits(AS), PartOffset), + PartPtrTy, StoreBasePtr->getName() + "."), getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false); PStore->copyMetadata(*LI, LLVMContext::MD_mem_parallel_loop_access); DEBUG(dbgs() << " +" << PartOffset << ":" << *PStore << "\n"); @@ -3707,9 +3709,10 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { PLoad = (*SplitLoads)[Idx]; } else { IRB.SetInsertPoint(LI); + auto AS = LI->getPointerAddressSpace(); PLoad = IRB.CreateAlignedLoad( getAdjustedPtr(IRB, DL, LoadBasePtr, - APInt(DL.getPointerSizeInBits(), PartOffset), + APInt(DL.getPointerSizeInBits(AS), PartOffset), LoadPartPtrTy, LoadBasePtr->getName() + "."), getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false, LI->getName()); @@ -3717,10 +3720,12 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { // And store this partition. IRB.SetInsertPoint(SI); + auto AS = SI->getPointerAddressSpace(); StoreInst *PStore = IRB.CreateAlignedStore( - PLoad, getAdjustedPtr(IRB, DL, StoreBasePtr, - APInt(DL.getPointerSizeInBits(), PartOffset), - StorePartPtrTy, StoreBasePtr->getName() + "."), + PLoad, + getAdjustedPtr(IRB, DL, StoreBasePtr, + APInt(DL.getPointerSizeInBits(AS), PartOffset), + StorePartPtrTy, StoreBasePtr->getName() + "."), getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false); // Now build a new slice for the alloca. diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp index 9fa43da99da9..850a01114eeb 100644 --- a/lib/Transforms/Scalar/Scalar.cpp +++ b/lib/Transforms/Scalar/Scalar.cpp @@ -20,12 +20,12 @@ #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/ScopedNoAliasAA.h" #include "llvm/Analysis/TypeBasedAliasAnalysis.h" -#include "llvm/Transforms/Scalar/GVN.h" -#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Verifier.h" #include "llvm/InitializePasses.h" -#include "llvm/IR/LegacyPassManager.h" +#include "llvm/Transforms/Scalar/GVN.h" +#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" using namespace llvm; diff --git a/lib/Transforms/Scalar/Scalarizer.cpp b/lib/Transforms/Scalar/Scalarizer.cpp index c0c09a7e43fe..d11855f2f3a9 100644 --- a/lib/Transforms/Scalar/Scalarizer.cpp +++ b/lib/Transforms/Scalar/Scalarizer.cpp @@ -14,12 +14,12 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstVisitor.h" #include "llvm/Pass.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" using namespace llvm; diff --git a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index cde659b9d189..84675f41cdd5 100644 --- a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -156,27 +156,27 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" -#include "llvm/IR/PatternMatch.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetSubtargetInfo.h" -#include "llvm/IR/IRBuilder.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" using namespace llvm; using namespace llvm::PatternMatch; diff --git a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp index 0f170e26ce5f..aaab5857e0f1 100644 --- a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -7,13 +7,14 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Sequence.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/LoopAnalysisManager.h" @@ -37,7 +38,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/LoopUtils.h" -#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" #include #include #include diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp index 102e9eaeab77..5210f165b874 100644 --- a/lib/Transforms/Scalar/Sink.cpp +++ b/lib/Transforms/Scalar/Sink.cpp @@ -114,7 +114,7 @@ static bool IsAcceptableTarget(Instruction *Inst, BasicBlock *SuccToSinkTo, if (SuccToSinkTo->getUniquePredecessor() != Inst->getParent()) { // We cannot sink a load across a critical edge - there may be stores in // other code paths. - if (!isSafeToSpeculativelyExecute(Inst)) + if (isa(Inst)) return false; // We don't want to sink across a critical edge if we don't dominate the diff --git a/lib/Transforms/Scalar/StructurizeCFG.cpp b/lib/Transforms/Scalar/StructurizeCFG.cpp index 49ce0262c97b..486f3e5a43d4 100644 --- a/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -7,7 +7,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SCCIterator.h" @@ -20,6 +19,7 @@ #include "llvm/IR/PatternMatch.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/SSAUpdater.h" using namespace llvm; diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp index bf54a51c7635..3e5993618c4c 100644 --- a/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -51,13 +51,12 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/TailRecursionElimination.h" -#include "llvm/Transforms/Scalar.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/Loads.h" @@ -76,6 +75,7 @@ #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt index 7a21c03da221..83bc05d0311c 100644 --- a/lib/Transforms/Utils/CMakeLists.txt +++ b/lib/Transforms/Utils/CMakeLists.txt @@ -37,6 +37,7 @@ add_llvm_library(LLVMTransformUtils MetaRenamer.cpp ModuleUtils.cpp NameAnonGlobals.cpp + OrderedInstructions.cpp PredicateInfo.cpp PromoteMemoryToRegister.cpp StripGCRelocates.cpp diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index 1c1a75c111e9..314c990293cc 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -13,7 +13,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/ConstantFolding.h" @@ -31,6 +30,7 @@ #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp index 5444b752de82..d27cb45c7d7f 100644 --- a/lib/Transforms/Utils/CloneModule.cpp +++ b/lib/Transforms/Utils/CloneModule.cpp @@ -12,12 +12,12 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm-c/Core.h" #include "llvm/IR/Constant.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Module.h" +#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/ValueMapper.h" -#include "llvm-c/Core.h" using namespace llvm; static void copyComdat(GlobalObject *Dst, const GlobalObject *Src) { diff --git a/lib/Transforms/Utils/DemoteRegToStack.cpp b/lib/Transforms/Utils/DemoteRegToStack.cpp index 0eee6e19efac..6d3d287defdb 100644 --- a/lib/Transforms/Utils/DemoteRegToStack.cpp +++ b/lib/Transforms/Utils/DemoteRegToStack.cpp @@ -7,12 +7,12 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/ADT/DenseMap.h" #include "llvm/Analysis/CFG.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Type.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; diff --git a/lib/Transforms/Utils/Evaluator.cpp b/lib/Transforms/Utils/Evaluator.cpp index 59f176e2f231..c97e544e620a 100644 --- a/lib/Transforms/Utils/Evaluator.cpp +++ b/lib/Transforms/Utils/Evaluator.cpp @@ -20,8 +20,8 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Operator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -439,7 +439,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, if (Callee->isDeclaration()) { // If this is a function we can constant fold, do it. - if (Constant *C = ConstantFoldCall(Callee, Formals, TLI)) { + if (Constant *C = ConstantFoldCall(CS, Callee, Formals, TLI)) { InstResult = C; DEBUG(dbgs() << "Constant folded function call. Result: " << *InstResult << "\n"); diff --git a/lib/Transforms/Utils/FlattenCFG.cpp b/lib/Transforms/Utils/FlattenCFG.cpp index 7b96fbb11a14..435eff3bef47 100644 --- a/lib/Transforms/Utils/FlattenCFG.cpp +++ b/lib/Transforms/Utils/FlattenCFG.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Utils/Local.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ValueTracking.h" @@ -19,6 +18,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" using namespace llvm; #define DEBUG_TYPE "flattencfg" diff --git a/lib/Transforms/Utils/FunctionComparator.cpp b/lib/Transforms/Utils/FunctionComparator.cpp index 57468be9a2a8..0457294361b5 100644 --- a/lib/Transforms/Utils/FunctionComparator.cpp +++ b/lib/Transforms/Utils/FunctionComparator.cpp @@ -15,8 +15,8 @@ #include "llvm/Transforms/Utils/FunctionComparator.h" #include "llvm/ADT/SmallSet.h" #include "llvm/IR/CallSite.h" -#include "llvm/IR/Instructions.h" #include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/Transforms/Utils/FunctionImportUtils.cpp b/lib/Transforms/Utils/FunctionImportUtils.cpp index b00f4b14068a..a98d07237b47 100644 --- a/lib/Transforms/Utils/FunctionImportUtils.cpp +++ b/lib/Transforms/Utils/FunctionImportUtils.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/ModuleSummaryAnalysis.h" #include "llvm/Transforms/Utils/FunctionImportUtils.h" +#include "llvm/Analysis/ModuleSummaryAnalysis.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" using namespace llvm; diff --git a/lib/Transforms/Utils/GlobalStatus.cpp b/lib/Transforms/Utils/GlobalStatus.cpp index ba4b78ac758a..245fefb38ee8 100644 --- a/lib/Transforms/Utils/GlobalStatus.cpp +++ b/lib/Transforms/Utils/GlobalStatus.cpp @@ -7,6 +7,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Transforms/Utils/GlobalStatus.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallSite.h" @@ -18,7 +19,6 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/Transforms/Utils/GlobalStatus.h" #include "llvm/IR/Use.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index 0ca9f4c484e6..2a18c140c788 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" @@ -28,13 +27,13 @@ #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Attributes.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DIBuilder.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/DIBuilder.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" @@ -43,6 +42,7 @@ #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Module.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" #include diff --git a/lib/Transforms/Utils/InstructionNamer.cpp b/lib/Transforms/Utils/InstructionNamer.cpp index 53b432fcafd4..23ec45edb3ef 100644 --- a/lib/Transforms/Utils/InstructionNamer.cpp +++ b/lib/Transforms/Utils/InstructionNamer.cpp @@ -14,10 +14,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar.h" #include "llvm/IR/Function.h" #include "llvm/IR/Type.h" #include "llvm/Pass.h" +#include "llvm/Transforms/Scalar.h" using namespace llvm; namespace { diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index ebd528bc8ec1..2af671636cbd 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -22,8 +22,8 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/LazyValueInfo.h" +#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp index 72c06aef8037..f3db278ef1e4 100644 --- a/lib/Transforms/Utils/LoopSimplify.cpp +++ b/lib/Transforms/Utils/LoopSimplify.cpp @@ -38,15 +38,14 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/LoopSimplify.h" -#include "llvm/Transforms/Scalar.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/DependenceAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InstructionSimplify.h" @@ -65,6 +64,7 @@ #include "llvm/IR/Type.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index 4ab4d7949d23..f2527f89e83e 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -16,7 +16,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Utils/UnrollLoop.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" @@ -39,6 +38,7 @@ #include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/SimplifyIndVar.h" +#include "llvm/Transforms/Utils/UnrollLoop.h" using namespace llvm; #define DEBUG_TYPE "loop-unroll" diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp index 391fde3b0b01..a920cd86a26a 100644 --- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -21,7 +21,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Utils/UnrollLoop.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/LoopIterator.h" @@ -37,6 +36,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/UnrollLoop.h" #include using namespace llvm; diff --git a/lib/Transforms/Utils/LoopUtils.cpp b/lib/Transforms/Utils/LoopUtils.cpp index 81f033e7d51a..412f6129407e 100644 --- a/lib/Transforms/Utils/LoopUtils.cpp +++ b/lib/Transforms/Utils/LoopUtils.cpp @@ -15,14 +15,13 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" -#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" diff --git a/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/lib/Transforms/Utils/LowerMemIntrinsics.cpp index c7cb561b5e21..0a51f9a0e4a2 100644 --- a/lib/Transforms/Utils/LowerMemIntrinsics.cpp +++ b/lib/Transforms/Utils/LowerMemIntrinsics.cpp @@ -8,9 +8,9 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/LowerMemIntrinsics.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" using namespace llvm; diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp index 8959e77438e9..890afbc46e63 100644 --- a/lib/Transforms/Utils/LowerSwitch.cpp +++ b/lib/Transforms/Utils/LowerSwitch.cpp @@ -13,7 +13,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar.h" #include "llvm/ADT/STLExtras.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" @@ -24,6 +23,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" #include diff --git a/lib/Transforms/Utils/MetaRenamer.cpp b/lib/Transforms/Utils/MetaRenamer.cpp index 481c6aa29c3a..9f2ad540c83d 100644 --- a/lib/Transforms/Utils/MetaRenamer.cpp +++ b/lib/Transforms/Utils/MetaRenamer.cpp @@ -13,7 +13,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/IPO.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -23,6 +22,7 @@ #include "llvm/IR/Type.h" #include "llvm/IR/TypeFinder.h" #include "llvm/Pass.h" +#include "llvm/Transforms/IPO.h" using namespace llvm; namespace { diff --git a/lib/Transforms/Utils/OrderedInstructions.cpp b/lib/Transforms/Utils/OrderedInstructions.cpp new file mode 100644 index 000000000000..2e67e0def5b9 --- /dev/null +++ b/lib/Transforms/Utils/OrderedInstructions.cpp @@ -0,0 +1,33 @@ +//===-- OrderedInstructions.cpp - Instruction dominance function ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines utility to check dominance relation of 2 instructions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/OrderedInstructions.h" +using namespace llvm; + +/// Given 2 instructions, use OrderedBasicBlock to check for dominance relation +/// if the instructions are in the same basic block, Otherwise, use dominator +/// tree. +bool OrderedInstructions::dominates(const Instruction *InstA, + const Instruction *InstB) const { + const BasicBlock *IBB = InstA->getParent(); + // Use ordered basic block to do dominance check in case the 2 instructions + // are in the same basic block. + if (IBB == InstB->getParent()) { + auto OBB = OBBMap.find(IBB); + if (OBB == OBBMap.end()) + OBB = OBBMap.insert({IBB, make_unique(IBB)}).first; + return OBB->second->dominates(InstA, InstB); + } else { + return DT->dominates(InstA->getParent(), InstB->getParent()); + } +} diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp index 8b6a2c3766d2..6ccf54e49dd3 100644 --- a/lib/Transforms/Utils/SSAUpdater.cpp +++ b/lib/Transforms/Utils/SSAUpdater.cpp @@ -11,9 +11,10 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/TinyPtrVector.h" #include "llvm/Analysis/InstructionSimplify.h" @@ -30,7 +31,6 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/Transforms/Utils/SSAUpdaterImpl.h" #include #include diff --git a/lib/Transforms/Utils/SanitizerStats.cpp b/lib/Transforms/Utils/SanitizerStats.cpp index 9afd175c10ed..8c23957ac43e 100644 --- a/lib/Transforms/Utils/SanitizerStats.cpp +++ b/lib/Transforms/Utils/SanitizerStats.cpp @@ -12,13 +12,13 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/SanitizerStats.h" -#include "llvm/Transforms/Utils/ModuleUtils.h" #include "llvm/ADT/Triple.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Module.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" using namespace llvm; diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 1b442a9a264d..0970c436e665 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -15,13 +15,13 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/EHPersonalities.h" @@ -29,8 +29,8 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/Constant.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" @@ -55,7 +55,6 @@ #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" -#include "llvm/IR/DebugInfo.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp index 2509b5f22046..2ea15f65cef9 100644 --- a/lib/Transforms/Utils/SimplifyInstructions.cpp +++ b/lib/Transforms/Utils/SimplifyInstructions.cpp @@ -27,8 +27,8 @@ #include "llvm/IR/Function.h" #include "llvm/IR/Type.h" #include "llvm/Pass.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" using namespace llvm; #define DEBUG_TYPE "instsimplify" diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index cc6c47e8f978..b723b65f35e5 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -738,8 +738,8 @@ Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) { ConstantInt *LenC = dyn_cast(CI->getArgOperand(2)); if (!LenC) return nullptr; - uint64_t Len = LenC->getZExtValue(); + uint64_t Len = LenC->getZExtValue(); if (Len == 0) // memcmp(s1,s2,0) -> 0 return Constant::getNullValue(CI->getType()); diff --git a/lib/Transforms/Utils/StripGCRelocates.cpp b/lib/Transforms/Utils/StripGCRelocates.cpp index f3d3fadb51e9..49dc15cf5e7c 100644 --- a/lib/Transforms/Utils/StripGCRelocates.cpp +++ b/lib/Transforms/Utils/StripGCRelocates.cpp @@ -20,8 +20,8 @@ #include "llvm/IR/Statepoint.h" #include "llvm/IR/Type.h" #include "llvm/Pass.h" -#include "llvm/Transforms/Scalar.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" using namespace llvm; diff --git a/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp b/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp index 66dbf335cb95..cd0378e0140c 100644 --- a/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp +++ b/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/IPO.h" #include "llvm/IR/DebugInfo.h" #include "llvm/Pass.h" +#include "llvm/Transforms/IPO.h" using namespace llvm; namespace { diff --git a/lib/Transforms/Utils/SymbolRewriter.cpp b/lib/Transforms/Utils/SymbolRewriter.cpp index 6d136636ce70..20107553665f 100644 --- a/lib/Transforms/Utils/SymbolRewriter.cpp +++ b/lib/Transforms/Utils/SymbolRewriter.cpp @@ -59,9 +59,9 @@ #define DEBUG_TYPE "symbol-rewriter" #include "llvm/Transforms/Utils/SymbolRewriter.h" -#include "llvm/Pass.h" #include "llvm/ADT/SmallString.h" #include "llvm/IR/LegacyPassManager.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MemoryBuffer.h" diff --git a/lib/Transforms/Utils/Utils.cpp b/lib/Transforms/Utils/Utils.cpp index 7106483c3bd2..f6c7d1c4989e 100644 --- a/lib/Transforms/Utils/Utils.cpp +++ b/lib/Transforms/Utils/Utils.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/InitializePasses.h" #include "llvm-c/Initialization.h" +#include "llvm/InitializePasses.h" #include "llvm/PassRegistry.h" using namespace llvm; diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp index c83b3f7b225b..78453aaa16ce 100644 --- a/lib/Transforms/Vectorize/BBVectorize.cpp +++ b/lib/Transforms/Vectorize/BBVectorize.cpp @@ -15,7 +15,6 @@ //===----------------------------------------------------------------------===// #define BBV_NAME "bb-vectorize" -#include "llvm/Transforms/Vectorize.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" @@ -50,6 +49,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Vectorize.h" #include using namespace llvm; diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 799eef21dc4e..1abdb2484850 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -485,8 +485,7 @@ protected: /// of scalars. If \p IfPredicateInstr is true we need to 'hide' each /// scalarized instruction behind an if block predicated on the control /// dependence of the instruction. - virtual void scalarizeInstruction(Instruction *Instr, - bool IfPredicateInstr = false); + void scalarizeInstruction(Instruction *Instr, bool IfPredicateInstr = false); /// Vectorize Load and Store instructions, virtual void vectorizeMemoryInstruction(Instruction *Instr); @@ -3815,7 +3814,11 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() { if (auto *BO = dyn_cast(I)) { NewI = B.CreateBinOp(BO->getOpcode(), ShrinkOperand(BO->getOperand(0)), ShrinkOperand(BO->getOperand(1))); - cast(NewI)->copyIRFlags(I); + + // Any wrapping introduced by shrinking this operation shouldn't be + // considered undefined behavior. So, we can't unconditionally copy + // arithmetic wrapping flags to NewI. + cast(NewI)->copyIRFlags(I, /*IncludeWrapFlags=*/false); } else if (auto *CI = dyn_cast(I)) { NewI = B.CreateICmp(CI->getPredicate(), ShrinkOperand(CI->getOperand(0)), diff --git a/lib/Transforms/Vectorize/Vectorize.cpp b/lib/Transforms/Vectorize/Vectorize.cpp index 28e0b2eb9866..a21928317888 100644 --- a/lib/Transforms/Vectorize/Vectorize.cpp +++ b/lib/Transforms/Vectorize/Vectorize.cpp @@ -17,9 +17,9 @@ #include "llvm-c/Initialization.h" #include "llvm-c/Transforms/Vectorize.h" #include "llvm/Analysis/Passes.h" +#include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Verifier.h" #include "llvm/InitializePasses.h" -#include "llvm/IR/LegacyPassManager.h" using namespace llvm; diff --git a/lib/XRay/InstrumentationMap.cpp b/lib/XRay/InstrumentationMap.cpp index 431c251feb65..d9ce255bc688 100644 --- a/lib/XRay/InstrumentationMap.cpp +++ b/lib/XRay/InstrumentationMap.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/XRay/InstrumentationMap.h" #include "llvm/ADT/None.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" @@ -22,7 +23,6 @@ #include "llvm/Support/Error.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/YAMLTraits.h" -#include "llvm/XRay/InstrumentationMap.h" #include #include #include diff --git a/test/Analysis/BranchProbabilityInfo/libfunc_call.ll b/test/Analysis/BranchProbabilityInfo/libfunc_call.ll new file mode 100644 index 000000000000..13bc0de90a61 --- /dev/null +++ b/test/Analysis/BranchProbabilityInfo/libfunc_call.ll @@ -0,0 +1,264 @@ +; RUN: opt < %s -analyze -branch-prob | FileCheck %s +; RUN: opt < %s -analyze -lazy-branch-prob | FileCheck %s +; RUN: opt < %s -passes='print' -disable-output 2>&1 | FileCheck %s + +declare i32 @strcmp(i8*, i8*) +declare i32 @strncmp(i8*, i8*, i32) +declare i32 @strcasecmp(i8*, i8*) +declare i32 @strncasecmp(i8*, i8*, i32) +declare i32 @memcmp(i8*, i8*) +declare i32 @nonstrcmp(i8*, i8*) + + +; Check that the result of strcmp is considered more likely to be nonzero than +; zero, and equally likely to be (nonzero) positive or negative. + +define i32 @test_strcmp_eq(i8* %p, i8* %q) { +; CHECK: Printing analysis {{.*}} for function 'test_strcmp_eq' +entry: + %val = call i32 @strcmp(i8* %p, i8* %q) + %cond = icmp eq i32 %val, 0 + br i1 %cond, label %then, label %else +; CHECK: edge entry -> then probability is 0x30000000 / 0x80000000 = 37.50% +; CHECK: edge entry -> else probability is 0x50000000 / 0x80000000 = 62.50% + +then: + br label %exit +; CHECK: edge then -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +else: + br label %exit +; CHECK: edge else -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +exit: + %result = phi i32 [ 0, %then ], [ 1, %else ] + ret i32 %result +} + +define i32 @test_strcmp_ne(i8* %p, i8* %q) { +; CHECK: Printing analysis {{.*}} for function 'test_strcmp_ne' +entry: + %val = call i32 @strcmp(i8* %p, i8* %q) + %cond = icmp ne i32 %val, 0 + br i1 %cond, label %then, label %else +; CHECK: edge entry -> then probability is 0x50000000 / 0x80000000 = 62.50% +; CHECK: edge entry -> else probability is 0x30000000 / 0x80000000 = 37.50% + +then: + br label %exit +; CHECK: edge then -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +else: + br label %exit +; CHECK: edge else -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +exit: + %result = phi i32 [ 0, %then ], [ 1, %else ] + ret i32 %result +} + +define i32 @test_strcmp_sgt(i8* %p, i8* %q) { +; CHECK: Printing analysis {{.*}} for function 'test_strcmp_sgt' +entry: + %val = call i32 @strcmp(i8* %p, i8* %q) + %cond = icmp sgt i32 %val, 0 + br i1 %cond, label %then, label %else +; CHECK: edge entry -> then probability is 0x40000000 / 0x80000000 = 50.00% +; CHECK: edge entry -> else probability is 0x40000000 / 0x80000000 = 50.00% + +then: + br label %exit +; CHECK: edge then -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +else: + br label %exit +; CHECK: edge else -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +exit: + %result = phi i32 [ 0, %then ], [ 1, %else ] + ret i32 %result +} + +define i32 @test_strcmp_slt(i8* %p, i8* %q) { +; CHECK: Printing analysis {{.*}} for function 'test_strcmp_slt' +entry: + %val = call i32 @strcmp(i8* %p, i8* %q) + %cond = icmp slt i32 %val, 0 + br i1 %cond, label %then, label %else +; CHECK: edge entry -> then probability is 0x40000000 / 0x80000000 = 50.00% +; CHECK: edge entry -> else probability is 0x40000000 / 0x80000000 = 50.00% + +then: + br label %exit +; CHECK: edge then -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +else: + br label %exit +; CHECK: edge else -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +exit: + %result = phi i32 [ 0, %then ], [ 1, %else ] + ret i32 %result +} + + +; Similarly check other library functions that have the same behaviour + +define i32 @test_strncmp_sgt(i8* %p, i8* %q) { +; CHECK: Printing analysis {{.*}} for function 'test_strncmp_sgt' +entry: + %val = call i32 @strncmp(i8* %p, i8* %q, i32 4) + %cond = icmp sgt i32 %val, 0 + br i1 %cond, label %then, label %else +; CHECK: edge entry -> then probability is 0x40000000 / 0x80000000 = 50.00% +; CHECK: edge entry -> else probability is 0x40000000 / 0x80000000 = 50.00% + +then: + br label %exit +; CHECK: edge then -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +else: + br label %exit +; CHECK: edge else -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +exit: + %result = phi i32 [ 0, %then ], [ 1, %else ] + ret i32 %result +} + +define i32 @test_strcasecmp_sgt(i8* %p, i8* %q) { +; CHECK: Printing analysis {{.*}} for function 'test_strcasecmp_sgt' +entry: + %val = call i32 @strcasecmp(i8* %p, i8* %q) + %cond = icmp sgt i32 %val, 0 + br i1 %cond, label %then, label %else +; CHECK: edge entry -> then probability is 0x40000000 / 0x80000000 = 50.00% +; CHECK: edge entry -> else probability is 0x40000000 / 0x80000000 = 50.00% + +then: + br label %exit +; CHECK: edge then -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +else: + br label %exit +; CHECK: edge else -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +exit: + %result = phi i32 [ 0, %then ], [ 1, %else ] + ret i32 %result +} + +define i32 @test_strncasecmp_sgt(i8* %p, i8* %q) { +; CHECK: Printing analysis {{.*}} for function 'test_strncasecmp_sgt' +entry: + %val = call i32 @strncasecmp(i8* %p, i8* %q, i32 4) + %cond = icmp sgt i32 %val, 0 + br i1 %cond, label %then, label %else +; CHECK: edge entry -> then probability is 0x40000000 / 0x80000000 = 50.00% +; CHECK: edge entry -> else probability is 0x40000000 / 0x80000000 = 50.00% + +then: + br label %exit +; CHECK: edge then -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +else: + br label %exit +; CHECK: edge else -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +exit: + %result = phi i32 [ 0, %then ], [ 1, %else ] + ret i32 %result +} + +define i32 @test_memcmp_sgt(i8* %p, i8* %q) { +; CHECK: Printing analysis {{.*}} for function 'test_memcmp_sgt' +entry: + %val = call i32 @memcmp(i8* %p, i8* %q) + %cond = icmp sgt i32 %val, 0 + br i1 %cond, label %then, label %else +; CHECK: edge entry -> then probability is 0x40000000 / 0x80000000 = 50.00% +; CHECK: edge entry -> else probability is 0x40000000 / 0x80000000 = 50.00% + +then: + br label %exit +; CHECK: edge then -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +else: + br label %exit +; CHECK: edge else -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +exit: + %result = phi i32 [ 0, %then ], [ 1, %else ] + ret i32 %result +} + + +; Check that for the result of a call to a non-library function the default +; heuristic is applied, i.e. positive more likely than negative, nonzero more +; likely than zero. + +define i32 @test_nonstrcmp_eq(i8* %p, i8* %q) { +; CHECK: Printing analysis {{.*}} for function 'test_nonstrcmp_eq' +entry: + %val = call i32 @nonstrcmp(i8* %p, i8* %q) + %cond = icmp eq i32 %val, 0 + br i1 %cond, label %then, label %else +; CHECK: edge entry -> then probability is 0x30000000 / 0x80000000 = 37.50% +; CHECK: edge entry -> else probability is 0x50000000 / 0x80000000 = 62.50% + +then: + br label %exit +; CHECK: edge then -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +else: + br label %exit +; CHECK: edge else -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +exit: + %result = phi i32 [ 0, %then ], [ 1, %else ] + ret i32 %result +} + +define i32 @test_nonstrcmp_ne(i8* %p, i8* %q) { +; CHECK: Printing analysis {{.*}} for function 'test_nonstrcmp_ne' +entry: + %val = call i32 @nonstrcmp(i8* %p, i8* %q) + %cond = icmp ne i32 %val, 0 + br i1 %cond, label %then, label %else +; CHECK: edge entry -> then probability is 0x50000000 / 0x80000000 = 62.50% +; CHECK: edge entry -> else probability is 0x30000000 / 0x80000000 = 37.50% + +then: + br label %exit +; CHECK: edge then -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +else: + br label %exit +; CHECK: edge else -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +exit: + %result = phi i32 [ 0, %then ], [ 1, %else ] + ret i32 %result +} + +define i32 @test_nonstrcmp_sgt(i8* %p, i8* %q) { +; CHECK: Printing analysis {{.*}} for function 'test_nonstrcmp_sgt' +entry: + %val = call i32 @nonstrcmp(i8* %p, i8* %q) + %cond = icmp sgt i32 %val, 0 + br i1 %cond, label %then, label %else +; CHECK: edge entry -> then probability is 0x50000000 / 0x80000000 = 62.50% +; CHECK: edge entry -> else probability is 0x30000000 / 0x80000000 = 37.50% + +then: + br label %exit +; CHECK: edge then -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +else: + br label %exit +; CHECK: edge else -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +exit: + %result = phi i32 [ 0, %then ], [ 1, %else ] + ret i32 %result +} diff --git a/test/Analysis/ConstantFolding/gep-constanfolding-error.ll b/test/Analysis/ConstantFolding/gep-constanfolding-error.ll index 50ad61a8f100..16bc8a983e48 100644 --- a/test/Analysis/ConstantFolding/gep-constanfolding-error.ll +++ b/test/Analysis/ConstantFolding/gep-constanfolding-error.ll @@ -44,7 +44,7 @@ entry: %9 = add i32 %f.promoted, %smax %10 = add i32 %9, 2 call void @llvm.memset.p0i8.i32(i8* %scevgep, i8 %conv6, i32 %10, i32 1, i1 false) -; CHECK: call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([6 x [6 x [7 x i8]]], [6 x [6 x [7 x i8]]]* @j, i32 0, i64 5, i64 4, i32 1), i8 %conv6, i32 1, i32 1, i1 false) +; CHECK: call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([6 x [6 x [7 x i8]]], [6 x [6 x [7 x i8]]]* @j, i32 0, i{{32|64}} 5, i{{32|64}} 4, i32 1), i8 %conv6, i32 1, i32 1, i1 false) ; CHECK-NOT: call void @llvm.memset.p0i8.i32(i8* getelementptr ([6 x [6 x [7 x i8]]], [6 x [6 x [7 x i8]]]* @j, i64 1, i64 4, i64 4, i32 1) ret i32 0 } diff --git a/test/Analysis/LazyValueAnalysis/lvi-after-jumpthreading.ll b/test/Analysis/LazyValueAnalysis/lvi-after-jumpthreading.ll index 00ab21e46d5d..3a0ab0f03b99 100644 --- a/test/Analysis/LazyValueAnalysis/lvi-after-jumpthreading.ll +++ b/test/Analysis/LazyValueAnalysis/lvi-after-jumpthreading.ll @@ -10,17 +10,23 @@ define i8 @test1(i32 %a, i32 %length) { ; CHECK-LABEL: LVI for function 'test1': entry: +; CHECK-LABEL: entry: +; CHECK-NEXT: ; LatticeVal for: 'i32 %a' is: overdefined +; CHECK-NEXT: ; LatticeVal for: 'i32 %length' is: overdefined br label %loop -; CHECK-LABEL: backedge: -; CHECK-NEXT: ; CachedLatticeValues for: ' %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ]' -; CHECK-DAG: ; at beginning of BasicBlock: '%backedge' LatticeVal: 'constantrange<0, 400>' -; CHECK-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ] -; CHECK-NEXT: ; CachedLatticeValues for: ' %iv.next = add nsw i32 %iv, 1' -; CHECK-NEXT: ; at beginning of BasicBlock: '%backedge' LatticeVal: 'constantrange<1, 401>' -; CHECK-NEXT: %iv.next = add nsw i32 %iv, 1 -; CHECK-NEXT: %cont = icmp slt i32 %iv.next, 400 -; CHECK-NEXT: br i1 %cont, label %backedge, label %exit +; CHECK-LABEL: backedge: +; CHECK-NEXT: ; LatticeVal for: 'i32 %a' is: overdefined +; CHECK-NEXT: ; LatticeVal for: 'i32 %length' is: overdefined +; CHECK-NEXT: ; LatticeVal for: ' %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ]' in BB: '%backedge' is: constantrange<0, 400> +; CHECK-NEXT: ; LatticeVal for: ' %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ]' in BB: '%exit' is: constantrange<399, 400> +; CHECK-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ] +; CHECK-NEXT: ; LatticeVal for: ' %iv.next = add nsw i32 %iv, 1' in BB: '%backedge' is: constantrange<1, 401> +; CHECK-NEXT: ; LatticeVal for: ' %iv.next = add nsw i32 %iv, 1' in BB: '%exit' is: constantrange<400, 401> +; CHECK-NEXT: %iv.next = add nsw i32 %iv, 1 +; CHECK-NEXT: ; LatticeVal for: ' %cont = icmp slt i32 %iv.next, 400' in BB: '%backedge' is: overdefined +; CHECK-NEXT: ; LatticeVal for: ' %cont = icmp slt i32 %iv.next, 400' in BB: '%exit' is: constantrange<0, -1> +; CHECK-NEXT: %cont = icmp slt i32 %iv.next, 400 ; CHECK-NOT: loop loop: %iv = phi i32 [0, %entry], [%iv.next, %backedge] @@ -36,46 +42,58 @@ exit: ret i8 0 } - ; Here JT does not transform the code, but LVICache is populated during the processing of blocks. define i8 @test2(i32 %n) { ; CHECK-LABEL: LVI for function 'test2': ; CHECK-LABEL: entry: -; CHECK-LABEL: ; OverDefined values for block are: -; CHECK-NEXT: ;i32 %n +; CHECK-NEXT: ; LatticeVal for: 'i32 %n' is: overdefined ; CHECK-NEXT: br label %loop entry: br label %loop ; CHECK-LABEL: loop: -; CHECK-LABEL: ; OverDefined values for block are: -; CHECK-NEXT: ; %iv2 = phi i32 [ %n, %entry ], [ %iv2.next, %backedge ] -; CHECK-NEXT: ; CachedLatticeValues for: ' %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ]' -; CHECK-DAG: ; at beginning of BasicBlock: '%loop' LatticeVal: 'constantrange<0, -2147483647>' -; CHECK-DAG: ; at beginning of BasicBlock: '%backedge' LatticeVal: 'constantrange<0, -2147483648>' -; CHECK-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ] -; CHECK: %cnd = and i1 %cnd1, %cnd2 -; CHECK: br i1 %cnd, label %backedge, label %exit +; CHECK-NEXT: ; LatticeVal for: 'i32 %n' is: overdefined +; CHECK-NEXT: ; LatticeVal for: ' %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ]' in BB: '%loop' is: constantrange<0, -2147483647> +; CHECK-DAG: ; LatticeVal for: ' %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ]' in BB: '%backedge' is: constantrange<0, -2147483648> +; CHECK-DAG: ; LatticeVal for: ' %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ]' in BB: '%exit' is: constantrange<0, -2147483647> +; CHECK-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ] loop: %iv = phi i32 [0, %entry], [%iv.next, %backedge] +; CHECK-NEXT: ; LatticeVal for: ' %iv2 = phi i32 [ %n, %entry ], [ %iv2.next, %backedge ]' in BB: '%loop' is: overdefined +; CHECK-DAG: ; LatticeVal for: ' %iv2 = phi i32 [ %n, %entry ], [ %iv2.next, %backedge ]' in BB: '%backedge' is: constantrange<1, -2147483648> +; CHECK-DAG: ; LatticeVal for: ' %iv2 = phi i32 [ %n, %entry ], [ %iv2.next, %backedge ]' in BB: '%exit' is: overdefined +; CHECK-NEXT: %iv2 = phi i32 [ %n, %entry ], [ %iv2.next, %backedge ] %iv2 = phi i32 [%n, %entry], [%iv2.next, %backedge] + +; CHECK-NEXT: ; LatticeVal for: ' %cnd1 = icmp sge i32 %iv, 0' in BB: '%loop' is: overdefined +; CHECK-DAG: ; LatticeVal for: ' %cnd1 = icmp sge i32 %iv, 0' in BB: '%backedge' is: overdefined +; CHECK-DAG: ; LatticeVal for: ' %cnd1 = icmp sge i32 %iv, 0' in BB: '%exit' is: overdefined +; CHECK-NEXT: %cnd1 = icmp sge i32 %iv, 0 %cnd1 = icmp sge i32 %iv, 0 %cnd2 = icmp sgt i32 %iv2, 0 +; CHECK: %cnd2 = icmp sgt i32 %iv2, 0 +; CHECK: ; LatticeVal for: ' %cnd = and i1 %cnd1, %cnd2' in BB: '%loop' is: overdefined +; CHECK-DAG: ; LatticeVal for: ' %cnd = and i1 %cnd1, %cnd2' in BB: '%backedge' is: constantrange<-1, 0> +; CHECK-DAG: ; LatticeVal for: ' %cnd = and i1 %cnd1, %cnd2' in BB: '%exit' is: overdefined +; CHECK-NEXT: %cnd = and i1 %cnd1, %cnd2 %cnd = and i1 %cnd1, %cnd2 br i1 %cnd, label %backedge, label %exit ; CHECK-LABEL: backedge: -; CHECK-NEXT: ; CachedLatticeValues for: ' %iv.next = add nsw i32 %iv, 1' -; CHECK-NEXT: ; at beginning of BasicBlock: '%backedge' LatticeVal: 'constantrange<1, -2147483647>' -; CHECK-NEXT: %iv.next = add nsw i32 %iv, 1 -; CHECK-NEXT: %iv2.next = sub nsw i32 %iv2, 1 -; CHECK: %cont = and i1 %cont1, %cont2 -; CHECK: br i1 %cont, label %loop, label %exit +; CHECK-NEXT: ; LatticeVal for: 'i32 %n' is: overdefined +; CHECK-NEXT: ; LatticeVal for: ' %iv.next = add nsw i32 %iv, 1' in BB: '%backedge' is: constantrange<1, -2147483647> +; CHECK-NEXT: %iv.next = add nsw i32 %iv, 1 backedge: %iv.next = add nsw i32 %iv, 1 %iv2.next = sub nsw i32 %iv2, 1 +; CHECK: ; LatticeVal for: ' %cont1 = icmp slt i32 %iv.next, 400' in BB: '%backedge' is: overdefined +; CHECK-NEXT: %cont1 = icmp slt i32 %iv.next, 400 %cont1 = icmp slt i32 %iv.next, 400 +; CHECK-NEXT: ; LatticeVal for: ' %cont2 = icmp sgt i32 %iv2.next, 0' in BB: '%backedge' is: overdefined +; CHECK-NEXT: %cont2 = icmp sgt i32 %iv2.next, 0 %cont2 = icmp sgt i32 %iv2.next, 0 +; CHECK-NEXT: ; LatticeVal for: ' %cont = and i1 %cont1, %cont2' in BB: '%backedge' is: overdefined +; CHECK-NEXT: %cont = and i1 %cont1, %cont2 %cont = and i1 %cont1, %cont2 br i1 %cont, label %loop, label %exit diff --git a/test/Bindings/OCaml/core.ml b/test/Bindings/OCaml/core.ml index 105f1bc4f732..802baa0b80b2 100644 --- a/test/Bindings/OCaml/core.ml +++ b/test/Bindings/OCaml/core.ml @@ -66,6 +66,16 @@ let suite name f = let filename = Sys.argv.(1) let m = create_module context filename +(*===-- Contained types --------------------------------------------------===*) + +let test_contained_types () = + let pointer_i32 = pointer_type i32_type in + insist (i32_type = (Array.get (subtypes pointer_i32) 0)); + + let ar = struct_type context [| i32_type; i8_type |] in + insist (i32_type = (Array.get (subtypes ar)) 0); + insist (i8_type = (Array.get (subtypes ar)) 1) + (*===-- Conversion --------------------------------------------------------===*) @@ -1533,6 +1543,7 @@ let test_writer () = (*===-- Driver ------------------------------------------------------------===*) let _ = + suite "contained types" test_contained_types; suite "conversion" test_conversion; suite "target" test_target; suite "constants" test_constants; diff --git a/test/Bitcode/ptest-old.ll b/test/Bitcode/ptest-old.ll index 53ffef900b57..184f72e9856e 100644 --- a/test/Bitcode/ptest-old.ll +++ b/test/Bitcode/ptest-old.ll @@ -1,6 +1,6 @@ ; RUN: llvm-as < %s | llvm-dis | FileCheck %s ; RUN: verify-uselistorder < %s -; REQUIRES: x86 +; REQUIRES: x86-registered-target define i32 @foo(<4 x float> %bar) nounwind { entry: diff --git a/test/BugPoint/unsymbolized.ll b/test/BugPoint/unsymbolized.ll new file mode 100644 index 000000000000..8547f220ea26 --- /dev/null +++ b/test/BugPoint/unsymbolized.ll @@ -0,0 +1,21 @@ +; REQUIRES: loadable_module +; RUN: echo "import sys" > %t.py +; RUN: echo "print('args = ' + str(sys.argv))" >> %t.py +; RUN: echo "exit(1)" >> %t.py +; RUN: not bugpoint -load %llvmshlibdir/BugpointPasses%shlibext %s -output-prefix %t -bugpoint-crashcalls -opt-command="%python" -opt-args %t.py | FileCheck %s +; RUN: not --crash opt -load %llvmshlibdir/BugpointPasses%shlibext %s -bugpoint-crashcalls -disable-symbolication 2>&1 | FileCheck --check-prefix=CRASH %s + +; Test that bugpoint disables symbolication on the opt tool to reduce runtime overhead when opt crashes +; CHECK: args = {{.*}}'-disable-symbolication' + +; Test that opt, when it crashes & is passed -disable-symbolication, doesn't symbolicate. +; In theory this test should maybe be in test/tools/opt or +; test/Transforms, but since there doesn't seem to be another convenient way to +; crash opt, apart from the BugpointPasses dynamic plugin, this is the spot for +; now. +; CRASH-NOT: Signals.inc + +define void @f() { + call void @f() + ret void +} diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 25c340fea6f7..b52b6018e026 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -61,7 +61,7 @@ set(LLVM_TEST_DEPENDS llvm-nm llvm-objdump llvm-opt-report - llvm-pdbdump + llvm-pdbutil llvm-profdata llvm-ranlib llvm-readobj diff --git a/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-stackprotect.ll b/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-stackprotect.ll index 006308641184..cd3ea9715e0f 100644 --- a/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-stackprotect.ll +++ b/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-stackprotect.ll @@ -4,7 +4,7 @@ ; CHECK: name: test_stack_guard ; CHECK: stack: -; CHECK: - { id: 0, name: StackGuardSlot, offset: 0, size: 8, alignment: 8 } +; CHECK: - { id: 0, name: StackGuardSlot, type: default, offset: 0, size: 8, alignment: 8, ; CHECK-NOT: id: 1 ; CHECK: [[GUARD_SLOT:%[0-9]+]](p0) = G_FRAME_INDEX %stack.0.StackGuardSlot diff --git a/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll b/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll index ac3d4b17f739..65b8ba570701 100644 --- a/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll +++ b/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll @@ -31,10 +31,13 @@ define i64 @muli64(i64 %arg1, i64 %arg2) { ; Tests for alloca ; CHECK-LABEL: name: allocai64 ; CHECK: stack: -; CHECK-NEXT: - { id: 0, name: ptr1, offset: 0, size: 8, alignment: 8 } -; CHECK-NEXT: - { id: 1, name: ptr2, offset: 0, size: 8, alignment: 1 } -; CHECK-NEXT: - { id: 2, name: ptr3, offset: 0, size: 128, alignment: 8 } -; CHECK-NEXT: - { id: 3, name: ptr4, offset: 0, size: 1, alignment: 8 } +; CHECK-NEXT: - { id: 0, name: ptr1, type: default, offset: 0, size: 8, alignment: 8, +; CHECK-NEXT: callee-saved-register: '', di-variable: '', di-expression: '', di-location: '' } +; CHECK-NEXT: - { id: 1, name: ptr2, type: default, offset: 0, size: 8, alignment: 1, +; CHECK-NEXT: callee-saved-register: '', di-variable: '', di-expression: '', di-location: '' } +; CHECK-NEXT: - { id: 2, name: ptr3, type: default, offset: 0, size: 128, alignment: 8, +; CHECK-NEXT: callee-saved-register: '', di-variable: '', di-expression: '', di-location: '' } +; CHECK-NEXT: - { id: 3, name: ptr4, type: default, offset: 0, size: 1, alignment: 8, ; CHECK: %{{[0-9]+}}(p0) = G_FRAME_INDEX %stack.0.ptr1 ; CHECK: %{{[0-9]+}}(p0) = G_FRAME_INDEX %stack.1.ptr2 ; CHECK: %{{[0-9]+}}(p0) = G_FRAME_INDEX %stack.2.ptr3 @@ -1550,3 +1553,15 @@ define <16 x i8> @test_shufflevector_v8s8_v16s8(<8 x i8> %arg1, <8 x i8> %arg2) define <4 x half> @test_constant_vector() { ret <4 x half> } + +define i32 @test_target_mem_intrinsic(i32* %addr) { +; CHECK-LABEL: name: test_target_mem_intrinsic +; CHECK: [[ADDR:%[0-9]+]](p0) = COPY %x0 +; CHECK: [[VAL:%[0-9]+]](s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.ldxr), [[ADDR]](p0) :: (volatile load 4 from %ir.addr) +; CHECK: G_TRUNC [[VAL]](s64) + %val = call i64 @llvm.aarch64.ldxr.p0i32(i32* %addr) + %trunc = trunc i64 %val to i32 + ret i32 %trunc +} + +declare i64 @llvm.aarch64.ldxr.p0i32(i32*) nounwind diff --git a/test/CodeGen/AArch64/GlobalISel/arm64-regbankselect.mir b/test/CodeGen/AArch64/GlobalISel/arm64-regbankselect.mir index 0f054f1d940c..296f65c041a1 100644 --- a/test/CodeGen/AArch64/GlobalISel/arm64-regbankselect.mir +++ b/test/CodeGen/AArch64/GlobalISel/arm64-regbankselect.mir @@ -98,8 +98,8 @@ name: defaultMapping legalized: true # CHECK-LABEL: name: defaultMapping # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -119,8 +119,8 @@ name: defaultMappingVector legalized: true # CHECK-LABEL: name: defaultMappingVector # CHECK: registers: -# CHECK: - { id: 0, class: fpr } -# CHECK: - { id: 1, class: fpr } +# CHECK: - { id: 0, class: fpr, preferred-register: '' } +# CHECK: - { id: 1, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -141,10 +141,10 @@ name: defaultMapping1Repair legalized: true # CHECK-LABEL: name: defaultMapping1Repair # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr } -# CHECK-NEXT: - { id: 1, class: gpr } -# CHECK-NEXT: - { id: 2, class: gpr } -# CHECK-NEXT: - { id: 3, class: gpr } +# CHECK-NEXT: - { id: 0, class: fpr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -166,10 +166,10 @@ name: defaultMapping2Repairs legalized: true # CHECK-LABEL: name: defaultMapping2Repairs # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr } -# CHECK-NEXT: - { id: 1, class: gpr } -# CHECK-NEXT: - { id: 2, class: gpr } -# CHECK-NEXT: - { id: 3, class: gpr } +# CHECK-NEXT: - { id: 0, class: fpr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -193,9 +193,9 @@ name: defaultMappingDefRepair legalized: true # CHECK-LABEL: name: defaultMappingDefRepair # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr } -# CHECK-NEXT: - { id: 1, class: fpr } -# CHECK-NEXT: - { id: 2, class: gpr } +# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: fpr } @@ -215,11 +215,11 @@ name: phiPropagation legalized: true tracksRegLiveness: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32 } -# CHECK-NEXT: - { id: 1, class: gpr64sp } -# CHECK-NEXT: - { id: 2, class: gpr32 } -# CHECK-NEXT: - { id: 3, class: gpr } -# CHECK-NEXT: - { id: 4, class: gpr } +# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 4, class: gpr, preferred-register: '' } registers: - { id: 0, class: gpr32 } - { id: 1, class: gpr64sp } @@ -254,10 +254,10 @@ name: defaultMappingUseRepairPhysReg legalized: true # CHECK-LABEL: name: defaultMappingUseRepairPhysReg # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr } -# CHECK-NEXT: - { id: 1, class: fpr } -# CHECK-NEXT: - { id: 2, class: gpr } -# CHECK-NEXT: - { id: 3, class: gpr } +# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -280,8 +280,8 @@ name: defaultMappingDefRepairPhysReg legalized: true # CHECK-LABEL: name: defaultMappingDefRepairPhysReg # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr } -# CHECK-NEXT: - { id: 1, class: gpr } +# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -303,18 +303,18 @@ name: greedyMappingOr legalized: true # CHECK-LABEL: name: greedyMappingOr # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr } -# CHECK-NEXT: - { id: 1, class: gpr } +# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } # Fast mode maps vector instruction on FPR. -# FAST-NEXT: - { id: 2, class: fpr } +# FAST-NEXT: - { id: 2, class: fpr, preferred-register: '' } # Fast mode needs two extra copies. -# FAST-NEXT: - { id: 3, class: fpr } -# FAST-NEXT: - { id: 4, class: fpr } +# FAST-NEXT: - { id: 3, class: fpr, preferred-register: '' } +# FAST-NEXT: - { id: 4, class: fpr, preferred-register: '' } # Greedy mode coalesce the computation on the GPR register # because it is the cheapest. -# GREEDY-NEXT: - { id: 2, class: gpr } +# GREEDY-NEXT: - { id: 2, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } @@ -350,18 +350,18 @@ name: greedyMappingOrWithConstraints legalized: true # CHECK-LABEL: name: greedyMappingOrWithConstraints # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr } -# CHECK-NEXT: - { id: 1, class: gpr } -# CHECK-NEXT: - { id: 2, class: fpr } +# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: fpr, preferred-register: '' } # Fast mode maps vector instruction on FPR. # Fast mode needs two extra copies. -# FAST-NEXT: - { id: 3, class: fpr } -# FAST-NEXT: - { id: 4, class: fpr } +# FAST-NEXT: - { id: 3, class: fpr, preferred-register: '' } +# FAST-NEXT: - { id: 4, class: fpr, preferred-register: '' } # Greedy mode coalesce the computation on the GPR register because it # is the cheapest, but will need one extra copy to materialize %2 into a FPR. -# GREEDY-NEXT: - { id: 3, class: gpr } +# GREEDY-NEXT: - { id: 3, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } @@ -396,8 +396,8 @@ body: | name: ignoreTargetSpecificInst legalized: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64 } -# CHECK-NEXT: - { id: 1, class: gpr64 } +# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr64 } - { id: 1, class: gpr64 } @@ -434,8 +434,8 @@ name: bitcast_s32_gpr legalized: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr } -# CHECK-NEXT: - { id: 1, class: gpr } +# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -457,8 +457,8 @@ name: bitcast_s32_fpr legalized: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr } -# CHECK-NEXT: - { id: 1, class: fpr } +# CHECK-NEXT: - { id: 0, class: fpr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -480,9 +480,9 @@ name: bitcast_s32_gpr_fpr legalized: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr } -# FAST-NEXT: - { id: 1, class: fpr } -# GREEDY-NEXT: - { id: 1, class: gpr } +# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' } +# FAST-NEXT: - { id: 1, class: fpr, preferred-register: '' } +# GREEDY-NEXT: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -504,9 +504,9 @@ name: bitcast_s32_fpr_gpr legalized: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr } -# FAST-NEXT: - { id: 1, class: gpr } -# GREEDY-NEXT: - { id: 1, class: fpr } +# CHECK-NEXT: - { id: 0, class: fpr, preferred-register: '' } +# FAST-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# GREEDY-NEXT: - { id: 1, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -528,8 +528,8 @@ name: bitcast_s64_gpr legalized: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr } -# CHECK-NEXT: - { id: 1, class: gpr } +# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -551,8 +551,8 @@ name: bitcast_s64_fpr legalized: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr } -# CHECK-NEXT: - { id: 1, class: fpr } +# CHECK-NEXT: - { id: 0, class: fpr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -574,9 +574,9 @@ name: bitcast_s64_gpr_fpr legalized: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr } -# FAST-NEXT: - { id: 1, class: fpr } -# GREEDY-NEXT: - { id: 1, class: gpr } +# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' } +# FAST-NEXT: - { id: 1, class: fpr, preferred-register: '' } +# GREEDY-NEXT: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -597,9 +597,9 @@ name: bitcast_s64_fpr_gpr legalized: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr } -# FAST-NEXT: - { id: 1, class: gpr } -# GREEDY-NEXT: - { id: 1, class: fpr } +# CHECK-NEXT: - { id: 0, class: fpr, preferred-register: '' } +# FAST-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# GREEDY-NEXT: - { id: 1, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -624,15 +624,15 @@ name: greedyWithChainOfComputation legalized: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr } -# CHECK-NEXT: - { id: 1, class: gpr } -# FAST-NEXT: - { id: 2, class: fpr } -# FAST-NEXT: - { id: 3, class: fpr } -# FAST-NEXT: - { id: 4, class: fpr } -# GREEDY-NEXT: - { id: 2, class: gpr } -# GREEDY-NEXT: - { id: 3, class: gpr } -# GREEDY-NEXT: - { id: 4, class: gpr } -# CHECK-NEXT: - { id: 5, class: gpr } +# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# FAST-NEXT: - { id: 2, class: fpr, preferred-register: '' } +# FAST-NEXT: - { id: 3, class: fpr, preferred-register: '' } +# FAST-NEXT: - { id: 4, class: fpr, preferred-register: '' } +# GREEDY-NEXT: - { id: 2, class: gpr, preferred-register: '' } +# GREEDY-NEXT: - { id: 3, class: gpr, preferred-register: '' } +# GREEDY-NEXT: - { id: 4, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 5, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -674,11 +674,11 @@ name: floatingPointLoad legalized: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr } -# CHECK-NEXT: - { id: 1, class: gpr } -# CHECK-NEXT: - { id: 2, class: fpr } -# CHECK-NEXT: - { id: 3, class: fpr } -# CHECK-NEXT: - { id: 4, class: fpr } +# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: fpr, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: fpr, preferred-register: '' } +# CHECK-NEXT: - { id: 4, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -716,11 +716,11 @@ name: floatingPointStore legalized: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr } -# CHECK-NEXT: - { id: 1, class: gpr } -# CHECK-NEXT: - { id: 2, class: fpr } -# CHECK-NEXT: - { id: 3, class: fpr } -# CHECK-NEXT: - { id: 4, class: fpr } +# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: fpr, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: fpr, preferred-register: '' } +# CHECK-NEXT: - { id: 4, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } diff --git a/test/CodeGen/AArch64/GlobalISel/call-translator-ios.ll b/test/CodeGen/AArch64/GlobalISel/call-translator-ios.ll index 4e6b9cad4c3d..38a90bbfbbd9 100644 --- a/test/CodeGen/AArch64/GlobalISel/call-translator-ios.ll +++ b/test/CodeGen/AArch64/GlobalISel/call-translator-ios.ll @@ -3,8 +3,8 @@ ; CHECK-LABEL: name: test_stack_slots ; CHECK: fixedStack: -; CHECK-DAG: - { id: [[STACK0:[0-9]+]], offset: 0, size: 1 -; CHECK-DAG: - { id: [[STACK8:[0-9]+]], offset: 1, size: 1 +; CHECK-DAG: - { id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 1, +; CHECK-DAG: - { id: [[STACK8:[0-9]+]], type: default, offset: 1, size: 1, ; CHECK: [[LHS_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]] ; CHECK: [[LHS:%[0-9]+]](s8) = G_LOAD [[LHS_ADDR]](p0) :: (invariant load 1 from %fixed-stack.[[STACK0]], align 0) ; CHECK: [[RHS_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK8]] diff --git a/test/CodeGen/AArch64/GlobalISel/call-translator.ll b/test/CodeGen/AArch64/GlobalISel/call-translator.ll index 44705a9c9f65..e923a0b2847f 100644 --- a/test/CodeGen/AArch64/GlobalISel/call-translator.ll +++ b/test/CodeGen/AArch64/GlobalISel/call-translator.ll @@ -35,7 +35,7 @@ define void @test_simple_arg(i32 %in) { ; CHECK-LABEL: name: test_indirect_call ; CHECK: registers: ; Make sure the register feeding the indirect call is properly constrained. -; CHECK: - { id: [[FUNC:[0-9]+]], class: gpr64 } +; CHECK: - { id: [[FUNC:[0-9]+]], class: gpr64, preferred-register: '' } ; CHECK: %[[FUNC]](p0) = COPY %x0 ; CHECK: BLR %[[FUNC]](p0), csr_aarch64_aapcs, implicit-def %lr, implicit %sp ; CHECK: RET_ReallyLR @@ -165,9 +165,9 @@ define zeroext i8 @test_abi_zext_ret(i8* %addr) { ; CHECK-LABEL: name: test_stack_slots ; CHECK: fixedStack: -; CHECK-DAG: - { id: [[STACK0:[0-9]+]], offset: 0, size: 8 -; CHECK-DAG: - { id: [[STACK8:[0-9]+]], offset: 8, size: 8 -; CHECK-DAG: - { id: [[STACK16:[0-9]+]], offset: 16, size: 8 +; CHECK-DAG: - { id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 8, +; CHECK-DAG: - { id: [[STACK8:[0-9]+]], type: default, offset: 8, size: 8, +; CHECK-DAG: - { id: [[STACK16:[0-9]+]], type: default, offset: 16, size: 8, ; CHECK: [[LHS_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]] ; CHECK: [[LHS:%[0-9]+]](s64) = G_LOAD [[LHS_ADDR]](p0) :: (invariant load 8 from %fixed-stack.[[STACK0]], align 0) ; CHECK: [[RHS_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK8]] @@ -208,7 +208,7 @@ define void @test_call_stack() { ; CHECK-LABEL: name: test_mem_i1 ; CHECK: fixedStack: -; CHECK-NEXT: - { id: [[SLOT:[0-9]+]], offset: 0, size: 1, alignment: 16, isImmutable: true, isAliased: false } +; CHECK-NEXT: - { id: [[SLOT:[0-9]+]], type: default, offset: 0, size: 1, alignment: 16, isImmutable: true, ; CHECK: [[ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[SLOT]] ; CHECK: {{%[0-9]+}}(s1) = G_LOAD [[ADDR]](p0) :: (invariant load 1 from %fixed-stack.[[SLOT]], align 0) define void @test_mem_i1([8 x i64], i1 %in) { diff --git a/test/CodeGen/AArch64/GlobalISel/debug-insts.ll b/test/CodeGen/AArch64/GlobalISel/debug-insts.ll index e01bd2a9f7c8..e832ba953241 100644 --- a/test/CodeGen/AArch64/GlobalISel/debug-insts.ll +++ b/test/CodeGen/AArch64/GlobalISel/debug-insts.ll @@ -3,8 +3,8 @@ ; CHECK-LABEL: name: debug_declare ; CHECK: stack: -; CHECK: - { id: {{.*}}, name: in.addr, offset: {{.*}}, size: {{.*}}, alignment: {{.*}}, di-variable: '!11', -; CHECK-NEXT: di-expression: '!12', di-location: '!13' } +; CHECK: - { id: {{.*}}, name: in.addr, type: default, offset: 0, size: {{.*}}, alignment: {{.*}}, +; CHECK-NEXT: callee-saved-register: '', di-variable: '!11', di-expression: '!12', ; CHECK: DBG_VALUE debug-use %0(s32), debug-use _, !11, !12, debug-location !13 define void @debug_declare(i32 %in) #0 !dbg !7 { entry: diff --git a/test/CodeGen/AArch64/GlobalISel/localizer-in-O0-pipeline.mir b/test/CodeGen/AArch64/GlobalISel/localizer-in-O0-pipeline.mir index ea8a77ca3917..28c926b5d062 100644 --- a/test/CodeGen/AArch64/GlobalISel/localizer-in-O0-pipeline.mir +++ b/test/CodeGen/AArch64/GlobalISel/localizer-in-O0-pipeline.mir @@ -35,15 +35,15 @@ regBankSelected: true tracksRegLiveness: true registers: # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr } -# CHECK-NEXT: - { id: 1, class: gpr } -# CHECK-NEXT: - { id: 2, class: fpr } -# CHECK-NEXT: - { id: 3, class: fpr } -# CHECK-NEXT: - { id: 4, class: fpr } -# CHECK-NEXT: - { id: 5, class: fpr } +# CHECK-NEXT: - { id: 0, class: fpr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: fpr, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: fpr, preferred-register: '' } +# CHECK-NEXT: - { id: 4, class: fpr, preferred-register: '' } +# CHECK-NEXT: - { id: 5, class: fpr, preferred-register: '' } # The localizer will create two new values to materialize the constants. -# OPTNONE-NEXT: - { id: 6, class: fpr } -# OPTNONE-NEXT: - { id: 7, class: fpr } +# OPTNONE-NEXT: - { id: 6, class: fpr, preferred-register: '' } +# OPTNONE-NEXT: - { id: 7, class: fpr, preferred-register: '' } - { id: 0, class: fpr } - { id: 1, class: gpr } - { id: 2, class: fpr } diff --git a/test/CodeGen/AArch64/GlobalISel/localizer.mir b/test/CodeGen/AArch64/GlobalISel/localizer.mir index 5bf8dac79860..afe2c13f025d 100644 --- a/test/CodeGen/AArch64/GlobalISel/localizer.mir +++ b/test/CodeGen/AArch64/GlobalISel/localizer.mir @@ -44,11 +44,11 @@ regBankSelected: true # CHECK: registers: # Existing registers should be left untouched -# CHECK: - { id: 0, class: gpr } -#CHECK-NEXT: - { id: 1, class: gpr } -#CHECK-NEXT: - { id: 2, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +#CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +#CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } # The newly created reg should be on the same regbank/regclass as its origin. -#CHECK-NEXT: - { id: 3, class: gpr } +#CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' } registers: - { id: 0, class: gpr } @@ -82,11 +82,11 @@ regBankSelected: true # CHECK: registers: # Existing registers should be left untouched -# CHECK: - { id: 0, class: gpr } -#CHECK-NEXT: - { id: 1, class: gpr } -#CHECK-NEXT: - { id: 2, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +#CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +#CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } # The newly created reg should be on the same regbank/regclass as its origin. -#CHECK-NEXT: - { id: 3, class: gpr } +#CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' } registers: - { id: 0, class: gpr } @@ -120,13 +120,13 @@ tracksRegLiveness: true # CHECK: registers: # Existing registers should be left untouched -# CHECK: - { id: 0, class: gpr } -#CHECK-NEXT: - { id: 1, class: gpr } -#CHECK-NEXT: - { id: 2, class: gpr } -#CHECK-NEXT: - { id: 3, class: gpr } -#CHECK-NEXT: - { id: 4, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +#CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +#CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } +#CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' } +#CHECK-NEXT: - { id: 4, class: gpr, preferred-register: '' } # The newly created reg should be on the same regbank/regclass as its origin. -#CHECK-NEXT: - { id: 5, class: gpr } +#CHECK-NEXT: - { id: 5, class: gpr, preferred-register: '' } registers: - { id: 0, class: gpr } @@ -168,14 +168,14 @@ tracksRegLiveness: true # CHECK: registers: # Existing registers should be left untouched -# CHECK: - { id: 0, class: gpr } -#CHECK-NEXT: - { id: 1, class: gpr } -#CHECK-NEXT: - { id: 2, class: gpr } -#CHECK-NEXT: - { id: 3, class: gpr } -#CHECK-NEXT: - { id: 4, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +#CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +#CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } +#CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' } +#CHECK-NEXT: - { id: 4, class: gpr, preferred-register: '' } # The newly created regs should be on the same regbank/regclass as its origin. -#CHECK-NEXT: - { id: 5, class: gpr } -#CHECK-NEXT: - { id: 6, class: gpr } +#CHECK-NEXT: - { id: 5, class: gpr, preferred-register: '' } +#CHECK-NEXT: - { id: 6, class: gpr, preferred-register: '' } registers: - { id: 0, class: gpr } @@ -219,14 +219,14 @@ tracksRegLiveness: true # CHECK: registers: # Existing registers should be left untouched -# CHECK: - { id: 0, class: gpr } -#CHECK-NEXT: - { id: 1, class: gpr } -#CHECK-NEXT: - { id: 2, class: gpr } -#CHECK-NEXT: - { id: 3, class: gpr } -#CHECK-NEXT: - { id: 4, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +#CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +#CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } +#CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' } +#CHECK-NEXT: - { id: 4, class: gpr, preferred-register: '' } # The newly created reg should be on the same regbank/regclass as its origin. -#CHECK-NEXT: - { id: 5, class: gpr } -#CHECK-NEXT: - { id: 6, class: gpr } +#CHECK-NEXT: - { id: 5, class: gpr, preferred-register: '' } +#CHECK-NEXT: - { id: 6, class: gpr, preferred-register: '' } registers: - { id: 0, class: gpr } @@ -270,14 +270,14 @@ tracksRegLiveness: true # CHECK: registers: # Existing registers should be left untouched -# CHECK: - { id: 0, class: fpr } -#CHECK-NEXT: - { id: 1, class: fpr } -#CHECK-NEXT: - { id: 2, class: fpr } -#CHECK-NEXT: - { id: 3, class: fpr } -#CHECK-NEXT: - { id: 4, class: fpr } +# CHECK: - { id: 0, class: fpr, preferred-register: '' } +#CHECK-NEXT: - { id: 1, class: fpr, preferred-register: '' } +#CHECK-NEXT: - { id: 2, class: fpr, preferred-register: '' } +#CHECK-NEXT: - { id: 3, class: fpr, preferred-register: '' } +#CHECK-NEXT: - { id: 4, class: fpr, preferred-register: '' } # The newly created reg should be on the same regbank/regclass as its origin. -#CHECK-NEXT: - { id: 5, class: fpr } -#CHECK-NEXT: - { id: 6, class: fpr } +#CHECK-NEXT: - { id: 5, class: fpr, preferred-register: '' } +#CHECK-NEXT: - { id: 6, class: fpr, preferred-register: '' } registers: - { id: 0, class: fpr } @@ -323,12 +323,12 @@ tracksRegLiveness: true # CHECK: registers: # Existing registers should be left untouched -# CHECK: - { id: 0, class: fpr } -#CHECK-NEXT: - { id: 1, class: fpr } -#CHECK-NEXT: - { id: 2, class: fpr } -#CHECK-NEXT: - { id: 3, class: fpr } +# CHECK: - { id: 0, class: fpr, preferred-register: '' } +#CHECK-NEXT: - { id: 1, class: fpr, preferred-register: '' } +#CHECK-NEXT: - { id: 2, class: fpr, preferred-register: '' } +#CHECK-NEXT: - { id: 3, class: fpr, preferred-register: '' } # The newly created reg should be on the same regbank/regclass as its origin. -#CHECK-NEXT: - { id: 4, class: fpr } +#CHECK-NEXT: - { id: 4, class: fpr, preferred-register: '' } registers: - { id: 0, class: fpr } diff --git a/test/CodeGen/AArch64/GlobalISel/regbankselect-dbg-value.mir b/test/CodeGen/AArch64/GlobalISel/regbankselect-dbg-value.mir index 73d4d2054729..c8a8266e8b28 100644 --- a/test/CodeGen/AArch64/GlobalISel/regbankselect-dbg-value.mir +++ b/test/CodeGen/AArch64/GlobalISel/regbankselect-dbg-value.mir @@ -32,7 +32,7 @@ name: test_dbg_value legalized: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr } +# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' } body: | bb.0: liveins: %w0 diff --git a/test/CodeGen/AArch64/GlobalISel/regbankselect-default.mir b/test/CodeGen/AArch64/GlobalISel/regbankselect-default.mir index 14ee40c941bf..b8468d8cf55f 100644 --- a/test/CodeGen/AArch64/GlobalISel/regbankselect-default.mir +++ b/test/CodeGen/AArch64/GlobalISel/regbankselect-default.mir @@ -73,8 +73,8 @@ name: test_add_s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -92,8 +92,8 @@ body: | name: test_add_v4s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: fpr } -# CHECK: - { id: 1, class: fpr } +# CHECK: - { id: 0, class: fpr, preferred-register: '' } +# CHECK: - { id: 1, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -111,8 +111,8 @@ body: | name: test_sub_s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -130,8 +130,8 @@ body: | name: test_sub_v4s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: fpr } -# CHECK: - { id: 1, class: fpr } +# CHECK: - { id: 0, class: fpr, preferred-register: '' } +# CHECK: - { id: 1, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -149,8 +149,8 @@ body: | name: test_mul_s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -168,8 +168,8 @@ body: | name: test_mul_v4s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: fpr } -# CHECK: - { id: 1, class: fpr } +# CHECK: - { id: 0, class: fpr, preferred-register: '' } +# CHECK: - { id: 1, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -187,8 +187,8 @@ body: | name: test_and_s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -206,8 +206,8 @@ body: | name: test_and_v4s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: fpr } -# CHECK: - { id: 1, class: fpr } +# CHECK: - { id: 0, class: fpr, preferred-register: '' } +# CHECK: - { id: 1, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -225,8 +225,8 @@ body: | name: test_or_s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -244,8 +244,8 @@ body: | name: test_or_v4s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: fpr } -# CHECK: - { id: 1, class: fpr } +# CHECK: - { id: 0, class: fpr, preferred-register: '' } +# CHECK: - { id: 1, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -263,8 +263,8 @@ body: | name: test_xor_s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -282,8 +282,8 @@ body: | name: test_xor_v4s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: fpr } -# CHECK: - { id: 1, class: fpr } +# CHECK: - { id: 0, class: fpr, preferred-register: '' } +# CHECK: - { id: 1, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -301,8 +301,8 @@ body: | name: test_shl_s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -320,8 +320,8 @@ body: | name: test_shl_v4s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: fpr } -# CHECK: - { id: 1, class: fpr } +# CHECK: - { id: 0, class: fpr, preferred-register: '' } +# CHECK: - { id: 1, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -339,8 +339,8 @@ body: | name: test_lshr_s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -358,8 +358,8 @@ body: | name: test_ashr_s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -377,8 +377,8 @@ body: | name: test_sdiv_s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -396,8 +396,8 @@ body: | name: test_udiv_s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -415,8 +415,8 @@ body: | name: test_anyext_s64_s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -434,8 +434,8 @@ body: | name: test_sext_s64_s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -453,8 +453,8 @@ body: | name: test_zext_s64_s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -472,8 +472,8 @@ body: | name: test_trunc_s32_s64 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -491,7 +491,7 @@ body: | name: test_constant_s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } body: | @@ -505,7 +505,7 @@ body: | name: test_constant_p0 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } body: | @@ -519,8 +519,8 @@ body: | name: test_icmp_s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -538,8 +538,8 @@ body: | name: test_icmp_p0 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -557,7 +557,7 @@ body: | name: test_frame_index_p0 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } stack: @@ -573,8 +573,8 @@ body: | name: test_ptrtoint_s64_p0 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -592,8 +592,8 @@ body: | name: test_inttoptr_p0_s64 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -611,8 +611,8 @@ body: | name: test_load_s32_p0 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -630,8 +630,8 @@ body: | name: test_store_s32_p0 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -651,8 +651,8 @@ body: | name: test_fadd_s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: fpr } -# CHECK: - { id: 1, class: fpr } +# CHECK: - { id: 0, class: fpr, preferred-register: '' } +# CHECK: - { id: 1, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -670,8 +670,8 @@ body: | name: test_fsub_s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: fpr } -# CHECK: - { id: 1, class: fpr } +# CHECK: - { id: 0, class: fpr, preferred-register: '' } +# CHECK: - { id: 1, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -689,8 +689,8 @@ body: | name: test_fmul_s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: fpr } -# CHECK: - { id: 1, class: fpr } +# CHECK: - { id: 0, class: fpr, preferred-register: '' } +# CHECK: - { id: 1, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -708,8 +708,8 @@ body: | name: test_fdiv_s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: fpr } -# CHECK: - { id: 1, class: fpr } +# CHECK: - { id: 0, class: fpr, preferred-register: '' } +# CHECK: - { id: 1, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -727,8 +727,8 @@ body: | name: test_fpext_s64_s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: fpr } -# CHECK: - { id: 1, class: fpr } +# CHECK: - { id: 0, class: fpr, preferred-register: '' } +# CHECK: - { id: 1, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -746,8 +746,8 @@ body: | name: test_fptrunc_s32_s64 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: fpr } -# CHECK: - { id: 1, class: fpr } +# CHECK: - { id: 0, class: fpr, preferred-register: '' } +# CHECK: - { id: 1, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -765,7 +765,7 @@ body: | name: test_fconstant_s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: fpr } +# CHECK: - { id: 0, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } body: | @@ -779,8 +779,8 @@ body: | name: test_fcmp_s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: fpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: fpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -798,8 +798,8 @@ body: | name: test_sitofp_s64_s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: fpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -817,8 +817,8 @@ body: | name: test_uitofp_s32_s64 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: fpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -836,8 +836,8 @@ body: | name: test_fptosi_s64_s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: fpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: fpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -855,8 +855,8 @@ body: | name: test_fptoui_s32_s64 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: fpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: fpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } diff --git a/test/CodeGen/AArch64/GlobalISel/select-binop.mir b/test/CodeGen/AArch64/GlobalISel/select-binop.mir index 8ae2e1b2eb7d..70cda516d5f1 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-binop.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-binop.mir @@ -64,9 +64,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32 } -# CHECK-NEXT: - { id: 1, class: gpr32 } -# CHECK-NEXT: - { id: 2, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -94,9 +94,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64 } -# CHECK-NEXT: - { id: 1, class: gpr64 } -# CHECK-NEXT: - { id: 2, class: gpr64 } +# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -123,9 +123,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32sp } -# CHECK-NEXT: - { id: 1, class: gpr } -# CHECK-NEXT: - { id: 2, class: gpr32sp } +# CHECK-NEXT: - { id: 0, class: gpr32sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr32sp, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -151,9 +151,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: gpr } -# CHECK-NEXT: - { id: 2, class: gpr64sp } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr64sp, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -179,9 +179,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32sp } -# CHECK-NEXT: - { id: 1, class: gpr } -# CHECK-NEXT: - { id: 2, class: gpr32sp } +# CHECK-NEXT: - { id: 0, class: gpr32sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr32sp, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -213,9 +213,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32 } -# CHECK-NEXT: - { id: 1, class: gpr32 } -# CHECK-NEXT: - { id: 2, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -243,9 +243,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64 } -# CHECK-NEXT: - { id: 1, class: gpr64 } -# CHECK-NEXT: - { id: 2, class: gpr64 } +# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -273,9 +273,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32 } -# CHECK-NEXT: - { id: 1, class: gpr32 } -# CHECK-NEXT: - { id: 2, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -303,9 +303,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64 } -# CHECK-NEXT: - { id: 1, class: gpr64 } -# CHECK-NEXT: - { id: 2, class: gpr64 } +# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -333,9 +333,9 @@ legalized: true regBankSelected: true # # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr64 } -# CHECK-NEXT: - { id: 1, class: fpr64 } -# CHECK-NEXT: - { id: 2, class: fpr64 } +# CHECK-NEXT: - { id: 0, class: fpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: fpr64, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: fpr } @@ -365,9 +365,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32 } -# CHECK-NEXT: - { id: 1, class: gpr32 } -# CHECK-NEXT: - { id: 2, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -395,9 +395,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64 } -# CHECK-NEXT: - { id: 1, class: gpr64 } -# CHECK-NEXT: - { id: 2, class: gpr64 } +# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -425,9 +425,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32 } -# CHECK-NEXT: - { id: 1, class: gpr32 } -# CHECK-NEXT: - { id: 2, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -455,9 +455,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64 } -# CHECK-NEXT: - { id: 1, class: gpr64 } -# CHECK-NEXT: - { id: 2, class: gpr64 } +# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -485,9 +485,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32 } -# CHECK-NEXT: - { id: 1, class: gpr32 } -# CHECK-NEXT: - { id: 2, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -515,9 +515,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64 } -# CHECK-NEXT: - { id: 1, class: gpr64 } -# CHECK-NEXT: - { id: 2, class: gpr64 } +# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -545,9 +545,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32 } -# CHECK-NEXT: - { id: 1, class: gpr32 } -# CHECK-NEXT: - { id: 2, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -575,9 +575,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64 } -# CHECK-NEXT: - { id: 1, class: gpr64 } -# CHECK-NEXT: - { id: 2, class: gpr64 } +# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -606,9 +606,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32 } -# CHECK-NEXT: - { id: 1, class: gpr32 } -# CHECK-NEXT: - { id: 2, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -636,9 +636,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64 } -# CHECK-NEXT: - { id: 1, class: gpr64 } -# CHECK-NEXT: - { id: 2, class: gpr64 } +# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -666,10 +666,10 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64 } -# CHECK-NEXT: - { id: 1, class: gpr64 } -# CHECK-NEXT: - { id: 2, class: gpr64 } -# CHECK-NEXT: - { id: 3, class: gpr64 } +# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gpr64, preferred-register: '' } # CHECK: body: # CHECK: %0 = COPY %x0 @@ -696,9 +696,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32 } -# CHECK-NEXT: - { id: 1, class: gpr32 } -# CHECK-NEXT: - { id: 2, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -726,9 +726,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64 } -# CHECK-NEXT: - { id: 1, class: gpr64 } -# CHECK-NEXT: - { id: 2, class: gpr64 } +# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -756,9 +756,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32 } -# CHECK-NEXT: - { id: 1, class: gpr32 } -# CHECK-NEXT: - { id: 2, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -786,9 +786,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64 } -# CHECK-NEXT: - { id: 1, class: gpr64 } -# CHECK-NEXT: - { id: 2, class: gpr64 } +# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -816,9 +816,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr32 } -# CHECK-NEXT: - { id: 1, class: fpr32 } -# CHECK-NEXT: - { id: 2, class: fpr32 } +# CHECK-NEXT: - { id: 0, class: fpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: fpr32, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: fpr } @@ -845,9 +845,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr64 } -# CHECK-NEXT: - { id: 1, class: fpr64 } -# CHECK-NEXT: - { id: 2, class: fpr64 } +# CHECK-NEXT: - { id: 0, class: fpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: fpr64, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: fpr } @@ -874,9 +874,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr32 } -# CHECK-NEXT: - { id: 1, class: fpr32 } -# CHECK-NEXT: - { id: 2, class: fpr32 } +# CHECK-NEXT: - { id: 0, class: fpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: fpr32, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: fpr } @@ -903,9 +903,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr64 } -# CHECK-NEXT: - { id: 1, class: fpr64 } -# CHECK-NEXT: - { id: 2, class: fpr64 } +# CHECK-NEXT: - { id: 0, class: fpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: fpr64, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: fpr } @@ -932,9 +932,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr32 } -# CHECK-NEXT: - { id: 1, class: fpr32 } -# CHECK-NEXT: - { id: 2, class: fpr32 } +# CHECK-NEXT: - { id: 0, class: fpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: fpr32, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: fpr } @@ -961,9 +961,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr64 } -# CHECK-NEXT: - { id: 1, class: fpr64 } -# CHECK-NEXT: - { id: 2, class: fpr64 } +# CHECK-NEXT: - { id: 0, class: fpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: fpr64, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: fpr } @@ -990,9 +990,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr32 } -# CHECK-NEXT: - { id: 1, class: fpr32 } -# CHECK-NEXT: - { id: 2, class: fpr32 } +# CHECK-NEXT: - { id: 0, class: fpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: fpr32, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: fpr } @@ -1019,9 +1019,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr64 } -# CHECK-NEXT: - { id: 1, class: fpr64 } -# CHECK-NEXT: - { id: 2, class: fpr64 } +# CHECK-NEXT: - { id: 0, class: fpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: fpr64, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: fpr } diff --git a/test/CodeGen/AArch64/GlobalISel/select-bitcast.mir b/test/CodeGen/AArch64/GlobalISel/select-bitcast.mir index 5ca63dbc214d..d871a80661a8 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-bitcast.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-bitcast.mir @@ -19,8 +19,8 @@ name: bitcast_s32_gpr legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32all } -# CHECK-NEXT: - { id: 1, class: gpr32all } +# CHECK-NEXT: - { id: 0, class: gpr32all, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32all, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -44,8 +44,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr32 } -# CHECK-NEXT: - { id: 1, class: fpr32 } +# CHECK-NEXT: - { id: 0, class: fpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr32, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: fpr } @@ -69,8 +69,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32all } -# CHECK-NEXT: - { id: 1, class: fpr32 } +# CHECK-NEXT: - { id: 0, class: gpr32all, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: fpr } @@ -94,8 +94,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr32 } -# CHECK-NEXT: - { id: 1, class: gpr32all } +# CHECK-NEXT: - { id: 0, class: fpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32all, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: gpr } @@ -119,8 +119,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64all } -# CHECK-NEXT: - { id: 1, class: gpr64all } +# CHECK-NEXT: - { id: 0, class: gpr64all, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr64all, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -144,8 +144,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr64 } -# CHECK-NEXT: - { id: 1, class: fpr64 } +# CHECK-NEXT: - { id: 0, class: fpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr64, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: fpr } @@ -169,8 +169,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64all } -# CHECK-NEXT: - { id: 1, class: fpr64 } +# CHECK-NEXT: - { id: 0, class: gpr64all, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: fpr } @@ -193,8 +193,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr64 } -# CHECK-NEXT: - { id: 1, class: gpr64all } +# CHECK-NEXT: - { id: 0, class: fpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr64all, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: gpr } diff --git a/test/CodeGen/AArch64/GlobalISel/select-fp-casts.mir b/test/CodeGen/AArch64/GlobalISel/select-fp-casts.mir index fbb11a1c7a4c..34c3da3a5369 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-fp-casts.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-fp-casts.mir @@ -34,8 +34,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK: - { id: 0, class: fpr64 } -# CHECK: - { id: 1, class: fpr32 } +# CHECK: - { id: 0, class: fpr64, preferred-register: '' } +# CHECK: - { id: 1, class: fpr32, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: fpr } @@ -59,8 +59,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK: - { id: 0, class: fpr32 } -# CHECK: - { id: 1, class: fpr64 } +# CHECK: - { id: 0, class: fpr32, preferred-register: '' } +# CHECK: - { id: 1, class: fpr64, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: fpr } @@ -84,8 +84,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32 } -# CHECK-NEXT: - { id: 1, class: fpr32 } +# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: fpr } @@ -109,8 +109,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64 } -# CHECK-NEXT: - { id: 1, class: fpr32 } +# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: fpr } @@ -134,8 +134,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32 } -# CHECK-NEXT: - { id: 1, class: fpr64 } +# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: fpr } @@ -159,8 +159,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64 } -# CHECK-NEXT: - { id: 1, class: fpr64 } +# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: fpr } @@ -184,8 +184,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32 } -# CHECK-NEXT: - { id: 1, class: fpr32 } +# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: fpr } @@ -209,8 +209,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64 } -# CHECK-NEXT: - { id: 1, class: fpr32 } +# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: fpr } @@ -234,8 +234,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32 } -# CHECK-NEXT: - { id: 1, class: fpr64 } +# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: fpr } @@ -259,8 +259,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64 } -# CHECK-NEXT: - { id: 1, class: fpr64 } +# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: fpr } @@ -284,8 +284,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr32 } -# CHECK-NEXT: - { id: 1, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: fpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: gpr } @@ -309,8 +309,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr64 } -# CHECK-NEXT: - { id: 1, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: fpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: gpr } @@ -334,8 +334,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr32 } -# CHECK-NEXT: - { id: 1, class: gpr64 } +# CHECK-NEXT: - { id: 0, class: fpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: gpr } @@ -359,8 +359,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr64 } -# CHECK-NEXT: - { id: 1, class: gpr64 } +# CHECK-NEXT: - { id: 0, class: fpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: gpr } @@ -384,8 +384,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr32 } -# CHECK-NEXT: - { id: 1, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: fpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: gpr } @@ -409,8 +409,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr64 } -# CHECK-NEXT: - { id: 1, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: fpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: gpr } @@ -434,8 +434,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr32 } -# CHECK-NEXT: - { id: 1, class: gpr64 } +# CHECK-NEXT: - { id: 0, class: fpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: gpr } @@ -459,8 +459,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr64 } -# CHECK-NEXT: - { id: 1, class: gpr64 } +# CHECK-NEXT: - { id: 0, class: fpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: gpr } diff --git a/test/CodeGen/AArch64/GlobalISel/select-int-ext.mir b/test/CodeGen/AArch64/GlobalISel/select-int-ext.mir index 2ba8b7366252..5f29f8b62fab 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-int-ext.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-int-ext.mir @@ -24,9 +24,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32all } -# CHECK-NEXT: - { id: 1, class: gpr64all } -# CHECK-NEXT: - { id: 2, class: gpr64all } +# CHECK-NEXT: - { id: 0, class: gpr32all, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr64all, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr64all, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -51,8 +51,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32all } -# CHECK-NEXT: - { id: 1, class: gpr32all } +# CHECK-NEXT: - { id: 0, class: gpr32all, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32all, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -76,9 +76,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32 } -# CHECK-NEXT: - { id: 1, class: gpr64 } -# CHECK-NEXT: - { id: 2, class: gpr64 } +# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -103,8 +103,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32 } -# CHECK-NEXT: - { id: 1, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -128,8 +128,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32 } -# CHECK-NEXT: - { id: 1, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -153,8 +153,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32 } -# CHECK-NEXT: - { id: 1, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -178,9 +178,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32 } -# CHECK-NEXT: - { id: 1, class: gpr64 } -# CHECK-NEXT: - { id: 2, class: gpr64 } +# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -205,8 +205,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32 } -# CHECK-NEXT: - { id: 1, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -230,8 +230,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32 } -# CHECK-NEXT: - { id: 1, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -255,8 +255,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32 } -# CHECK-NEXT: - { id: 1, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } diff --git a/test/CodeGen/AArch64/GlobalISel/select-int-ptr-casts.mir b/test/CodeGen/AArch64/GlobalISel/select-int-ptr-casts.mir index 6537408f6d98..b71a9a3d731e 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-int-ptr-casts.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-int-ptr-casts.mir @@ -18,8 +18,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64all } -# CHECK-NEXT: - { id: 1, class: gpr64all } +# CHECK-NEXT: - { id: 0, class: gpr64all, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr64all, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -41,8 +41,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64 } -# CHECK-NEXT: - { id: 1, class: gpr64 } +# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -64,8 +64,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64 } -# CHECK-NEXT: - { id: 1, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -87,8 +87,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64 } -# CHECK-NEXT: - { id: 1, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -110,8 +110,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64 } -# CHECK-NEXT: - { id: 1, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -133,8 +133,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64 } -# CHECK-NEXT: - { id: 1, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } diff --git a/test/CodeGen/AArch64/GlobalISel/select-load.mir b/test/CodeGen/AArch64/GlobalISel/select-load.mir index 9188e2b0c0fc..d00b98d148be 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-load.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-load.mir @@ -37,8 +37,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: gpr64 } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -62,8 +62,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -87,8 +87,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -112,8 +112,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -137,8 +137,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr } -# CHECK-NEXT: - { id: 1, class: gpr64 } +# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -165,10 +165,10 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: gpr } -# CHECK-NEXT: - { id: 2, class: gpr } -# CHECK-NEXT: - { id: 3, class: gpr64 } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -197,10 +197,10 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: gpr } -# CHECK-NEXT: - { id: 2, class: gpr } -# CHECK-NEXT: - { id: 3, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -229,10 +229,10 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: gpr } -# CHECK-NEXT: - { id: 2, class: gpr } -# CHECK-NEXT: - { id: 3, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -261,10 +261,10 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: gpr } -# CHECK-NEXT: - { id: 2, class: gpr } -# CHECK-NEXT: - { id: 3, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -293,8 +293,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: fpr64 } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: fpr } @@ -318,8 +318,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: fpr32 } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: fpr } @@ -343,8 +343,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: fpr16 } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr16, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: fpr } @@ -368,8 +368,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: fpr8 } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr8, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: fpr } @@ -393,10 +393,10 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: gpr } -# CHECK-NEXT: - { id: 2, class: gpr } -# CHECK-NEXT: - { id: 3, class: fpr64 } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: fpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -425,10 +425,10 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: gpr } -# CHECK-NEXT: - { id: 2, class: gpr } -# CHECK-NEXT: - { id: 3, class: fpr32 } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: fpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -457,10 +457,10 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: gpr } -# CHECK-NEXT: - { id: 2, class: gpr } -# CHECK-NEXT: - { id: 3, class: fpr16 } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: fpr16, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -489,10 +489,10 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: gpr } -# CHECK-NEXT: - { id: 2, class: gpr } -# CHECK-NEXT: - { id: 3, class: fpr8 } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: fpr8, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } diff --git a/test/CodeGen/AArch64/GlobalISel/select-muladd.mir b/test/CodeGen/AArch64/GlobalISel/select-muladd.mir index 7d5b43bc16d5..cd7a79f17d95 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-muladd.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-muladd.mir @@ -13,13 +13,13 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64 } -# CHECK-NEXT: - { id: 1, class: gpr32 } -# CHECK-NEXT: - { id: 2, class: gpr32 } -# CHECK-NEXT: - { id: 3, class: gpr } -# CHECK-NEXT: - { id: 4, class: gpr } -# CHECK-NEXT: - { id: 5, class: gpr } -# CHECK-NEXT: - { id: 6, class: gpr64 } +# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 4, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 5, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 6, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } diff --git a/test/CodeGen/AArch64/GlobalISel/select-store.mir b/test/CodeGen/AArch64/GlobalISel/select-store.mir index 9b8f5c566ce0..536e236c2738 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-store.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-store.mir @@ -35,8 +35,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: gpr64 } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -62,8 +62,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -89,8 +89,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -116,8 +116,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -143,8 +143,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: gpr } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -169,8 +169,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: gpr } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -195,8 +195,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64 } -# CHECK-NEXT: - { id: 1, class: gpr } +# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -223,10 +223,10 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: gpr64 } -# CHECK-NEXT: - { id: 2, class: gpr } -# CHECK-NEXT: - { id: 3, class: gpr } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -255,10 +255,10 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: gpr32 } -# CHECK-NEXT: - { id: 2, class: gpr } -# CHECK-NEXT: - { id: 3, class: gpr } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -287,10 +287,10 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: gpr32 } -# CHECK-NEXT: - { id: 2, class: gpr } -# CHECK-NEXT: - { id: 3, class: gpr } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -319,10 +319,10 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: gpr32 } -# CHECK-NEXT: - { id: 2, class: gpr } -# CHECK-NEXT: - { id: 3, class: gpr } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -351,8 +351,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: fpr64 } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: fpr } @@ -378,8 +378,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: fpr32 } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: fpr } @@ -405,10 +405,10 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: fpr64 } -# CHECK-NEXT: - { id: 2, class: gpr } -# CHECK-NEXT: - { id: 3, class: gpr } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: fpr } @@ -437,10 +437,10 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: fpr32 } -# CHECK-NEXT: - { id: 2, class: gpr } -# CHECK-NEXT: - { id: 3, class: gpr } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: fpr } diff --git a/test/CodeGen/AArch64/GlobalISel/select-trunc.mir b/test/CodeGen/AArch64/GlobalISel/select-trunc.mir index fc3546e777f7..5559e2d3a0d1 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-trunc.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-trunc.mir @@ -15,8 +15,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64 } -# CHECK-NEXT: - { id: 1, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -39,8 +39,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64 } -# CHECK-NEXT: - { id: 1, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -63,8 +63,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32 } -# CHECK-NEXT: - { id: 1, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } diff --git a/test/CodeGen/AArch64/GlobalISel/select-xor.mir b/test/CodeGen/AArch64/GlobalISel/select-xor.mir index e787849c8d1b..7190fda15b8e 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-xor.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-xor.mir @@ -20,9 +20,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32 } -# CHECK-NEXT: - { id: 1, class: gpr32 } -# CHECK-NEXT: - { id: 2, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -50,9 +50,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64 } -# CHECK-NEXT: - { id: 1, class: gpr64 } -# CHECK-NEXT: - { id: 2, class: gpr64 } +# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -81,9 +81,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32 } -# CHECK-NEXT: - { id: 1, class: gpr } -# CHECK-NEXT: - { id: 2, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -110,9 +110,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64 } -# CHECK-NEXT: - { id: 1, class: gpr } -# CHECK-NEXT: - { id: 2, class: gpr64 } +# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -139,9 +139,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32 } -# CHECK-NEXT: - { id: 1, class: gpr } -# CHECK-NEXT: - { id: 2, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } diff --git a/test/CodeGen/AArch64/GlobalISel/select.mir b/test/CodeGen/AArch64/GlobalISel/select.mir index 8bffa085fdca..5e52bc761a84 100644 --- a/test/CodeGen/AArch64/GlobalISel/select.mir +++ b/test/CodeGen/AArch64/GlobalISel/select.mir @@ -35,7 +35,7 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } registers: - { id: 0, class: gpr } @@ -132,12 +132,12 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32 } -# CHECK-NEXT: - { id: 1, class: gpr32 } -# CHECK-NEXT: - { id: 2, class: gpr64 } -# CHECK-NEXT: - { id: 3, class: gpr32 } -# CHECK-NEXT: - { id: 4, class: gpr64 } -# CHECK-NEXT: - { id: 5, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 4, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 5, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -180,12 +180,12 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr32 } -# CHECK-NEXT: - { id: 1, class: gpr32 } -# CHECK-NEXT: - { id: 2, class: fpr64 } -# CHECK-NEXT: - { id: 3, class: gpr32 } -# CHECK-NEXT: - { id: 4, class: gpr32 } -# CHECK-NEXT: - { id: 5, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: fpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: fpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 4, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 5, class: gpr32, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: gpr } @@ -223,9 +223,9 @@ regBankSelected: true tracksRegLiveness: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr32 } -# CHECK-NEXT: - { id: 1, class: gpr32 } -# CHECK-NEXT: - { id: 2, class: fpr32 } +# CHECK-NEXT: - { id: 0, class: fpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: fpr32, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: gpr } @@ -260,16 +260,16 @@ regBankSelected: true tracksRegLiveness: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32 } -# CHECK-NEXT: - { id: 1, class: gpr32 } -# CHECK-NEXT: - { id: 2, class: gpr32 } -# CHECK-NEXT: - { id: 3, class: gpr32 } -# CHECK-NEXT: - { id: 4, class: gpr64 } -# CHECK-NEXT: - { id: 5, class: gpr64 } -# CHECK-NEXT: - { id: 6, class: gpr64 } -# CHECK-NEXT: - { id: 7, class: gpr64 } -# CHECK-NEXT: - { id: 8, class: gpr64 } -# CHECK-NEXT: - { id: 9, class: gpr64 } +# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 4, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 5, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 6, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 7, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 8, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 9, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } diff --git a/test/CodeGen/AArch64/GlobalISel/varargs-ios-translator.ll b/test/CodeGen/AArch64/GlobalISel/varargs-ios-translator.ll index 3bd56fa4cebc..af0ab57b0b9f 100644 --- a/test/CodeGen/AArch64/GlobalISel/varargs-ios-translator.ll +++ b/test/CodeGen/AArch64/GlobalISel/varargs-ios-translator.ll @@ -4,7 +4,7 @@ define void @test_varargs_sentinel(i8* %list, i64, i64, i64, i64, i64, i64, i64, i32, ...) { ; CHECK-LABEL: name: test_varargs_sentinel ; CHECK: fixedStack: -; CHECK: - { id: [[VARARGS_SLOT:[0-9]+]], offset: 8 +; CHECK: - { id: [[VARARGS_SLOT:[0-9]+]], type: default, offset: 8 ; CHECK: body: ; CHECK: [[LIST:%[0-9]+]] = COPY %x0 ; CHECK: [[VARARGS_AREA:%[0-9]+]] = ADDXri %fixed-stack.[[VARARGS_SLOT]], 0, 0 diff --git a/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll b/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll new file mode 100644 index 000000000000..16a02de79a91 --- /dev/null +++ b/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll @@ -0,0 +1,131 @@ +; RUN: llc -O0 -verify-machineinstrs -mtriple=arm64-eabi < %s | FileCheck --enable-var-scope %s + +; Test fptosi +define i32 @fptosi_wh(half %a) nounwind ssp { +entry: +; CHECK-LABEL: fptosi_wh +; CHECK: fcvt s1, h0 +; CHECK: fcvtzs [[REG:w[0-9]+]], s1 +; CHECK: mov w0, [[REG]] + %conv = fptosi half %a to i32 + ret i32 %conv +} + +; Test fptoui +define i32 @fptoui_swh(half %a) nounwind ssp { +entry: +; CHECK-LABEL: fptoui_swh +; CHECK: fcvt s1, h0 +; CHECK: fcvtzu [[REG:w[0-9]+]], s1 +; CHECK: mov w0, [[REG]] + %conv = fptoui half %a to i32 + ret i32 %conv +} + +; Test sitofp +define half @sitofp_hw_i1(i1 %a) nounwind ssp { +entry: +; CHECK-LABEL: sitofp_hw_i1 +; CHECK: sbfx w0, w0, #0, #1 +; CHECK: scvtf s0, w0 +; CHECK: fcvt h0, s0 + %conv = sitofp i1 %a to half + ret half %conv +} + +; Test sitofp +define half @sitofp_hw_i8(i8 %a) nounwind ssp { +entry: +; CHECK-LABEL: sitofp_hw_i8 +; CHECK: sxtb w0, w0 +; CHECK: scvtf s0, w0 +; CHECK: fcvt h0, s0 + %conv = sitofp i8 %a to half + ret half %conv +} + +; Test sitofp +define half @sitofp_hw_i16(i16 %a) nounwind ssp { +entry: +; CHECK-LABEL: sitofp_hw_i16 +; CHECK: sxth w0, w0 +; CHECK: scvtf s0, w0 +; CHECK: fcvt h0, s0 + %conv = sitofp i16 %a to half + ret half %conv +} + +; Test sitofp +define half @sitofp_hw_i32(i32 %a) nounwind ssp { +entry: +; CHECK-LABEL: sitofp_hw_i32 +; CHECK: scvtf s0, w0 +; CHECK: fcvt h0, s0 + %conv = sitofp i32 %a to half + ret half %conv +} + +; Test sitofp +define half @sitofp_hx(i64 %a) nounwind ssp { +entry: +; CHECK-LABEL: sitofp_hx +; CHECK: scvtf s0, x0 +; CHECK: fcvt h0, s0 + %conv = sitofp i64 %a to half + ret half %conv +} + +; Test uitofp +define half @uitofp_hw_i1(i1 %a) nounwind ssp { +entry: +; CHECK-LABEL: uitofp_hw_i1 +; CHECK: and w0, w0, #0x1 +; CHECK: ucvtf s0, w0 +; CHECK: fcvt h0, s0 + %conv = uitofp i1 %a to half + ret half %conv +} + +; Test uitofp +define half @uitofp_hw_i8(i8 %a) nounwind ssp { +entry: +; CHECK-LABEL: uitofp_hw_i8 +; CHECK: and w0, w0, #0xff +; CHECK: ucvtf s0, w0 +; CHECK: fcvt h0, s0 + %conv = uitofp i8 %a to half + ret half %conv +} + +; Test uitofp +define half @uitofp_hw_i16(i16 %a) nounwind ssp { +entry: +; CHECK-LABEL: uitofp_hw_i16 +; CHECK: and w0, w0, #0xffff +; CHECK: ucvtf s0, w0 +; CHECK: fcvt h0, s0 + %conv = uitofp i16 %a to half + ret half %conv +} + +; Test uitofp +define half @uitofp_hw_i32(i32 %a) nounwind ssp { +entry: +; CHECK-LABEL: uitofp_hw_i32 +; CHECK: ucvtf s0, w0 +; CHECK: fcvt h0, s0 + %conv = uitofp i32 %a to half + ret half %conv +} + +; Test uitofp +define half @uitofp_hx(i64 %a) nounwind ssp { +entry: +; CHECK-LABEL: uitofp_hx +; CHECK: ucvtf s0, x0 +; CHECK: fcvt h0, s0 + %conv = uitofp i64 %a to half + ret half %conv +} + + diff --git a/test/CodeGen/AArch64/spill-undef.mir b/test/CodeGen/AArch64/spill-undef.mir new file mode 100644 index 000000000000..4294df286bd3 --- /dev/null +++ b/test/CodeGen/AArch64/spill-undef.mir @@ -0,0 +1,67 @@ +# RUN: llc %s -run-pass greedy -o - | FileCheck %s +# Check that we don't insert spill code for undef values. +# Uninitialized memory for them is fine. +# PR33311 +--- | + ; ModuleID = 'stuff.ll' + target triple = "aarch64--" + + @g = external global i32 + + define void @foobar() { + ret void + } + +... +--- +name: foobar +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: gpr32all } + - { id: 3, class: gpr32 } + - { id: 4, class: gpr64common } + - { id: 5, class: gpr32 } + - { id: 6, class: gpr64common } + - { id: 7, class: gpr32 } + - { id: 8, class: gpr32 } + - { id: 9, class: gpr64 } +body: | + bb.0: + liveins: %x0 + successors: %bb.1, %bb.2 + + ; %8 is going to be spilled. + ; But on that path, we don't care about its value. + ; Emit a simple KILL instruction instead of an + ; actual spill. + ; CHECK: [[UNDEF:%[0-9]+]] = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[UNDEF]] + %8 = IMPLICIT_DEF + ; %9 us going to be spilled. + ; But it is only partially undef. + ; Make sure we spill it properly + ; CHECK: [[NINE:%[0-9]+]] = COPY %x0 + ; CHECK: [[NINE]].sub_32 = IMPLICIT_DEF + ; CHECK-NEXT: STRXui [[NINE]] + %9 = COPY %x0 + %9.sub_32 = IMPLICIT_DEF + CBNZW %wzr, %bb.2 + B %bb.1 + + bb.1: + %4 = ADRP target-flags(aarch64-page) @g + %8 = LDRWui %4, target-flags(aarch64-pageoff, aarch64-nc) @g :: (volatile dereferenceable load 4 from @g) + INLINEASM $nop, 1, 12, implicit-def dead early-clobber %x0, 12, implicit-def dead early-clobber %x1, 12, implicit-def dead early-clobber %x2, 12, implicit-def dead early-clobber %x3, 12, implicit-def dead early-clobber %x4, 12, implicit-def dead early-clobber %x5, 12, implicit-def dead early-clobber %x6, 12, implicit-def dead early-clobber %x7, 12, implicit-def dead early-clobber %x8, 12, implicit-def dead early-clobber %x9, 12, implicit-def dead early-clobber %x10, 12, implicit-def dead early-clobber %x11, 12, implicit-def dead early-clobber %x12, 12, implicit-def dead early-clobber %x13, 12, implicit-def dead early-clobber %x14, 12, implicit-def dead early-clobber %x15, 12, implicit-def dead early-clobber %x16, 12, implicit-def dead early-clobber %x17, 12, implicit-def dead early-clobber %x18, 12, implicit-def dead early-clobber %x19, 12, implicit-def dead early-clobber %x20, 12, implicit-def dead early-clobber %x21, 12, implicit-def dead early-clobber %x22, 12, implicit-def dead early-clobber %x23, 12, implicit-def dead early-clobber %x24, 12, implicit-def dead early-clobber %x25, 12, implicit-def dead early-clobber %x26, 12, implicit-def dead early-clobber %x27, 12, implicit-def dead early-clobber %x28, 12, implicit-def dead early-clobber %fp, 12, implicit-def dead early-clobber %lr + + bb.2: + INLINEASM $nop, 1, 12, implicit-def dead early-clobber %x0, 12, implicit-def dead early-clobber %x1, 12, implicit-def dead early-clobber %x2, 12, implicit-def dead early-clobber %x3, 12, implicit-def dead early-clobber %x4, 12, implicit-def dead early-clobber %x5, 12, implicit-def dead early-clobber %x6, 12, implicit-def dead early-clobber %x7, 12, implicit-def dead early-clobber %x8, 12, implicit-def dead early-clobber %x9, 12, implicit-def dead early-clobber %x10, 12, implicit-def dead early-clobber %x11, 12, implicit-def dead early-clobber %x12, 12, implicit-def dead early-clobber %x13, 12, implicit-def dead early-clobber %x14, 12, implicit-def dead early-clobber %x15, 12, implicit-def dead early-clobber %x16, 12, implicit-def dead early-clobber %x17, 12, implicit-def dead early-clobber %x18, 12, implicit-def dead early-clobber %x19, 12, implicit-def dead early-clobber %x20, 12, implicit-def dead early-clobber %x21, 12, implicit-def dead early-clobber %x22, 12, implicit-def dead early-clobber %x23, 12, implicit-def dead early-clobber %x24, 12, implicit-def dead early-clobber %x25, 12, implicit-def dead early-clobber %x26, 12, implicit-def dead early-clobber %x27, 12, implicit-def dead early-clobber %x28, 12, implicit-def dead early-clobber %fp, 12, implicit-def dead early-clobber %lr + %6 = ADRP target-flags(aarch64-page) @g + %w0 = MOVi32imm 42 + STRWui %8, %6, target-flags(aarch64-pageoff, aarch64-nc) @g :: (volatile store 4 into @g) + STRXui %9, %6, target-flags(aarch64-pageoff, aarch64-nc) @g :: (volatile store 8 into @g) + RET_ReallyLR implicit killed %w0 + +... diff --git a/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir b/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir new file mode 100644 index 000000000000..ebd473d769b3 --- /dev/null +++ b/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir @@ -0,0 +1,24 @@ +# RUN: llc -O0 -march=amdgcn -mcpu=fiji -run-pass=legalizer -global-isel %s -o - | FileCheck %s + +--- | + define void @test_icmp() { + entry: + ret void + } +... + +--- +name: test_icmp +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0.entry: + liveins: %vgpr0 + %0(s32) = G_CONSTANT i32 0 + %1(s32) = COPY %vgpr0 + + ; CHECK: %2(s1) = G_ICMP intpred(ne), %0(s32), %1 + %2(s1) = G_ICMP intpred(ne), %0, %1 +... diff --git a/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir b/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir new file mode 100644 index 000000000000..d11130936bd9 --- /dev/null +++ b/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir @@ -0,0 +1,28 @@ +# RUN: llc -O0 -march=amdgcn -mcpu=fiji -run-pass=legalizer -global-isel %s -o - | FileCheck %s + +--- | + define void @test_select() { ret void } +... + +--- +name: test_select +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: _ } + - { id: 4, class: _ } + - { id: 5, class: _ } +body: | + bb.0: + liveins: %vgpr0 + %0(s32) = G_CONSTANT i32 0 + %1(s32) = COPY %vgpr0 + + %2(s1) = G_ICMP intpred(ne), %0, %1 + %3(s32) = G_CONSTANT i32 1 + %4(s32) = G_CONSTANT i32 2 + ; CHECK: %5(s32) = G_SELECT %2(s1), %3, %4 + %5(s32) = G_SELECT %2, %3, %4 + +... diff --git a/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir b/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir index 3496b1ab71fe..902f1e6c6725 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir @@ -24,8 +24,8 @@ legalized: true # CHECK-LABEL: name: load_constant # CHECK: registers: -# CHECK: - { id: 0, class: sgpr } -# CHECK: - { id: 1, class: sgpr } +# CHECK: - { id: 0, class: sgpr, preferred-register: '' } +# CHECK: - { id: 1, class: sgpr, preferred-register: '' } body: | bb.0: @@ -40,8 +40,8 @@ legalized: true # CHECK-LABEL: name: load_global_uniform # CHECK: registers: -# CHECK: - { id: 0, class: sgpr } -# CHECK: - { id: 1, class: sgpr } +# CHECK: - { id: 0, class: sgpr, preferred-register: '' } +# CHECK: - { id: 1, class: sgpr, preferred-register: '' } body: | bb.0: @@ -56,9 +56,9 @@ legalized: true # CHECK-LABEL: name: load_global_non_uniform # CHECK: registers: -# CHECK: - { id: 0, class: sgpr } -# CHECK: - { id: 1, class: vgpr } -# CHECK: - { id: 2, class: vgpr } +# CHECK: - { id: 0, class: sgpr, preferred-register: '' } +# CHECK: - { id: 1, class: vgpr, preferred-register: '' } +# CHECK: - { id: 2, class: vgpr, preferred-register: '' } body: | diff --git a/test/CodeGen/AMDGPU/add.v2i16.ll b/test/CodeGen/AMDGPU/add.v2i16.ll index e5e2d436deb0..76f724c2b90b 100644 --- a/test/CodeGen/AMDGPU/add.v2i16.ll +++ b/test/CodeGen/AMDGPU/add.v2i16.ll @@ -66,7 +66,7 @@ define amdgpu_kernel void @s_test_add_v2i16_kernarg(<2 x i16> addrspace(1)* %out ; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, 0x7b, v{{[0-9]+}} ; VI-DAG: v_mov_b32_e32 v[[SCONST:[0-9]+]], 0x1c8 -; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, v[[SCONST]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v[[SCONST]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD define amdgpu_kernel void @v_test_add_v2i16_constant(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid @@ -84,7 +84,7 @@ define amdgpu_kernel void @v_test_add_v2i16_constant(<2 x i16> addrspace(1)* %ou ; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, 0xfffffcb3, v{{[0-9]+}} ; VI-DAG: v_mov_b32_e32 v[[SCONST:[0-9]+]], 0xfffffc21 -; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, v[[SCONST]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v[[SCONST]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD define amdgpu_kernel void @v_test_add_v2i16_neg_constant(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid @@ -101,7 +101,7 @@ define amdgpu_kernel void @v_test_add_v2i16_neg_constant(<2 x i16> addrspace(1)* ; VI: v_mov_b32_e32 v[[SCONST:[0-9]+]], -1 ; VI: flat_load_ushort [[LOAD0:v[0-9]+]] ; VI: flat_load_ushort [[LOAD1:v[0-9]+]] -; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, v[[SCONST]], [[LOAD0]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, [[LOAD0]], v[[SCONST]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, -1, [[LOAD1]] ; VI: v_or_b32_e32 define amdgpu_kernel void @v_test_add_v2i16_inline_neg1(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 { @@ -140,7 +140,7 @@ define amdgpu_kernel void @v_test_add_v2i16_inline_lo_zero_hi(<2 x i16> addrspac ; VI-NOT: v_add_u16 ; VI: v_mov_b32_e32 v[[K:[0-9]+]], 0x3f80 -; VI: v_add_u16_sdwa v{{[0-9]+}}, v[[K]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI: v_add_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v[[K]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI-NOT: v_add_u16 ; VI: v_or_b32_e32 define amdgpu_kernel void @v_test_add_v2i16_inline_fp_split(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 { diff --git a/test/CodeGen/AMDGPU/ashr.v2i16.ll b/test/CodeGen/AMDGPU/ashr.v2i16.ll index 7f424ef2a147..dd96e6264418 100644 --- a/test/CodeGen/AMDGPU/ashr.v2i16.ll +++ b/test/CodeGen/AMDGPU/ashr.v2i16.ll @@ -9,7 +9,7 @@ ; GFX9: v_pk_ashrrev_i16 [[RESULT:v[0-9]+]], [[RHS]], [[VLHS]] ; VI: v_ashrrev_i32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; CI: v_ashrrev_i32_e32 ; CI: v_and_b32_e32 v{{[0-9]+}}, 0xffff, v{{[0-9]+}} diff --git a/test/CodeGen/AMDGPU/branch-relax-spill.ll b/test/CodeGen/AMDGPU/branch-relax-spill.ll index ede15559c4ff..db476c21636f 100644 --- a/test/CodeGen/AMDGPU/branch-relax-spill.ll +++ b/test/CodeGen/AMDGPU/branch-relax-spill.ll @@ -7,110 +7,110 @@ define amdgpu_kernel void @spill(i32 addrspace(1)* %arg, i32 %cnd) #0 { entry: - %sgpr0 = tail call i32 asm sideeffect "s_mov_b32 s0, 0", "={SGPR0}"() #0 - %sgpr1 = tail call i32 asm sideeffect "s_mov_b32 s1, 0", "={SGPR1}"() #0 - %sgpr2 = tail call i32 asm sideeffect "s_mov_b32 s2, 0", "={SGPR2}"() #0 - %sgpr3 = tail call i32 asm sideeffect "s_mov_b32 s3, 0", "={SGPR3}"() #0 - %sgpr4 = tail call i32 asm sideeffect "s_mov_b32 s4, 0", "={SGPR4}"() #0 - %sgpr5 = tail call i32 asm sideeffect "s_mov_b32 s5, 0", "={SGPR5}"() #0 - %sgpr6 = tail call i32 asm sideeffect "s_mov_b32 s6, 0", "={SGPR6}"() #0 - %sgpr7 = tail call i32 asm sideeffect "s_mov_b32 s7, 0", "={SGPR7}"() #0 - %sgpr8 = tail call i32 asm sideeffect "s_mov_b32 s8, 0", "={SGPR8}"() #0 - %sgpr9 = tail call i32 asm sideeffect "s_mov_b32 s9, 0", "={SGPR9}"() #0 - %sgpr10 = tail call i32 asm sideeffect "s_mov_b32 s10, 0", "={SGPR10}"() #0 - %sgpr11 = tail call i32 asm sideeffect "s_mov_b32 s11, 0", "={SGPR11}"() #0 - %sgpr12 = tail call i32 asm sideeffect "s_mov_b32 s12, 0", "={SGPR12}"() #0 - %sgpr13 = tail call i32 asm sideeffect "s_mov_b32 s13, 0", "={SGPR13}"() #0 - %sgpr14 = tail call i32 asm sideeffect "s_mov_b32 s14, 0", "={SGPR14}"() #0 - %sgpr15 = tail call i32 asm sideeffect "s_mov_b32 s15, 0", "={SGPR15}"() #0 - %sgpr16 = tail call i32 asm sideeffect "s_mov_b32 s16, 0", "={SGPR16}"() #0 - %sgpr17 = tail call i32 asm sideeffect "s_mov_b32 s17, 0", "={SGPR17}"() #0 - %sgpr18 = tail call i32 asm sideeffect "s_mov_b32 s18, 0", "={SGPR18}"() #0 - %sgpr19 = tail call i32 asm sideeffect "s_mov_b32 s19, 0", "={SGPR19}"() #0 - %sgpr20 = tail call i32 asm sideeffect "s_mov_b32 s20, 0", "={SGPR20}"() #0 - %sgpr21 = tail call i32 asm sideeffect "s_mov_b32 s21, 0", "={SGPR21}"() #0 - %sgpr22 = tail call i32 asm sideeffect "s_mov_b32 s22, 0", "={SGPR22}"() #0 - %sgpr23 = tail call i32 asm sideeffect "s_mov_b32 s23, 0", "={SGPR23}"() #0 - %sgpr24 = tail call i32 asm sideeffect "s_mov_b32 s24, 0", "={SGPR24}"() #0 - %sgpr25 = tail call i32 asm sideeffect "s_mov_b32 s25, 0", "={SGPR25}"() #0 - %sgpr26 = tail call i32 asm sideeffect "s_mov_b32 s26, 0", "={SGPR26}"() #0 - %sgpr27 = tail call i32 asm sideeffect "s_mov_b32 s27, 0", "={SGPR27}"() #0 - %sgpr28 = tail call i32 asm sideeffect "s_mov_b32 s28, 0", "={SGPR28}"() #0 - %sgpr29 = tail call i32 asm sideeffect "s_mov_b32 s29, 0", "={SGPR29}"() #0 - %sgpr30 = tail call i32 asm sideeffect "s_mov_b32 s30, 0", "={SGPR30}"() #0 - %sgpr31 = tail call i32 asm sideeffect "s_mov_b32 s31, 0", "={SGPR31}"() #0 - %sgpr32 = tail call i32 asm sideeffect "s_mov_b32 s32, 0", "={SGPR32}"() #0 - %sgpr33 = tail call i32 asm sideeffect "s_mov_b32 s33, 0", "={SGPR33}"() #0 - %sgpr34 = tail call i32 asm sideeffect "s_mov_b32 s34, 0", "={SGPR34}"() #0 - %sgpr35 = tail call i32 asm sideeffect "s_mov_b32 s35, 0", "={SGPR35}"() #0 - %sgpr36 = tail call i32 asm sideeffect "s_mov_b32 s36, 0", "={SGPR36}"() #0 - %sgpr37 = tail call i32 asm sideeffect "s_mov_b32 s37, 0", "={SGPR37}"() #0 - %sgpr38 = tail call i32 asm sideeffect "s_mov_b32 s38, 0", "={SGPR38}"() #0 - %sgpr39 = tail call i32 asm sideeffect "s_mov_b32 s39, 0", "={SGPR39}"() #0 - %sgpr40 = tail call i32 asm sideeffect "s_mov_b32 s40, 0", "={SGPR40}"() #0 - %sgpr41 = tail call i32 asm sideeffect "s_mov_b32 s41, 0", "={SGPR41}"() #0 - %sgpr42 = tail call i32 asm sideeffect "s_mov_b32 s42, 0", "={SGPR42}"() #0 - %sgpr43 = tail call i32 asm sideeffect "s_mov_b32 s43, 0", "={SGPR43}"() #0 - %sgpr44 = tail call i32 asm sideeffect "s_mov_b32 s44, 0", "={SGPR44}"() #0 - %sgpr45 = tail call i32 asm sideeffect "s_mov_b32 s45, 0", "={SGPR45}"() #0 - %sgpr46 = tail call i32 asm sideeffect "s_mov_b32 s46, 0", "={SGPR46}"() #0 - %sgpr47 = tail call i32 asm sideeffect "s_mov_b32 s47, 0", "={SGPR47}"() #0 - %sgpr48 = tail call i32 asm sideeffect "s_mov_b32 s48, 0", "={SGPR48}"() #0 - %sgpr49 = tail call i32 asm sideeffect "s_mov_b32 s49, 0", "={SGPR49}"() #0 - %sgpr50 = tail call i32 asm sideeffect "s_mov_b32 s50, 0", "={SGPR50}"() #0 - %sgpr51 = tail call i32 asm sideeffect "s_mov_b32 s51, 0", "={SGPR51}"() #0 - %sgpr52 = tail call i32 asm sideeffect "s_mov_b32 s52, 0", "={SGPR52}"() #0 - %sgpr53 = tail call i32 asm sideeffect "s_mov_b32 s53, 0", "={SGPR53}"() #0 - %sgpr54 = tail call i32 asm sideeffect "s_mov_b32 s54, 0", "={SGPR54}"() #0 - %sgpr55 = tail call i32 asm sideeffect "s_mov_b32 s55, 0", "={SGPR55}"() #0 - %sgpr56 = tail call i32 asm sideeffect "s_mov_b32 s56, 0", "={SGPR56}"() #0 - %sgpr57 = tail call i32 asm sideeffect "s_mov_b32 s57, 0", "={SGPR57}"() #0 - %sgpr58 = tail call i32 asm sideeffect "s_mov_b32 s58, 0", "={SGPR58}"() #0 - %sgpr59 = tail call i32 asm sideeffect "s_mov_b32 s59, 0", "={SGPR59}"() #0 - %sgpr60 = tail call i32 asm sideeffect "s_mov_b32 s60, 0", "={SGPR60}"() #0 - %sgpr61 = tail call i32 asm sideeffect "s_mov_b32 s61, 0", "={SGPR61}"() #0 - %sgpr62 = tail call i32 asm sideeffect "s_mov_b32 s62, 0", "={SGPR62}"() #0 - %sgpr63 = tail call i32 asm sideeffect "s_mov_b32 s63, 0", "={SGPR63}"() #0 - %sgpr64 = tail call i32 asm sideeffect "s_mov_b32 s64, 0", "={SGPR64}"() #0 - %sgpr65 = tail call i32 asm sideeffect "s_mov_b32 s65, 0", "={SGPR65}"() #0 - %sgpr66 = tail call i32 asm sideeffect "s_mov_b32 s66, 0", "={SGPR66}"() #0 - %sgpr67 = tail call i32 asm sideeffect "s_mov_b32 s67, 0", "={SGPR67}"() #0 - %sgpr68 = tail call i32 asm sideeffect "s_mov_b32 s68, 0", "={SGPR68}"() #0 - %sgpr69 = tail call i32 asm sideeffect "s_mov_b32 s69, 0", "={SGPR69}"() #0 - %sgpr70 = tail call i32 asm sideeffect "s_mov_b32 s70, 0", "={SGPR70}"() #0 - %sgpr71 = tail call i32 asm sideeffect "s_mov_b32 s71, 0", "={SGPR71}"() #0 - %sgpr72 = tail call i32 asm sideeffect "s_mov_b32 s72, 0", "={SGPR72}"() #0 - %sgpr73 = tail call i32 asm sideeffect "s_mov_b32 s73, 0", "={SGPR73}"() #0 - %sgpr74 = tail call i32 asm sideeffect "s_mov_b32 s74, 0", "={SGPR74}"() #0 - %sgpr75 = tail call i32 asm sideeffect "s_mov_b32 s75, 0", "={SGPR75}"() #0 - %sgpr76 = tail call i32 asm sideeffect "s_mov_b32 s76, 0", "={SGPR76}"() #0 - %sgpr77 = tail call i32 asm sideeffect "s_mov_b32 s77, 0", "={SGPR77}"() #0 - %sgpr78 = tail call i32 asm sideeffect "s_mov_b32 s78, 0", "={SGPR78}"() #0 - %sgpr79 = tail call i32 asm sideeffect "s_mov_b32 s79, 0", "={SGPR79}"() #0 - %sgpr80 = tail call i32 asm sideeffect "s_mov_b32 s80, 0", "={SGPR80}"() #0 - %sgpr81 = tail call i32 asm sideeffect "s_mov_b32 s81, 0", "={SGPR81}"() #0 - %sgpr82 = tail call i32 asm sideeffect "s_mov_b32 s82, 0", "={SGPR82}"() #0 - %sgpr83 = tail call i32 asm sideeffect "s_mov_b32 s83, 0", "={SGPR83}"() #0 - %sgpr84 = tail call i32 asm sideeffect "s_mov_b32 s84, 0", "={SGPR84}"() #0 - %sgpr85 = tail call i32 asm sideeffect "s_mov_b32 s85, 0", "={SGPR85}"() #0 - %sgpr86 = tail call i32 asm sideeffect "s_mov_b32 s86, 0", "={SGPR86}"() #0 - %sgpr87 = tail call i32 asm sideeffect "s_mov_b32 s87, 0", "={SGPR87}"() #0 - %sgpr88 = tail call i32 asm sideeffect "s_mov_b32 s88, 0", "={SGPR88}"() #0 - %sgpr89 = tail call i32 asm sideeffect "s_mov_b32 s89, 0", "={SGPR89}"() #0 - %sgpr90 = tail call i32 asm sideeffect "s_mov_b32 s90, 0", "={SGPR90}"() #0 - %sgpr91 = tail call i32 asm sideeffect "s_mov_b32 s91, 0", "={SGPR91}"() #0 - %sgpr92 = tail call i32 asm sideeffect "s_mov_b32 s92, 0", "={SGPR92}"() #0 - %sgpr93 = tail call i32 asm sideeffect "s_mov_b32 s93, 0", "={SGPR93}"() #0 - %sgpr94 = tail call i32 asm sideeffect "s_mov_b32 s94, 0", "={SGPR94}"() #0 - %sgpr95 = tail call i32 asm sideeffect "s_mov_b32 s95, 0", "={SGPR95}"() #0 - %sgpr96 = tail call i32 asm sideeffect "s_mov_b32 s96, 0", "={SGPR96}"() #0 - %sgpr97 = tail call i32 asm sideeffect "s_mov_b32 s97, 0", "={SGPR97}"() #0 - %sgpr98 = tail call i32 asm sideeffect "s_mov_b32 s98, 0", "={SGPR98}"() #0 - %sgpr99 = tail call i32 asm sideeffect "s_mov_b32 s99, 0", "={SGPR99}"() #0 - %sgpr100 = tail call i32 asm sideeffect "s_mov_b32 s100, 0", "={SGPR100}"() #0 - %sgpr101 = tail call i32 asm sideeffect "s_mov_b32 s101, 0", "={SGPR101}"() #0 - %sgpr102 = tail call i32 asm sideeffect "s_mov_b32 s102, 0", "={SGPR102}"() #0 - %sgpr103 = tail call i32 asm sideeffect "s_mov_b32 s103, 0", "={SGPR103}"() #0 + %sgpr0 = tail call i32 asm sideeffect "s_mov_b32 s0, 0", "={s0}"() #0 + %sgpr1 = tail call i32 asm sideeffect "s_mov_b32 s1, 0", "={s1}"() #0 + %sgpr2 = tail call i32 asm sideeffect "s_mov_b32 s2, 0", "={s2}"() #0 + %sgpr3 = tail call i32 asm sideeffect "s_mov_b32 s3, 0", "={s3}"() #0 + %sgpr4 = tail call i32 asm sideeffect "s_mov_b32 s4, 0", "={s4}"() #0 + %sgpr5 = tail call i32 asm sideeffect "s_mov_b32 s5, 0", "={s5}"() #0 + %sgpr6 = tail call i32 asm sideeffect "s_mov_b32 s6, 0", "={s6}"() #0 + %sgpr7 = tail call i32 asm sideeffect "s_mov_b32 s7, 0", "={s7}"() #0 + %sgpr8 = tail call i32 asm sideeffect "s_mov_b32 s8, 0", "={s8}"() #0 + %sgpr9 = tail call i32 asm sideeffect "s_mov_b32 s9, 0", "={s9}"() #0 + %sgpr10 = tail call i32 asm sideeffect "s_mov_b32 s10, 0", "={s10}"() #0 + %sgpr11 = tail call i32 asm sideeffect "s_mov_b32 s11, 0", "={s11}"() #0 + %sgpr12 = tail call i32 asm sideeffect "s_mov_b32 s12, 0", "={s12}"() #0 + %sgpr13 = tail call i32 asm sideeffect "s_mov_b32 s13, 0", "={s13}"() #0 + %sgpr14 = tail call i32 asm sideeffect "s_mov_b32 s14, 0", "={s14}"() #0 + %sgpr15 = tail call i32 asm sideeffect "s_mov_b32 s15, 0", "={s15}"() #0 + %sgpr16 = tail call i32 asm sideeffect "s_mov_b32 s16, 0", "={s16}"() #0 + %sgpr17 = tail call i32 asm sideeffect "s_mov_b32 s17, 0", "={s17}"() #0 + %sgpr18 = tail call i32 asm sideeffect "s_mov_b32 s18, 0", "={s18}"() #0 + %sgpr19 = tail call i32 asm sideeffect "s_mov_b32 s19, 0", "={s19}"() #0 + %sgpr20 = tail call i32 asm sideeffect "s_mov_b32 s20, 0", "={s20}"() #0 + %sgpr21 = tail call i32 asm sideeffect "s_mov_b32 s21, 0", "={s21}"() #0 + %sgpr22 = tail call i32 asm sideeffect "s_mov_b32 s22, 0", "={s22}"() #0 + %sgpr23 = tail call i32 asm sideeffect "s_mov_b32 s23, 0", "={s23}"() #0 + %sgpr24 = tail call i32 asm sideeffect "s_mov_b32 s24, 0", "={s24}"() #0 + %sgpr25 = tail call i32 asm sideeffect "s_mov_b32 s25, 0", "={s25}"() #0 + %sgpr26 = tail call i32 asm sideeffect "s_mov_b32 s26, 0", "={s26}"() #0 + %sgpr27 = tail call i32 asm sideeffect "s_mov_b32 s27, 0", "={s27}"() #0 + %sgpr28 = tail call i32 asm sideeffect "s_mov_b32 s28, 0", "={s28}"() #0 + %sgpr29 = tail call i32 asm sideeffect "s_mov_b32 s29, 0", "={s29}"() #0 + %sgpr30 = tail call i32 asm sideeffect "s_mov_b32 s30, 0", "={s30}"() #0 + %sgpr31 = tail call i32 asm sideeffect "s_mov_b32 s31, 0", "={s31}"() #0 + %sgpr32 = tail call i32 asm sideeffect "s_mov_b32 s32, 0", "={s32}"() #0 + %sgpr33 = tail call i32 asm sideeffect "s_mov_b32 s33, 0", "={s33}"() #0 + %sgpr34 = tail call i32 asm sideeffect "s_mov_b32 s34, 0", "={s34}"() #0 + %sgpr35 = tail call i32 asm sideeffect "s_mov_b32 s35, 0", "={s35}"() #0 + %sgpr36 = tail call i32 asm sideeffect "s_mov_b32 s36, 0", "={s36}"() #0 + %sgpr37 = tail call i32 asm sideeffect "s_mov_b32 s37, 0", "={s37}"() #0 + %sgpr38 = tail call i32 asm sideeffect "s_mov_b32 s38, 0", "={s38}"() #0 + %sgpr39 = tail call i32 asm sideeffect "s_mov_b32 s39, 0", "={s39}"() #0 + %sgpr40 = tail call i32 asm sideeffect "s_mov_b32 s40, 0", "={s40}"() #0 + %sgpr41 = tail call i32 asm sideeffect "s_mov_b32 s41, 0", "={s41}"() #0 + %sgpr42 = tail call i32 asm sideeffect "s_mov_b32 s42, 0", "={s42}"() #0 + %sgpr43 = tail call i32 asm sideeffect "s_mov_b32 s43, 0", "={s43}"() #0 + %sgpr44 = tail call i32 asm sideeffect "s_mov_b32 s44, 0", "={s44}"() #0 + %sgpr45 = tail call i32 asm sideeffect "s_mov_b32 s45, 0", "={s45}"() #0 + %sgpr46 = tail call i32 asm sideeffect "s_mov_b32 s46, 0", "={s46}"() #0 + %sgpr47 = tail call i32 asm sideeffect "s_mov_b32 s47, 0", "={s47}"() #0 + %sgpr48 = tail call i32 asm sideeffect "s_mov_b32 s48, 0", "={s48}"() #0 + %sgpr49 = tail call i32 asm sideeffect "s_mov_b32 s49, 0", "={s49}"() #0 + %sgpr50 = tail call i32 asm sideeffect "s_mov_b32 s50, 0", "={s50}"() #0 + %sgpr51 = tail call i32 asm sideeffect "s_mov_b32 s51, 0", "={s51}"() #0 + %sgpr52 = tail call i32 asm sideeffect "s_mov_b32 s52, 0", "={s52}"() #0 + %sgpr53 = tail call i32 asm sideeffect "s_mov_b32 s53, 0", "={s53}"() #0 + %sgpr54 = tail call i32 asm sideeffect "s_mov_b32 s54, 0", "={s54}"() #0 + %sgpr55 = tail call i32 asm sideeffect "s_mov_b32 s55, 0", "={s55}"() #0 + %sgpr56 = tail call i32 asm sideeffect "s_mov_b32 s56, 0", "={s56}"() #0 + %sgpr57 = tail call i32 asm sideeffect "s_mov_b32 s57, 0", "={s57}"() #0 + %sgpr58 = tail call i32 asm sideeffect "s_mov_b32 s58, 0", "={s58}"() #0 + %sgpr59 = tail call i32 asm sideeffect "s_mov_b32 s59, 0", "={s59}"() #0 + %sgpr60 = tail call i32 asm sideeffect "s_mov_b32 s60, 0", "={s60}"() #0 + %sgpr61 = tail call i32 asm sideeffect "s_mov_b32 s61, 0", "={s61}"() #0 + %sgpr62 = tail call i32 asm sideeffect "s_mov_b32 s62, 0", "={s62}"() #0 + %sgpr63 = tail call i32 asm sideeffect "s_mov_b32 s63, 0", "={s63}"() #0 + %sgpr64 = tail call i32 asm sideeffect "s_mov_b32 s64, 0", "={s64}"() #0 + %sgpr65 = tail call i32 asm sideeffect "s_mov_b32 s65, 0", "={s65}"() #0 + %sgpr66 = tail call i32 asm sideeffect "s_mov_b32 s66, 0", "={s66}"() #0 + %sgpr67 = tail call i32 asm sideeffect "s_mov_b32 s67, 0", "={s67}"() #0 + %sgpr68 = tail call i32 asm sideeffect "s_mov_b32 s68, 0", "={s68}"() #0 + %sgpr69 = tail call i32 asm sideeffect "s_mov_b32 s69, 0", "={s69}"() #0 + %sgpr70 = tail call i32 asm sideeffect "s_mov_b32 s70, 0", "={s70}"() #0 + %sgpr71 = tail call i32 asm sideeffect "s_mov_b32 s71, 0", "={s71}"() #0 + %sgpr72 = tail call i32 asm sideeffect "s_mov_b32 s72, 0", "={s72}"() #0 + %sgpr73 = tail call i32 asm sideeffect "s_mov_b32 s73, 0", "={s73}"() #0 + %sgpr74 = tail call i32 asm sideeffect "s_mov_b32 s74, 0", "={s74}"() #0 + %sgpr75 = tail call i32 asm sideeffect "s_mov_b32 s75, 0", "={s75}"() #0 + %sgpr76 = tail call i32 asm sideeffect "s_mov_b32 s76, 0", "={s76}"() #0 + %sgpr77 = tail call i32 asm sideeffect "s_mov_b32 s77, 0", "={s77}"() #0 + %sgpr78 = tail call i32 asm sideeffect "s_mov_b32 s78, 0", "={s78}"() #0 + %sgpr79 = tail call i32 asm sideeffect "s_mov_b32 s79, 0", "={s79}"() #0 + %sgpr80 = tail call i32 asm sideeffect "s_mov_b32 s80, 0", "={s80}"() #0 + %sgpr81 = tail call i32 asm sideeffect "s_mov_b32 s81, 0", "={s81}"() #0 + %sgpr82 = tail call i32 asm sideeffect "s_mov_b32 s82, 0", "={s82}"() #0 + %sgpr83 = tail call i32 asm sideeffect "s_mov_b32 s83, 0", "={s83}"() #0 + %sgpr84 = tail call i32 asm sideeffect "s_mov_b32 s84, 0", "={s84}"() #0 + %sgpr85 = tail call i32 asm sideeffect "s_mov_b32 s85, 0", "={s85}"() #0 + %sgpr86 = tail call i32 asm sideeffect "s_mov_b32 s86, 0", "={s86}"() #0 + %sgpr87 = tail call i32 asm sideeffect "s_mov_b32 s87, 0", "={s87}"() #0 + %sgpr88 = tail call i32 asm sideeffect "s_mov_b32 s88, 0", "={s88}"() #0 + %sgpr89 = tail call i32 asm sideeffect "s_mov_b32 s89, 0", "={s89}"() #0 + %sgpr90 = tail call i32 asm sideeffect "s_mov_b32 s90, 0", "={s90}"() #0 + %sgpr91 = tail call i32 asm sideeffect "s_mov_b32 s91, 0", "={s91}"() #0 + %sgpr92 = tail call i32 asm sideeffect "s_mov_b32 s92, 0", "={s92}"() #0 + %sgpr93 = tail call i32 asm sideeffect "s_mov_b32 s93, 0", "={s93}"() #0 + %sgpr94 = tail call i32 asm sideeffect "s_mov_b32 s94, 0", "={s94}"() #0 + %sgpr95 = tail call i32 asm sideeffect "s_mov_b32 s95, 0", "={s95}"() #0 + %sgpr96 = tail call i32 asm sideeffect "s_mov_b32 s96, 0", "={s96}"() #0 + %sgpr97 = tail call i32 asm sideeffect "s_mov_b32 s97, 0", "={s97}"() #0 + %sgpr98 = tail call i32 asm sideeffect "s_mov_b32 s98, 0", "={s98}"() #0 + %sgpr99 = tail call i32 asm sideeffect "s_mov_b32 s99, 0", "={s99}"() #0 + %sgpr100 = tail call i32 asm sideeffect "s_mov_b32 s100, 0", "={s100}"() #0 + %sgpr101 = tail call i32 asm sideeffect "s_mov_b32 s101, 0", "={s101}"() #0 + %sgpr102 = tail call i32 asm sideeffect "s_mov_b32 s102, 0", "={s102}"() #0 + %sgpr103 = tail call i32 asm sideeffect "s_mov_b32 s103, 0", "={s103}"() #0 %vcc_lo = tail call i32 asm sideeffect "s_mov_b32 $0, 0", "={VCC_LO}"() #0 %vcc_hi = tail call i32 asm sideeffect "s_mov_b32 $0, 0", "={VCC_HI}"() #0 %cmp = icmp eq i32 %cnd, 0 @@ -126,112 +126,112 @@ bb2: ; 28 bytes br label %bb3 bb3: - tail call void asm sideeffect "; reg use $0", "{SGPR0}"(i32 %sgpr0) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR1}"(i32 %sgpr1) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR2}"(i32 %sgpr2) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR3}"(i32 %sgpr3) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR4}"(i32 %sgpr4) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR5}"(i32 %sgpr5) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR6}"(i32 %sgpr6) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR7}"(i32 %sgpr7) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR8}"(i32 %sgpr8) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR9}"(i32 %sgpr9) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR10}"(i32 %sgpr10) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR11}"(i32 %sgpr11) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR12}"(i32 %sgpr12) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR13}"(i32 %sgpr13) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR14}"(i32 %sgpr14) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR15}"(i32 %sgpr15) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR16}"(i32 %sgpr16) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR17}"(i32 %sgpr17) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR18}"(i32 %sgpr18) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR19}"(i32 %sgpr19) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR20}"(i32 %sgpr20) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR21}"(i32 %sgpr21) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR22}"(i32 %sgpr22) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR23}"(i32 %sgpr23) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR24}"(i32 %sgpr24) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR25}"(i32 %sgpr25) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR26}"(i32 %sgpr26) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR27}"(i32 %sgpr27) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR28}"(i32 %sgpr28) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR29}"(i32 %sgpr29) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR30}"(i32 %sgpr30) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR31}"(i32 %sgpr31) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR32}"(i32 %sgpr32) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR33}"(i32 %sgpr33) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR34}"(i32 %sgpr34) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR35}"(i32 %sgpr35) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR36}"(i32 %sgpr36) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR37}"(i32 %sgpr37) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR38}"(i32 %sgpr38) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR39}"(i32 %sgpr39) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR40}"(i32 %sgpr40) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR41}"(i32 %sgpr41) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR42}"(i32 %sgpr42) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR43}"(i32 %sgpr43) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR44}"(i32 %sgpr44) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR45}"(i32 %sgpr45) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR46}"(i32 %sgpr46) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR47}"(i32 %sgpr47) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR48}"(i32 %sgpr48) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR49}"(i32 %sgpr49) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR50}"(i32 %sgpr50) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR51}"(i32 %sgpr51) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR52}"(i32 %sgpr52) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR53}"(i32 %sgpr53) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR54}"(i32 %sgpr54) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR55}"(i32 %sgpr55) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR56}"(i32 %sgpr56) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR57}"(i32 %sgpr57) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR58}"(i32 %sgpr58) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR59}"(i32 %sgpr59) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR60}"(i32 %sgpr60) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR61}"(i32 %sgpr61) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR62}"(i32 %sgpr62) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR63}"(i32 %sgpr63) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR64}"(i32 %sgpr64) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR65}"(i32 %sgpr65) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR66}"(i32 %sgpr66) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR67}"(i32 %sgpr67) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR68}"(i32 %sgpr68) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR69}"(i32 %sgpr69) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR70}"(i32 %sgpr70) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR71}"(i32 %sgpr71) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR72}"(i32 %sgpr72) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR73}"(i32 %sgpr73) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR74}"(i32 %sgpr74) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR75}"(i32 %sgpr75) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR76}"(i32 %sgpr76) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR77}"(i32 %sgpr77) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR78}"(i32 %sgpr78) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR79}"(i32 %sgpr79) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR80}"(i32 %sgpr80) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR81}"(i32 %sgpr81) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR82}"(i32 %sgpr82) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR83}"(i32 %sgpr83) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR84}"(i32 %sgpr84) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR85}"(i32 %sgpr85) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR86}"(i32 %sgpr86) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR87}"(i32 %sgpr87) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR88}"(i32 %sgpr88) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR89}"(i32 %sgpr89) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR90}"(i32 %sgpr90) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR91}"(i32 %sgpr91) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR92}"(i32 %sgpr92) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR93}"(i32 %sgpr93) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR94}"(i32 %sgpr94) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR95}"(i32 %sgpr95) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR96}"(i32 %sgpr96) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR97}"(i32 %sgpr97) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR98}"(i32 %sgpr98) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR99}"(i32 %sgpr99) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR100}"(i32 %sgpr100) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR101}"(i32 %sgpr101) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR102}"(i32 %sgpr102) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR103}"(i32 %sgpr103) #0 - tail call void asm sideeffect "; reg use $0", "{VCC_LO}"(i32 %vcc_lo) #0 - tail call void asm sideeffect "; reg use $0", "{VCC_HI}"(i32 %vcc_hi) #0 + tail call void asm sideeffect "; reg use $0", "{s0}"(i32 %sgpr0) #0 + tail call void asm sideeffect "; reg use $0", "{s1}"(i32 %sgpr1) #0 + tail call void asm sideeffect "; reg use $0", "{s2}"(i32 %sgpr2) #0 + tail call void asm sideeffect "; reg use $0", "{s3}"(i32 %sgpr3) #0 + tail call void asm sideeffect "; reg use $0", "{s4}"(i32 %sgpr4) #0 + tail call void asm sideeffect "; reg use $0", "{s5}"(i32 %sgpr5) #0 + tail call void asm sideeffect "; reg use $0", "{s6}"(i32 %sgpr6) #0 + tail call void asm sideeffect "; reg use $0", "{s7}"(i32 %sgpr7) #0 + tail call void asm sideeffect "; reg use $0", "{s8}"(i32 %sgpr8) #0 + tail call void asm sideeffect "; reg use $0", "{s9}"(i32 %sgpr9) #0 + tail call void asm sideeffect "; reg use $0", "{s10}"(i32 %sgpr10) #0 + tail call void asm sideeffect "; reg use $0", "{s11}"(i32 %sgpr11) #0 + tail call void asm sideeffect "; reg use $0", "{s12}"(i32 %sgpr12) #0 + tail call void asm sideeffect "; reg use $0", "{s13}"(i32 %sgpr13) #0 + tail call void asm sideeffect "; reg use $0", "{s14}"(i32 %sgpr14) #0 + tail call void asm sideeffect "; reg use $0", "{s15}"(i32 %sgpr15) #0 + tail call void asm sideeffect "; reg use $0", "{s16}"(i32 %sgpr16) #0 + tail call void asm sideeffect "; reg use $0", "{s17}"(i32 %sgpr17) #0 + tail call void asm sideeffect "; reg use $0", "{s18}"(i32 %sgpr18) #0 + tail call void asm sideeffect "; reg use $0", "{s19}"(i32 %sgpr19) #0 + tail call void asm sideeffect "; reg use $0", "{s20}"(i32 %sgpr20) #0 + tail call void asm sideeffect "; reg use $0", "{s21}"(i32 %sgpr21) #0 + tail call void asm sideeffect "; reg use $0", "{s22}"(i32 %sgpr22) #0 + tail call void asm sideeffect "; reg use $0", "{s23}"(i32 %sgpr23) #0 + tail call void asm sideeffect "; reg use $0", "{s24}"(i32 %sgpr24) #0 + tail call void asm sideeffect "; reg use $0", "{s25}"(i32 %sgpr25) #0 + tail call void asm sideeffect "; reg use $0", "{s26}"(i32 %sgpr26) #0 + tail call void asm sideeffect "; reg use $0", "{s27}"(i32 %sgpr27) #0 + tail call void asm sideeffect "; reg use $0", "{s28}"(i32 %sgpr28) #0 + tail call void asm sideeffect "; reg use $0", "{s29}"(i32 %sgpr29) #0 + tail call void asm sideeffect "; reg use $0", "{s30}"(i32 %sgpr30) #0 + tail call void asm sideeffect "; reg use $0", "{s31}"(i32 %sgpr31) #0 + tail call void asm sideeffect "; reg use $0", "{s32}"(i32 %sgpr32) #0 + tail call void asm sideeffect "; reg use $0", "{s33}"(i32 %sgpr33) #0 + tail call void asm sideeffect "; reg use $0", "{s34}"(i32 %sgpr34) #0 + tail call void asm sideeffect "; reg use $0", "{s35}"(i32 %sgpr35) #0 + tail call void asm sideeffect "; reg use $0", "{s36}"(i32 %sgpr36) #0 + tail call void asm sideeffect "; reg use $0", "{s37}"(i32 %sgpr37) #0 + tail call void asm sideeffect "; reg use $0", "{s38}"(i32 %sgpr38) #0 + tail call void asm sideeffect "; reg use $0", "{s39}"(i32 %sgpr39) #0 + tail call void asm sideeffect "; reg use $0", "{s40}"(i32 %sgpr40) #0 + tail call void asm sideeffect "; reg use $0", "{s41}"(i32 %sgpr41) #0 + tail call void asm sideeffect "; reg use $0", "{s42}"(i32 %sgpr42) #0 + tail call void asm sideeffect "; reg use $0", "{s43}"(i32 %sgpr43) #0 + tail call void asm sideeffect "; reg use $0", "{s44}"(i32 %sgpr44) #0 + tail call void asm sideeffect "; reg use $0", "{s45}"(i32 %sgpr45) #0 + tail call void asm sideeffect "; reg use $0", "{s46}"(i32 %sgpr46) #0 + tail call void asm sideeffect "; reg use $0", "{s47}"(i32 %sgpr47) #0 + tail call void asm sideeffect "; reg use $0", "{s48}"(i32 %sgpr48) #0 + tail call void asm sideeffect "; reg use $0", "{s49}"(i32 %sgpr49) #0 + tail call void asm sideeffect "; reg use $0", "{s50}"(i32 %sgpr50) #0 + tail call void asm sideeffect "; reg use $0", "{s51}"(i32 %sgpr51) #0 + tail call void asm sideeffect "; reg use $0", "{s52}"(i32 %sgpr52) #0 + tail call void asm sideeffect "; reg use $0", "{s53}"(i32 %sgpr53) #0 + tail call void asm sideeffect "; reg use $0", "{s54}"(i32 %sgpr54) #0 + tail call void asm sideeffect "; reg use $0", "{s55}"(i32 %sgpr55) #0 + tail call void asm sideeffect "; reg use $0", "{s56}"(i32 %sgpr56) #0 + tail call void asm sideeffect "; reg use $0", "{s57}"(i32 %sgpr57) #0 + tail call void asm sideeffect "; reg use $0", "{s58}"(i32 %sgpr58) #0 + tail call void asm sideeffect "; reg use $0", "{s59}"(i32 %sgpr59) #0 + tail call void asm sideeffect "; reg use $0", "{s60}"(i32 %sgpr60) #0 + tail call void asm sideeffect "; reg use $0", "{s61}"(i32 %sgpr61) #0 + tail call void asm sideeffect "; reg use $0", "{s62}"(i32 %sgpr62) #0 + tail call void asm sideeffect "; reg use $0", "{s63}"(i32 %sgpr63) #0 + tail call void asm sideeffect "; reg use $0", "{s64}"(i32 %sgpr64) #0 + tail call void asm sideeffect "; reg use $0", "{s65}"(i32 %sgpr65) #0 + tail call void asm sideeffect "; reg use $0", "{s66}"(i32 %sgpr66) #0 + tail call void asm sideeffect "; reg use $0", "{s67}"(i32 %sgpr67) #0 + tail call void asm sideeffect "; reg use $0", "{s68}"(i32 %sgpr68) #0 + tail call void asm sideeffect "; reg use $0", "{s69}"(i32 %sgpr69) #0 + tail call void asm sideeffect "; reg use $0", "{s70}"(i32 %sgpr70) #0 + tail call void asm sideeffect "; reg use $0", "{s71}"(i32 %sgpr71) #0 + tail call void asm sideeffect "; reg use $0", "{s72}"(i32 %sgpr72) #0 + tail call void asm sideeffect "; reg use $0", "{s73}"(i32 %sgpr73) #0 + tail call void asm sideeffect "; reg use $0", "{s74}"(i32 %sgpr74) #0 + tail call void asm sideeffect "; reg use $0", "{s75}"(i32 %sgpr75) #0 + tail call void asm sideeffect "; reg use $0", "{s76}"(i32 %sgpr76) #0 + tail call void asm sideeffect "; reg use $0", "{s77}"(i32 %sgpr77) #0 + tail call void asm sideeffect "; reg use $0", "{s78}"(i32 %sgpr78) #0 + tail call void asm sideeffect "; reg use $0", "{s79}"(i32 %sgpr79) #0 + tail call void asm sideeffect "; reg use $0", "{s80}"(i32 %sgpr80) #0 + tail call void asm sideeffect "; reg use $0", "{s81}"(i32 %sgpr81) #0 + tail call void asm sideeffect "; reg use $0", "{s82}"(i32 %sgpr82) #0 + tail call void asm sideeffect "; reg use $0", "{s83}"(i32 %sgpr83) #0 + tail call void asm sideeffect "; reg use $0", "{s84}"(i32 %sgpr84) #0 + tail call void asm sideeffect "; reg use $0", "{s85}"(i32 %sgpr85) #0 + tail call void asm sideeffect "; reg use $0", "{s86}"(i32 %sgpr86) #0 + tail call void asm sideeffect "; reg use $0", "{s87}"(i32 %sgpr87) #0 + tail call void asm sideeffect "; reg use $0", "{s88}"(i32 %sgpr88) #0 + tail call void asm sideeffect "; reg use $0", "{s89}"(i32 %sgpr89) #0 + tail call void asm sideeffect "; reg use $0", "{s90}"(i32 %sgpr90) #0 + tail call void asm sideeffect "; reg use $0", "{s91}"(i32 %sgpr91) #0 + tail call void asm sideeffect "; reg use $0", "{s92}"(i32 %sgpr92) #0 + tail call void asm sideeffect "; reg use $0", "{s93}"(i32 %sgpr93) #0 + tail call void asm sideeffect "; reg use $0", "{s94}"(i32 %sgpr94) #0 + tail call void asm sideeffect "; reg use $0", "{s95}"(i32 %sgpr95) #0 + tail call void asm sideeffect "; reg use $0", "{s96}"(i32 %sgpr96) #0 + tail call void asm sideeffect "; reg use $0", "{s97}"(i32 %sgpr97) #0 + tail call void asm sideeffect "; reg use $0", "{s98}"(i32 %sgpr98) #0 + tail call void asm sideeffect "; reg use $0", "{s99}"(i32 %sgpr99) #0 + tail call void asm sideeffect "; reg use $0", "{s100}"(i32 %sgpr100) #0 + tail call void asm sideeffect "; reg use $0", "{s101}"(i32 %sgpr101) #0 + tail call void asm sideeffect "; reg use $0", "{s102}"(i32 %sgpr102) #0 + tail call void asm sideeffect "; reg use $0", "{s103}"(i32 %sgpr103) #0 + tail call void asm sideeffect "; reg use $0", "{vcc_lo}"(i32 %vcc_lo) #0 + tail call void asm sideeffect "; reg use $0", "{vcc_hi}"(i32 %vcc_hi) #0 ret void } diff --git a/test/CodeGen/AMDGPU/clamp-omod-special-case.mir b/test/CodeGen/AMDGPU/clamp-omod-special-case.mir index fbfd0fbf9308..6ecf75c1acec 100644 --- a/test/CodeGen/AMDGPU/clamp-omod-special-case.mir +++ b/test/CodeGen/AMDGPU/clamp-omod-special-case.mir @@ -24,6 +24,10 @@ ret void } + define amdgpu_ps void @v_max_reg_imm_f32() #0 { + ret void + } + attributes #0 = { nounwind "no-signed-zeros-fp-math"="false" } ... @@ -422,3 +426,19 @@ body: | S_ENDPGM ... +--- + +# Pass used to crash with immediate second operand of max +name: v_max_reg_imm_f32 +tracksRegLiveness: true +registers: + - { id: 0, class: vgpr_32 } + - { id: 1, class: vgpr_32 } +body: | + bb.0 (%ir-block.0): + liveins: %vgpr0 + + %0 = COPY %vgpr0 + %1 = V_MAX_F32_e64 0, killed %0, 0, 1056964608, 1, 0, implicit %exec + +... diff --git a/test/CodeGen/AMDGPU/exceed-max-sgprs.ll b/test/CodeGen/AMDGPU/exceed-max-sgprs.ll index 207dfce75f16..13aafc24895d 100644 --- a/test/CodeGen/AMDGPU/exceed-max-sgprs.ll +++ b/test/CodeGen/AMDGPU/exceed-max-sgprs.ll @@ -2,97 +2,97 @@ ; ERROR: error: scalar registers limit of 104 exceeded (106) in use_too_many_sgprs_tahiti define amdgpu_kernel void @use_too_many_sgprs_tahiti() #0 { - call void asm sideeffect "", "~{SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7}" () - call void asm sideeffect "", "~{SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15}" () - call void asm sideeffect "", "~{SGPR16_SGPR17_SGPR18_SGPR19_SGPR20_SGPR21_SGPR22_SGPR23}" () - call void asm sideeffect "", "~{SGPR24_SGPR25_SGPR26_SGPR27_SGPR28_SGPR29_SGPR30_SGPR31}" () - call void asm sideeffect "", "~{SGPR32_SGPR33_SGPR34_SGPR35_SGPR36_SGPR37_SGPR38_SGPR39}" () - call void asm sideeffect "", "~{SGPR40_SGPR41_SGPR42_SGPR43_SGPR44_SGPR45_SGPR46_SGPR47}" () - call void asm sideeffect "", "~{SGPR48_SGPR49_SGPR50_SGPR51_SGPR52_SGPR53_SGPR54_SGPR55}" () - call void asm sideeffect "", "~{SGPR56_SGPR57_SGPR58_SGPR59_SGPR60_SGPR61_SGPR62_SGPR63}" () - call void asm sideeffect "", "~{SGPR64_SGPR65_SGPR66_SGPR67_SGPR68_SGPR69_SGPR70_SGPR71}" () - call void asm sideeffect "", "~{SGPR72_SGPR73_SGPR74_SGPR75_SGPR76_SGPR77_SGPR78_SGPR79}" () - call void asm sideeffect "", "~{SGPR80_SGPR81_SGPR82_SGPR83_SGPR84_SGPR85_SGPR86_SGPR87}" () - call void asm sideeffect "", "~{SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95}" () - call void asm sideeffect "", "~{SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103}" () - call void asm sideeffect "", "~{VCC}" () + call void asm sideeffect "", "~{s[0:7]}" () + call void asm sideeffect "", "~{s[8:15]}" () + call void asm sideeffect "", "~{s[16:23]}" () + call void asm sideeffect "", "~{s[24:31]}" () + call void asm sideeffect "", "~{s[32:39]}" () + call void asm sideeffect "", "~{s[40:47]}" () + call void asm sideeffect "", "~{s[48:55]}" () + call void asm sideeffect "", "~{s[56:63]}" () + call void asm sideeffect "", "~{s[64:71]}" () + call void asm sideeffect "", "~{s[72:79]}" () + call void asm sideeffect "", "~{s[80:87]}" () + call void asm sideeffect "", "~{s[88:95]}" () + call void asm sideeffect "", "~{s[96:103]}" () + call void asm sideeffect "", "~{vcc}" () ret void } ; ERROR: error: scalar registers limit of 104 exceeded (106) in use_too_many_sgprs_bonaire define amdgpu_kernel void @use_too_many_sgprs_bonaire() #1 { - call void asm sideeffect "", "~{SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7}" () - call void asm sideeffect "", "~{SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15}" () - call void asm sideeffect "", "~{SGPR16_SGPR17_SGPR18_SGPR19_SGPR20_SGPR21_SGPR22_SGPR23}" () - call void asm sideeffect "", "~{SGPR24_SGPR25_SGPR26_SGPR27_SGPR28_SGPR29_SGPR30_SGPR31}" () - call void asm sideeffect "", "~{SGPR32_SGPR33_SGPR34_SGPR35_SGPR36_SGPR37_SGPR38_SGPR39}" () - call void asm sideeffect "", "~{SGPR40_SGPR41_SGPR42_SGPR43_SGPR44_SGPR45_SGPR46_SGPR47}" () - call void asm sideeffect "", "~{SGPR48_SGPR49_SGPR50_SGPR51_SGPR52_SGPR53_SGPR54_SGPR55}" () - call void asm sideeffect "", "~{SGPR56_SGPR57_SGPR58_SGPR59_SGPR60_SGPR61_SGPR62_SGPR63}" () - call void asm sideeffect "", "~{SGPR64_SGPR65_SGPR66_SGPR67_SGPR68_SGPR69_SGPR70_SGPR71}" () - call void asm sideeffect "", "~{SGPR72_SGPR73_SGPR74_SGPR75_SGPR76_SGPR77_SGPR78_SGPR79}" () - call void asm sideeffect "", "~{SGPR80_SGPR81_SGPR82_SGPR83_SGPR84_SGPR85_SGPR86_SGPR87}" () - call void asm sideeffect "", "~{SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95}" () - call void asm sideeffect "", "~{SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103}" () - call void asm sideeffect "", "~{VCC}" () + call void asm sideeffect "", "~{s[0:7]}" () + call void asm sideeffect "", "~{s[8:15]}" () + call void asm sideeffect "", "~{s[16:23]}" () + call void asm sideeffect "", "~{s[24:31]}" () + call void asm sideeffect "", "~{s[32:39]}" () + call void asm sideeffect "", "~{s[40:47]}" () + call void asm sideeffect "", "~{s[48:55]}" () + call void asm sideeffect "", "~{s[56:63]}" () + call void asm sideeffect "", "~{s[64:71]}" () + call void asm sideeffect "", "~{s[72:79]}" () + call void asm sideeffect "", "~{s[80:87]}" () + call void asm sideeffect "", "~{s[88:95]}" () + call void asm sideeffect "", "~{s[96:103]}" () + call void asm sideeffect "", "~{vcc}" () ret void } ; ERROR: error: scalar registers limit of 104 exceeded (108) in use_too_many_sgprs_bonaire_flat_scr define amdgpu_kernel void @use_too_many_sgprs_bonaire_flat_scr() #1 { - call void asm sideeffect "", "~{SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7}" () - call void asm sideeffect "", "~{SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15}" () - call void asm sideeffect "", "~{SGPR16_SGPR17_SGPR18_SGPR19_SGPR20_SGPR21_SGPR22_SGPR23}" () - call void asm sideeffect "", "~{SGPR24_SGPR25_SGPR26_SGPR27_SGPR28_SGPR29_SGPR30_SGPR31}" () - call void asm sideeffect "", "~{SGPR32_SGPR33_SGPR34_SGPR35_SGPR36_SGPR37_SGPR38_SGPR39}" () - call void asm sideeffect "", "~{SGPR40_SGPR41_SGPR42_SGPR43_SGPR44_SGPR45_SGPR46_SGPR47}" () - call void asm sideeffect "", "~{SGPR48_SGPR49_SGPR50_SGPR51_SGPR52_SGPR53_SGPR54_SGPR55}" () - call void asm sideeffect "", "~{SGPR56_SGPR57_SGPR58_SGPR59_SGPR60_SGPR61_SGPR62_SGPR63}" () - call void asm sideeffect "", "~{SGPR64_SGPR65_SGPR66_SGPR67_SGPR68_SGPR69_SGPR70_SGPR71}" () - call void asm sideeffect "", "~{SGPR72_SGPR73_SGPR74_SGPR75_SGPR76_SGPR77_SGPR78_SGPR79}" () - call void asm sideeffect "", "~{SGPR80_SGPR81_SGPR82_SGPR83_SGPR84_SGPR85_SGPR86_SGPR87}" () - call void asm sideeffect "", "~{SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95}" () - call void asm sideeffect "", "~{SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103}" () - call void asm sideeffect "", "~{VCC}" () - call void asm sideeffect "", "~{FLAT_SCR}" () + call void asm sideeffect "", "~{s[0:7]}" () + call void asm sideeffect "", "~{s[8:15]}" () + call void asm sideeffect "", "~{s[16:23]}" () + call void asm sideeffect "", "~{s[24:31]}" () + call void asm sideeffect "", "~{s[32:39]}" () + call void asm sideeffect "", "~{s[40:47]}" () + call void asm sideeffect "", "~{s[48:55]}" () + call void asm sideeffect "", "~{s[56:63]}" () + call void asm sideeffect "", "~{s[64:71]}" () + call void asm sideeffect "", "~{s[72:79]}" () + call void asm sideeffect "", "~{s[80:87]}" () + call void asm sideeffect "", "~{s[88:95]}" () + call void asm sideeffect "", "~{s[96:103]}" () + call void asm sideeffect "", "~{vcc}" () + call void asm sideeffect "", "~{flat_scratch}" () ret void } ; ERROR: error: scalar registers limit of 96 exceeded (98) in use_too_many_sgprs_iceland define amdgpu_kernel void @use_too_many_sgprs_iceland() #2 { - call void asm sideeffect "", "~{VCC}" () - call void asm sideeffect "", "~{SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7}" () - call void asm sideeffect "", "~{SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15}" () - call void asm sideeffect "", "~{SGPR16_SGPR17_SGPR18_SGPR19_SGPR20_SGPR21_SGPR22_SGPR23}" () - call void asm sideeffect "", "~{SGPR24_SGPR25_SGPR26_SGPR27_SGPR28_SGPR29_SGPR30_SGPR31}" () - call void asm sideeffect "", "~{SGPR32_SGPR33_SGPR34_SGPR35_SGPR36_SGPR37_SGPR38_SGPR39}" () - call void asm sideeffect "", "~{SGPR40_SGPR41_SGPR42_SGPR43_SGPR44_SGPR45_SGPR46_SGPR47}" () - call void asm sideeffect "", "~{SGPR48_SGPR49_SGPR50_SGPR51_SGPR52_SGPR53_SGPR54_SGPR55}" () - call void asm sideeffect "", "~{SGPR56_SGPR57_SGPR58_SGPR59_SGPR60_SGPR61_SGPR62_SGPR63}" () - call void asm sideeffect "", "~{SGPR64_SGPR65_SGPR66_SGPR67_SGPR68_SGPR69_SGPR70_SGPR71}" () - call void asm sideeffect "", "~{SGPR72_SGPR73_SGPR74_SGPR75_SGPR76_SGPR77_SGPR78_SGPR79}" () - call void asm sideeffect "", "~{SGPR80_SGPR81_SGPR82_SGPR83_SGPR84_SGPR85_SGPR86_SGPR87}" () - call void asm sideeffect "", "~{SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95}" () + call void asm sideeffect "", "~{vcc}" () + call void asm sideeffect "", "~{s[0:7]}" () + call void asm sideeffect "", "~{s[8:15]}" () + call void asm sideeffect "", "~{s[16:23]}" () + call void asm sideeffect "", "~{s[24:31]}" () + call void asm sideeffect "", "~{s[32:39]}" () + call void asm sideeffect "", "~{s[40:47]}" () + call void asm sideeffect "", "~{s[48:55]}" () + call void asm sideeffect "", "~{s[56:63]}" () + call void asm sideeffect "", "~{s[64:71]}" () + call void asm sideeffect "", "~{s[72:79]}" () + call void asm sideeffect "", "~{s[80:87]}" () + call void asm sideeffect "", "~{s[88:95]}" () ret void } ; ERROR: error: addressable scalar registers limit of 102 exceeded (103) in use_too_many_sgprs_fiji define amdgpu_kernel void @use_too_many_sgprs_fiji() #3 { - call void asm sideeffect "", "~{SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7}" () - call void asm sideeffect "", "~{SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15}" () - call void asm sideeffect "", "~{SGPR16_SGPR17_SGPR18_SGPR19_SGPR20_SGPR21_SGPR22_SGPR23}" () - call void asm sideeffect "", "~{SGPR24_SGPR25_SGPR26_SGPR27_SGPR28_SGPR29_SGPR30_SGPR31}" () - call void asm sideeffect "", "~{SGPR32_SGPR33_SGPR34_SGPR35_SGPR36_SGPR37_SGPR38_SGPR39}" () - call void asm sideeffect "", "~{SGPR40_SGPR41_SGPR42_SGPR43_SGPR44_SGPR45_SGPR46_SGPR47}" () - call void asm sideeffect "", "~{SGPR48_SGPR49_SGPR50_SGPR51_SGPR52_SGPR53_SGPR54_SGPR55}" () - call void asm sideeffect "", "~{SGPR56_SGPR57_SGPR58_SGPR59_SGPR60_SGPR61_SGPR62_SGPR63}" () - call void asm sideeffect "", "~{SGPR64_SGPR65_SGPR66_SGPR67_SGPR68_SGPR69_SGPR70_SGPR71}" () - call void asm sideeffect "", "~{SGPR72_SGPR73_SGPR74_SGPR75_SGPR76_SGPR77_SGPR78_SGPR79}" () - call void asm sideeffect "", "~{SGPR80_SGPR81_SGPR82_SGPR83_SGPR84_SGPR85_SGPR86_SGPR87}" () - call void asm sideeffect "", "~{SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95}" () - call void asm sideeffect "", "~{SGPR96_SGPR97_SGPR98_SGPR99}" () - call void asm sideeffect "", "~{SGPR100_SGPR101}" () - call void asm sideeffect "", "~{SGPR102}" () + call void asm sideeffect "", "~{s[0:7]}" () + call void asm sideeffect "", "~{s[8:15]}" () + call void asm sideeffect "", "~{s[16:23]}" () + call void asm sideeffect "", "~{s[24:31]}" () + call void asm sideeffect "", "~{s[32:39]}" () + call void asm sideeffect "", "~{s[40:47]}" () + call void asm sideeffect "", "~{s[48:55]}" () + call void asm sideeffect "", "~{s[56:63]}" () + call void asm sideeffect "", "~{s[64:71]}" () + call void asm sideeffect "", "~{s[72:79]}" () + call void asm sideeffect "", "~{s[80:87]}" () + call void asm sideeffect "", "~{s[88:95]}" () + call void asm sideeffect "", "~{s[96:99]}" () + call void asm sideeffect "", "~{s[100:101]}" () + call void asm sideeffect "", "~{s102}" () ret void } diff --git a/test/CodeGen/AMDGPU/fabs.f16.ll b/test/CodeGen/AMDGPU/fabs.f16.ll index d4ef7124a334..4e2ec4b3054f 100644 --- a/test/CodeGen/AMDGPU/fabs.f16.ll +++ b/test/CodeGen/AMDGPU/fabs.f16.ll @@ -40,7 +40,7 @@ define amdgpu_kernel void @s_fabs_f16(half addrspace(1)* %out, half %in) { ; VI: flat_load_ushort [[LO:v[0-9]+]] ; VI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7fff{{$}} ; VI-DAG: v_and_b32_e32 [[FABS_LO:v[0-9]+]], [[MASK]], [[HI]] -; VI-DAG: v_and_b32_sdwa [[FABS_HI:v[0-9]+]], [[MASK]], [[LO]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-DAG: v_and_b32_sdwa [[FABS_HI:v[0-9]+]], [[LO]], [[MASK]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI-DAG: v_or_b32_e32 v{{[0-9]+}}, [[FABS_HI]], [[FABS_LO]] ; VI: flat_store_dword @@ -60,8 +60,8 @@ define amdgpu_kernel void @s_fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half ; CI: v_and_b32_e32 v{{[0-9]+}}, [[MASK]] ; VI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7fff{{$}} -; VI-DAG: v_and_b32_sdwa v{{[0-9]+}}, [[MASK]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; VI-DAG: v_and_b32_sdwa v{{[0-9]+}}, [[MASK]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-DAG: v_and_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[MASK]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-DAG: v_and_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[MASK]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI-DAG: v_and_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}} ; VI-DAG: v_and_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}} ; VI-DAG: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} @@ -128,7 +128,7 @@ define amdgpu_kernel void @fabs_free_v2f16(<2 x half> addrspace(1)* %out, i32 %i ; CI: v_cvt_f16_f32 ; VI: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, -; VI: v_mul_f16_e64 v{{[0-9]+}}, |v{{[0-9]+}}|, v{{[0-9]+}} +; VI: v_mul_f16_sdwa v{{[0-9]+}}, |v{{[0-9]+}}|, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; VI: v_mul_f16_e64 v{{[0-9]+}}, |v{{[0-9]+}}|, v{{[0-9]+}} ; GFX9: v_and_b32_e32 [[FABS:v[0-9]+]], 0x7fff7fff, [[VAL]] diff --git a/test/CodeGen/AMDGPU/fadd.f16.ll b/test/CodeGen/AMDGPU/fadd.f16.ll index 9b3d2a475a14..08199be144f4 100644 --- a/test/CodeGen/AMDGPU/fadd.f16.ll +++ b/test/CodeGen/AMDGPU/fadd.f16.ll @@ -78,7 +78,7 @@ entry: ; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]] ; VI-DAG: v_add_f16_e32 v[[R_F16_LO:[0-9]+]], v[[B_V2_F16]], v[[A_V2_F16]] -; VI-DAG: v_add_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[B_V2_F16]], v[[A_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-DAG: v_add_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[A_V2_F16]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]] ; GCN: buffer_store_dword v[[R_V2_F16]] @@ -108,7 +108,7 @@ entry: ; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]] ; VI-DAG: v_mov_b32_e32 v[[CONST2:[0-9]+]], 0x4000 -; VI-DAG: v_add_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[CONST2]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; VI-DAG: v_add_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[B_V2_F16]], v[[CONST2]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; VI-DAG: v_add_f16_e32 v[[R_F16_0:[0-9]+]], 1.0, v[[B_V2_F16]] ; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]] @@ -137,7 +137,7 @@ entry: ; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]] ; VI-DAG: v_mov_b32_e32 v[[CONST1:[0-9]+]], 0x3c00 -; VI-DAG: v_add_f16_sdwa v[[R_F16_0:[0-9]+]], v[[CONST1]], v[[A_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; VI-DAG: v_add_f16_sdwa v[[R_F16_0:[0-9]+]], v[[A_V2_F16]], v[[CONST1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; VI-DAG: v_add_f16_e32 v[[R_F16_1:[0-9]+]], 2.0, v[[A_V2_F16]] ; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_1]] diff --git a/test/CodeGen/AMDGPU/fcanonicalize.f16.ll b/test/CodeGen/AMDGPU/fcanonicalize.f16.ll index 9e8ddd39bbaf..404358f0ecb9 100644 --- a/test/CodeGen/AMDGPU/fcanonicalize.f16.ll +++ b/test/CodeGen/AMDGPU/fcanonicalize.f16.ll @@ -278,9 +278,9 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_var_v2f16(<2 x half> addrspa } ; GCN-LABEL: {{^}}s_test_canonicalize_var_v2f16: -; VI: v_mul_f16_e64 [[REG0:v[0-9]+]], 1.0, {{s[0-9]+}} -; VI-DAG: v_mul_f16_e64 [[REG1:v[0-9]+]], 1.0, {{s[0-9]+}} -; VI-DAG: v_lshlrev_b32_e32 v{{[0-9]+}}, 16, +; VI: v_mov_b32_e32 [[ONE:v[0-9]+]], 0x3c00 +; VI: v_mul_f16_sdwa [[REG0:v[0-9]+]], [[ONE]], {{v[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI: v_mul_f16_e64 [[REG1:v[0-9]+]], 1.0, {{s[0-9]+}} ; VI-NOT: v_and_b32 ; GFX9: v_pk_mul_f16 [[REG:v[0-9]+]], 1.0, {{s[0-9]+$}} diff --git a/test/CodeGen/AMDGPU/flat-scratch-reg.ll b/test/CodeGen/AMDGPU/flat-scratch-reg.ll index 5705cbc99443..a7664c399fbb 100644 --- a/test/CodeGen/AMDGPU/flat-scratch-reg.ll +++ b/test/CodeGen/AMDGPU/flat-scratch-reg.ll @@ -21,7 +21,7 @@ ; VI-XNACK: ; NumSgprs: 12 define amdgpu_kernel void @no_vcc_no_flat() { entry: - call void asm sideeffect "", "~{SGPR7}"() + call void asm sideeffect "", "~{s7}"() ret void } @@ -35,7 +35,7 @@ entry: ; VI-XNACK: ; NumSgprs: 12 define amdgpu_kernel void @vcc_no_flat() { entry: - call void asm sideeffect "", "~{SGPR7},~{VCC}"() + call void asm sideeffect "", "~{s7},~{vcc}"() ret void } @@ -52,7 +52,7 @@ entry: ; HSA-VI-XNACK: ; NumSgprs: 14 define amdgpu_kernel void @no_vcc_flat() { entry: - call void asm sideeffect "", "~{SGPR7},~{FLAT_SCR}"() + call void asm sideeffect "", "~{s7},~{flat_scratch}"() ret void } @@ -68,7 +68,7 @@ entry: ; HSA-VI-XNACK: ; NumSgprs: 14 define amdgpu_kernel void @vcc_flat() { entry: - call void asm sideeffect "", "~{SGPR7},~{VCC},~{FLAT_SCR}"() + call void asm sideeffect "", "~{s7},~{vcc},~{flat_scratch}"() ret void } @@ -81,7 +81,7 @@ entry: ; VI-XNACK: NumSgprs: 6 define amdgpu_kernel void @use_flat_scr() #0 { entry: - call void asm sideeffect "; clobber ", "~{FLAT_SCR}"() + call void asm sideeffect "; clobber ", "~{flat_scratch}"() ret void } @@ -91,7 +91,7 @@ entry: ; VI-XNACK: NumSgprs: 6 define amdgpu_kernel void @use_flat_scr_lo() #0 { entry: - call void asm sideeffect "; clobber ", "~{FLAT_SCR_LO}"() + call void asm sideeffect "; clobber ", "~{flat_scratch_lo}"() ret void } @@ -101,7 +101,7 @@ entry: ; VI-XNACK: NumSgprs: 6 define amdgpu_kernel void @use_flat_scr_hi() #0 { entry: - call void asm sideeffect "; clobber ", "~{FLAT_SCR_HI}"() + call void asm sideeffect "; clobber ", "~{flat_scratch_hi}"() ret void } diff --git a/test/CodeGen/AMDGPU/fmul.f16.ll b/test/CodeGen/AMDGPU/fmul.f16.ll index 4ef2aa693cf4..cd86409e2038 100644 --- a/test/CodeGen/AMDGPU/fmul.f16.ll +++ b/test/CodeGen/AMDGPU/fmul.f16.ll @@ -78,7 +78,7 @@ entry: ; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]] ; VI-DAG: v_mul_f16_e32 v[[R_F16_LO:[0-9]+]], v[[B_V2_F16]], v[[A_V2_F16]] -; VI-DAG: v_mul_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[B_V2_F16]], v[[A_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-DAG: v_mul_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[A_V2_F16]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]] ; GCN: buffer_store_dword v[[R_V2_F16]] @@ -105,7 +105,7 @@ entry: ; SI: v_mul_f32_e32 v[[R_F32_1:[0-9]+]], 4.0, v[[B_F32_1]] ; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]] ; VI-DAG: v_mov_b32_e32 v[[CONST4:[0-9]+]], 0x4400 -; VI-DAG: v_mul_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[CONST4]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; VI-DAG: v_mul_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[B_V2_F16]], v[[CONST4]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; VI-DAG: v_mul_f16_e32 v[[R_F16_0:[0-9]+]], 0x4200, v[[B_V2_F16]] ; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] ; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]] @@ -131,7 +131,7 @@ entry: ; SI: v_mul_f32_e32 v[[R_F32_1:[0-9]+]], 0x40400000, v[[A_F32_1]] ; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]] ; VI-DAG: v_mov_b32_e32 v[[CONST3:[0-9]+]], 0x4200 -; VI-DAG: v_mul_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[CONST3]], v[[A_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; VI-DAG: v_mul_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[A_V2_F16]], v[[CONST3]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; VI-DAG: v_mul_f16_e32 v[[R_F16_0:[0-9]+]], 4.0, v[[A_V2_F16]] ; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] ; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]] diff --git a/test/CodeGen/AMDGPU/fneg-fabs.f16.ll b/test/CodeGen/AMDGPU/fneg-fabs.f16.ll index c256159726bf..f4afaca2b7a7 100644 --- a/test/CodeGen/AMDGPU/fneg-fabs.f16.ll +++ b/test/CodeGen/AMDGPU/fneg-fabs.f16.ll @@ -73,7 +73,7 @@ define amdgpu_kernel void @v_fneg_fabs_f16(half addrspace(1)* %out, half addrspa ; CIVI: s_mov_b32 [[MASK:s[0-9]+]], 0x8000{{$}} ; VI: v_mov_b32_e32 [[VMASK:v[0-9]+]], [[MASK]] ; CI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], -; VI: v_or_b32_sdwa v{{[0-9]+}}, [[VMASK]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[VMASK]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; CIVI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], ; CIVI: flat_store_dword @@ -92,9 +92,9 @@ define amdgpu_kernel void @s_fneg_fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x ; CI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], ; CI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], ; VI: v_mov_b32_e32 [[VMASK:v[0-9]+]], [[MASK]] -; VI: v_or_b32_sdwa v{{[0-9]+}}, [[VMASK]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[VMASK]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], -; VI: v_or_b32_sdwa v{{[0-9]+}}, [[VMASK]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[VMASK]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], ; GFX9: s_mov_b32 [[MASK:s[0-9]+]], 0x80008000 @@ -116,7 +116,7 @@ define amdgpu_kernel void @fneg_fabs_v4f16(<4 x half> addrspace(1)* %out, <4 x h ; CI: v_mul_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}} ; VI: v_mul_f16_e64 v{{[0-9]+}}, -|v{{[0-9]+}}|, 4.0 -; VI: v_mul_f16_e64 v{{[0-9]+}}, -|v{{[0-9]+}}|, 4.0 +; VI: v_mul_f16_sdwa v{{[0-9]+}}, -|v{{[0-9]+}}|, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX9: s_and_b32 [[ABS:s[0-9]+]], s{{[0-9]+}}, 0x7fff7fff ; GFX9: v_pk_mul_f16 v{{[0-9]+}}, [[ABS]], 4.0 neg_lo:[1,0] neg_hi:[1,0] diff --git a/test/CodeGen/AMDGPU/fneg.f16.ll b/test/CodeGen/AMDGPU/fneg.f16.ll index 16e4fc680bea..59745a9352ce 100644 --- a/test/CodeGen/AMDGPU/fneg.f16.ll +++ b/test/CodeGen/AMDGPU/fneg.f16.ll @@ -117,7 +117,7 @@ define amdgpu_kernel void @fneg_free_v2f16(<2 x half> addrspace(1)* %out, i32 %i ; CI: v_cvt_f16_f32 ; VI: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, -; VI: v_mul_f16_e64 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}} +; VI: v_mul_f16_sdwa v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; VI: v_mul_f16_e64 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}} ; GFX9: v_pk_mul_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} neg_lo:[1,0] neg_hi:[1,0]{{$}} diff --git a/test/CodeGen/AMDGPU/fptosi.f16.ll b/test/CodeGen/AMDGPU/fptosi.f16.ll index 50e56e08416a..f310618d8bdb 100644 --- a/test/CodeGen/AMDGPU/fptosi.f16.ll +++ b/test/CodeGen/AMDGPU/fptosi.f16.ll @@ -66,7 +66,7 @@ entry: ; VI: v_cvt_f32_f16_sdwa v[[A_F32_1:[0-9]+]], v[[A_V2_F16]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; VI: v_cvt_i32_f32_e32 v[[R_I16_0:[0-9]+]], v[[A_F32_0]] ; VI: v_cvt_i32_f32_sdwa v[[R_I16_1:[0-9]+]], v[[A_F32_1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; VI: v_or_b32_sdwa v[[R_V2_I16:[0-9]+]], v[[R_I16_1]], v[[R_I16_0]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; VI: v_or_b32_sdwa v[[R_V2_I16:[0-9]+]], v[[R_I16_0]], v[[R_I16_1]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GCN: buffer_store_dword v[[R_V2_I16]] ; GCN: s_endpgm diff --git a/test/CodeGen/AMDGPU/fptoui.f16.ll b/test/CodeGen/AMDGPU/fptoui.f16.ll index 2afa6111cf17..7641c08e33c3 100644 --- a/test/CodeGen/AMDGPU/fptoui.f16.ll +++ b/test/CodeGen/AMDGPU/fptoui.f16.ll @@ -66,7 +66,7 @@ entry: ; VI-DAG: v_cvt_f32_f16_sdwa v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; VI: v_cvt_i32_f32_e32 v[[R_I16_1:[0-9]+]], v[[A_F32_1]] ; VI: v_cvt_i32_f32_sdwa v[[R_I16_0:[0-9]+]], v[[A_F32_0]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; VI: v_or_b32_sdwa v[[R_V2_I16:[0-9]+]], v[[R_I16_0]], v[[R_I16_1]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; VI: v_or_b32_sdwa v[[R_V2_I16:[0-9]+]], v[[R_I16_1]], v[[R_I16_0]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GCN: buffer_store_dword v[[R_V2_I16]] ; GCN: s_endpgm diff --git a/test/CodeGen/AMDGPU/fsub.f16.ll b/test/CodeGen/AMDGPU/fsub.f16.ll index 836b480b6a67..fa00c06546db 100644 --- a/test/CodeGen/AMDGPU/fsub.f16.ll +++ b/test/CodeGen/AMDGPU/fsub.f16.ll @@ -78,7 +78,7 @@ entry: ; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]] ; VI-DAG: v_subrev_f16_e32 v[[R_F16_0:[0-9]+]], v[[B_V2_F16]], v[[A_V2_F16]] -; VI-DAG: v_subrev_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[B_V2_F16]], v[[A_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-DAG: v_sub_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[A_V2_F16]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]] ; GFX9: v_pk_add_f16 v[[R_V2_F16:[0-9]+]], v[[A_V2_F16]], v[[B_V2_F16]] neg_lo:[0,1] neg_hi:[0,1] @@ -146,7 +146,7 @@ entry: ; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]] ; VI-DAG: v_mov_b32_e32 [[CONSTM1:v[0-9]+]], 0xbc00 -; VI-DAG: v_add_f16_sdwa v[[R_F16_HI:[0-9]+]], [[CONSTM1]], v[[A_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; VI-DAG: v_add_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[A_V2_F16]], [[CONSTM1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; VI-DAG: v_add_f16_e32 v[[R_F16_0:[0-9]+]], -2.0, v[[A_V2_F16]] ; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]] diff --git a/test/CodeGen/AMDGPU/hsa-note-no-func.ll b/test/CodeGen/AMDGPU/hsa-note-no-func.ll index af63a4f8df76..81d9ed2eba8c 100644 --- a/test/CodeGen/AMDGPU/hsa-note-no-func.ll +++ b/test/CodeGen/AMDGPU/hsa-note-no-func.ll @@ -1,6 +1,12 @@ +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx600 | FileCheck --check-prefix=HSA --check-prefix=HSA-SI600 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx601 | FileCheck --check-prefix=HSA --check-prefix=HSA-SI601 %s ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx700 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI700 %s ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx701 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI701 %s ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx702 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI702 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx703 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI703 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=mullins | FileCheck --check-prefix=HSA --check-prefix=HSA-CI703 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=hawaii | FileCheck --check-prefix=HSA --check-prefix=HSA-CI701 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kabini | FileCheck --check-prefix=HSA --check-prefix=HSA-CI703 %s ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | FileCheck --check-prefix=HSA --check-prefix=HSA-CI700 %s ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=-flat-for-global | FileCheck --check-prefix=HSA --check-prefix=HSA-VI801 %s ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=tonga -mattr=-flat-for-global | FileCheck --check-prefix=HSA --check-prefix=HSA-VI802 %s @@ -15,11 +21,16 @@ ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx810 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI810 %s ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx900 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX900 %s ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx901 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX901 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx902 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX902 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx903 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX903 %s ; HSA: .hsa_code_object_version 2,1 +; HSA-SI600: .hsa_code_object_isa 6,0,0,"AMD","AMDGPU" +; HSA-SI601: .hsa_code_object_isa 6,0,1,"AMD","AMDGPU" ; HSA-CI700: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU" ; HSA-CI701: .hsa_code_object_isa 7,0,1,"AMD","AMDGPU" ; HSA-CI702: .hsa_code_object_isa 7,0,2,"AMD","AMDGPU" +; HSA-CI703: .hsa_code_object_isa 7,0,3,"AMD","AMDGPU" ; HSA-VI800: .hsa_code_object_isa 8,0,0,"AMD","AMDGPU" ; HSA-VI801: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU" ; HSA-VI802: .hsa_code_object_isa 8,0,2,"AMD","AMDGPU" @@ -28,3 +39,5 @@ ; HSA-VI810: .hsa_code_object_isa 8,1,0,"AMD","AMDGPU" ; HSA-GFX900: .hsa_code_object_isa 9,0,0,"AMD","AMDGPU" ; HSA-GFX901: .hsa_code_object_isa 9,0,1,"AMD","AMDGPU" +; HSA-GFX902: .hsa_code_object_isa 9,0,2,"AMD","AMDGPU" +; HSA-GFX903: .hsa_code_object_isa 9,0,3,"AMD","AMDGPU" diff --git a/test/CodeGen/AMDGPU/illegal-sgpr-to-vgpr-copy.ll b/test/CodeGen/AMDGPU/illegal-sgpr-to-vgpr-copy.ll index 6e411ce5e017..0c5b8fbda222 100644 --- a/test/CodeGen/AMDGPU/illegal-sgpr-to-vgpr-copy.ll +++ b/test/CodeGen/AMDGPU/illegal-sgpr-to-vgpr-copy.ll @@ -5,40 +5,40 @@ ; GCN: ; illegal copy v1 to s9 define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_i32() #0 { - %vgpr = call i32 asm sideeffect "; def $0", "=${VGPR1}"() - call void asm sideeffect "; use $0", "${SGPR9}"(i32 %vgpr) + %vgpr = call i32 asm sideeffect "; def $0", "=${v1}"() + call void asm sideeffect "; use $0", "${s9}"(i32 %vgpr) ret void } ; ERR: error: :0:0: in function illegal_vgpr_to_sgpr_copy_v2i32 void (): illegal SGPR to VGPR copy ; GCN: ; illegal copy v[0:1] to s[10:11] define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_v2i32() #0 { - %vgpr = call <2 x i32> asm sideeffect "; def $0", "=${VGPR0_VGPR1}"() - call void asm sideeffect "; use $0", "${SGPR10_SGPR11}"(<2 x i32> %vgpr) + %vgpr = call <2 x i32> asm sideeffect "; def $0", "=${v[0:1]}"() + call void asm sideeffect "; use $0", "${s[10:11]}"(<2 x i32> %vgpr) ret void } ; ERR: error: :0:0: in function illegal_vgpr_to_sgpr_copy_v4i32 void (): illegal SGPR to VGPR copy ; GCN: ; illegal copy v[0:3] to s[8:11] define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_v4i32() #0 { - %vgpr = call <4 x i32> asm sideeffect "; def $0", "=${VGPR0_VGPR1_VGPR2_VGPR3}"() - call void asm sideeffect "; use $0", "${SGPR8_SGPR9_SGPR10_SGPR11}"(<4 x i32> %vgpr) + %vgpr = call <4 x i32> asm sideeffect "; def $0", "=${v[0:3]}"() + call void asm sideeffect "; use $0", "${s[8:11]}"(<4 x i32> %vgpr) ret void } ; ERR: error: :0:0: in function illegal_vgpr_to_sgpr_copy_v8i32 void (): illegal SGPR to VGPR copy ; GCN: ; illegal copy v[0:7] to s[8:15] define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_v8i32() #0 { - %vgpr = call <8 x i32> asm sideeffect "; def $0", "=${VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7}"() - call void asm sideeffect "; use $0", "${SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15}"(<8 x i32> %vgpr) + %vgpr = call <8 x i32> asm sideeffect "; def $0", "=${v[0:7]}"() + call void asm sideeffect "; use $0", "${s[8:15]}"(<8 x i32> %vgpr) ret void } ; ERR error: :0:0: in function illegal_vgpr_to_sgpr_copy_v16i32 void (): illegal SGPR to VGPR copy ; GCN: ; illegal copy v[0:15] to s[16:31] define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_v16i32() #0 { - %vgpr = call <16 x i32> asm sideeffect "; def $0", "=${VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7_VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15}"() - call void asm sideeffect "; use $0", "${SGPR16_SGPR17_SGPR18_SGPR19_SGPR20_SGPR21_SGPR22_SGPR23_SGPR24_SGPR25_SGPR26_SGPR27_SGPR28_SGPR29_SGPR30_SGPR31}"(<16 x i32> %vgpr) + %vgpr = call <16 x i32> asm sideeffect "; def $0", "=${v[0:15]}"() + call void asm sideeffect "; use $0", "${s[16:31]}"(<16 x i32> %vgpr) ret void } diff --git a/test/CodeGen/AMDGPU/immv216.ll b/test/CodeGen/AMDGPU/immv216.ll index bc951a82becd..cd3502baee7b 100644 --- a/test/CodeGen/AMDGPU/immv216.ll +++ b/test/CodeGen/AMDGPU/immv216.ll @@ -124,7 +124,7 @@ define amdgpu_kernel void @store_literal_imm_v2f16(<2 x half> addrspace(1)* %out ; VI: buffer_load_ushort [[VAL1:v[0-9]+]] ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 0, [[VAL0]] ; VI-DAG: v_mov_b32_e32 [[CONST0:v[0-9]+]], 0 -; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONST0]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST0]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI: v_or_b32 ; VI: buffer_store_dword define amdgpu_kernel void @add_inline_imm_0.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { @@ -142,7 +142,7 @@ define amdgpu_kernel void @add_inline_imm_0.0_v2f16(<2 x half> addrspace(1)* %ou ; VI: buffer_load_ushort [[VAL1:v[0-9]+]] ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 0.5, [[VAL0]] ; VI-DAG: v_mov_b32_e32 [[CONST05:v[0-9]+]], 0x3800 -; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONST05]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST05]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI: v_or_b32 ; VI: buffer_store_dword define amdgpu_kernel void @add_inline_imm_0.5_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { @@ -160,7 +160,7 @@ define amdgpu_kernel void @add_inline_imm_0.5_v2f16(<2 x half> addrspace(1)* %ou ; VI: buffer_load_ushort [[VAL1:v[0-9]+]] ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -0.5, [[VAL0]] ; VI-DAG: v_mov_b32_e32 [[CONSTM05:v[0-9]+]], 0xb800 -; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONSTM05]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONSTM05]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI: v_or_b32 ; VI: buffer_store_dword define amdgpu_kernel void @add_inline_imm_neg_0.5_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { @@ -178,7 +178,7 @@ define amdgpu_kernel void @add_inline_imm_neg_0.5_v2f16(<2 x half> addrspace(1)* ; VI: buffer_load_ushort [[VAL1:v[0-9]+]] ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 1.0, [[VAL0]] ; VI-DAG: v_mov_b32_e32 [[CONST1:v[0-9]+]], 0x3c00 -; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONST1]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI: v_or_b32 ; VI: buffer_store_dword define amdgpu_kernel void @add_inline_imm_1.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { @@ -196,7 +196,7 @@ define amdgpu_kernel void @add_inline_imm_1.0_v2f16(<2 x half> addrspace(1)* %ou ; VI: buffer_load_ushort [[VAL1:v[0-9]+]] ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -1.0, [[VAL0]] ; VI-DAG: v_mov_b32_e32 [[CONSTM1:v[0-9]+]], 0xbc00 -; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONSTM1]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONSTM1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI: v_or_b32 ; VI: buffer_store_dword define amdgpu_kernel void @add_inline_imm_neg_1.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { @@ -214,7 +214,7 @@ define amdgpu_kernel void @add_inline_imm_neg_1.0_v2f16(<2 x half> addrspace(1)* ; VI: buffer_load_ushort [[VAL1:v[0-9]+]] ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 2.0, [[VAL0]] ; VI-DAG: v_mov_b32_e32 [[CONST2:v[0-9]+]], 0x4000 -; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONST2]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST2]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI: v_or_b32 ; VI: buffer_store_dword define amdgpu_kernel void @add_inline_imm_2.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { @@ -232,7 +232,7 @@ define amdgpu_kernel void @add_inline_imm_2.0_v2f16(<2 x half> addrspace(1)* %ou ; VI: buffer_load_ushort [[VAL1:v[0-9]+]] ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -2.0, [[VAL0]] ; VI-DAG: v_mov_b32_e32 [[CONSTM2:v[0-9]+]], 0xc000 -; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONSTM2]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONSTM2]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI: v_or_b32 ; VI: buffer_store_dword define amdgpu_kernel void @add_inline_imm_neg_2.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { @@ -250,7 +250,7 @@ define amdgpu_kernel void @add_inline_imm_neg_2.0_v2f16(<2 x half> addrspace(1)* ; VI: buffer_load_ushort [[VAL1:v[0-9]+]] ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 4.0, [[VAL0]] ; VI-DAG: v_mov_b32_e32 [[CONST4:v[0-9]+]], 0x4400 -; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONST4]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST4]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI: v_or_b32 ; VI: buffer_store_dword define amdgpu_kernel void @add_inline_imm_4.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { @@ -268,7 +268,7 @@ define amdgpu_kernel void @add_inline_imm_4.0_v2f16(<2 x half> addrspace(1)* %ou ; VI: buffer_load_ushort [[VAL1:v[0-9]+]] ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -4.0, [[VAL0]] ; VI-DAG: v_mov_b32_e32 [[CONSTM4:v[0-9]+]], 0xc400 -; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONSTM4]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONSTM4]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI: v_or_b32 ; VI: buffer_store_dword define amdgpu_kernel void @add_inline_imm_neg_4.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { @@ -285,7 +285,7 @@ define amdgpu_kernel void @add_inline_imm_neg_4.0_v2f16(<2 x half> addrspace(1)* ; VI: v_mov_b32_e32 [[CONST05:v[0-9]+]], 0x3800 ; VI: buffer_load_dword ; VI-NOT: and -; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONST05]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[CONST05]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 0.5, v{{[0-9]+}} ; VI: v_or_b32 ; VI: buffer_store_dword @@ -306,7 +306,7 @@ define amdgpu_kernel void @commute_add_inline_imm_0.5_v2f16(<2 x half> addrspace ; VI-DAG: buffer_load_dword ; VI-NOT: and ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, [[K]], v{{[0-9]+}} -; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[K]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[K]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; VI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; VI: buffer_store_dword define amdgpu_kernel void @commute_add_literal_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 { @@ -325,7 +325,7 @@ define amdgpu_kernel void @commute_add_literal_v2f16(<2 x half> addrspace(1)* %o ; VI: buffer_load_ushort [[VAL1:v[0-9]+]] ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 1, [[VAL0]] ; VI-DAG: v_mov_b32_e32 [[CONST1:v[0-9]+]], 1 -; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONST1]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI: v_or_b32 ; VI: buffer_store_dword define amdgpu_kernel void @add_inline_imm_1_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { @@ -343,7 +343,7 @@ define amdgpu_kernel void @add_inline_imm_1_v2f16(<2 x half> addrspace(1)* %out, ; VI: buffer_load_ushort [[VAL1:v[0-9]+]] ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 2, [[VAL0]] ; VI-DAG: v_mov_b32_e32 [[CONST2:v[0-9]+]], 2 -; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONST2]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST2]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI: v_or_b32 ; VI: buffer_store_dword define amdgpu_kernel void @add_inline_imm_2_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { @@ -361,7 +361,7 @@ define amdgpu_kernel void @add_inline_imm_2_v2f16(<2 x half> addrspace(1)* %out, ; VI: buffer_load_ushort [[VAL1:v[0-9]+]] ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 16, [[VAL0]] ; VI-DAG: v_mov_b32_e32 [[CONST16:v[0-9]+]], 16 -; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONST16]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI: v_or_b32 ; VI: buffer_store_dword define amdgpu_kernel void @add_inline_imm_16_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { @@ -379,7 +379,7 @@ define amdgpu_kernel void @add_inline_imm_16_v2f16(<2 x half> addrspace(1)* %out ; VI: buffer_load_ushort [[VAL1:v[0-9]+]] ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -1, [[VAL0]] ; VI-DAG: v_mov_b32_e32 [[CONSTM1:v[0-9]+]], 0xffff -; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONSTM1]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONSTM1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI: v_or_b32 ; VI: buffer_store_dword define amdgpu_kernel void @add_inline_imm_neg_1_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { @@ -397,7 +397,7 @@ define amdgpu_kernel void @add_inline_imm_neg_1_v2f16(<2 x half> addrspace(1)* % ; VI: buffer_load_ushort [[VAL1:v[0-9]+]] ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -2, [[VAL0]] ; VI-DAG: v_mov_b32_e32 [[CONSTM2:v[0-9]+]], 0xfffe -; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONSTM2]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONSTM2]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI: v_or_b32 ; VI: buffer_store_dword define amdgpu_kernel void @add_inline_imm_neg_2_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { @@ -415,7 +415,7 @@ define amdgpu_kernel void @add_inline_imm_neg_2_v2f16(<2 x half> addrspace(1)* % ; VI: buffer_load_ushort [[VAL1:v[0-9]+]] ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -16, [[VAL0]] ; VI-DAG: v_mov_b32_e32 [[CONSTM16:v[0-9]+]], 0xfff0 -; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONSTM16]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONSTM16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI: v_or_b32 ; VI: buffer_store_dword define amdgpu_kernel void @add_inline_imm_neg_16_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { @@ -433,7 +433,7 @@ define amdgpu_kernel void @add_inline_imm_neg_16_v2f16(<2 x half> addrspace(1)* ; VI: buffer_load_ushort [[VAL1:v[0-9]+]] ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 63, [[VAL0]] ; VI-DAG: v_mov_b32_e32 [[CONST63:v[0-9]+]], 63 -; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONST63]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST63]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI: v_or_b32 ; VI: buffer_store_dword define amdgpu_kernel void @add_inline_imm_63_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { @@ -451,7 +451,7 @@ define amdgpu_kernel void @add_inline_imm_63_v2f16(<2 x half> addrspace(1)* %out ; VI: buffer_load_ushort [[VAL1:v[0-9]+]] ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 64, [[VAL0]] ; VI-DAG: v_mov_b32_e32 [[CONST64:v[0-9]+]], 64 -; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONST64]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST64]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI: v_or_b32 ; VI: buffer_store_dword define amdgpu_kernel void @add_inline_imm_64_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { diff --git a/test/CodeGen/AMDGPU/indirect-addressing-si.ll b/test/CodeGen/AMDGPU/indirect-addressing-si.ll index fab1f8d12253..0d20c32a4770 100644 --- a/test/CodeGen/AMDGPU/indirect-addressing-si.ll +++ b/test/CodeGen/AMDGPU/indirect-addressing-si.ll @@ -379,7 +379,7 @@ entry: %idx0 = load volatile i32, i32 addrspace(1)* %gep %idx1 = add i32 %idx0, 1 %val0 = extractelement <4 x i32> , i32 %idx0 - %live.out.reg = call i32 asm sideeffect "s_mov_b32 $0, 17", "={SGPR4}" () + %live.out.reg = call i32 asm sideeffect "s_mov_b32 $0, 17", "={s4}" () %val1 = extractelement <4 x i32> , i32 %idx1 store volatile i32 %val0, i32 addrspace(1)* %out0 store volatile i32 %val1, i32 addrspace(1)* %out0 diff --git a/test/CodeGen/AMDGPU/inline-asm.ll b/test/CodeGen/AMDGPU/inline-asm.ll index 36441cf778c2..c0f5218efc16 100644 --- a/test/CodeGen/AMDGPU/inline-asm.ll +++ b/test/CodeGen/AMDGPU/inline-asm.ll @@ -193,7 +193,7 @@ entry: ; CHECK: use v[0:1] define amdgpu_kernel void @i64_imm_input_phys_vgpr() { entry: - call void asm sideeffect "; use $0 ", "{VGPR0_VGPR1}"(i64 123456) + call void asm sideeffect "; use $0 ", "{v[0:1]}"(i64 123456) ret void } @@ -202,7 +202,7 @@ entry: ; CHECK: ; use v0 define amdgpu_kernel void @i1_imm_input_phys_vgpr() { entry: - call void asm sideeffect "; use $0 ", "{VGPR0}"(i1 true) + call void asm sideeffect "; use $0 ", "{v0}"(i1 true) ret void } @@ -215,7 +215,7 @@ entry: define amdgpu_kernel void @i1_input_phys_vgpr() { entry: %val = load i1, i1 addrspace(1)* undef - call void asm sideeffect "; use $0 ", "{VGPR0}"(i1 %val) + call void asm sideeffect "; use $0 ", "{v0}"(i1 %val) ret void } @@ -229,7 +229,7 @@ define amdgpu_kernel void @i1_input_phys_vgpr_x2() { entry: %val0 = load volatile i1, i1 addrspace(1)* undef %val1 = load volatile i1, i1 addrspace(1)* undef - call void asm sideeffect "; use $0 $1 ", "{VGPR0}, {VGPR1}"(i1 %val0, i1 %val1) + call void asm sideeffect "; use $0 $1 ", "{v0}, {v1}"(i1 %val0, i1 %val1) ret void } @@ -240,8 +240,8 @@ entry: ; CHECK: v_lshlrev_b32_e32 v{{[0-9]+}}, v0, v1 define amdgpu_kernel void @muliple_def_phys_vgpr() { entry: - %def0 = call i32 asm sideeffect "; def $0 ", "={VGPR0}"() - %def1 = call i32 asm sideeffect "; def $0 ", "={VGPR0}"() + %def0 = call i32 asm sideeffect "; def $0 ", "={v0}"() + %def1 = call i32 asm sideeffect "; def $0 ", "={v0}"() %add = shl i32 %def0, %def1 store i32 %add, i32 addrspace(1)* undef ret void diff --git a/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll b/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll index 1edccff3bf15..86fc41a23772 100644 --- a/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll +++ b/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll @@ -261,7 +261,7 @@ define amdgpu_kernel void @v_insertelement_v2i16_0_inlineimm(<2 x i16> addrspace ; VI: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e70000 ; GCN-DAG: flat_load_dword [[VEC:v[0-9]+]] ; CI: v_or_b32_e32 [[RES:v[0-9]+]], 0x3e70000, [[VEC]] -; VI: v_or_b32_sdwa [[RES:v[0-9]+]], [[K]], [[VEC]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; VI: v_or_b32_sdwa [[RES:v[0-9]+]], [[VEC]], [[K]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX9-DAG: s_movk_i32 [[K:s[0-9]+]], 0x3e7 ; GFX9-DAG: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]] @@ -285,7 +285,7 @@ define amdgpu_kernel void @v_insertelement_v2i16_1(<2 x i16> addrspace(1)* %out, ; CI: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]] ; GFX9: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]] ; CI: v_or_b32_e32 [[RES:v[0-9]+]], 0xfff10000, [[ELT0]] -; VI: v_or_b32_sdwa [[RES:v[0-9]+]], [[K]], [[VEC]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; VI: v_or_b32_sdwa [[RES:v[0-9]+]], [[VEC]], [[K]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX9: v_lshl_or_b32 [[RES:v[0-9]+]], -15, 16, [[ELT0]] ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RES]] define amdgpu_kernel void @v_insertelement_v2i16_1_inlineimm(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { @@ -345,7 +345,7 @@ define amdgpu_kernel void @v_insertelement_v2f16_0_inlineimm(<2 x half> addrspac ; VI: v_mov_b32_e32 [[K:v[0-9]+]], 0x45000000 ; GCN-DAG: flat_load_dword [[VEC:v[0-9]+]] ; CI: v_or_b32_e32 [[RES:v[0-9]+]], 0x45000000, [[VEC]] -; VI: v_or_b32_sdwa [[RES:v[0-9]+]], [[K]], [[VEC]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; VI: v_or_b32_sdwa [[RES:v[0-9]+]], [[VEC]], [[K]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX9-DAG: s_movk_i32 [[K:s[0-9]+]], 0x4500 ; GFX9-DAG: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]] @@ -369,7 +369,7 @@ define amdgpu_kernel void @v_insertelement_v2f16_1(<2 x half> addrspace(1)* %out ; CI: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]] ; GFX9: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]] ; CI: v_or_b32_e32 [[RES:v[0-9]+]], 0x230000, [[ELT0]] -; VI: v_or_b32_sdwa [[RES:v[0-9]+]], [[K]], [[VEC]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; VI: v_or_b32_sdwa [[RES:v[0-9]+]], [[VEC]], [[K]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX9: v_lshl_or_b32 [[RES:v[0-9]+]], 35, 16, [[ELT0]] ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RES]] define amdgpu_kernel void @v_insertelement_v2f16_1_inlineimm(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 { diff --git a/test/CodeGen/AMDGPU/limit-coalesce.mir b/test/CodeGen/AMDGPU/limit-coalesce.mir index a0d2d6c097a2..7d6d8a5891cd 100644 --- a/test/CodeGen/AMDGPU/limit-coalesce.mir +++ b/test/CodeGen/AMDGPU/limit-coalesce.mir @@ -2,13 +2,13 @@ # Check that coalescer does not create wider register tuple than in source -# CHECK: - { id: 2, class: vreg_64 } -# CHECK: - { id: 3, class: vreg_64 } -# CHECK: - { id: 4, class: vreg_64 } -# CHECK: - { id: 5, class: vreg_96 } -# CHECK: - { id: 6, class: vreg_96 } -# CHECK: - { id: 7, class: vreg_128 } -# CHECK: - { id: 8, class: vreg_128 } +# CHECK: - { id: 2, class: vreg_64, preferred-register: '' } +# CHECK: - { id: 3, class: vreg_64, preferred-register: '' } +# CHECK: - { id: 4, class: vreg_64, preferred-register: '' } +# CHECK: - { id: 5, class: vreg_96, preferred-register: '' } +# CHECK: - { id: 6, class: vreg_96, preferred-register: '' } +# CHECK: - { id: 7, class: vreg_128, preferred-register: '' } +# CHECK: - { id: 8, class: vreg_128, preferred-register: '' } # No more registers shall be defined # CHECK-NEXT: liveins: # CHECK: FLAT_STORE_DWORDX2 %vgpr0_vgpr1, %4, diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.alignb.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.alignb.ll new file mode 100644 index 000000000000..873a3f0f368f --- /dev/null +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.alignb.ll @@ -0,0 +1,23 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +declare i32 @llvm.amdgcn.alignbit(i32, i32, i32) #0 +declare i32 @llvm.amdgcn.alignbyte(i32, i32, i32) #0 + +; GCN-LABEL: {{^}}v_alignbit_b32: +; GCN: v_alignbit_b32 {{[vs][0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}} +define amdgpu_kernel void @v_alignbit_b32(i32 addrspace(1)* %out, i32 %src1, i32 %src2, i32 %src3) #1 { + %val = call i32 @llvm.amdgcn.alignbit(i32 %src1, i32 %src2, i32 %src3) #0 + store i32 %val, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_alignbyte_b32: +; GCN: v_alignbyte_b32 {{[vs][0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}} +define amdgpu_kernel void @v_alignbyte_b32(i32 addrspace(1)* %out, i32 %src1, i32 %src2, i32 %src3) #1 { + %val = call i32 @llvm.amdgcn.alignbyte(i32 %src1, i32 %src2, i32 %src3) #0 + store i32 %val, i32 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll index 3a2b87cd87f3..83bc8b234724 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll @@ -4,18 +4,28 @@ declare i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64, i32, i64) #0 ; GCN-LABEL: {{^}}v_mqsad_pk_u16_u8: -; GCN: v_mqsad_pk_u16_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] +; GCN: v_mqsad_pk_u16_u8 v[0:1], v[4:5], s{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] +; GCN-DAG: v_mov_b32_e32 v5, v1 +; GCN-DAG: v_mov_b32_e32 v4, v0 define amdgpu_kernel void @v_mqsad_pk_u16_u8(i64 addrspace(1)* %out, i64 %src) { - %result= call i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64 %src, i32 100, i64 100) #0 - store i64 %result, i64 addrspace(1)* %out, align 4 + %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[4:5]},v"(i64 %src) #0 + %tmp1 = call i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64 %tmp, i32 100, i64 100) #0 + %tmp2 = call i64 asm ";; force constraint", "=v,{v[4:5]}"(i64 %tmp1) #0 + store i64 %tmp2, i64 addrspace(1)* %out, align 4 ret void } ; GCN-LABEL: {{^}}v_mqsad_pk_u16_u8_non_immediate: -; GCN: v_mqsad_pk_u16_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] +; GCN: v_mqsad_pk_u16_u8 v[0:1], v[2:3], v4, v[6:7] +; GCN-DAG: v_mov_b32_e32 v3, v1 +; GCN-DAG: v_mov_b32_e32 v2, v0 define amdgpu_kernel void @v_mqsad_pk_u16_u8_non_immediate(i64 addrspace(1)* %out, i64 %src, i32 %a, i64 %b) { - %result= call i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64 %src, i32 %a, i64 %b) #0 - store i64 %result, i64 addrspace(1)* %out, align 4 + %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) #0 + %tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) #0 + %tmp2 = call i64 asm "v_lshlrev_b64 $0, $1, 1", "={v[6:7]},v"(i64 %b) #0 + %tmp3 = call i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64 %tmp, i32 %tmp1, i64 %tmp2) #0 + %tmp4 = call i64 asm ";; force constraint", "=v,{v[2:3]}"(i64 %tmp3) #0 + store i64 %tmp4, i64 addrspace(1)* %out, align 4 ret void } diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.u32.u8.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.u32.u8.ll index a8d03bf6bbac..685b5e0f29c4 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.u32.u8.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.u32.u8.ll @@ -3,45 +3,56 @@ declare <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64, i32, <4 x i32>) #0 -; GCN-LABEL: {{^}}v_mqsad_u32_u8_use_non_inline_constant: -; GCN: v_mqsad_u32_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] -define amdgpu_kernel void @v_mqsad_u32_u8_use_non_inline_constant(<4 x i32> addrspace(1)* %out, i64 %src) { - %result = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %src, i32 100, <4 x i32> ) #0 - store <4 x i32> %result, <4 x i32> addrspace(1)* %out, align 4 +; GCN-LABEL: {{^}}v_mqsad_u32_u8_inline_integer_immediate: +; GCN-DAG: v_mov_b32_e32 v0, v2 +; GCN-DAG: v_mov_b32_e32 v1, v3 +; GCN: v_mqsad_u32_u8 v[2:5], v[0:1], v6, v[{{[0-9]+:[0-9]+}}] +define amdgpu_kernel void @v_mqsad_u32_u8_inline_integer_immediate(<4 x i32> addrspace(1)* %out, i64 %src, i32 %a) { + %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) #0 + %tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) #0 + %tmp2 = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %tmp, i32 %tmp1, <4 x i32> ) #0 + %tmp3 = call <4 x i32> asm ";; force constraint", "=v,{v[2:5]}"(<4 x i32> %tmp2) #0 + store <4 x i32> %tmp3, <4 x i32> addrspace(1)* %out, align 4 ret void } ; GCN-LABEL: {{^}}v_mqsad_u32_u8_non_immediate: -; GCN: v_mqsad_u32_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] +; GCN-DAG: v_mov_b32_e32 v0, v2 +; GCN-DAG: v_mov_b32_e32 v1, v3 +; GCN: v_mqsad_u32_u8 v[2:5], v[0:1], v6, v[{{[0-9]+:[0-9]+}}] define amdgpu_kernel void @v_mqsad_u32_u8_non_immediate(<4 x i32> addrspace(1)* %out, i64 %src, i32 %a, <4 x i32> %b) { - %result = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %src, i32 %a, <4 x i32> %b) #0 - store <4 x i32> %result, <4 x i32> addrspace(1)* %out, align 4 - ret void -} - -; GCN-LABEL: {{^}}v_mqsad_u32_u8_inline_integer_immediate: -; GCN: v_mqsad_u32_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] -define amdgpu_kernel void @v_mqsad_u32_u8_inline_integer_immediate(<4 x i32> addrspace(1)* %out, i64 %src, i32 %a) { - %result = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %src, i32 %a, <4 x i32> ) #0 - store <4 x i32> %result, <4 x i32> addrspace(1)* %out, align 4 + %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) #0 + %tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) #0 + %tmp2 = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %tmp, i32 %tmp1, <4 x i32> %b) #0 + %tmp3 = call <4 x i32> asm ";; force constraint", "=v,{v[2:5]}"(<4 x i32> %tmp2) #0 + store <4 x i32> %tmp3, <4 x i32> addrspace(1)* %out, align 4 ret void } ; GCN-LABEL: {{^}}v_mqsad_u32_u8_inline_fp_immediate: -; GCN: v_mqsad_u32_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] +; GCN-DAG: v_mov_b32_e32 v0, v2 +; GCN-DAG: v_mov_b32_e32 v1, v3 +; GCN: v_mqsad_u32_u8 v[2:5], v[0:1], v6, v[{{[0-9]+:[0-9]+}}] define amdgpu_kernel void @v_mqsad_u32_u8_inline_fp_immediate(<4 x i32> addrspace(1)* %out, i64 %src, i32 %a) { - %result = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %src, i32 %a, <4 x i32> ) #0 - store <4 x i32> %result, <4 x i32> addrspace(1)* %out, align 4 + %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) #0 + %tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) #0 + %tmp2 = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %tmp, i32 %tmp1, <4 x i32> ) #0 + %tmp3 = call <4 x i32> asm ";; force constraint", "=v,{v[2:5]}"(<4 x i32> %tmp2) #0 + store <4 x i32> %tmp3, <4 x i32> addrspace(1)* %out, align 4 ret void } ; GCN-LABEL: {{^}}v_mqsad_u32_u8_use_sgpr_vgpr: -; GCN: v_mqsad_u32_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] +; GCN-DAG: v_mov_b32_e32 v0, v2 +; GCN-DAG: v_mov_b32_e32 v1, v3 +; GCN: v_mqsad_u32_u8 v[2:5], v[0:1], v6, v[{{[0-9]+:[0-9]+}}] define amdgpu_kernel void @v_mqsad_u32_u8_use_sgpr_vgpr(<4 x i32> addrspace(1)* %out, i64 %src, i32 %a, <4 x i32> addrspace(1)* %input) { %in = load <4 x i32>, <4 x i32> addrspace(1) * %input - - %result = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %src, i32 %a, <4 x i32> %in) #0 - store <4 x i32> %result, <4 x i32> addrspace(1)* %out, align 4 + %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) #0 + %tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) #0 + %tmp2 = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %tmp, i32 %tmp1, <4 x i32> %in) #0 + %tmp3 = call <4 x i32> asm ";; force constraint", "=v,{v[2:5]}"(<4 x i32> %tmp2) #0 + store <4 x i32> %tmp3, <4 x i32> addrspace(1)* %out, align 4 ret void } diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll index be71225c5e06..1f46613a8db0 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll @@ -4,18 +4,28 @@ declare i64 @llvm.amdgcn.qsad.pk.u16.u8(i64, i32, i64) #0 ; GCN-LABEL: {{^}}v_qsad_pk_u16_u8: -; GCN: v_qsad_pk_u16_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] +; GCN: v_qsad_pk_u16_u8 v[0:1], v[4:5], s{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] +; GCN-DAG: v_mov_b32_e32 v5, v1 +; GCN-DAG: v_mov_b32_e32 v4, v0 define amdgpu_kernel void @v_qsad_pk_u16_u8(i64 addrspace(1)* %out, i64 %src) { - %result= call i64 @llvm.amdgcn.qsad.pk.u16.u8(i64 %src, i32 100, i64 100) #0 - store i64 %result, i64 addrspace(1)* %out, align 4 + %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[4:5]},v"(i64 %src) #0 + %tmp1 = call i64 @llvm.amdgcn.qsad.pk.u16.u8(i64 %tmp, i32 100, i64 100) #0 + %tmp2 = call i64 asm ";; force constraint", "=v,{v[4:5]}"(i64 %tmp1) #0 + store i64 %tmp2, i64 addrspace(1)* %out, align 4 ret void } ; GCN-LABEL: {{^}}v_qsad_pk_u16_u8_non_immediate: -; GCN: v_qsad_pk_u16_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] +; GCN: v_qsad_pk_u16_u8 v[0:1], v[2:3], v4, v[6:7] +; GCN-DAG: v_mov_b32_e32 v3, v1 +; GCN-DAG: v_mov_b32_e32 v2, v0 define amdgpu_kernel void @v_qsad_pk_u16_u8_non_immediate(i64 addrspace(1)* %out, i64 %src, i32 %a, i64 %b) { - %result= call i64 @llvm.amdgcn.qsad.pk.u16.u8(i64 %src, i32 %a, i64 %b) #0 - store i64 %result, i64 addrspace(1)* %out, align 4 + %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) #0 + %tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) #0 + %tmp2 = call i64 asm "v_lshlrev_b64 $0, $1, 1", "={v[6:7]},v"(i64 %b) #0 + %tmp3 = call i64 @llvm.amdgcn.qsad.pk.u16.u8(i64 %tmp, i32 %tmp1, i64 %tmp2) #0 + %tmp4 = call i64 asm ";; force constraint", "=v,{v[2:3]}"(i64 %tmp3) #0 + store i64 %tmp4, i64 addrspace(1)* %out, align 4 ret void } diff --git a/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll b/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll index eec187390169..806723e5136c 100644 --- a/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll +++ b/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll @@ -118,7 +118,7 @@ define amdgpu_kernel void @fmuladd_f16_imm_b( ; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]] ; VI-FLUSH: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] -; VI-FLUSH-DAG: v_mac_f16_sdwa v[[A_F16_1]], v[[C_V2_F16]], v[[B_V2_F16]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-FLUSH-DAG: v_mac_f16_sdwa v[[A_F16_1]], v[[B_V2_F16]], v[[C_V2_F16]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; VI-FLUSH-DAG: v_mac_f16_e32 v[[A_V2_F16]], v[[C_V2_F16]], v[[B_V2_F16]] ; VI-FLUSH-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[A_F16_1]] ; VI-FLUSH-NOT: v_and_b32 diff --git a/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll b/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll index a4353d1136e1..8f4b314ffabb 100644 --- a/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll +++ b/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll @@ -82,7 +82,7 @@ entry: ; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]] ; VI-DAG: v_max_f16_e32 v[[R_F16_0:[0-9]+]], v[[B_V2_F16]], v[[A_V2_F16]] -; VI-DAG: v_max_f16_sdwa v[[R_F16_1:[0-9]+]], v[[B_V2_F16]], v[[A_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-DAG: v_max_f16_sdwa v[[R_F16_1:[0-9]+]], v[[A_V2_F16]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; VI-NOT: and ; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_1]], v[[R_F16_0]] @@ -110,7 +110,7 @@ entry: ; SI: v_max_f32_e32 v[[R_F32_1:[0-9]+]], 4.0, v[[B_F32_1]] ; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]] ; VI-DAG: v_mov_b32_e32 [[CONST4:v[0-9]+]], 0x4400 -; VI-DAG: v_max_f16_sdwa v[[R_F16_HI:[0-9]+]], [[CONST4]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; VI-DAG: v_max_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[B_V2_F16]], [[CONST4]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; VI-DAG: v_max_f16_e32 v[[R_F16_0:[0-9]+]], 0x4200, v[[B_V2_F16]] ; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] @@ -138,7 +138,7 @@ entry: ; SI: v_max_f32_e32 v[[R_F32_1:[0-9]+]], 0x40400000, v[[A_F32_1]] ; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]] ; VI-DAG: v_mov_b32_e32 [[CONST3:v[0-9]+]], 0x4200 -; VI-DAG: v_max_f16_sdwa v[[R_F16_HI:[0-9]+]], [[CONST3]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; VI-DAG: v_max_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[B_V2_F16]], [[CONST3]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; VI-DAG: v_max_f16_e32 v[[R_F16_0:[0-9]+]], 4.0, v[[A_V2_F16]] ; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] diff --git a/test/CodeGen/AMDGPU/llvm.minnum.f16.ll b/test/CodeGen/AMDGPU/llvm.minnum.f16.ll index 4875d26fc860..1a86286f7136 100644 --- a/test/CodeGen/AMDGPU/llvm.minnum.f16.ll +++ b/test/CodeGen/AMDGPU/llvm.minnum.f16.ll @@ -81,7 +81,7 @@ entry: ; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]] ; VI-DAG: v_min_f16_e32 v[[R_F16_0:[0-9]+]], v[[B_V2_F16]], v[[A_V2_F16]] -; VI-DAG: v_min_f16_sdwa v[[R_F16_1:[0-9]+]], v[[B_V2_F16]], v[[A_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-DAG: v_min_f16_sdwa v[[R_F16_1:[0-9]+]], v[[A_V2_F16]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; VI-NOT: and ; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_1]], v[[R_F16_0]] @@ -111,7 +111,7 @@ entry: ; SI-DAG: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]] ; VI-DAG: v_mov_b32_e32 [[CONST4:v[0-9]+]], 0x4400 -; VI-DAG: v_min_f16_sdwa v[[R_F16_HI:[0-9]+]], [[CONST4]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; VI-DAG: v_min_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[B_V2_F16]], [[CONST4]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; VI-DAG: v_min_f16_e32 v[[R_F16_0:[0-9]+]], 0x4200, v[[B_V2_F16]] ; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] @@ -139,7 +139,7 @@ entry: ; SI: v_min_f32_e32 v[[R_F32_1:[0-9]+]], 0x40400000, v[[A_F32_1]] ; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]] ; VI-DAG: v_mov_b32_e32 [[CONST3:v[0-9]+]], 0x4200 -; VI-DAG: v_min_f16_sdwa v[[R_F16_HI:[0-9]+]], [[CONST3]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; VI-DAG: v_min_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[B_V2_F16]], [[CONST3]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; VI-DAG: v_min_f16_e32 v[[R_F16_0:[0-9]+]], 4.0, v[[A_V2_F16]] ; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] diff --git a/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll b/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll index 77d793201adc..49f00e9447da 100644 --- a/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll +++ b/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll @@ -608,11 +608,11 @@ ret: ; GCN: ;;#ASMSTART ; GCN: ; use s[0:1] define amdgpu_kernel void @no_vgprs_last_sgpr_spill(i32 addrspace(1)* %out, i32 %in) #1 { - call void asm sideeffect "", "~{VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7}" () #0 - call void asm sideeffect "", "~{VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15}" () #0 - call void asm sideeffect "", "~{VGPR16_VGPR17_VGPR18_VGPR19}"() #0 - call void asm sideeffect "", "~{VGPR20_VGPR21}"() #0 - call void asm sideeffect "", "~{VGPR22}"() #0 + call void asm sideeffect "", "~{v[0:7]}" () #0 + call void asm sideeffect "", "~{v[8:15]}" () #0 + call void asm sideeffect "", "~{v[16:19]}"() #0 + call void asm sideeffect "", "~{v[20:21]}"() #0 + call void asm sideeffect "", "~{v22}"() #0 %wide.sgpr0 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0 %wide.sgpr1 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0 diff --git a/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll b/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll new file mode 100644 index 000000000000..5b2da788a405 --- /dev/null +++ b/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll @@ -0,0 +1,131 @@ +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-promote-alloca < %s | FileCheck --check-prefix=OPT %s + +; Make sure that array alloca loaded and stored as multi-element aggregates are handled correctly +; Strictly the promote-alloca pass shouldn't have to deal with this case as it is non-canonical, but +; the pass should handle it gracefully if it is +; The checks look for lines that previously caused issues in PromoteAlloca (non-canonical). Opt +; should now leave these unchanged + +; OPT-LABEL: @promote_1d_aggr( +; OPT: store [1 x float] %tmp3, [1 x float]* %f1 + +%Block = type { [1 x float], i32 } +%gl_PerVertex = type { <4 x float>, float, [1 x float], [1 x float] } + +@block = external addrspace(1) global %Block +@pv = external addrspace(1) global %gl_PerVertex + +define amdgpu_vs void @promote_1d_aggr() #0 { + %i = alloca i32 + %f1 = alloca [1 x float] + %tmp = getelementptr %Block, %Block addrspace(1)* @block, i32 0, i32 1 + %tmp1 = load i32, i32 addrspace(1)* %tmp + store i32 %tmp1, i32* %i + %tmp2 = getelementptr %Block, %Block addrspace(1)* @block, i32 0, i32 0 + %tmp3 = load [1 x float], [1 x float] addrspace(1)* %tmp2 + store [1 x float] %tmp3, [1 x float]* %f1 + %tmp4 = load i32, i32* %i + %tmp5 = getelementptr [1 x float], [1 x float]* %f1, i32 0, i32 %tmp4 + %tmp6 = load float, float* %tmp5 + %tmp7 = alloca <4 x float> + %tmp8 = load <4 x float>, <4 x float>* %tmp7 + %tmp9 = insertelement <4 x float> %tmp8, float %tmp6, i32 0 + %tmp10 = insertelement <4 x float> %tmp9, float %tmp6, i32 1 + %tmp11 = insertelement <4 x float> %tmp10, float %tmp6, i32 2 + %tmp12 = insertelement <4 x float> %tmp11, float %tmp6, i32 3 + %tmp13 = getelementptr %gl_PerVertex, %gl_PerVertex addrspace(1)* @pv, i32 0, i32 0 + store <4 x float> %tmp12, <4 x float> addrspace(1)* %tmp13 + ret void +} + + +; OPT-LABEL: @promote_store_aggr( +; OPT: %tmp6 = load [2 x float], [2 x float]* %f1 + +%Block2 = type { i32, [2 x float] } +@block2 = external addrspace(1) global %Block2 + +define amdgpu_vs void @promote_store_aggr() #0 { + %i = alloca i32 + %f1 = alloca [2 x float] + %tmp = getelementptr %Block2, %Block2 addrspace(1)* @block2, i32 0, i32 0 + %tmp1 = load i32, i32 addrspace(1)* %tmp + store i32 %tmp1, i32* %i + %tmp2 = load i32, i32* %i + %tmp3 = sitofp i32 %tmp2 to float + %tmp4 = getelementptr [2 x float], [2 x float]* %f1, i32 0, i32 0 + store float %tmp3, float* %tmp4 + %tmp5 = getelementptr [2 x float], [2 x float]* %f1, i32 0, i32 1 + store float 2.000000e+00, float* %tmp5 + %tmp6 = load [2 x float], [2 x float]* %f1 + %tmp7 = getelementptr %Block2, %Block2 addrspace(1)* @block2, i32 0, i32 1 + store [2 x float] %tmp6, [2 x float] addrspace(1)* %tmp7 + %tmp8 = getelementptr %gl_PerVertex, %gl_PerVertex addrspace(1)* @pv, i32 0, i32 0 + store <4 x float> , <4 x float> addrspace(1)* %tmp8 + ret void +} + +; OPT-LABEL: @promote_load_from_store_aggr( +; OPT: store [2 x float] %tmp3, [2 x float]* %f1 + +%Block3 = type { [2 x float], i32 } +@block3 = external addrspace(1) global %Block3 + +define amdgpu_vs void @promote_load_from_store_aggr() #0 { + %i = alloca i32 + %f1 = alloca [2 x float] + %tmp = getelementptr %Block3, %Block3 addrspace(1)* @block3, i32 0, i32 1 + %tmp1 = load i32, i32 addrspace(1)* %tmp + store i32 %tmp1, i32* %i + %tmp2 = getelementptr %Block3, %Block3 addrspace(1)* @block3, i32 0, i32 0 + %tmp3 = load [2 x float], [2 x float] addrspace(1)* %tmp2 + store [2 x float] %tmp3, [2 x float]* %f1 + %tmp4 = load i32, i32* %i + %tmp5 = getelementptr [2 x float], [2 x float]* %f1, i32 0, i32 %tmp4 + %tmp6 = load float, float* %tmp5 + %tmp7 = alloca <4 x float> + %tmp8 = load <4 x float>, <4 x float>* %tmp7 + %tmp9 = insertelement <4 x float> %tmp8, float %tmp6, i32 0 + %tmp10 = insertelement <4 x float> %tmp9, float %tmp6, i32 1 + %tmp11 = insertelement <4 x float> %tmp10, float %tmp6, i32 2 + %tmp12 = insertelement <4 x float> %tmp11, float %tmp6, i32 3 + %tmp13 = getelementptr %gl_PerVertex, %gl_PerVertex addrspace(1)* @pv, i32 0, i32 0 + store <4 x float> %tmp12, <4 x float> addrspace(1)* %tmp13 + ret void +} + +; OPT-LABEL: @promote_double_aggr( +; OPT: store [2 x double] %tmp5, [2 x double]* %s + +@tmp_g = external addrspace(1) global { [4 x double], <2 x double>, <3 x double>, <4 x double> } +@frag_color = external addrspace(1) global <4 x float> + +define amdgpu_ps void @promote_double_aggr() #0 { + %s = alloca [2 x double] + %tmp = getelementptr { [4 x double], <2 x double>, <3 x double>, <4 x double> }, { [4 x double], <2 x double>, <3 x double>, <4 x double> } addrspace(1)* @tmp_g, i32 0, i32 0, i32 0 + %tmp1 = load double, double addrspace(1)* %tmp + %tmp2 = getelementptr { [4 x double], <2 x double>, <3 x double>, <4 x double> }, { [4 x double], <2 x double>, <3 x double>, <4 x double> } addrspace(1)* @tmp_g, i32 0, i32 0, i32 1 + %tmp3 = load double, double addrspace(1)* %tmp2 + %tmp4 = insertvalue [2 x double] undef, double %tmp1, 0 + %tmp5 = insertvalue [2 x double] %tmp4, double %tmp3, 1 + store [2 x double] %tmp5, [2 x double]* %s + %tmp6 = getelementptr [2 x double], [2 x double]* %s, i32 0, i32 1 + %tmp7 = load double, double* %tmp6 + %tmp8 = getelementptr [2 x double], [2 x double]* %s, i32 0, i32 1 + %tmp9 = load double, double* %tmp8 + %tmp10 = fadd double %tmp7, %tmp9 + %tmp11 = getelementptr [2 x double], [2 x double]* %s, i32 0, i32 0 + store double %tmp10, double* %tmp11 + %tmp12 = getelementptr [2 x double], [2 x double]* %s, i32 0, i32 0 + %tmp13 = load double, double* %tmp12 + %tmp14 = getelementptr [2 x double], [2 x double]* %s, i32 0, i32 1 + %tmp15 = load double, double* %tmp14 + %tmp16 = fadd double %tmp13, %tmp15 + %tmp17 = fptrunc double %tmp16 to float + %tmp18 = insertelement <4 x float> undef, float %tmp17, i32 0 + %tmp19 = insertelement <4 x float> %tmp18, float %tmp17, i32 1 + %tmp20 = insertelement <4 x float> %tmp19, float %tmp17, i32 2 + %tmp21 = insertelement <4 x float> %tmp20, float %tmp17, i32 3 + store <4 x float> %tmp21, <4 x float> addrspace(1)* @frag_color + ret void +} diff --git a/test/CodeGen/AMDGPU/rename-independent-subregs-invalid-mac-operands.mir b/test/CodeGen/AMDGPU/rename-independent-subregs-invalid-mac-operands.mir new file mode 100644 index 000000000000..1a0d68d81f97 --- /dev/null +++ b/test/CodeGen/AMDGPU/rename-independent-subregs-invalid-mac-operands.mir @@ -0,0 +1,69 @@ +# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass=simple-register-coalescing,rename-independent-subregs -o - %s | FileCheck -check-prefix=GCN %s +--- + +# GCN-LABEL: name: mac_invalid_operands +# GCN: undef %18.sub0 = V_MAC_F32_e32 undef %3, undef %9, undef %18.sub0, implicit %exec + +name: mac_invalid_operands +alignment: 0 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: vreg_128 } + - { id: 1, class: vreg_128 } + - { id: 2, class: sgpr_64 } + - { id: 3, class: vgpr_32 } + - { id: 4, class: vgpr_32 } + - { id: 5, class: vgpr_32 } + - { id: 6, class: vgpr_32 } + - { id: 7, class: sreg_64 } + - { id: 8, class: vgpr_32 } + - { id: 9, class: vgpr_32 } + - { id: 10, class: vreg_64 } + - { id: 11, class: vreg_64 } + - { id: 12, class: vreg_128 } + - { id: 13, class: vreg_128 } + - { id: 14, class: vgpr_32 } + - { id: 15, class: vreg_64 } + - { id: 16, class: vgpr_32 } + - { id: 17, class: vreg_128 } +body: | + bb.0: + successors: %bb.2, %bb.1 + + %7 = V_CMP_NEQ_F32_e64 0, 0, 0, undef %3, 0, 0, implicit %exec + %vcc = COPY killed %7 + S_CBRANCH_VCCZ %bb.2, implicit killed %vcc + + bb.1: + successors: %bb.3 + + %4 = V_ADD_F32_e32 undef %6, undef %5, implicit %exec + undef %12.sub0 = COPY killed %4 + %17 = COPY killed %12 + S_BRANCH %bb.3 + + bb.2: + successors: %bb.3 + + %8 = V_MAC_F32_e32 undef %3, undef %9, undef %8, implicit %exec + undef %13.sub0 = COPY %8 + %13.sub1 = COPY %8 + %13.sub2 = COPY killed %8 + %0 = COPY killed %13 + %17 = COPY killed %0 + + bb.3: + %1 = COPY killed %17 + FLAT_STORE_DWORD undef %10, %1.sub2, 0, 0, implicit %exec, implicit %flat_scr + %14 = COPY %1.sub1 + %16 = COPY killed %1.sub0 + undef %15.sub0 = COPY killed %16 + %15.sub1 = COPY killed %14 + FLAT_STORE_DWORDX2 undef %11, killed %15, 0, 0, implicit %exec, implicit %flat_scr + S_ENDPGM + +... diff --git a/test/CodeGen/AMDGPU/scratch-simple.ll b/test/CodeGen/AMDGPU/scratch-simple.ll index 6ed730ad60f4..abd15f1fb47f 100644 --- a/test/CodeGen/AMDGPU/scratch-simple.ll +++ b/test/CodeGen/AMDGPU/scratch-simple.ll @@ -12,8 +12,10 @@ ; GCN-DAG: v_lshlrev_b32_e32 [[BYTES:v[0-9]+]], 2, v0 ; GCN-DAG: v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, [[BYTES]] -; GCN-DAG: v_or_b32_e32 [[LO_OFF:v[0-9]+]], 0x200, [[CLAMP_IDX]] -; GCN-DAG: v_or_b32_e32 [[HI_OFF:v[0-9]+]], 0x400, [[CLAMP_IDX]] +; GCN-DAG: v_mov_b32_e32 [[C200:v[0-9]+]], 0x200 +; GCN-DAG: v_mov_b32_e32 [[C400:v[0-9]+]], 0x400 +; GCN-DAG: v_or_b32_e32 [[LO_OFF:v[0-9]+]], [[C200]], [[CLAMP_IDX]] +; GCN-DAG: v_or_b32_e32 [[HI_OFF:v[0-9]+]], [[C400]], [[CLAMP_IDX]] ; GCN: buffer_load_dword {{v[0-9]+}}, [[LO_OFF]], {{s\[[0-9]+:[0-9]+\]}}, [[SWO]] offen ; GCN: buffer_load_dword {{v[0-9]+}}, [[HI_OFF]], {{s\[[0-9]+:[0-9]+\]}}, [[SWO]] offen diff --git a/test/CodeGen/AMDGPU/sdwa-peephole.ll b/test/CodeGen/AMDGPU/sdwa-peephole.ll index a319edfc5ace..66e166d283f7 100644 --- a/test/CodeGen/AMDGPU/sdwa-peephole.ll +++ b/test/CodeGen/AMDGPU/sdwa-peephole.ll @@ -74,7 +74,7 @@ entry: ; SDWA-DAG: v_mul_u32_u24_sdwa v[[DST_MUL_LO:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0 ; SDWA-DAG: v_mul_u32_u24_sdwa v[[DST_MUL_HI:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; SDWA: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL_HI]], v[[DST_MUL_LO]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; SDWA: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL_LO]], v[[DST_MUL_HI]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD define amdgpu_kernel void @mul_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %ina, <2 x i16> addrspace(1)* %inb) { entry: @@ -97,8 +97,8 @@ entry: ; SDWA-DAG: v_mul_u32_u24_sdwa v[[DST_MUL1:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; SDWA-DAG: v_mul_u32_u24_sdwa v[[DST_MUL2:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0 ; SDWA-DAG: v_mul_u32_u24_sdwa v[[DST_MUL3:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL3]], v[[DST_MUL2]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 -; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL1]], v[[DST_MUL0]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL2]], v[[DST_MUL3]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL0]], v[[DST_MUL1]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD define amdgpu_kernel void @mul_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %ina, <4 x i16> addrspace(1)* %inb) { entry: @@ -125,10 +125,10 @@ entry: ; SDWA-DAG: v_mul_u32_u24_sdwa v[[DST_MUL5:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; SDWA-DAG: v_mul_u32_u24_sdwa v[[DST_MUL6:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0 ; SDWA-DAG: v_mul_u32_u24_sdwa v[[DST_MUL7:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL7]], v[[DST_MUL6]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 -; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL5]], v[[DST_MUL4]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 -; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL3]], v[[DST_MUL2]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 -; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL1]], v[[DST_MUL0]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL6]], v[[DST_MUL7]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL4]], v[[DST_MUL5]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL2]], v[[DST_MUL3]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL0]], v[[DST_MUL1]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD define amdgpu_kernel void @mul_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> addrspace(1)* %ina, <8 x i16> addrspace(1)* %inb) { entry: @@ -347,8 +347,8 @@ entry: ; NOSDWA-NOT: v_mul_u32_u24_sdwa ; SDWA-DAG: v_mov_b32_e32 v[[M321:[0-9]+]], 0x141 ; SDWA-DAG: v_mov_b32_e32 v[[M123:[0-9]+]], 0x7b -; SDWA-DAG: v_mul_u32_u24_sdwa v{{[0-9]+}}, v[[M123]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 -; SDWA-DAG: v_mul_u32_u24_sdwa v{{[0-9]+}}, v[[M321]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; SDWA-DAG: v_mul_u32_u24_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v[[M123]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; SDWA-DAG: v_mul_u32_u24_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v[[M321]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD define amdgpu_kernel void @immediate_mul_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) { entry: @@ -367,7 +367,7 @@ entry: ; NOSDWA: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; NOSDWA-NOT: v_mul_u32_u24_sdwa -; SDWA: v_mul_u32_u24_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; SDWA: v_mul_u32_u24_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD define amdgpu_kernel void @mulmul_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %ina, <2 x i16> addrspace(1)* %inb) { entry: @@ -408,9 +408,9 @@ store_label: ; NOSDWA-NOT: v_and_b32_sdwa ; NOSDWA-NOT: v_or_b32_sdwa -; SDWA-DAG: v_and_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; SDWA-DAG: v_and_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; SDWA-DAG: v_lshlrev_b16_e32 v{{[0-9]+}}, 8, v{{[0-9]+}} -; SDWA-DAG: v_and_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; SDWA-DAG: v_and_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; SDWA-DAG: v_lshlrev_b16_e32 v{{[0-9]+}}, 8, v{{[0-9]+}} ; SDWA: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD diff --git a/test/CodeGen/AMDGPU/shl.v2i16.ll b/test/CodeGen/AMDGPU/shl.v2i16.ll index 115221c5316d..839854fd575b 100644 --- a/test/CodeGen/AMDGPU/shl.v2i16.ll +++ b/test/CodeGen/AMDGPU/shl.v2i16.ll @@ -10,7 +10,7 @@ ; VI: v_lshlrev_b32_e32 ; VI: v_lshlrev_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; CI: v_lshlrev_b32_e32 ; CI: v_and_b32_e32 v{{[0-9]+}}, 0xffff, v{{[0-9]+}} diff --git a/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll b/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll index 114c97b61bd4..a57e7b13453f 100644 --- a/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll +++ b/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll @@ -25,50 +25,50 @@ ; SMEM: s_dcache_wb ; ALL: s_endpgm define amdgpu_kernel void @test(i32 addrspace(1)* %out, i32 %in) { - call void asm sideeffect "", "~{SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7}" () - call void asm sideeffect "", "~{SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15}" () - call void asm sideeffect "", "~{SGPR16_SGPR17_SGPR18_SGPR19_SGPR20_SGPR21_SGPR22_SGPR23}" () - call void asm sideeffect "", "~{SGPR24_SGPR25_SGPR26_SGPR27_SGPR28_SGPR29_SGPR30_SGPR31}" () - call void asm sideeffect "", "~{SGPR32_SGPR33_SGPR34_SGPR35_SGPR36_SGPR37_SGPR38_SGPR39}" () - call void asm sideeffect "", "~{SGPR40_SGPR41_SGPR42_SGPR43_SGPR44_SGPR45_SGPR46_SGPR47}" () - call void asm sideeffect "", "~{SGPR48_SGPR49_SGPR50_SGPR51_SGPR52_SGPR53_SGPR54_SGPR55}" () - call void asm sideeffect "", "~{SGPR56_SGPR57_SGPR58_SGPR59_SGPR60_SGPR61_SGPR62_SGPR63}" () - call void asm sideeffect "", "~{SGPR64_SGPR65_SGPR66_SGPR67_SGPR68_SGPR69_SGPR70_SGPR71}" () - call void asm sideeffect "", "~{SGPR72_SGPR73_SGPR74_SGPR75_SGPR76_SGPR77_SGPR78_SGPR79}" () - call void asm sideeffect "", "~{SGPR80_SGPR81_SGPR82_SGPR83_SGPR84_SGPR85_SGPR86_SGPR87}" () - call void asm sideeffect "", "~{SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95}" () - call void asm sideeffect "", "~{VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7}" () - call void asm sideeffect "", "~{VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15}" () - call void asm sideeffect "", "~{VGPR16_VGPR17_VGPR18_VGPR19_VGPR20_VGPR21_VGPR22_VGPR23}" () - call void asm sideeffect "", "~{VGPR24_VGPR25_VGPR26_VGPR27_VGPR28_VGPR29_VGPR30_VGPR31}" () - call void asm sideeffect "", "~{VGPR32_VGPR33_VGPR34_VGPR35_VGPR36_VGPR37_VGPR38_VGPR39}" () - call void asm sideeffect "", "~{VGPR40_VGPR41_VGPR42_VGPR43_VGPR44_VGPR45_VGPR46_VGPR47}" () - call void asm sideeffect "", "~{VGPR48_VGPR49_VGPR50_VGPR51_VGPR52_VGPR53_VGPR54_VGPR55}" () - call void asm sideeffect "", "~{VGPR56_VGPR57_VGPR58_VGPR59_VGPR60_VGPR61_VGPR62_VGPR63}" () - call void asm sideeffect "", "~{VGPR64_VGPR65_VGPR66_VGPR67_VGPR68_VGPR69_VGPR70_VGPR71}" () - call void asm sideeffect "", "~{VGPR72_VGPR73_VGPR74_VGPR75_VGPR76_VGPR77_VGPR78_VGPR79}" () - call void asm sideeffect "", "~{VGPR80_VGPR81_VGPR82_VGPR83_VGPR84_VGPR85_VGPR86_VGPR87}" () - call void asm sideeffect "", "~{VGPR88_VGPR89_VGPR90_VGPR91_VGPR92_VGPR93_VGPR94_VGPR95}" () - call void asm sideeffect "", "~{VGPR96_VGPR97_VGPR98_VGPR99_VGPR100_VGPR101_VGPR102_VGPR103}" () - call void asm sideeffect "", "~{VGPR104_VGPR105_VGPR106_VGPR107_VGPR108_VGPR109_VGPR110_VGPR111}" () - call void asm sideeffect "", "~{VGPR112_VGPR113_VGPR114_VGPR115_VGPR116_VGPR117_VGPR118_VGPR119}" () - call void asm sideeffect "", "~{VGPR120_VGPR121_VGPR122_VGPR123_VGPR124_VGPR125_VGPR126_VGPR127}" () - call void asm sideeffect "", "~{VGPR128_VGPR129_VGPR130_VGPR131_VGPR132_VGPR133_VGPR134_VGPR135}" () - call void asm sideeffect "", "~{VGPR136_VGPR137_VGPR138_VGPR139_VGPR140_VGPR141_VGPR142_VGPR143}" () - call void asm sideeffect "", "~{VGPR144_VGPR145_VGPR146_VGPR147_VGPR148_VGPR149_VGPR150_VGPR151}" () - call void asm sideeffect "", "~{VGPR152_VGPR153_VGPR154_VGPR155_VGPR156_VGPR157_VGPR158_VGPR159}" () - call void asm sideeffect "", "~{VGPR160_VGPR161_VGPR162_VGPR163_VGPR164_VGPR165_VGPR166_VGPR167}" () - call void asm sideeffect "", "~{VGPR168_VGPR169_VGPR170_VGPR171_VGPR172_VGPR173_VGPR174_VGPR175}" () - call void asm sideeffect "", "~{VGPR176_VGPR177_VGPR178_VGPR179_VGPR180_VGPR181_VGPR182_VGPR183}" () - call void asm sideeffect "", "~{VGPR184_VGPR185_VGPR186_VGPR187_VGPR188_VGPR189_VGPR190_VGPR191}" () - call void asm sideeffect "", "~{VGPR192_VGPR193_VGPR194_VGPR195_VGPR196_VGPR197_VGPR198_VGPR199}" () - call void asm sideeffect "", "~{VGPR200_VGPR201_VGPR202_VGPR203_VGPR204_VGPR205_VGPR206_VGPR207}" () - call void asm sideeffect "", "~{VGPR208_VGPR209_VGPR210_VGPR211_VGPR212_VGPR213_VGPR214_VGPR215}" () - call void asm sideeffect "", "~{VGPR216_VGPR217_VGPR218_VGPR219_VGPR220_VGPR221_VGPR222_VGPR223}" () - call void asm sideeffect "", "~{VGPR224_VGPR225_VGPR226_VGPR227_VGPR228_VGPR229_VGPR230_VGPR231}" () - call void asm sideeffect "", "~{VGPR232_VGPR233_VGPR234_VGPR235_VGPR236_VGPR237_VGPR238_VGPR239}" () - call void asm sideeffect "", "~{VGPR240_VGPR241_VGPR242_VGPR243_VGPR244_VGPR245_VGPR246_VGPR247}" () - call void asm sideeffect "", "~{VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255}" () + call void asm sideeffect "", "~{s[0:7]}" () + call void asm sideeffect "", "~{s[8:15]}" () + call void asm sideeffect "", "~{s[16:23]}" () + call void asm sideeffect "", "~{s[24:31]}" () + call void asm sideeffect "", "~{s[32:39]}" () + call void asm sideeffect "", "~{s[40:47]}" () + call void asm sideeffect "", "~{s[48:55]}" () + call void asm sideeffect "", "~{s[56:63]}" () + call void asm sideeffect "", "~{s[64:71]}" () + call void asm sideeffect "", "~{s[72:79]}" () + call void asm sideeffect "", "~{s[80:87]}" () + call void asm sideeffect "", "~{s[88:95]}" () + call void asm sideeffect "", "~{v[0:7]}" () + call void asm sideeffect "", "~{v[8:15]}" () + call void asm sideeffect "", "~{v[16:23]}" () + call void asm sideeffect "", "~{v[24:31]}" () + call void asm sideeffect "", "~{v[32:39]}" () + call void asm sideeffect "", "~{v[40:47]}" () + call void asm sideeffect "", "~{v[48:55]}" () + call void asm sideeffect "", "~{v[56:63]}" () + call void asm sideeffect "", "~{v[64:71]}" () + call void asm sideeffect "", "~{v[72:79]}" () + call void asm sideeffect "", "~{v[80:87]}" () + call void asm sideeffect "", "~{v[88:95]}" () + call void asm sideeffect "", "~{v[96:103]}" () + call void asm sideeffect "", "~{v[104:111]}" () + call void asm sideeffect "", "~{v[112:119]}" () + call void asm sideeffect "", "~{v[120:127]}" () + call void asm sideeffect "", "~{v[128:135]}" () + call void asm sideeffect "", "~{v[136:143]}" () + call void asm sideeffect "", "~{v[144:151]}" () + call void asm sideeffect "", "~{v[152:159]}" () + call void asm sideeffect "", "~{v[160:167]}" () + call void asm sideeffect "", "~{v[168:175]}" () + call void asm sideeffect "", "~{v[176:183]}" () + call void asm sideeffect "", "~{v[184:191]}" () + call void asm sideeffect "", "~{v[192:199]}" () + call void asm sideeffect "", "~{v[200:207]}" () + call void asm sideeffect "", "~{v[208:215]}" () + call void asm sideeffect "", "~{v[216:223]}" () + call void asm sideeffect "", "~{v[224:231]}" () + call void asm sideeffect "", "~{v[232:239]}" () + call void asm sideeffect "", "~{v[240:247]}" () + call void asm sideeffect "", "~{v[248:255]}" () store i32 %in, i32 addrspace(1)* %out ret void diff --git a/test/CodeGen/AMDGPU/skip-if-dead.ll b/test/CodeGen/AMDGPU/skip-if-dead.ll index 3f53572ab440..ea8b87f1dee2 100644 --- a/test/CodeGen/AMDGPU/skip-if-dead.ll +++ b/test/CodeGen/AMDGPU/skip-if-dead.ll @@ -79,7 +79,7 @@ define amdgpu_ps void @test_kill_depth_var_x2(float %x, float %y) #0 { ; CHECK-NEXT: s_endpgm define amdgpu_ps void @test_kill_depth_var_x2_instructions(float %x) #0 { call void @llvm.AMDGPU.kill(float %x) - %y = call float asm sideeffect "v_mov_b32_e64 v7, -1", "={VGPR7}"() + %y = call float asm sideeffect "v_mov_b32_e64 v7, -1", "={v7}"() call void @llvm.AMDGPU.kill(float %y) ret void } @@ -128,7 +128,7 @@ bb: v_nop_e64 v_nop_e64 v_nop_e64 - v_nop_e64", "={VGPR7}"() + v_nop_e64", "={v7}"() call void @llvm.AMDGPU.kill(float %var) br label %exit @@ -186,11 +186,11 @@ bb: v_nop_e64 v_nop_e64 v_nop_e64 - v_nop_e64", "={VGPR7}"() - %live.across = call float asm sideeffect "v_mov_b32_e64 v8, -1", "={VGPR8}"() + v_nop_e64", "={v7}"() + %live.across = call float asm sideeffect "v_mov_b32_e64 v8, -1", "={v8}"() call void @llvm.AMDGPU.kill(float %var) store volatile float %live.across, float addrspace(1)* undef - %live.out = call float asm sideeffect "v_mov_b32_e64 v9, -2", "={VGPR9}"() + %live.out = call float asm sideeffect "v_mov_b32_e64 v9, -2", "={v9}"() br label %exit exit: @@ -242,7 +242,7 @@ bb: v_nop_e64 v_nop_e64 v_nop_e64 - v_nop_e64", "={VGPR7}"() + v_nop_e64", "={v7}"() call void @llvm.AMDGPU.kill(float %var) %vgpr = load volatile i32, i32 addrspace(1)* undef %loop.cond = icmp eq i32 %vgpr, 0 diff --git a/test/CodeGen/AMDGPU/sminmax.v2i16.ll b/test/CodeGen/AMDGPU/sminmax.v2i16.ll index 5d71ad2c8ba3..a9aac2d8abb7 100644 --- a/test/CodeGen/AMDGPU/sminmax.v2i16.ll +++ b/test/CodeGen/AMDGPU/sminmax.v2i16.ll @@ -10,11 +10,11 @@ ; VI: v_sub_i32_e32 ; VI-DAG: v_sub_i32_e32 -; VI: v_max_i32_sdwa v{{[0-9]+}}, sext(v{{[0-9]+}}), v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI: v_max_i32_sdwa v{{[0-9]+}}, sext(v{{[0-9]+}}), v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; VI: v_max_i32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, sext(v{{[0-9]+}}) dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; VI: v_max_i32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, sext(v{{[0-9]+}}) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 ; VI: v_add_i32_e32 ; VI: v_add_i32_e32 -; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; CI: v_sub_i32_e32 ; CI-DAG: v_sub_i32_e32 @@ -47,7 +47,7 @@ define amdgpu_kernel void @s_abs_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> % ; VI: v_max_i16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; VI: v_max_i16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; VI: v_add_u16_e32 v{{[0-9]+}}, 2, v{{[0-9]+}} -; VI: v_add_u16_sdwa v{{[0-9]+}}, [[TWO]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI: v_add_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[TWO]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI-NOT: v_and_b32 ; VI: v_or_b32_e32 define amdgpu_kernel void @v_abs_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %src) #0 { diff --git a/test/CodeGen/AMDGPU/spill-scavenge-offset.ll b/test/CodeGen/AMDGPU/spill-scavenge-offset.ll index c05021a91ff0..a23461a0a514 100644 --- a/test/CodeGen/AMDGPU/spill-scavenge-offset.ll +++ b/test/CodeGen/AMDGPU/spill-scavenge-offset.ll @@ -20,13 +20,13 @@ entry: %a = load <1280 x i32>, <1280 x i32> addrspace(1)* %aptr ; mark most VGPR registers as used to increase register pressure - call void asm sideeffect "", "~{VGPR4},~{VGPR8},~{VGPR12},~{VGPR16},~{VGPR20},~{VGPR24},~{VGPR28},~{VGPR32}" () - call void asm sideeffect "", "~{VGPR36},~{VGPR40},~{VGPR44},~{VGPR48},~{VGPR52},~{VGPR56},~{VGPR60},~{VGPR64}" () - call void asm sideeffect "", "~{VGPR68},~{VGPR72},~{VGPR76},~{VGPR80},~{VGPR84},~{VGPR88},~{VGPR92},~{VGPR96}" () - call void asm sideeffect "", "~{VGPR100},~{VGPR104},~{VGPR108},~{VGPR112},~{VGPR116},~{VGPR120},~{VGPR124},~{VGPR128}" () - call void asm sideeffect "", "~{VGPR132},~{VGPR136},~{VGPR140},~{VGPR144},~{VGPR148},~{VGPR152},~{VGPR156},~{VGPR160}" () - call void asm sideeffect "", "~{VGPR164},~{VGPR168},~{VGPR172},~{VGPR176},~{VGPR180},~{VGPR184},~{VGPR188},~{VGPR192}" () - call void asm sideeffect "", "~{VGPR196},~{VGPR200},~{VGPR204},~{VGPR208},~{VGPR212},~{VGPR216},~{VGPR220},~{VGPR224}" () + call void asm sideeffect "", "~{v4},~{v8},~{v12},~{v16},~{v20},~{v24},~{v28},~{v32}" () + call void asm sideeffect "", "~{v36},~{v40},~{v44},~{v48},~{v52},~{v56},~{v60},~{v64}" () + call void asm sideeffect "", "~{v68},~{v72},~{v76},~{v80},~{v84},~{v88},~{v92},~{v96}" () + call void asm sideeffect "", "~{v100},~{v104},~{v108},~{v112},~{v116},~{v120},~{v124},~{v128}" () + call void asm sideeffect "", "~{v132},~{v136},~{v140},~{v144},~{v148},~{v152},~{v156},~{v160}" () + call void asm sideeffect "", "~{v164},~{v168},~{v172},~{v176},~{v180},~{v184},~{v188},~{v192}" () + call void asm sideeffect "", "~{v196},~{v200},~{v204},~{v208},~{v212},~{v216},~{v220},~{v224}" () %outptr = getelementptr <1280 x i32>, <1280 x i32> addrspace(1)* %out, i32 %tid store <1280 x i32> %a, <1280 x i32> addrspace(1)* %outptr diff --git a/test/CodeGen/AMDGPU/sub.v2i16.ll b/test/CodeGen/AMDGPU/sub.v2i16.ll index 6aeff3fc3b6c..ee923e2b8b61 100644 --- a/test/CodeGen/AMDGPU/sub.v2i16.ll +++ b/test/CodeGen/AMDGPU/sub.v2i16.ll @@ -5,7 +5,7 @@ ; GCN-LABEL: {{^}}v_test_sub_v2i16: ; GFX9: v_pk_sub_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -; VI: v_subrev_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI: v_sub_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI: v_subrev_u16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} define amdgpu_kernel void @v_test_sub_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0, <2 x i16> addrspace(1)* %in1) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -62,7 +62,7 @@ define amdgpu_kernel void @s_test_sub_v2i16_kernarg(<2 x i16> addrspace(1)* %out ; GFX9: v_pk_sub_i16 v{{[0-9]+}}, v{{[0-9]+}}, [[CONST]] ; VI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xfffffe38 -; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, [[K]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[K]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, 0xffffff85, v{{[0-9]+}} define amdgpu_kernel void @v_test_sub_v2i16_constant(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -80,7 +80,7 @@ define amdgpu_kernel void @v_test_sub_v2i16_constant(<2 x i16> addrspace(1)* %ou ; GFX9: v_pk_sub_i16 v{{[0-9]+}}, v{{[0-9]+}}, [[CONST]] ; VI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3df -; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, [[K]], v{{[0-9]+}} +; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[K]] ; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, 0x34d, v{{[0-9]+}} define amdgpu_kernel void @v_test_sub_v2i16_neg_constant(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -98,7 +98,7 @@ define amdgpu_kernel void @v_test_sub_v2i16_neg_constant(<2 x i16> addrspace(1)* ; VI: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 ; VI: flat_load_ushort [[LOAD0:v[0-9]+]] ; VI: flat_load_ushort [[LOAD1:v[0-9]+]] -; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, [[ONE]], [[LOAD0]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, [[LOAD0]], [[ONE]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, 1, [[LOAD1]] ; VI: v_or_b32_e32 define amdgpu_kernel void @v_test_sub_v2i16_inline_neg1(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 { @@ -137,7 +137,7 @@ define amdgpu_kernel void @v_test_sub_v2i16_inline_lo_zero_hi(<2 x i16> addrspac ; VI-NOT: v_subrev_i16 ; VI: v_mov_b32_e32 [[K:v[0-9]+]], 0xffffc080 -; VI: v_add_u16_sdwa v{{[0-9]+}}, [[K]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI: v_add_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[K]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI-NOT: v_subrev_i16 ; VI: v_or_b32_e32 define amdgpu_kernel void @v_test_sub_v2i16_inline_fp_split(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 { @@ -252,7 +252,7 @@ define amdgpu_kernel void @v_test_sub_v2i16_sext_to_v2i32(<2 x i32> addrspace(1) ; GFX9: v_pk_sub_i16 ; GFX9: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}} -; VI: v_subrev_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI: v_sub_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; VI: v_subrev_u16_e32 ; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 16 diff --git a/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll b/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll index 3e80fcf85b52..1e08f51dabde 100644 --- a/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll +++ b/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll @@ -73,14 +73,14 @@ bb11: ; preds = %bb9 ; CHECK: buffer_store_dwordx4 v{{\[}}[[OUTPUT_LO]]:[[OUTPUT_HI]]{{\]}} define amdgpu_kernel void @partially_undef_copy() #0 { - %tmp0 = call i32 asm sideeffect "v_mov_b32_e32 v5, 5", "={VGPR5}"() - %tmp1 = call i32 asm sideeffect "v_mov_b32_e32 v6, 6", "={VGPR6}"() + %tmp0 = call i32 asm sideeffect "v_mov_b32_e32 v5, 5", "={v5}"() + %tmp1 = call i32 asm sideeffect "v_mov_b32_e32 v6, 6", "={v6}"() %partially.undef.0 = insertelement <4 x i32> undef, i32 %tmp0, i32 0 %partially.undef.1 = insertelement <4 x i32> %partially.undef.0, i32 %tmp1, i32 0 store volatile <4 x i32> %partially.undef.1, <4 x i32> addrspace(1)* undef, align 16 - tail call void asm sideeffect "v_nop", "v={VGPR5_VGPR6_VGPR7_VGPR8}"(<4 x i32> %partially.undef.0) + tail call void asm sideeffect "v_nop", "v={v[5:8]}"(<4 x i32> %partially.undef.0) ret void } diff --git a/test/CodeGen/AMDGPU/v_mac_f16.ll b/test/CodeGen/AMDGPU/v_mac_f16.ll index 3da1a0324042..ce4a69db3506 100644 --- a/test/CodeGen/AMDGPU/v_mac_f16.ll +++ b/test/CodeGen/AMDGPU/v_mac_f16.ll @@ -304,14 +304,14 @@ entry: ; GCN: {{buffer|flat}}_load_dword v[[C_V2_F16:[0-9]+]] ; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] -; SI: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] -; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]] -; SI-DAG: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]] -; SI-DAG: v_lshrrev_b32_e32 v[[C_F16_1:[0-9]+]], 16, v[[C_V2_F16]] -; SI-DAG: v_cvt_f32_f16_e32 v[[C_F32_0:[0-9]+]], v[[C_V2_F16]] -; SI-DAG: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]] +; SI: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] +; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]] +; SI: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]] ; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]] +; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]] +; SI: v_lshrrev_b32_e32 v[[C_F16_1:[0-9]+]], 16, v[[C_V2_F16]] ; SI: v_cvt_f32_f16_e32 v[[C_F32_1:[0-9]+]], v[[C_F16_1]] +; SI-DAG: v_cvt_f32_f16_e32 v[[C_F32_0:[0-9]+]], v[[C_V2_F16]] ; SI-DAG: v_mac_f32_e32 v[[C_F32_0]], v[[B_F32_0]], v[[A_F32_0]] ; SI-DAG: v_cvt_f16_f32_e32 v[[R_F16_LO:[0-9]+]], v[[C_F32_0]] ; SI-DAG: v_mac_f32_e32 v[[C_F32_1]], v[[B_F32_1]], v[[A_F32_1]] @@ -320,12 +320,12 @@ entry: ; VI-NOT: and ; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]] -; VI-DAG: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] -; VI-DAG: v_mac_f16_sdwa v[[A_F16_1]], v[[C_V2_F16]], v[[B_V2_F16]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; VI-DAG: v_mac_f16_e32 v[[A_V2_F16]], v[[C_V2_F16]], v[[B_V2_F16]] -; VI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[A_F16_1]] +; VI-DAG: v_lshrrev_b32_e32 v[[C_F16_1:[0-9]+]], 16, v[[C_V2_F16]] +; VI-DAG: v_mac_f16_sdwa v[[C_F16_1]], v[[A_V2_F16]], v[[B_V2_F16]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-DAG: v_mac_f16_e32 v[[C_V2_F16]], v[[B_V2_F16]], v[[A_V2_F16]] +; VI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[C_F16_1]] ; VI-NOT: and -; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[A_V2_F16]] +; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[C_V2_F16]] ; GCN: {{buffer|flat}}_store_dword v[[R_V2_F16]] ; GCN: s_endpgm @@ -336,7 +336,9 @@ define amdgpu_kernel void @mac_v2f16( <2 x half> addrspace(1)* %c) #0 { entry: %a.val = load <2 x half>, <2 x half> addrspace(1)* %a + call void @llvm.amdgcn.s.barrier() #2 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b + call void @llvm.amdgcn.s.barrier() #2 %c.val = load <2 x half>, <2 x half> addrspace(1)* %c %t.val = fmul <2 x half> %a.val, %b.val @@ -485,7 +487,7 @@ entry: ; VI-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0 ; VI-DAG: v_sub_f16_e32 v[[NEG_A1:[0-9]+]], 0, v{{[0-9]+}} ; VI-DAG: v_sub_f16_sdwa v[[NEG_A0:[0-9]+]], [[ZERO]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; VI-DAG: v_mac_f16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v[[NEG_A0]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; VI-DAG: v_mac_f16_sdwa v{{[0-9]+}}, v[[NEG_A0]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 ; VI-DAG: v_mac_f16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v[[NEG_A1]] ; GCN: s_endpgm @@ -517,7 +519,7 @@ entry: ; VI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0 ; VI: v_sub_f16_e32 v[[NEG_A1:[0-9]+]], 0, v{{[0-9]+}} ; VI: v_sub_f16_sdwa v[[NEG_A0:[0-9]+]], [[ZERO]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; VI-DAG: v_mac_f16_sdwa v{{[0-9]+}}, v[[NEG_A0]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; VI-DAG: v_mac_f16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v[[NEG_A0]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; VI-DAG: v_mac_f16_e32 v{{[0-9]+}}, v[[NEG_A1]], v{{[0-9]+}} ; GCN: s_endpgm @@ -670,5 +672,8 @@ entry: ret void } +declare void @llvm.amdgcn.s.barrier() #2 + attributes #0 = { nounwind "no-signed-zeros-fp-math"="false" } attributes #1 = { nounwind "no-signed-zeros-fp-math"="true" } +attributes #2 = { nounwind convergent } diff --git a/test/CodeGen/ARM/GlobalISel/arm-instruction-select-combos.mir b/test/CodeGen/ARM/GlobalISel/arm-instruction-select-combos.mir new file mode 100644 index 000000000000..d7f208d4cf59 --- /dev/null +++ b/test/CodeGen/ARM/GlobalISel/arm-instruction-select-combos.mir @@ -0,0 +1,149 @@ +# RUN: llc -O0 -mtriple arm-- -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s +--- | + define void @test_mla() #0 { ret void } + define void @test_mla_v5() #1 { ret void } + + define void @test_mls() #2 { ret void } + define void @test_no_mls() { ret void } + + attributes #0 = { "target-features"="+v6" } + attributes #1 = { "target-features"="-v6" } + attributes #2 = { "target-features"="+v6t2" } +... +--- +name: test_mla +# CHECK-LABEL: name: test_mla +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } + - { id: 2, class: gprb } + - { id: 3, class: gprb } + - { id: 4, class: gprb } +body: | + bb.0: + liveins: %r0, %r1, %r2 + + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + %2(s32) = COPY %r2 + ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 + ; CHECK: [[VREGZ:%[0-9]+]] = COPY %r2 + + %3(s32) = G_MUL %0, %1 + %4(s32) = G_ADD %3, %2 + ; CHECK: [[VREGR:%[0-9]+]] = MLA [[VREGX]], [[VREGY]], [[VREGZ]], 14, _, _ + + %r0 = COPY %4(s32) + ; CHECK: %r0 = COPY [[VREGR]] + + BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, _, implicit %r0 +... +--- +name: test_mla_v5 +# CHECK-LABEL: name: test_mla_v5 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } + - { id: 2, class: gprb } + - { id: 3, class: gprb } + - { id: 4, class: gprb } +body: | + bb.0: + liveins: %r0, %r1, %r2 + + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + %2(s32) = COPY %r2 + ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 + ; CHECK: [[VREGZ:%[0-9]+]] = COPY %r2 + + %3(s32) = G_MUL %0, %1 + %4(s32) = G_ADD %3, %2 + ; CHECK: [[VREGR:%[0-9]+]] = MLAv5 [[VREGX]], [[VREGY]], [[VREGZ]], 14, _, _ + + %r0 = COPY %4(s32) + ; CHECK: %r0 = COPY [[VREGR]] + + BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, _, implicit %r0 +... +--- +name: test_mls +# CHECK-LABEL: name: test_mls +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } + - { id: 2, class: gprb } + - { id: 3, class: gprb } + - { id: 4, class: gprb } +body: | + bb.0: + liveins: %r0, %r1, %r2 + + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + %2(s32) = COPY %r2 + ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 + ; CHECK: [[VREGZ:%[0-9]+]] = COPY %r2 + + %3(s32) = G_MUL %0, %1 + %4(s32) = G_SUB %2, %3 + ; CHECK: [[VREGR:%[0-9]+]] = MLS [[VREGX]], [[VREGY]], [[VREGZ]], 14, _ + + %r0 = COPY %4(s32) + ; CHECK: %r0 = COPY [[VREGR]] + + BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, _, implicit %r0 +... +--- +name: test_no_mls +# CHECK-LABEL: name: test_no_mls +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } + - { id: 2, class: gprb } + - { id: 3, class: gprb } + - { id: 4, class: gprb } +body: | + bb.0: + liveins: %r0, %r1, %r2 + + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + %2(s32) = COPY %r2 + ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 + ; CHECK: [[VREGZ:%[0-9]+]] = COPY %r2 + + %3(s32) = G_MUL %0, %1 + %4(s32) = G_SUB %2, %3 + ; CHECK: [[VREGM:%[0-9]+]] = MULv5 [[VREGX]], [[VREGY]], 14, _, _ + ; CHECK: [[VREGR:%[0-9]+]] = SUBrr [[VREGZ]], [[VREGM]], 14, _, _ + + %r0 = COPY %4(s32) + ; CHECK: %r0 = COPY [[VREGR]] + + BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, _, implicit %r0 +... diff --git a/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir b/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir index 72c3b715d36e..16642d85d9cf 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir +++ b/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir @@ -28,6 +28,10 @@ define void @test_sdiv_s32() #2 { ret void } define void @test_udiv_s32() #2 { ret void } + define void @test_and_s32() { ret void } + define void @test_or_s32() { ret void } + define void @test_xor_s32() { ret void } + define void @test_load_from_stack() { ret void } define void @test_load_f32() #0 { ret void } define void @test_load_f64() #0 { ret void } @@ -783,6 +787,105 @@ body: | ; CHECK: BX_RET 14, _, implicit %r0 ... --- +name: test_and_s32 +# CHECK-LABEL: name: test_and_s32 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } + - { id: 2, class: gprb } +# CHECK: id: 0, class: gpr +# CHECK: id: 1, class: gpr +# CHECK: id: 2, class: gpr +body: | + bb.0: + liveins: %r0, %r1 + + %0(s32) = COPY %r0 + ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + + %1(s32) = COPY %r1 + ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 + + %2(s32) = G_AND %0, %1 + ; CHECK: [[VREGRES:%[0-9]+]] = ANDrr [[VREGX]], [[VREGY]], 14, _ + + %r0 = COPY %2(s32) + ; CHECK: %r0 = COPY [[VREGRES]] + + BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, _, implicit %r0 +... +--- +name: test_or_s32 +# CHECK-LABEL: name: test_or_s32 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } + - { id: 2, class: gprb } +# CHECK: id: 0, class: gpr +# CHECK: id: 1, class: gpr +# CHECK: id: 2, class: gpr +body: | + bb.0: + liveins: %r0, %r1 + + %0(s32) = COPY %r0 + ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + + %1(s32) = COPY %r1 + ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 + + %2(s32) = G_OR %0, %1 + ; CHECK: [[VREGRES:%[0-9]+]] = ORRrr [[VREGX]], [[VREGY]], 14, _ + + %r0 = COPY %2(s32) + ; CHECK: %r0 = COPY [[VREGRES]] + + BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, _, implicit %r0 +... +--- +name: test_xor_s32 +# CHECK-LABEL: name: test_xor_s32 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } + - { id: 2, class: gprb } +# CHECK: id: 0, class: gpr +# CHECK: id: 1, class: gpr +# CHECK: id: 2, class: gpr +body: | + bb.0: + liveins: %r0, %r1 + + %0(s32) = COPY %r0 + ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + + %1(s32) = COPY %r1 + ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 + + %2(s32) = G_XOR %0, %1 + ; CHECK: [[VREGRES:%[0-9]+]] = EORrr [[VREGX]], [[VREGY]], 14, _ + + %r0 = COPY %2(s32) + ; CHECK: %r0 = COPY [[VREGRES]] + + BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, _, implicit %r0 +... +--- name: test_load_from_stack # CHECK-LABEL: name: test_load_from_stack legalized: true @@ -802,8 +905,8 @@ fixedStack: - { id: 0, offset: 0, size: 1, alignment: 4, isImmutable: true, isAliased: false } - { id: 1, offset: 4, size: 4, alignment: 4, isImmutable: true, isAliased: false } - { id: 2, offset: 8, size: 4, alignment: 4, isImmutable: true, isAliased: false } -# CHECK-DAG: id: [[FI1:[0-9]+]], offset: 0 -# CHECK-DAG: id: [[FI32:[0-9]+]], offset: 8 +# CHECK-DAG: id: [[FI1:[0-9]+]], type: default, offset: 0, size: 1 +# CHECK-DAG: id: [[FI32:[0-9]+]], type: default, offset: 8 body: | bb.0: liveins: %r0, %r1, %r2, %r3 @@ -1024,13 +1127,11 @@ body: | %1(s32) = COPY %r3 ; CHECK: [[IN2:%[0-9]+]] = COPY %r3 - %2(s64) = G_SEQUENCE %0(s32), 0, %1(s32), 1 + %2(s64) = G_MERGE_VALUES %0(s32), %1(s32) ; CHECK: %[[DREG]] = VMOVDRR [[IN1]], [[IN2]] - %3(s32) = G_EXTRACT %2(s64), 0 - %4(s32) = G_EXTRACT %2(s64), 32 - ; CHECK: [[OUT1:%[0-9]+]] = VGETLNi32 %[[DREG]], 0 - ; CHECK: [[OUT2:%[0-9]+]] = VGETLNi32 %[[DREG]], 1 + %3(s32), %4(s32) = G_UNMERGE_VALUES %2(s64) + ; CHECK: [[OUT1:%[0-9]+]], [[OUT2:%[0-9]+]] = VMOVRRD %[[DREG]] %r0 = COPY %3 ; CHECK: %r0 = COPY [[OUT1]] diff --git a/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll b/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll index 1c7769894a27..05902c22fb98 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll +++ b/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple arm-unknown -mattr=+vfp2 -global-isel -stop-after=irtranslator %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=LITTLE -; RUN: llc -mtriple armeb-unknown -mattr=+vfp2 -global-isel -stop-after=irtranslator %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=BIG +; RUN: llc -mtriple arm-unknown -mattr=+vfp2 -global-isel -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=LITTLE +; RUN: llc -mtriple armeb-unknown -mattr=+vfp2 -global-isel -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=BIG define void @test_void_return() { ; CHECK-LABEL: name: test_void_return @@ -329,15 +329,13 @@ define arm_aapcscc double @test_double_aapcscc(double %p0, double %p1, double %p ; CHECK: liveins: %r0, %r1, %r2, %r3 ; CHECK-DAG: [[VREGP1LO:%[0-9]+]](s32) = COPY %r2 ; CHECK-DAG: [[VREGP1HI:%[0-9]+]](s32) = COPY %r3 -; LITTLE: [[VREGP1:%[0-9]+]](s64) = G_SEQUENCE [[VREGP1LO]](s32), 0, [[VREGP1HI]](s32), 32 -; BIG: [[VREGP1:%[0-9]+]](s64) = G_SEQUENCE [[VREGP1HI]](s32), 0, [[VREGP1LO]](s32), 32 +; LITTLE: [[VREGP1:%[0-9]+]](s64) = G_MERGE_VALUES [[VREGP1LO]](s32), [[VREGP1HI]](s32) +; BIG: [[VREGP1:%[0-9]+]](s64) = G_MERGE_VALUES [[VREGP1HI]](s32), [[VREGP1LO]](s32) ; CHECK: [[FIP5:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[P5]] ; CHECK: [[VREGP5:%[0-9]+]](s64) = G_LOAD [[FIP5]](p0){{.*}}load 8 ; CHECK: [[VREGV:%[0-9]+]](s64) = G_FADD [[VREGP1]], [[VREGP5]] -; LITTLE: [[VREGVLO:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 0 -; LITTLE: [[VREGVHI:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 32 -; BIG: [[VREGVHI:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 0 -; BIG: [[VREGVLO:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 32 +; LITTLE: [[VREGVLO:%[0-9]+]](s32), [[VREGVHI:%[0-9]+]](s32) = G_UNMERGE_VALUES [[VREGV]](s64) +; BIG: [[VREGVHI:%[0-9]+]](s32), [[VREGVLO:%[0-9]+]](s32) = G_UNMERGE_VALUES [[VREGV]](s64) ; CHECK-DAG: %r0 = COPY [[VREGVLO]] ; CHECK-DAG: %r1 = COPY [[VREGVHI]] ; CHECK: BX_RET 14, _, implicit %r0, implicit %r1 @@ -376,15 +374,13 @@ define arm_aapcscc double @test_double_gap_aapcscc(float %filler, double %p0, ; CHECK: liveins: %r0, %r2, %r3 ; CHECK-DAG: [[VREGP0LO:%[0-9]+]](s32) = COPY %r2 ; CHECK-DAG: [[VREGP0HI:%[0-9]+]](s32) = COPY %r3 -; LITTLE: [[VREGP0:%[0-9]+]](s64) = G_SEQUENCE [[VREGP0LO]](s32), 0, [[VREGP0HI]](s32), 32 -; BIG: [[VREGP0:%[0-9]+]](s64) = G_SEQUENCE [[VREGP0HI]](s32), 0, [[VREGP0LO]](s32), 32 +; LITTLE: [[VREGP0:%[0-9]+]](s64) = G_MERGE_VALUES [[VREGP0LO]](s32), [[VREGP0HI]](s32) +; BIG: [[VREGP0:%[0-9]+]](s64) = G_MERGE_VALUES [[VREGP0HI]](s32), [[VREGP0LO]](s32) ; CHECK: [[FIP1:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[P1]] ; CHECK: [[VREGP1:%[0-9]+]](s64) = G_LOAD [[FIP1]](p0){{.*}}load 8 ; CHECK: [[VREGV:%[0-9]+]](s64) = G_FADD [[VREGP0]], [[VREGP1]] -; LITTLE: [[VREGVLO:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 0 -; LITTLE: [[VREGVHI:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 32 -; BIG: [[VREGVHI:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 0 -; BIG: [[VREGVLO:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 32 +; LITTLE: [[VREGVLO:%[0-9]+]](s32), [[VREGVHI:%[0-9]+]](s32) = G_UNMERGE_VALUES [[VREGV]](s64) +; BIG: [[VREGVHI:%[0-9]+]](s32), [[VREGVLO:%[0-9]+]](s32) = G_UNMERGE_VALUES [[VREGV]](s64) ; CHECK-DAG: %r0 = COPY [[VREGVLO]] ; CHECK-DAG: %r1 = COPY [[VREGVHI]] ; CHECK: BX_RET 14, _, implicit %r0, implicit %r1 @@ -401,15 +397,13 @@ define arm_aapcscc double @test_double_gap2_aapcscc(double %p0, float %filler, ; CHECK: liveins: %r0, %r1, %r2 ; CHECK-DAG: [[VREGP0LO:%[0-9]+]](s32) = COPY %r0 ; CHECK-DAG: [[VREGP0HI:%[0-9]+]](s32) = COPY %r1 -; LITTLE: [[VREGP0:%[0-9]+]](s64) = G_SEQUENCE [[VREGP0LO]](s32), 0, [[VREGP0HI]](s32), 32 -; BIG: [[VREGP0:%[0-9]+]](s64) = G_SEQUENCE [[VREGP0HI]](s32), 0, [[VREGP0LO]](s32), 32 +; LITTLE: [[VREGP0:%[0-9]+]](s64) = G_MERGE_VALUES [[VREGP0LO]](s32), [[VREGP0HI]](s32) +; BIG: [[VREGP0:%[0-9]+]](s64) = G_MERGE_VALUES [[VREGP0HI]](s32), [[VREGP0LO]](s32) ; CHECK: [[FIP1:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[P1]] ; CHECK: [[VREGP1:%[0-9]+]](s64) = G_LOAD [[FIP1]](p0){{.*}}load 8 ; CHECK: [[VREGV:%[0-9]+]](s64) = G_FADD [[VREGP0]], [[VREGP1]] -; LITTLE: [[VREGVLO:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 0 -; LITTLE: [[VREGVHI:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 32 -; BIG: [[VREGVHI:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 0 -; BIG: [[VREGVLO:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 32 +; LITTLE: [[VREGVLO:%[0-9]+]](s32), [[VREGVHI:%[0-9]+]](s32) = G_UNMERGE_VALUES [[VREGV]](s64) +; BIG: [[VREGVHI:%[0-9]+]](s32), [[VREGVLO:%[0-9]+]](s32) = G_UNMERGE_VALUES [[VREGV]](s64) ; CHECK-DAG: %r0 = COPY [[VREGVLO]] ; CHECK-DAG: %r1 = COPY [[VREGVHI]] ; CHECK: BX_RET 14, _, implicit %r0, implicit %r1 @@ -420,9 +414,11 @@ entry: define arm_aapcscc void @test_indirect_call(void() *%fptr) { ; CHECK-LABEL: name: test_indirect_call -; CHECK: [[FPTR:%[0-9]+]](p0) = COPY %r0 +; CHECK: registers: +; CHECK-NEXT: id: [[FPTR:[0-9]+]], class: gpr +; CHECK: %[[FPTR]](p0) = COPY %r0 ; CHECK: ADJCALLSTACKDOWN 0, 0, 14, _, implicit-def %sp, implicit %sp -; CHECK: BLX [[FPTR]](p0), csr_aapcs, implicit-def %lr, implicit %sp +; CHECK: BLX %[[FPTR]](p0), csr_aapcs, implicit-def %lr, implicit %sp ; CHECK: ADJCALLSTACKUP 0, 0, 14, _, implicit-def %sp, implicit %sp entry: notail call arm_aapcscc void %fptr() @@ -566,13 +562,12 @@ define arm_aapcscc double @test_call_aapcs_fp_params(double %a, float %b) { ; CHECK-LABEL: name: test_call_aapcs_fp_params ; CHECK-DAG: [[A1:%[0-9]+]](s32) = COPY %r0 ; CHECK-DAG: [[A2:%[0-9]+]](s32) = COPY %r1 -; LITTLE-DAG: [[AVREG:%[0-9]+]](s64) = G_SEQUENCE [[A1]](s32), 0, [[A2]](s32), 32 -; BIG-DAG: [[AVREG:%[0-9]+]](s64) = G_SEQUENCE [[A2]](s32), 0, [[A1]](s32), 32 +; LITTLE-DAG: [[AVREG:%[0-9]+]](s64) = G_MERGE_VALUES [[A1]](s32), [[A2]](s32) +; BIG-DAG: [[AVREG:%[0-9]+]](s64) = G_MERGE_VALUES [[A2]](s32), [[A1]](s32) ; CHECK-DAG: [[BVREG:%[0-9]+]](s32) = COPY %r2 ; CHECK: ADJCALLSTACKDOWN 16, 0, 14, _, implicit-def %sp, implicit %sp ; CHECK-DAG: %r0 = COPY [[BVREG]] -; CHECK-DAG: [[A1:%[0-9]+]](s32) = G_EXTRACT [[AVREG]](s64), 0 -; CHECK-DAG: [[A2:%[0-9]+]](s32) = G_EXTRACT [[AVREG]](s64), 32 +; CHECK-DAG: [[A1:%[0-9]+]](s32), [[A2:%[0-9]+]](s32) = G_UNMERGE_VALUES [[AVREG]](s64) ; LITTLE-DAG: %r2 = COPY [[A1]] ; LITTLE-DAG: %r3 = COPY [[A2]] ; BIG-DAG: %r2 = COPY [[A2]] @@ -588,11 +583,10 @@ define arm_aapcscc double @test_call_aapcs_fp_params(double %a, float %b) { ; CHECK: BLX @aapcscc_fp_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1 ; CHECK-DAG: [[R1:%[0-9]+]](s32) = COPY %r0 ; CHECK-DAG: [[R2:%[0-9]+]](s32) = COPY %r1 -; LITTLE: [[RVREG:%[0-9]+]](s64) = G_SEQUENCE [[R1]](s32), 0, [[R2]](s32), 32 -; BIG: [[RVREG:%[0-9]+]](s64) = G_SEQUENCE [[R2]](s32), 0, [[R1]](s32), 32 +; LITTLE: [[RVREG:%[0-9]+]](s64) = G_MERGE_VALUES [[R1]](s32), [[R2]](s32) +; BIG: [[RVREG:%[0-9]+]](s64) = G_MERGE_VALUES [[R2]](s32), [[R1]](s32) ; CHECK: ADJCALLSTACKUP 16, 0, 14, _, implicit-def %sp, implicit %sp -; CHECK: [[R1:%[0-9]+]](s32) = G_EXTRACT [[RVREG]](s64), 0 -; CHECK: [[R2:%[0-9]+]](s32) = G_EXTRACT [[RVREG]](s64), 32 +; CHECK: [[R1:%[0-9]+]](s32), [[R2:%[0-9]+]](s32) = G_UNMERGE_VALUES [[RVREG]](s64) ; LITTLE-DAG: %r0 = COPY [[R1]] ; LITTLE-DAG: %r1 = COPY [[R2]] ; BIG-DAG: %r0 = COPY [[R2]] @@ -702,8 +696,8 @@ define arm_aapcscc void @test_large_int_arrays([20 x i32] %arr) { ; CHECK: fixedStack: ; The parameters live in separate stack locations, one for each element that ; doesn't fit in the registers. -; CHECK-DAG: id: [[FIRST_STACK_ID:[0-9]+]], offset: 0, size: 4 -; CHECK-DAG: id: [[LAST_STACK_ID:[-0]+]], offset: 60, size: 4 +; CHECK-DAG: id: [[FIRST_STACK_ID:[0-9]+]], type: default, offset: 0, size: 4, +; CHECK-DAG: id: [[LAST_STACK_ID:[-0]+]], type: default, offset: 60, size: 4 ; CHECK: liveins: %r0, %r1, %r2, %r3 ; CHECK-DAG: [[R0:%[0-9]+]](s32) = COPY %r0 ; CHECK-DAG: [[R1:%[0-9]+]](s32) = COPY %r1 @@ -755,16 +749,16 @@ declare arm_aapcscc [2 x float] @fp_arrays_aapcs_target([3 x double]) define arm_aapcscc [2 x float] @test_fp_arrays_aapcs([3 x double] %arr) { ; CHECK-LABEL: name: test_fp_arrays_aapcs ; CHECK: fixedStack: -; CHECK: id: [[ARR2_ID:[0-9]+]], offset: 0, size: 8 +; CHECK: id: [[ARR2_ID:[0-9]+]], type: default, offset: 0, size: 8, ; CHECK: liveins: %r0, %r1, %r2, %r3 ; CHECK: [[ARR0_0:%[0-9]+]](s32) = COPY %r0 ; CHECK: [[ARR0_1:%[0-9]+]](s32) = COPY %r1 -; LITTLE: [[ARR0:%[0-9]+]](s64) = G_SEQUENCE [[ARR0_0]](s32), 0, [[ARR0_1]](s32), 32 -; BIG: [[ARR0:%[0-9]+]](s64) = G_SEQUENCE [[ARR0_1]](s32), 0, [[ARR0_0]](s32), 32 +; LITTLE: [[ARR0:%[0-9]+]](s64) = G_MERGE_VALUES [[ARR0_0]](s32), [[ARR0_1]](s32) +; BIG: [[ARR0:%[0-9]+]](s64) = G_MERGE_VALUES [[ARR0_1]](s32), [[ARR0_0]](s32) ; CHECK: [[ARR1_0:%[0-9]+]](s32) = COPY %r2 ; CHECK: [[ARR1_1:%[0-9]+]](s32) = COPY %r3 -; LITTLE: [[ARR1:%[0-9]+]](s64) = G_SEQUENCE [[ARR1_0]](s32), 0, [[ARR1_1]](s32), 32 -; BIG: [[ARR1:%[0-9]+]](s64) = G_SEQUENCE [[ARR1_1]](s32), 0, [[ARR1_0]](s32), 32 +; LITTLE: [[ARR1:%[0-9]+]](s64) = G_MERGE_VALUES [[ARR1_0]](s32), [[ARR1_1]](s32) +; BIG: [[ARR1:%[0-9]+]](s64) = G_MERGE_VALUES [[ARR1_1]](s32), [[ARR1_0]](s32) ; CHECK: [[ARR2_FI:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[ARR2_ID]] ; CHECK: [[ARR2:%[0-9]+]](s64) = G_LOAD [[ARR2_FI]]{{.*}}load 8 from %fixed-stack.[[ARR2_ID]] ; CHECK: [[ARR_MERGED_0:%[0-9]+]](s192) = IMPLICIT_DEF @@ -776,14 +770,12 @@ define arm_aapcscc [2 x float] @test_fp_arrays_aapcs([3 x double] %arr) { ; CHECK: [[ARR0:%[0-9]+]](s64) = G_EXTRACT [[ARR_MERGED]](s192), 0 ; CHECK: [[ARR1:%[0-9]+]](s64) = G_EXTRACT [[ARR_MERGED]](s192), 64 ; CHECK: [[ARR2:%[0-9]+]](s64) = G_EXTRACT [[ARR_MERGED]](s192), 128 -; CHECK: [[ARR0_0:%[0-9]+]](s32) = G_EXTRACT [[ARR0]](s64), 0 -; CHECK: [[ARR0_1:%[0-9]+]](s32) = G_EXTRACT [[ARR0]](s64), 32 +; CHECK: [[ARR0_0:%[0-9]+]](s32), [[ARR0_1:%[0-9]+]](s32) = G_UNMERGE_VALUES [[ARR0]](s64) ; LITTLE: %r0 = COPY [[ARR0_0]](s32) ; LITTLE: %r1 = COPY [[ARR0_1]](s32) ; BIG: %r0 = COPY [[ARR0_1]](s32) ; BIG: %r1 = COPY [[ARR0_0]](s32) -; CHECK: [[ARR1_0:%[0-9]+]](s32) = G_EXTRACT [[ARR1]](s64), 0 -; CHECK: [[ARR1_1:%[0-9]+]](s32) = G_EXTRACT [[ARR1]](s64), 32 +; CHECK: [[ARR1_0:%[0-9]+]](s32), [[ARR1_1:%[0-9]+]](s32) = G_UNMERGE_VALUES [[ARR1]](s64) ; LITTLE: %r2 = COPY [[ARR1_0]](s32) ; LITTLE: %r3 = COPY [[ARR1_1]](s32) ; BIG: %r2 = COPY [[ARR1_1]](s32) @@ -815,10 +807,10 @@ declare arm_aapcs_vfpcc [4 x float] @fp_arrays_aapcs_vfp_target([3 x double], [3 define arm_aapcs_vfpcc [4 x float] @test_fp_arrays_aapcs_vfp([3 x double] %x, [3 x float] %y, [4 x double] %z) { ; CHECK-LABEL: name: test_fp_arrays_aapcs_vfp ; CHECK: fixedStack: -; CHECK-DAG: id: [[Z0_ID:[0-9]+]], offset: 0, size: 8 -; CHECK-DAG: id: [[Z1_ID:[0-9]+]], offset: 8, size: 8 -; CHECK-DAG: id: [[Z2_ID:[0-9]+]], offset: 16, size: 8 -; CHECK-DAG: id: [[Z3_ID:[0-9]+]], offset: 24, size: 8 +; CHECK-DAG: id: [[Z0_ID:[0-9]+]], type: default, offset: 0, size: 8, +; CHECK-DAG: id: [[Z1_ID:[0-9]+]], type: default, offset: 8, size: 8, +; CHECK-DAG: id: [[Z2_ID:[0-9]+]], type: default, offset: 16, size: 8, +; CHECK-DAG: id: [[Z3_ID:[0-9]+]], type: default, offset: 24, size: 8, ; CHECK: liveins: %d0, %d1, %d2, %s6, %s7, %s8 ; CHECK: [[X0:%[0-9]+]](s64) = COPY %d0 ; CHECK: [[X1:%[0-9]+]](s64) = COPY %d1 @@ -916,8 +908,8 @@ define arm_aapcscc [2 x i32*] @test_tough_arrays([6 x [4 x i32]] %arr) { ; CHECK: fixedStack: ; The parameters live in separate stack locations, one for each element that ; doesn't fit in the registers. -; CHECK-DAG: id: [[FIRST_STACK_ID:[0-9]+]], offset: 0, size: 4 -; CHECK-DAG: id: [[LAST_STACK_ID:[-0]+]], offset: 76, size: 4 +; CHECK-DAG: id: [[FIRST_STACK_ID:[0-9]+]], type: default, offset: 0, size: 4, +; CHECK-DAG: id: [[LAST_STACK_ID:[-0]+]], type: default, offset: 76, size: 4 ; CHECK: liveins: %r0, %r1, %r2, %r3 ; CHECK-DAG: [[R0:%[0-9]+]](s32) = COPY %r0 ; CHECK-DAG: [[R1:%[0-9]+]](s32) = COPY %r1 @@ -979,8 +971,8 @@ declare arm_aapcscc {i32, i32} @structs_target({i32, i32}, {i32*, float, i32, do define arm_aapcscc {i32, i32} @test_structs({i32, i32} %x, {i32*, float, i32, double} %y) { ; CHECK-LABEL: test_structs ; CHECK: fixedStack: -; CHECK-DAG: id: [[Y2_ID:[0-9]+]], offset: 0, size: 4 -; CHECK-DAG: id: [[Y3_ID:[0-9]+]], offset: 8, size: 8 +; CHECK-DAG: id: [[Y2_ID:[0-9]+]], type: default, offset: 0, size: 4, +; CHECK-DAG: id: [[Y3_ID:[0-9]+]], type: default, offset: 8, size: 8, ; CHECK: liveins: %r0, %r1, %r2, %r3 ; CHECK-DAG: [[X0:%[0-9]+]](s32) = COPY %r0 ; CHECK-DAG: [[X1:%[0-9]+]](s32) = COPY %r1 diff --git a/test/CodeGen/ARM/GlobalISel/arm-isel.ll b/test/CodeGen/ARM/GlobalISel/arm-isel.ll index 57ccff90c0bb..6ddc29a3bbba 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-isel.ll +++ b/test/CodeGen/ARM/GlobalISel/arm-isel.ll @@ -153,6 +153,87 @@ entry: ret i32 %sum } +define i8 @test_and_i8(i8 %x, i8 %y) { +; CHECK-LABEL: test_and_i8: +; CHECK: and r0, r0, r1 +; CHECK: bx lr +entry: + %sum = and i8 %x, %y + ret i8 %sum +} + +define i16 @test_and_i16(i16 %x, i16 %y) { +; CHECK-LABEL: test_and_i16: +; CHECK: and r0, r0, r1 +; CHECK: bx lr +entry: + %sum = and i16 %x, %y + ret i16 %sum +} + +define i32 @test_and_i32(i32 %x, i32 %y) { +; CHECK-LABEL: test_and_i32: +; CHECK: and r0, r0, r1 +; CHECK: bx lr +entry: + %sum = and i32 %x, %y + ret i32 %sum +} + +define i8 @test_or_i8(i8 %x, i8 %y) { +; CHECK-LABEL: test_or_i8: +; CHECK: orr r0, r0, r1 +; CHECK: bx lr +entry: + %sum = or i8 %x, %y + ret i8 %sum +} + +define i16 @test_or_i16(i16 %x, i16 %y) { +; CHECK-LABEL: test_or_i16: +; CHECK: orr r0, r0, r1 +; CHECK: bx lr +entry: + %sum = or i16 %x, %y + ret i16 %sum +} + +define i32 @test_or_i32(i32 %x, i32 %y) { +; CHECK-LABEL: test_or_i32: +; CHECK: orr r0, r0, r1 +; CHECK: bx lr +entry: + %sum = or i32 %x, %y + ret i32 %sum +} + +define i8 @test_xor_i8(i8 %x, i8 %y) { +; CHECK-LABEL: test_xor_i8: +; CHECK: eor r0, r0, r1 +; CHECK: bx lr +entry: + %sum = xor i8 %x, %y + ret i8 %sum +} + +define i16 @test_xor_i16(i16 %x, i16 %y) { +; CHECK-LABEL: test_xor_i16: +; CHECK: eor r0, r0, r1 +; CHECK: bx lr +entry: + %sum = xor i16 %x, %y + ret i16 %sum +} + +define i32 @test_xor_i32(i32 %x, i32 %y) { +; CHECK-LABEL: test_xor_i32: +; CHECK: eor r0, r0, r1 +; CHECK: bx lr +entry: + %sum = xor i32 %x, %y + ret i32 %sum +} + define i32 @test_stack_args_i32(i32 %p0, i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5) { ; CHECK-LABEL: test_stack_args_i32: ; CHECK: add [[P5ADDR:r[0-9]+]], sp, #4 @@ -272,8 +353,7 @@ define arm_aapcscc double @test_double_softfp(double %f0, double %f1) { ; CHECK-DAG: vmov [[F0:d[0-9]+]], r0, r1 ; CHECK-DAG: vmov [[F1:d[0-9]+]], r2, r3 ; CHECK: vadd.f64 [[FV:d[0-9]+]], [[F0]], [[F1]] -; CHECK: vmov.32 r0, [[FV]][0] -; CHECK: vmov.32 r1, [[FV]][1] +; CHECK: vmov r0, r1, [[FV]] ; CHECK: bx lr entry: %v = fadd double %f0, %f1 diff --git a/test/CodeGen/ARM/GlobalISel/arm-legalize-fp.mir b/test/CodeGen/ARM/GlobalISel/arm-legalize-fp.mir index d154b4887c19..803135ba595e 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-legalize-fp.mir +++ b/test/CodeGen/ARM/GlobalISel/arm-legalize-fp.mir @@ -82,10 +82,10 @@ body: | %1(s32) = COPY %r1 %2(s32) = COPY %r2 %3(s32) = COPY %r3 - ; HARD-DAG: [[X:%[0-9]+]](s64) = G_SEQUENCE [[X0]] - ; HARD-DAG: [[Y:%[0-9]+]](s64) = G_SEQUENCE [[Y0]] - %4(s64) = G_SEQUENCE %0(s32), 0, %1(s32), 32 - %5(s64) = G_SEQUENCE %2(s32), 0, %3(s32), 32 + ; HARD-DAG: [[X:%[0-9]+]](s64) = G_MERGE_VALUES [[X0]] + ; HARD-DAG: [[Y:%[0-9]+]](s64) = G_MERGE_VALUES [[Y0]] + %4(s64) = G_MERGE_VALUES %0(s32), %1(s32) + %5(s64) = G_MERGE_VALUES %2(s32), %3(s32) ; CHECK: ADJCALLSTACKDOWN ; SOFT-DAG: %r{{[0-1]}} = COPY [[X0]] ; SOFT-DAG: %r{{[0-1]}} = COPY [[X1]] @@ -97,8 +97,7 @@ body: | ; HARD: BLX $fmod, {{.*}}, implicit %d0, implicit %d1, implicit-def %d0 ; CHECK: ADJCALLSTACKUP %6(s64) = G_FREM %4, %5 - %7(s32) = G_EXTRACT %6(s64), 0 - %8(s32) = G_EXTRACT %6(s64), 32 + %7(s32), %8(s32) = G_UNMERGE_VALUES %6(s64) %r0 = COPY %7(s32) %r1 = COPY %8(s32) BX_RET 14, _, implicit %r0, implicit %r1 @@ -174,10 +173,10 @@ body: | %1(s32) = COPY %r1 %2(s32) = COPY %r2 %3(s32) = COPY %r3 - ; HARD-DAG: [[X:%[0-9]+]](s64) = G_SEQUENCE [[X0]] - ; HARD-DAG: [[Y:%[0-9]+]](s64) = G_SEQUENCE [[Y0]] - %4(s64) = G_SEQUENCE %0(s32), 0, %1(s32), 32 - %5(s64) = G_SEQUENCE %2(s32), 0, %3(s32), 32 + ; HARD-DAG: [[X:%[0-9]+]](s64) = G_MERGE_VALUES [[X0]] + ; HARD-DAG: [[Y:%[0-9]+]](s64) = G_MERGE_VALUES [[Y0]] + %4(s64) = G_MERGE_VALUES %0(s32), %1(s32) + %5(s64) = G_MERGE_VALUES %2(s32), %3(s32) ; CHECK: ADJCALLSTACKDOWN ; SOFT-DAG: %r{{[0-1]}} = COPY [[X0]] ; SOFT-DAG: %r{{[0-1]}} = COPY [[X1]] @@ -189,8 +188,7 @@ body: | ; HARD: BLX $pow, {{.*}}, implicit %d0, implicit %d1, implicit-def %d0 ; CHECK: ADJCALLSTACKUP %6(s64) = G_FPOW %4, %5 - %7(s32) = G_EXTRACT %6(s64), 0 - %8(s32) = G_EXTRACT %6(s64), 32 + %7(s32), %8(s32) = G_UNMERGE_VALUES %6(s64) %r0 = COPY %7(s32) %r1 = COPY %8(s32) BX_RET 14, _, implicit %r0, implicit %r1 @@ -258,10 +256,10 @@ body: | %1(s32) = COPY %r1 %2(s32) = COPY %r2 %3(s32) = COPY %r3 - ; HARD-DAG: [[X:%[0-9]+]](s64) = G_SEQUENCE [[X0]] - ; HARD-DAG: [[Y:%[0-9]+]](s64) = G_SEQUENCE [[Y0]] - %4(s64) = G_SEQUENCE %0(s32), 0, %1(s32), 32 - %5(s64) = G_SEQUENCE %2(s32), 0, %3(s32), 32 + ; HARD-DAG: [[X:%[0-9]+]](s64) = G_MERGE_VALUES [[X0]] + ; HARD-DAG: [[Y:%[0-9]+]](s64) = G_MERGE_VALUES [[Y0]] + %4(s64) = G_MERGE_VALUES %0(s32), %1(s32) + %5(s64) = G_MERGE_VALUES %2(s32), %3(s32) ; HARD: [[R:%[0-9]+]](s64) = G_FADD [[X]], [[Y]] ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r{{[0-1]}} = COPY [[X0]] @@ -272,10 +270,8 @@ body: | ; SOFT-DEFAULT: BLX $__adddf3, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1 ; SOFT: ADJCALLSTACKUP %6(s64) = G_FADD %4, %5 - ; HARD-DAG: G_EXTRACT [[R]](s64), 0 - ; HARD-DAG: G_EXTRACT [[R]](s64), 32 - %7(s32) = G_EXTRACT %6(s64), 0 - %8(s32) = G_EXTRACT %6(s64), 32 + ; HARD-DAG: G_UNMERGE_VALUES [[R]](s64) + %7(s32),%8(s32) = G_UNMERGE_VALUES %6(s64) %r0 = COPY %7(s32) %r1 = COPY %8(s32) BX_RET 14, _, implicit %r0, implicit %r1 diff --git a/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir b/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir index f6ac92597cb2..c6f6ca81c279 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir +++ b/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir @@ -15,6 +15,18 @@ define void @test_mul_s16() { ret void } define void @test_mul_s32() { ret void } + define void @test_and_s8() { ret void } + define void @test_and_s16() { ret void } + define void @test_and_s32() { ret void } + + define void @test_or_s8() { ret void } + define void @test_or_s16() { ret void } + define void @test_or_s32() { ret void } + + define void @test_xor_s8() { ret void } + define void @test_xor_s16() { ret void } + define void @test_xor_s32() { ret void } + define void @test_load_from_stack() { ret void } define void @test_legal_loads() #0 { ret void } define void @test_legal_stores() #0 { ret void } @@ -299,6 +311,234 @@ body: | %r0 = COPY %2(s32) BX_RET 14, _, implicit %r0 +... +--- +name: test_and_s8 +# CHECK-LABEL: name: test_and_s8 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s8) = COPY %r0 + %1(s8) = COPY %r1 + %2(s8) = G_AND %0, %1 + ; G_AND with s8 should widen + ; CHECK: {{%[0-9]+}}(s32) = G_AND {{%[0-9]+, %[0-9]+}} + ; CHECK-NOT: {{%[0-9]+}}(s8) = G_AND {{%[0-9]+, %[0-9]+}} + %r0 = COPY %2(s8) + BX_RET 14, _, implicit %r0 +... +--- +name: test_and_s16 +# CHECK-LABEL: name: test_and_s16 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s16) = COPY %r0 + %1(s16) = COPY %r1 + %2(s16) = G_AND %0, %1 + ; G_AND with s16 should widen + ; CHECK: {{%[0-9]+}}(s32) = G_AND {{%[0-9]+, %[0-9]+}} + ; CHECK-NOT: {{%[0-9]+}}(s16) = G_AND {{%[0-9]+, %[0-9]+}} + %r0 = COPY %2(s16) + BX_RET 14, _, implicit %r0 + +... +--- +name: test_and_s32 +# CHECK-LABEL: name: test_and_s32 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + %2(s32) = G_AND %0, %1 + ; G_AND with s32 is legal, so we should find it unchanged in the output + ; CHECK: {{%[0-9]+}}(s32) = G_AND {{%[0-9]+, %[0-9]+}} + %r0 = COPY %2(s32) + BX_RET 14, _, implicit %r0 + +... +--- +name: test_or_s8 +# CHECK-LABEL: name: test_or_s8 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s8) = COPY %r0 + %1(s8) = COPY %r1 + %2(s8) = G_OR %0, %1 + ; G_OR with s8 should widen + ; CHECK: {{%[0-9]+}}(s32) = G_OR {{%[0-9]+, %[0-9]+}} + ; CHECK-NOT: {{%[0-9]+}}(s8) = G_OR {{%[0-9]+, %[0-9]+}} + %r0 = COPY %2(s8) + BX_RET 14, _, implicit %r0 +... +--- +name: test_or_s16 +# CHECK-LABEL: name: test_or_s16 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s16) = COPY %r0 + %1(s16) = COPY %r1 + %2(s16) = G_OR %0, %1 + ; G_OR with s16 should widen + ; CHECK: {{%[0-9]+}}(s32) = G_OR {{%[0-9]+, %[0-9]+}} + ; CHECK-NOT: {{%[0-9]+}}(s16) = G_OR {{%[0-9]+, %[0-9]+}} + %r0 = COPY %2(s16) + BX_RET 14, _, implicit %r0 + +... +--- +name: test_or_s32 +# CHECK-LABEL: name: test_or_s32 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + %2(s32) = G_OR %0, %1 + ; G_OR with s32 is legal, so we should find it unchanged in the output + ; CHECK: {{%[0-9]+}}(s32) = G_OR {{%[0-9]+, %[0-9]+}} + %r0 = COPY %2(s32) + BX_RET 14, _, implicit %r0 + +... +--- +name: test_xor_s8 +# CHECK-LABEL: name: test_xor_s8 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s8) = COPY %r0 + %1(s8) = COPY %r1 + %2(s8) = G_XOR %0, %1 + ; G_XOR with s8 should widen + ; CHECK: {{%[0-9]+}}(s32) = G_XOR {{%[0-9]+, %[0-9]+}} + ; CHECK-NOT: {{%[0-9]+}}(s8) = G_XOR {{%[0-9]+, %[0-9]+}} + %r0 = COPY %2(s8) + BX_RET 14, _, implicit %r0 +... +--- +name: test_xor_s16 +# CHECK-LABEL: name: test_xor_s16 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s16) = COPY %r0 + %1(s16) = COPY %r1 + %2(s16) = G_XOR %0, %1 + ; G_XOR with s16 should widen + ; CHECK: {{%[0-9]+}}(s32) = G_XOR {{%[0-9]+, %[0-9]+}} + ; CHECK-NOT: {{%[0-9]+}}(s16) = G_XOR {{%[0-9]+, %[0-9]+}} + %r0 = COPY %2(s16) + BX_RET 14, _, implicit %r0 + +... +--- +name: test_xor_s32 +# CHECK-LABEL: name: test_xor_s32 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + %2(s32) = G_XOR %0, %1 + ; G_XOR with s32 is legal, so we should find it unchanged in the output + ; CHECK: {{%[0-9]+}}(s32) = G_XOR {{%[0-9]+, %[0-9]+}} + %r0 = COPY %2(s32) + BX_RET 14, _, implicit %r0 + ... --- name: test_load_from_stack @@ -317,7 +557,7 @@ fixedStack: - { id: 0, offset: 0, size: 4, alignment: 4, isImmutable: true, isAliased: false } - { id: 1, offset: 4, size: 4, alignment: 4, isImmutable: true, isAliased: false } - { id: 2, offset: 8, size: 4, alignment: 4, isImmutable: true, isAliased: false } - # CHECK: id: [[FRAME_INDEX:[0-9]+]], offset: 8 + # CHECK: id: [[FRAME_INDEX:[0-9]+]], type: default, offset: 8 body: | bb.0: liveins: %r0, %r1, %r2, %r3 diff --git a/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir b/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir index dfccc47c277c..cc1df80c6019 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir +++ b/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir @@ -16,6 +16,10 @@ define void @test_sdiv_s32() #1 { ret void } define void @test_udiv_s32() #1 { ret void } + define void @test_and_s32() { ret void} + define void @test_or_s32() { ret void} + define void @test_xor_s32() { ret void} + define void @test_loads() #0 { ret void } define void @test_stores() #0 { ret void } @@ -45,9 +49,9 @@ legalized: true regBankSelected: false selected: false # CHECK: registers: -# CHECK: - { id: 0, class: gprb } -# CHECK: - { id: 1, class: gprb } -# CHECK: - { id: 2, class: gprb } +# CHECK: - { id: 0, class: gprb, preferred-register: '' } +# CHECK: - { id: 1, class: gprb, preferred-register: '' } +# CHECK: - { id: 2, class: gprb, preferred-register: '' } registers: - { id: 0, class: _ } @@ -71,12 +75,12 @@ legalized: true regBankSelected: false selected: false # CHECK: registers: -# CHECK: - { id: 0, class: gprb } -# CHECK: - { id: 1, class: gprb } -# CHECK: - { id: 2, class: gprb } -# CHECK: - { id: 3, class: gprb } -# CHECK: - { id: 4, class: gprb } -# CHECK: - { id: 5, class: gprb } +# CHECK: - { id: 0, class: gprb, preferred-register: '' } +# CHECK: - { id: 1, class: gprb, preferred-register: '' } +# CHECK: - { id: 2, class: gprb, preferred-register: '' } +# CHECK: - { id: 3, class: gprb, preferred-register: '' } +# CHECK: - { id: 4, class: gprb, preferred-register: '' } +# CHECK: - { id: 5, class: gprb, preferred-register: '' } registers: - { id: 0, class: _ } @@ -106,12 +110,12 @@ legalized: true regBankSelected: false selected: false # CHECK: registers: -# CHECK: - { id: 0, class: gprb } -# CHECK: - { id: 1, class: gprb } -# CHECK: - { id: 2, class: gprb } -# CHECK: - { id: 3, class: gprb } -# CHECK: - { id: 4, class: gprb } -# CHECK: - { id: 5, class: gprb } +# CHECK: - { id: 0, class: gprb, preferred-register: '' } +# CHECK: - { id: 1, class: gprb, preferred-register: '' } +# CHECK: - { id: 2, class: gprb, preferred-register: '' } +# CHECK: - { id: 3, class: gprb, preferred-register: '' } +# CHECK: - { id: 4, class: gprb, preferred-register: '' } +# CHECK: - { id: 5, class: gprb, preferred-register: '' } registers: - { id: 0, class: _ } @@ -141,12 +145,12 @@ legalized: true regBankSelected: false selected: false # CHECK: registers: -# CHECK: - { id: 0, class: gprb } -# CHECK: - { id: 1, class: gprb } -# CHECK: - { id: 2, class: gprb } -# CHECK: - { id: 3, class: gprb } -# CHECK: - { id: 4, class: gprb } -# CHECK: - { id: 5, class: gprb } +# CHECK: - { id: 0, class: gprb, preferred-register: '' } +# CHECK: - { id: 1, class: gprb, preferred-register: '' } +# CHECK: - { id: 2, class: gprb, preferred-register: '' } +# CHECK: - { id: 3, class: gprb, preferred-register: '' } +# CHECK: - { id: 4, class: gprb, preferred-register: '' } +# CHECK: - { id: 5, class: gprb, preferred-register: '' } registers: - { id: 0, class: _ } @@ -176,9 +180,9 @@ legalized: true regBankSelected: false selected: false # CHECK: registers: -# CHECK: - { id: 0, class: gprb } -# CHECK: - { id: 1, class: gprb } -# CHECK: - { id: 2, class: gprb } +# CHECK: - { id: 0, class: gprb, preferred-register: '' } +# CHECK: - { id: 1, class: gprb, preferred-register: '' } +# CHECK: - { id: 2, class: gprb, preferred-register: '' } registers: - { id: 0, class: _ } @@ -202,12 +206,12 @@ legalized: true regBankSelected: false selected: false # CHECK: registers: -# CHECK: - { id: 0, class: gprb } -# CHECK: - { id: 1, class: gprb } -# CHECK: - { id: 2, class: gprb } -# CHECK: - { id: 3, class: gprb } -# CHECK: - { id: 4, class: gprb } -# CHECK: - { id: 5, class: gprb } +# CHECK: - { id: 0, class: gprb, preferred-register: '' } +# CHECK: - { id: 1, class: gprb, preferred-register: '' } +# CHECK: - { id: 2, class: gprb, preferred-register: '' } +# CHECK: - { id: 3, class: gprb, preferred-register: '' } +# CHECK: - { id: 4, class: gprb, preferred-register: '' } +# CHECK: - { id: 5, class: gprb, preferred-register: '' } registers: - { id: 0, class: _ } @@ -237,12 +241,12 @@ legalized: true regBankSelected: false selected: false # CHECK: registers: -# CHECK: - { id: 0, class: gprb } -# CHECK: - { id: 1, class: gprb } -# CHECK: - { id: 2, class: gprb } -# CHECK: - { id: 3, class: gprb } -# CHECK: - { id: 4, class: gprb } -# CHECK: - { id: 5, class: gprb } +# CHECK: - { id: 0, class: gprb, preferred-register: '' } +# CHECK: - { id: 1, class: gprb, preferred-register: '' } +# CHECK: - { id: 2, class: gprb, preferred-register: '' } +# CHECK: - { id: 3, class: gprb, preferred-register: '' } +# CHECK: - { id: 4, class: gprb, preferred-register: '' } +# CHECK: - { id: 5, class: gprb, preferred-register: '' } registers: - { id: 0, class: _ } @@ -272,9 +276,9 @@ legalized: true regBankSelected: false selected: false # CHECK: registers: -# CHECK: - { id: 0, class: gprb } -# CHECK: - { id: 1, class: gprb } -# CHECK: - { id: 2, class: gprb } +# CHECK: - { id: 0, class: gprb, preferred-register: '' } +# CHECK: - { id: 1, class: gprb, preferred-register: '' } +# CHECK: - { id: 2, class: gprb, preferred-register: '' } registers: - { id: 0, class: _ } @@ -298,12 +302,12 @@ legalized: true regBankSelected: false selected: false # CHECK: registers: -# CHECK: - { id: 0, class: gprb } -# CHECK: - { id: 1, class: gprb } -# CHECK: - { id: 2, class: gprb } -# CHECK: - { id: 3, class: gprb } -# CHECK: - { id: 4, class: gprb } -# CHECK: - { id: 5, class: gprb } +# CHECK: - { id: 0, class: gprb, preferred-register: '' } +# CHECK: - { id: 1, class: gprb, preferred-register: '' } +# CHECK: - { id: 2, class: gprb, preferred-register: '' } +# CHECK: - { id: 3, class: gprb, preferred-register: '' } +# CHECK: - { id: 4, class: gprb, preferred-register: '' } +# CHECK: - { id: 5, class: gprb, preferred-register: '' } registers: - { id: 0, class: _ } @@ -333,12 +337,12 @@ legalized: true regBankSelected: false selected: false # CHECK: registers: -# CHECK: - { id: 0, class: gprb } -# CHECK: - { id: 1, class: gprb } -# CHECK: - { id: 2, class: gprb } -# CHECK: - { id: 3, class: gprb } -# CHECK: - { id: 4, class: gprb } -# CHECK: - { id: 5, class: gprb } +# CHECK: - { id: 0, class: gprb, preferred-register: '' } +# CHECK: - { id: 1, class: gprb, preferred-register: '' } +# CHECK: - { id: 2, class: gprb, preferred-register: '' } +# CHECK: - { id: 3, class: gprb, preferred-register: '' } +# CHECK: - { id: 4, class: gprb, preferred-register: '' } +# CHECK: - { id: 5, class: gprb, preferred-register: '' } registers: - { id: 0, class: _ } @@ -368,9 +372,9 @@ legalized: true regBankSelected: false selected: false # CHECK: registers: -# CHECK: - { id: 0, class: gprb } -# CHECK: - { id: 1, class: gprb } -# CHECK: - { id: 2, class: gprb } +# CHECK: - { id: 0, class: gprb, preferred-register: '' } +# CHECK: - { id: 1, class: gprb, preferred-register: '' } +# CHECK: - { id: 2, class: gprb, preferred-register: '' } registers: - { id: 0, class: _ } @@ -394,9 +398,9 @@ legalized: true regBankSelected: false selected: false # CHECK: registers: -# CHECK: - { id: 0, class: gprb } -# CHECK: - { id: 1, class: gprb } -# CHECK: - { id: 2, class: gprb } +# CHECK: - { id: 0, class: gprb, preferred-register: '' } +# CHECK: - { id: 1, class: gprb, preferred-register: '' } +# CHECK: - { id: 2, class: gprb, preferred-register: '' } registers: - { id: 0, class: _ } @@ -412,6 +416,84 @@ body: | %r0 = COPY %2(s32) BX_RET 14, _, implicit %r0 +... +--- +name: test_and_s32 +# CHECK-LABEL: name: test_and_s32 +legalized: true +regBankSelected: false +selected: false +# CHECK: registers: +# CHECK: - { id: 0, class: gprb, preferred-register: '' } +# CHECK: - { id: 1, class: gprb, preferred-register: '' } +# CHECK: - { id: 2, class: gprb, preferred-register: '' } + +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + %2(s32) = G_AND %0, %1 + %r0 = COPY %2(s32) + BX_RET 14, _, implicit %r0 + +... +--- +name: test_or_s32 +# CHECK-LABEL: name: test_or_s32 +legalized: true +regBankSelected: false +selected: false +# CHECK: registers: +# CHECK: - { id: 0, class: gprb, preferred-register: '' } +# CHECK: - { id: 1, class: gprb, preferred-register: '' } +# CHECK: - { id: 2, class: gprb, preferred-register: '' } + +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + %2(s32) = G_OR %0, %1 + %r0 = COPY %2(s32) + BX_RET 14, _, implicit %r0 + +... +--- +name: test_xor_s32 +# CHECK-LABEL: name: test_xor_s32 +legalized: true +regBankSelected: false +selected: false +# CHECK: registers: +# CHECK: - { id: 0, class: gprb, preferred-register: '' } +# CHECK: - { id: 1, class: gprb, preferred-register: '' } +# CHECK: - { id: 2, class: gprb, preferred-register: '' } + +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + %2(s32) = G_XOR %0, %1 + %r0 = COPY %2(s32) + BX_RET 14, _, implicit %r0 + ... --- name: test_loads @@ -420,13 +502,13 @@ legalized: true regBankSelected: false selected: false # CHECK: registers: -# CHECK: - { id: 0, class: gprb } -# CHECK: - { id: 1, class: gprb } -# CHECK: - { id: 2, class: gprb } -# CHECK: - { id: 3, class: gprb } -# CHECK: - { id: 4, class: gprb } -# CHECK: - { id: 5, class: gprb } -# CHECK: - { id: 6, class: fprb } +# CHECK: - { id: 0, class: gprb, preferred-register: '' } +# CHECK: - { id: 1, class: gprb, preferred-register: '' } +# CHECK: - { id: 2, class: gprb, preferred-register: '' } +# CHECK: - { id: 3, class: gprb, preferred-register: '' } +# CHECK: - { id: 4, class: gprb, preferred-register: '' } +# CHECK: - { id: 5, class: gprb, preferred-register: '' } +# CHECK: - { id: 6, class: fprb, preferred-register: '' } registers: - { id: 0, class: _ } @@ -456,13 +538,13 @@ legalized: true regBankSelected: false selected: false # CHECK: registers: -# CHECK: - { id: 0, class: gprb } -# CHECK: - { id: 1, class: gprb } -# CHECK: - { id: 2, class: gprb } -# CHECK: - { id: 3, class: gprb } -# CHECK: - { id: 4, class: gprb } -# CHECK: - { id: 5, class: gprb } -# CHECK: - { id: 6, class: fprb } +# CHECK: - { id: 0, class: gprb, preferred-register: '' } +# CHECK: - { id: 1, class: gprb, preferred-register: '' } +# CHECK: - { id: 2, class: gprb, preferred-register: '' } +# CHECK: - { id: 3, class: gprb, preferred-register: '' } +# CHECK: - { id: 4, class: gprb, preferred-register: '' } +# CHECK: - { id: 5, class: gprb, preferred-register: '' } +# CHECK: - { id: 6, class: fprb, preferred-register: '' } registers: - { id: 0, class: _ } @@ -498,11 +580,11 @@ legalized: true regBankSelected: false selected: false # CHECK: registers: -# CHECK: - { id: 0, class: gprb } -# CHECK: - { id: 1, class: gprb } -# CHECK: - { id: 2, class: gprb } -# CHECK: - { id: 3, class: gprb } -# CHECK: - { id: 4, class: gprb } +# CHECK: - { id: 0, class: gprb, preferred-register: '' } +# CHECK: - { id: 1, class: gprb, preferred-register: '' } +# CHECK: - { id: 2, class: gprb, preferred-register: '' } +# CHECK: - { id: 3, class: gprb, preferred-register: '' } +# CHECK: - { id: 4, class: gprb, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -531,9 +613,9 @@ legalized: true regBankSelected: false selected: false # CHECK: registers: -# CHECK: - { id: 0, class: gprb } -# CHECK: - { id: 1, class: gprb } -# CHECK: - { id: 2, class: gprb } +# CHECK: - { id: 0, class: gprb, preferred-register: '' } +# CHECK: - { id: 1, class: gprb, preferred-register: '' } +# CHECK: - { id: 2, class: gprb, preferred-register: '' } registers: - { id: 0, class: _ } @@ -556,7 +638,7 @@ legalized: true regBankSelected: false selected: false # CHECK: registers: -# CHECK: - { id: 0, class: gprb } +# CHECK: - { id: 0, class: gprb, preferred-register: '' } registers: - { id: 0, class: _ } body: | @@ -572,8 +654,8 @@ legalized: true regBankSelected: false selected: false # CHECK: registers: -# CHECK: - { id: 0, class: gprb } -# CHECK: - { id: 1, class: gprb } +# CHECK: - { id: 0, class: gprb, preferred-register: '' } +# CHECK: - { id: 1, class: gprb, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -593,8 +675,8 @@ legalized: true regBankSelected: false selected: false # CHECK: registers: -# CHECK: - { id: 0, class: gprb } -# CHECK: - { id: 1, class: gprb } +# CHECK: - { id: 0, class: gprb, preferred-register: '' } +# CHECK: - { id: 1, class: gprb, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -614,8 +696,8 @@ legalized: true regBankSelected: false selected: false # CHECK: registers: -# CHECK: - { id: 0, class: gprb } -# CHECK: - { id: 1, class: gprb } +# CHECK: - { id: 0, class: gprb, preferred-register: '' } +# CHECK: - { id: 1, class: gprb, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -635,9 +717,9 @@ legalized: true regBankSelected: false selected: false # CHECK: registers: -# CHECK: - { id: 0, class: fprb } -# CHECK: - { id: 1, class: fprb } -# CHECK: - { id: 2, class: fprb } +# CHECK: - { id: 0, class: fprb, preferred-register: '' } +# CHECK: - { id: 1, class: fprb, preferred-register: '' } +# CHECK: - { id: 2, class: fprb, preferred-register: '' } registers: - { id: 0, class: _ } @@ -661,9 +743,9 @@ legalized: true regBankSelected: false selected: false # CHECK: registers: -# CHECK: - { id: 0, class: fprb } -# CHECK: - { id: 1, class: fprb } -# CHECK: - { id: 2, class: fprb } +# CHECK: - { id: 0, class: fprb, preferred-register: '' } +# CHECK: - { id: 1, class: fprb, preferred-register: '' } +# CHECK: - { id: 2, class: fprb, preferred-register: '' } registers: - { id: 0, class: _ } @@ -687,11 +769,11 @@ legalized: true regBankSelected: false selected: false # CHECK: registers: -# CHECK: - { id: 0, class: gprb } -# CHECK: - { id: 1, class: gprb } -# CHECK: - { id: 2, class: fprb } -# CHECK: - { id: 3, class: gprb } -# CHECK: - { id: 4, class: gprb } +# CHECK: - { id: 0, class: gprb, preferred-register: '' } +# CHECK: - { id: 1, class: gprb, preferred-register: '' } +# CHECK: - { id: 2, class: fprb, preferred-register: '' } +# CHECK: - { id: 3, class: gprb, preferred-register: '' } +# CHECK: - { id: 4, class: gprb, preferred-register: '' } registers: - { id: 0, class: _ } @@ -705,9 +787,8 @@ body: | %0(s32) = COPY %r0 %1(s32) = COPY %r1 - %2(s64) = G_SEQUENCE %0(s32), 0, %1(s32), 32 - %3(s32) = G_EXTRACT %2(s64), 0 - %4(s32) = G_EXTRACT %2(s64), 32 + %2(s64) = G_MERGE_VALUES %0(s32), %1(s32) + %3(s32), %4(s32) = G_UNMERGE_VALUES %2(s64) %r0 = COPY %3(s32) %r1 = COPY %4(s32) BX_RET 14, _, implicit %r0, implicit %r1 diff --git a/test/CodeGen/ARM/clang-section.ll b/test/CodeGen/ARM/clang-section.ll new file mode 100644 index 000000000000..343f0e721d7f --- /dev/null +++ b/test/CodeGen/ARM/clang-section.ll @@ -0,0 +1,140 @@ +;RUN: llc -mtriple=armv7-eabi %s -o - | FileCheck %s +;Test that global variables and functions are assigned to correct sections. + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "armv7-arm-none-eabi" + +@a = global i32 0, align 4 #0 +@b = global i32 1, align 4 #0 +@c = global [4 x i32] zeroinitializer, align 4 #0 +@d = global [5 x i16] zeroinitializer, align 2 #0 +@e = global [6 x i16] [i16 0, i16 0, i16 1, i16 0, i16 0, i16 0], align 2 #0 +@f = constant i32 2, align 4 #0 +@h = global i32 0, align 4 #1 +@i = global i32 0, align 4 #2 +@j = constant i32 4, align 4 #2 +@k = global i32 0, align 4 #2 +@_ZZ3gooE7lstat_h = internal global i32 0, align 4 #2 +@_ZL1g = internal global [2 x i32] zeroinitializer, align 4 #0 +@l = global i32 5, align 4 #3 +@m = constant i32 6, align 4 #3 +@n = global i32 0, align 4 +@o = global i32 6, align 4 +@p = constant i32 7, align 4 + +; Function Attrs: noinline nounwind +define i32 @foo() #4 { +entry: + %0 = load i32, i32* @b, align 4 + ret i32 %0 +} + +; Function Attrs: noinline +define i32 @goo() #5 { +entry: + %call = call i32 @zoo(i32* getelementptr inbounds ([2 x i32], [2 x i32]* @_ZL1g, i32 0, i32 0), i32* @_ZZ3gooE7lstat_h) + ret i32 %call +} + +declare i32 @zoo(i32*, i32*) #6 + +; Function Attrs: noinline nounwind +define i32 @hoo() #7 { +entry: + %0 = load i32, i32* @b, align 4 + ret i32 %0 +} + +attributes #0 = { "bss-section"="my_bss.1" "data-section"="my_data.1" "rodata-section"="my_rodata.1" } +attributes #1 = { "data-section"="my_data.1" "rodata-section"="my_rodata.1" } +attributes #2 = { "bss-section"="my_bss.2" "rodata-section"="my_rodata.1" } +attributes #3 = { "bss-section"="my_bss.2" "data-section"="my_data.2" "rodata-section"="my_rodata.2" } +attributes #4 = { noinline nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign" "disable-tail-calls"="false" "implicit-section-name"="my_text.1" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a9" "target-features"="+dsp,+fp16,+neon,+vfp3" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #5 = { noinline "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign" "disable-tail-calls"="false" "implicit-section-name"="my_text.2" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a9" "target-features"="+dsp,+fp16,+neon,+vfp3" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #6 = { "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a9" "target-features"="+dsp,+fp16,+neon,+vfp3" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #7 = { noinline nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a9" "target-features"="+dsp,+fp16,+neon,+vfp3" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.module.flags = !{!0, !1, !2, !3} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, !"static_rwdata", i32 1} +!2 = !{i32 1, !"enumsize_buildattr", i32 2} +!3 = !{i32 1, !"armlib_unavailable", i32 0} + +;CHECK: .section my_text.1,"ax",%progbits +;CHECK: .type foo,%function +;CHECK: foo: + +;CHECK: .section my_text.2,"ax",%progbits +;CHECK: .type goo,%function +;CHECK: goo: + +;CHECK: .text +;CHECK: .type hoo,%function +;CHECK: hoo: + +;CHECK: .type a,%object +;CHECK: .section my_bss.1,"aw",%nobits +;CHECK: a: + +;CHECK: .type b,%object +;CHECK: .section my_data.1,"aw",%progbits +;CHECK: b: + +;CHECK: .type c,%object +;CHECK: .section my_bss.1,"aw",%nobits +;CHECK: c: + +;CHECK: .type d,%object +;CHECK: d: + +;CHECK: .type e,%object +;CHECK: .section my_data.1,"aw",%progbits +;CHECK: e: + +;CHECK: .type f,%object +;CHECK: .section my_rodata.1,"a",%progbits +;CHECK: f: + +;CHECK: .type h,%object +;CHECK: .bss +;CHECK: h: + +;CHECK: .type i,%object +;CHECK: .section my_bss.2,"aw",%nobits +;CHECK: i: + +;CHECK: .type j,%object +;CHECK: .section my_rodata.1,"a",%progbits +;CHECK: j: + +;CHECK: .type k,%object +;CHECK: .section my_bss.2,"aw",%nobits +;CHECK: k: + +;CHECK: .type _ZZ3gooE7lstat_h,%object @ @_ZZ3gooE7lstat_h +;CHECK: _ZZ3gooE7lstat_h: + +;CHECK: .type _ZL1g,%object +;CHECK: .section my_bss.1,"aw",%nobits +;CHECK: _ZL1g: + +;CHECK: .type l,%object +;CHECK: .section my_data.2,"aw",%progbits +;CHECK: l: + +;CHECK: .type m,%object +;CHECK: .section my_rodata.2,"a",%progbits +;CHECK: m: + +;CHECK: .type n,%object +;CHECK: .bss +;CHECK: n: + +;CHECK: .type o,%object +;CHECK: .data +;CHECK: o: + +;CHECK: .type p,%object +;CHECK: .section .rodata,"a",%progbits +;CHECK: p: diff --git a/test/CodeGen/ARM/cortex-a57-misched-vfma.ll b/test/CodeGen/ARM/cortex-a57-misched-vfma.ll index a9223e1e2a99..5f914323861a 100644 --- a/test/CodeGen/ARM/cortex-a57-misched-vfma.ll +++ b/test/CodeGen/ARM/cortex-a57-misched-vfma.ll @@ -1,5 +1,6 @@ ; REQUIRES: asserts -; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s +; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-DEFAULT +; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null -fp-contract=fast | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FAST ; Check latencies of vmul/vfma accumulate chains. define float @Test1(float %f1, float %f2, float %f3, float %f4, float %f5, float %f6) { @@ -14,7 +15,8 @@ define float @Test1(float %f1, float %f2, float %f3, float %f4, float %f5, float ; > VMULS read-advanced latency to VMLAS = 0 ; CHECK-SAME: Latency=0 -; CHECK: VMLAS +; CHECK-DEFAULT: VMLAS +; CHECK-FAST: VFMAS ; > VMLAS common latency = 9 ; CHECK: Latency : 9 ; CHECK: Successors: @@ -22,7 +24,8 @@ define float @Test1(float %f1, float %f2, float %f3, float %f4, float %f5, float ; > VMLAS read-advanced latency to the next VMLAS = 4 ; CHECK-SAME: Latency=4 -; CHECK: VMLAS +; CHECK-DEFAULT: VMLAS +; CHECK-FAST: VFMAS ; CHECK: Latency : 9 ; CHECK: Successors: ; CHECK: data @@ -51,7 +54,8 @@ define <2 x float> @Test2(<2 x float> %f1, <2 x float> %f2, <2 x float> %f3, <2 ; VMULfd read-advanced latency to VMLAfd = 0 ; CHECK-SAME: Latency=0 -; CHECK: VMLAfd +; CHECK-DEFAULT: VMLAfd +; CHECK-FAST: VFMAfd ; > VMLAfd common latency = 9 ; CHECK: Latency : 9 ; CHECK: Successors: @@ -59,7 +63,8 @@ define <2 x float> @Test2(<2 x float> %f1, <2 x float> %f2, <2 x float> %f3, <2 ; > VMLAfd read-advanced latency to the next VMLAfd = 4 ; CHECK-SAME: Latency=4 -; CHECK: VMLAfd +; CHECK-DEFAULT: VMLAfd +; CHECK-FAST: VFMAfd ; CHECK: Latency : 9 ; CHECK: Successors: ; CHECK: data @@ -75,3 +80,79 @@ define <2 x float> @Test2(<2 x float> %f1, <2 x float> %f2, <2 x float> %f3, <2 ret <2 x float> %add2 } +define float @Test3(float %f1, float %f2, float %f3, float %f4, float %f5, float %f6) { +; CHECK: ********** MI Scheduling ********** +; CHECK: Test3:BB#0 + +; CHECK: VMULS +; > VMULS common latency = 5 +; CHECK: Latency : 5 +; CHECK: Successors: +; CHECK: data +; > VMULS read-advanced latency to VMLSS = 0 +; CHECK-SAME: Latency=0 + +; CHECK-DEFAULT: VMLSS +; CHECK-FAST: VFMSS +; > VMLSS common latency = 9 +; CHECK: Latency : 9 +; CHECK: Successors: +; CHECK: data +; > VMLSS read-advanced latency to the next VMLSS = 4 +; CHECK-SAME: Latency=4 + +; CHECK-DEFAULT: VMLSS +; CHECK-FAST: VFMSS +; CHECK: Latency : 9 +; CHECK: Successors: +; CHECK: data +; > VMLSS not-optimized latency to VMOVRS = 9 +; CHECK-SAME: Latency=9 + +; f1 * f2 + f3 * f4 + f5 * f6 ==> VMULS, VMLSS, VMLSS + %mul1 = fmul float %f1, %f2 + %mul2 = fmul float %f3, %f4 + %mul3 = fmul float %f5, %f6 + %sub1 = fsub float %mul1, %mul2 + %sub2 = fsub float %sub1, %mul3 + ret float %sub2 +} + +; ASIMD form +define <2 x float> @Test4(<2 x float> %f1, <2 x float> %f2, <2 x float> %f3, <2 x float> %f4, <2 x float> %f5, <2 x float> %f6) { +; CHECK: ********** MI Scheduling ********** +; CHECK: Test4:BB#0 + +; CHECK: VMULfd +; > VMULfd common latency = 5 +; CHECK: Latency : 5 +; CHECK: Successors: +; CHECK: data +; VMULfd read-advanced latency to VMLSfd = 0 +; CHECK-SAME: Latency=0 + +; CHECK-DEFAULT: VMLSfd +; CHECK-FAST: VFMSfd +; > VMLSfd common latency = 9 +; CHECK: Latency : 9 +; CHECK: Successors: +; CHECK: data +; > VMLSfd read-advanced latency to the next VMLSfd = 4 +; CHECK-SAME: Latency=4 + +; CHECK-DEFAULT: VMLSfd +; CHECK-FAST: VFMSfd +; CHECK: Latency : 9 +; CHECK: Successors: +; CHECK: data +; > VMLSfd not-optimized latency to VMOVRRD = 9 +; CHECK-SAME: Latency=9 + +; f1 * f2 + f3 * f4 + f5 * f6 ==> VMULS, VMLSS, VMLSS + %mul1 = fmul <2 x float> %f1, %f2 + %mul2 = fmul <2 x float> %f3, %f4 + %mul3 = fmul <2 x float> %f5, %f6 + %sub1 = fsub <2 x float> %mul1, %mul2 + %sub2 = fsub <2 x float> %sub1, %mul3 + ret <2 x float> %sub2 +} diff --git a/test/CodeGen/ARM/invalidated-save-point.ll b/test/CodeGen/ARM/invalidated-save-point.ll index 0ff153b6799d..bb602308a179 100644 --- a/test/CodeGen/ARM/invalidated-save-point.ll +++ b/test/CodeGen/ARM/invalidated-save-point.ll @@ -4,8 +4,8 @@ ; this point. Notably, if it isn't is will be invalid and reference a ; deleted block (%bb.-1.if.end) -; CHECK-NOT: savePoint: -; CHECK-NOT: restorePoint: +; CHECK: savePoint: '' +; CHECK: restorePoint: '' target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "thumbv7" diff --git a/test/CodeGen/Generic/llc-start-stop.ll b/test/CodeGen/Generic/llc-start-stop.ll index 7508f94c50a9..49407fbb2d88 100644 --- a/test/CodeGen/Generic/llc-start-stop.ll +++ b/test/CodeGen/Generic/llc-start-stop.ll @@ -10,12 +10,12 @@ ; STOP-BEFORE-NOT: Loop Strength Reduction ; RUN: llc < %s -debug-pass=Structure -start-after=loop-reduce -o /dev/null 2>&1 | FileCheck %s -check-prefix=START-AFTER -; START-AFTER: -machine-branch-prob -pre-isel-intrinsic-lowering +; START-AFTER: -machine-branch-prob -gc-lowering ; START-AFTER: FunctionPass Manager ; START-AFTER-NEXT: Lower Garbage Collection Instructions ; RUN: llc < %s -debug-pass=Structure -start-before=loop-reduce -o /dev/null 2>&1 | FileCheck %s -check-prefix=START-BEFORE -; START-BEFORE: -machine-branch-prob -pre-isel-intrinsic-lowering +; START-BEFORE: -machine-branch-prob -domtree ; START-BEFORE: FunctionPass Manager ; START-BEFORE: Loop Strength Reduction ; START-BEFORE-NEXT: Lower Garbage Collection Instructions diff --git a/test/CodeGen/Hexagon/common-gep-inbounds.ll b/test/CodeGen/Hexagon/common-gep-inbounds.ll new file mode 100644 index 000000000000..a8b75725a0b8 --- /dev/null +++ b/test/CodeGen/Hexagon/common-gep-inbounds.ll @@ -0,0 +1,20 @@ +; RUN: llc -march=hexagon -debug-only=commgep 2>&1 < %s | FileCheck %s +; REQUIRES: asserts + +; We should generate new GEPs with "inbounds" flag. +; CHECK: new GEP:{{.*}}inbounds +; CHECK: new GEP:{{.*}}inbounds + +target triple = "hexagon" + +%struct.0 = type { i16, i16 } + +; Function Attrs: nounwind +define i16 @TraceBack() #0 { +entry: + %p = getelementptr inbounds %struct.0, %struct.0* undef, i32 0, i32 0 + %a = load i16, i16* %p + ret i16 %a +} + +attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="-hvx-double,-long-calls" } diff --git a/test/CodeGen/Hexagon/mux-undef.ll b/test/CodeGen/Hexagon/mux-undef.ll new file mode 100644 index 000000000000..3780a329b1eb --- /dev/null +++ b/test/CodeGen/Hexagon/mux-undef.ll @@ -0,0 +1,27 @@ +; RUN: llc -march=hexagon -verify-machineinstrs < %s | FileCheck %s +; +; Make sure this test compiles successfully. +; CHECK: jumpr r31 + +target triple = "hexagon--elf" + +; Function Attrs: nounwind +define i32 @fred() #0 { +b0: + call void @foo() #0 + br label %b1 + +b1: ; preds = %b0 + br i1 undef, label %b2, label %b3 + +b2: ; preds = %b1 + br label %b3 + +b3: ; preds = %b2, %b1 + %v4 = phi i32 [ 1, %b1 ], [ 2, %b2 ] + ret i32 %v4 +} + +declare void @foo() #0 + +attributes #0 = { nounwind "target-cpu"="hexagonv60" } diff --git a/test/CodeGen/MIR/AArch64/generic-virtual-registers-error.mir b/test/CodeGen/MIR/AArch64/generic-virtual-registers-error.mir index d63c2ef6e871..af785bcb10a9 100644 --- a/test/CodeGen/MIR/AArch64/generic-virtual-registers-error.mir +++ b/test/CodeGen/MIR/AArch64/generic-virtual-registers-error.mir @@ -17,6 +17,5 @@ body: | liveins: %w0 ; ERR: generic virtual registers must have a type ; ERR-NEXT: %0 - ; ERR: Unable to initialize machine function %0 = G_ADD i32 %w0, %w0 ... diff --git a/test/CodeGen/MIR/AArch64/generic-virtual-registers-with-regbank-error.mir b/test/CodeGen/MIR/AArch64/generic-virtual-registers-with-regbank-error.mir index e331179773d6..f177b91da559 100644 --- a/test/CodeGen/MIR/AArch64/generic-virtual-registers-with-regbank-error.mir +++ b/test/CodeGen/MIR/AArch64/generic-virtual-registers-with-regbank-error.mir @@ -18,6 +18,5 @@ body: | liveins: %w0 ; ERR: generic virtual registers must have a type ; ERR-NEXT: %0 - ; ERR: Unable to initialize machine function %0 = G_ADD i32 %w0, %w0 ... diff --git a/test/CodeGen/MIR/AArch64/register-operand-bank.mir b/test/CodeGen/MIR/AArch64/register-operand-bank.mir index d48495167f15..d2f99933a35a 100644 --- a/test/CodeGen/MIR/AArch64/register-operand-bank.mir +++ b/test/CodeGen/MIR/AArch64/register-operand-bank.mir @@ -7,8 +7,8 @@ --- # CHECK-LABEL: name: func # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: fpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: fpr, preferred-register: '' } name: func body: | bb.0: diff --git a/test/CodeGen/MIR/AArch64/stack-object-local-offset.mir b/test/CodeGen/MIR/AArch64/stack-object-local-offset.mir index fc0c4ce8c07f..cfb3aef5fb0f 100644 --- a/test/CodeGen/MIR/AArch64/stack-object-local-offset.mir +++ b/test/CodeGen/MIR/AArch64/stack-object-local-offset.mir @@ -25,7 +25,9 @@ frameInfo: maxAlignment: 8 # CHECK-LABEL: stack_local # CHECK: stack: -# CHECK-NEXT: { id: 0, name: local_var, offset: 0, size: 8, alignment: 8, local-offset: -8 } +# CHECK-NEXT: { id: 0, name: local_var, type: default, offset: 0, size: 8, alignment: 8, +# CHECK-NEXT: callee-saved-register: '', local-offset: -8, di-variable: '', di-expression: '', +# CHECK-NEXT: di-location: '' } stack: - { id: 0,name: local_var,offset: 0,size: 8,alignment: 8, local-offset: -8 } body: | diff --git a/test/CodeGen/MIR/Generic/frame-info.mir b/test/CodeGen/MIR/Generic/frame-info.mir index 157eb99e149e..a467bfa3a1af 100644 --- a/test/CodeGen/MIR/Generic/frame-info.mir +++ b/test/CodeGen/MIR/Generic/frame-info.mir @@ -36,9 +36,13 @@ tracksRegLiveness: true # CHECK-NEXT: maxAlignment: # CHECK-NEXT: adjustsStack: false # CHECK-NEXT: hasCalls: false +# CHECK-NEXT: stackProtector: '' +# CHECK-NEXT: maxCallFrameSize: # CHECK-NEXT: hasOpaqueSPAdjustment: false # CHECK-NEXT: hasVAStart: false # CHECK-NEXT: hasMustTailInVarArgFunc: false +# CHECK-NEXT: savePoint: '' +# CHECK-NEXT: restorePoint: '' # CHECK: body frameInfo: maxAlignment: 4 @@ -61,6 +65,7 @@ tracksRegLiveness: true # CHECK-NEXT: maxAlignment: # CHECK-NEXT: adjustsStack: true # CHECK-NEXT: hasCalls: true +# CHECK-NEXT: stackProtector: '' # CHECK-NEXT: maxCallFrameSize: 4 # CHECK-NEXT: hasOpaqueSPAdjustment: true # CHECK-NEXT: hasVAStart: true diff --git a/test/CodeGen/MIR/Generic/function-missing-machine-function.mir b/test/CodeGen/MIR/Generic/function-missing-machine-function.mir deleted file mode 100644 index f3a834801671..000000000000 --- a/test/CodeGen/MIR/Generic/function-missing-machine-function.mir +++ /dev/null @@ -1,13 +0,0 @@ -# RUN: not llc -run-pass none -o /dev/null %s 2>&1 | FileCheck %s -# This test verifies that an error is reported when a MIR file has some -# function but is missing a corresponding machine function. - -# CHECK: no machine function information for function 'foo' in the MIR file - ---- | - - define i32 @foo() { - ret i32 0 - } - -... diff --git a/test/CodeGen/MIR/X86/callee-saved-info.mir b/test/CodeGen/MIR/X86/callee-saved-info.mir index 883f6fdb0d22..6920611019b9 100644 --- a/test/CodeGen/MIR/X86/callee-saved-info.mir +++ b/test/CodeGen/MIR/X86/callee-saved-info.mir @@ -50,12 +50,12 @@ frameInfo: adjustsStack: true hasCalls: true # CHECK: fixedStack: -# CHECK-NEXT: , callee-saved-register: '%rbx' } +# CHECK: , callee-saved-register: '%rbx' } fixedStack: - { id: 0, type: spill-slot, offset: -16, size: 8, alignment: 16, callee-saved-register: '%rbx' } # CHECK: stack: # CHECK-NEXT: - { id: 0 -# CHECK-NEXT: , callee-saved-register: '%edi' } +# CHECK: callee-saved-register: '%edi' stack: - { id: 0, name: b, offset: -20, size: 4, alignment: 4 } - { id: 1, offset: -24, size: 4, alignment: 4, callee-saved-register: '%edi' } diff --git a/test/CodeGen/MIR/X86/empty0.mir b/test/CodeGen/MIR/X86/empty0.mir new file mode 100644 index 000000000000..4431af7c6a99 --- /dev/null +++ b/test/CodeGen/MIR/X86/empty0.mir @@ -0,0 +1,6 @@ +# RUN: llc -run-pass none -o - %s | FileCheck %s +# Make sure empty files don't crash us +# CHECK: --- | +# ... moduleid, sourcefilename stuff here .. +# CHECK: target datalayout = +# CHECK: ... diff --git a/test/CodeGen/MIR/X86/empty1.mir b/test/CodeGen/MIR/X86/empty1.mir new file mode 100644 index 000000000000..d80b0cd30231 --- /dev/null +++ b/test/CodeGen/MIR/X86/empty1.mir @@ -0,0 +1,8 @@ +# RUN: llc -run-pass none -o - %s | FileCheck %s +# Make sure empty files don't crash us +--- | +... +# CHECK: --- | +# ... moduleid, sourcefilename stuff here .. +# CHECK: target datalayout = +# CHECK: ... diff --git a/test/CodeGen/MIR/X86/empty2.mir b/test/CodeGen/MIR/X86/empty2.mir new file mode 100644 index 000000000000..7495807cd4d6 --- /dev/null +++ b/test/CodeGen/MIR/X86/empty2.mir @@ -0,0 +1,8 @@ +# RUN: llc -run-pass none -o - %s | FileCheck %s +# Make sure empty files don't crash us +--- +... +# CHECK: --- | +# ... moduleid, sourcefilename stuff here .. +# CHECK: target datalayout = +# CHECK: ... diff --git a/test/CodeGen/MIR/X86/fixed-stack-objects.mir b/test/CodeGen/MIR/X86/fixed-stack-objects.mir index a7ecac841a64..c87cb0b49f93 100644 --- a/test/CodeGen/MIR/X86/fixed-stack-objects.mir +++ b/test/CodeGen/MIR/X86/fixed-stack-objects.mir @@ -20,7 +20,7 @@ frameInfo: stackSize: 4 maxAlignment: 4 # CHECK: fixedStack: -# CHECK-NEXT: - { id: 0, offset: 0, size: 4, alignment: 4, isImmutable: true, isAliased: false } +# CHECK-NEXT: - { id: 0, type: default, offset: 0, size: 4, alignment: 4, isImmutable: true, fixedStack: - { id: 0, offset: 0, size: 4, alignment: 4, isImmutable: true, isAliased: false } stack: diff --git a/test/CodeGen/MIR/X86/generic-instr-type.mir b/test/CodeGen/MIR/X86/generic-instr-type.mir index b9e47cdf6192..78951de70a3c 100644 --- a/test/CodeGen/MIR/X86/generic-instr-type.mir +++ b/test/CodeGen/MIR/X86/generic-instr-type.mir @@ -19,11 +19,11 @@ --- name: test_vregs # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: _ } -# CHECK-NEXT: - { id: 1, class: _ } -# CHECK-NEXT: - { id: 2, class: _ } -# CHECK-NEXT: - { id: 3, class: _ } -# CHECK-NEXT: - { id: 4, class: _ } +# CHECK-NEXT: - { id: 0, class: _, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: _, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: _, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: _, preferred-register: '' } +# CHECK-NEXT: - { id: 4, class: _, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } diff --git a/test/CodeGen/MIR/X86/inline-asm.mir b/test/CodeGen/MIR/X86/inline-asm.mir new file mode 100644 index 000000000000..be96517144b0 --- /dev/null +++ b/test/CodeGen/MIR/X86/inline-asm.mir @@ -0,0 +1,12 @@ +# RUN: llc -o - %s -mtriple=x86_64-- -run-pass none | FileCheck %s +--- +# Avoid crash/assert when using an emptystring in an INLINEASM. +# CHECK-LABEL: name: emptystring +# CHECK: bb.0: +# CHECK: INLINEASM $"", 1 +# CHECK: RET 0 +name: emptystring +body: | + bb.0: + INLINEASM $"", 1 + RET 0 diff --git a/test/CodeGen/MIR/X86/register-operand-class.mir b/test/CodeGen/MIR/X86/register-operand-class.mir index 63019daad7a1..abdcda2a077b 100644 --- a/test/CodeGen/MIR/X86/register-operand-class.mir +++ b/test/CodeGen/MIR/X86/register-operand-class.mir @@ -1,4 +1,4 @@ -# RUN: llc -o - %s -march=x86-64 -run-pass none | FileCheck %s +# RUN: llc -o - %s -march=x86-64 -run-pass none | FileCheck %s # Test various aspects of register class specification on machine operands. --- | define void @func() { ret void } @@ -6,11 +6,11 @@ --- # CHECK-LABEL: name: func # CHECK: registers: -# CHECK: - { id: 0, class: gr32 } -# CHECK: - { id: 1, class: gr64 } -# CHECK: - { id: 2, class: gr32 } -# CHECK: - { id: 3, class: gr16 } -# CHECK: - { id: 4, class: _ } +# CHECK: - { id: 0, class: gr32, preferred-register: '' } +# CHECK: - { id: 1, class: gr64, preferred-register: '' } +# CHECK: - { id: 2, class: gr32, preferred-register: '' } +# CHECK: - { id: 3, class: gr16, preferred-register: '' } +# CHECK: - { id: 4, class: _, preferred-register: '' } name: func body: | bb.0: diff --git a/test/CodeGen/MIR/X86/roundtrip.mir b/test/CodeGen/MIR/X86/roundtrip.mir new file mode 100644 index 000000000000..c697f7306041 --- /dev/null +++ b/test/CodeGen/MIR/X86/roundtrip.mir @@ -0,0 +1,20 @@ +# RUN: llc -o - %s -mtriple=x86_64-- -run-pass=none | llc -o - -x mir - -mtriple=x86_64-- -run-pass=none | FileCheck %s +--- +# CHECK-LABEL: name: func0 +# CHECK: registers: +# CHECK: - { id: 0, class: gr32, preferred-register: '' } +# CHECK: - { id: 1, class: gr32, preferred-register: '' } +# CHECK: body: | +# CHECK: bb.0: +# CHECK: %0 = MOV32r0 implicit-def %eflags +# CHECK: dead %1 = COPY %0 +# CHECK: MOV32mr undef %rcx, 1, _, 0, _, killed %0 :: (volatile store 4) +# CHECK: RETQ undef %eax +name: func0 +body: | + bb.0: + %0 : gr32 = MOV32r0 implicit-def %eflags + dead %1 : gr32 = COPY %0 + MOV32mr undef %rcx, 1, _, 0, _, killed %0 :: (volatile store 4) + RETQ undef %eax +... diff --git a/test/CodeGen/MIR/X86/simple-register-allocation-hints.mir b/test/CodeGen/MIR/X86/simple-register-allocation-hints.mir index 27ca266f7794..310fa6a1c53b 100644 --- a/test/CodeGen/MIR/X86/simple-register-allocation-hints.mir +++ b/test/CodeGen/MIR/X86/simple-register-allocation-hints.mir @@ -15,7 +15,7 @@ name: test tracksRegLiveness: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr32 } +# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' } # CHECK-NEXT: - { id: 1, class: gr32, preferred-register: '%esi' } # CHECK-NEXT: - { id: 2, class: gr32, preferred-register: '%edi' } registers: diff --git a/test/CodeGen/MIR/X86/spill-slot-fixed-stack-objects.mir b/test/CodeGen/MIR/X86/spill-slot-fixed-stack-objects.mir index 1771d6fafcae..d3c422362848 100644 --- a/test/CodeGen/MIR/X86/spill-slot-fixed-stack-objects.mir +++ b/test/CodeGen/MIR/X86/spill-slot-fixed-stack-objects.mir @@ -19,7 +19,7 @@ name: test frameInfo: maxAlignment: 4 # CHECK: fixedStack: -# CHECK-NEXT: - { id: 0, type: spill-slot, offset: 0, size: 4, alignment: 4 } +# CHECK-NEXT: - { id: 0, type: spill-slot, offset: 0, size: 4, alignment: 4, callee-saved-register: '' } fixedStack: - { id: 0, type: spill-slot, offset: 0, size: 4, alignment: 4 } stack: diff --git a/test/CodeGen/MIR/X86/stack-object-debug-info.mir b/test/CodeGen/MIR/X86/stack-object-debug-info.mir index a893b0836a62..445d1bd3f1fd 100644 --- a/test/CodeGen/MIR/X86/stack-object-debug-info.mir +++ b/test/CodeGen/MIR/X86/stack-object-debug-info.mir @@ -51,8 +51,9 @@ frameInfo: maxAlignment: 16 # CHECK-LABEL: foo # CHECK: stack: -# CHECK: - { id: 0, name: y.i, offset: 0, size: 256, alignment: 16, di-variable: '!4', -# CHECK-NEXT: di-expression: '!10', di-location: '!11' } +# CHECK: - { id: 0, name: y.i, type: default, offset: 0, size: 256, alignment: 16, +# CHECK-NEXT: callee-saved-register: '', di-variable: '!4', di-expression: '!10', +# CHECK-NEXT: di-location: '!11' } stack: - { id: 0, name: y.i, offset: 0, size: 256, alignment: 16, di-variable: '!4', di-expression: '!7', di-location: '!8' } diff --git a/test/CodeGen/MIR/X86/stack-objects.mir b/test/CodeGen/MIR/X86/stack-objects.mir index 08b9ec0b4347..608202ec5dcc 100644 --- a/test/CodeGen/MIR/X86/stack-objects.mir +++ b/test/CodeGen/MIR/X86/stack-objects.mir @@ -21,9 +21,12 @@ name: test frameInfo: maxAlignment: 8 # CHECK: stack: -# CHECK-NEXT: - { id: 0, name: b, offset: -12, size: 4, alignment: 4 } -# CHECK-NEXT: - { id: 1, name: x, offset: -24, size: 8, alignment: 8 } -# CHECK-NEXT: - { id: 2, type: spill-slot, offset: -32, size: 4, alignment: 4 } +# CHECK-NEXT: - { id: 0, name: b, type: default, offset: -12, size: 4, alignment: 4, +# CHECK-NEXT: callee-saved-register: '', di-variable: '', di-expression: '', di-location: '' } +# CHECK-NEXT: - { id: 1, name: x, type: default, offset: -24, size: 8, alignment: 8, +# CHECK-NEXT: callee-saved-register: '', di-variable: '', di-expression: '', di-location: '' } +# CHECK-NEXT: - { id: 2, name: '', type: spill-slot, offset: -32, size: 4, alignment: 4, +# CHECK-NEXT: callee-saved-register: '', di-variable: '', di-expression: '', di-location: '' } stack: - { id: 0, name: b, offset: -12, size: 4, alignment: 4 } - { id: 1, name: x, offset: -24, size: 8, alignment: 8 } diff --git a/test/CodeGen/MIR/X86/variable-sized-stack-objects.mir b/test/CodeGen/MIR/X86/variable-sized-stack-objects.mir index 5e7d99352e57..95efd977d9c6 100644 --- a/test/CodeGen/MIR/X86/variable-sized-stack-objects.mir +++ b/test/CodeGen/MIR/X86/variable-sized-stack-objects.mir @@ -24,9 +24,11 @@ frameInfo: maxAlignment: 8 adjustsStack: true # CHECK: stack: -# CHECK-NEXT: - { id: 0, offset: -20, size: 4, alignment: 4 } -# CHECK-NEXT: - { id: 1, offset: -32, size: 8, alignment: 8 } -# CHECK-NEXT: - { id: 2, name: y, type: variable-sized, offset: -32, alignment: 1 } +# CHECK-NEXT: - { id: 0, name: '', type: default, offset: -20, size: 4, alignment: 4, +# CHECK-NEXT: callee-saved-register: '', di-variable: '', di-expression: '', di-location: '' } +# CHECK-NEXT: - { id: 1, name: '', type: default, offset: -32, size: 8, alignment: 8, +# CHECK-NEXT: callee-saved-register: '', di-variable: '', di-expression: '', di-location: '' } +# CHECK-NEXT: - { id: 2, name: y, type: variable-sized, offset: -32, alignment: 1, stack: - { id: 0, offset: -20, size: 4, alignment: 4 } - { id: 1, offset: -32, size: 8, alignment: 8 } diff --git a/test/CodeGen/MIR/X86/virtual-registers.mir b/test/CodeGen/MIR/X86/virtual-registers.mir index e63bcf4acdd1..0d181f895aa9 100644 --- a/test/CodeGen/MIR/X86/virtual-registers.mir +++ b/test/CodeGen/MIR/X86/virtual-registers.mir @@ -33,9 +33,9 @@ name: bar tracksRegLiveness: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr32 } -# CHECK-NEXT: - { id: 1, class: gr32 } -# CHECK-NEXT: - { id: 2, class: gr32 } +# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gr32, preferred-register: '' } registers: - { id: 0, class: gr32 } - { id: 1, class: gr32 } @@ -67,9 +67,9 @@ name: foo tracksRegLiveness: true # CHECK: name: foo # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr32 } -# CHECK-NEXT: - { id: 1, class: gr32 } -# CHECK-NEXT: - { id: 2, class: gr32 } +# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gr32, preferred-register: '' } registers: - { id: 2, class: gr32 } - { id: 0, class: gr32 } diff --git a/test/CodeGen/Mips/biggot.ll b/test/CodeGen/Mips/biggot.ll index 3acfa372a905..b266b5e05e21 100644 --- a/test/CodeGen/Mips/biggot.ll +++ b/test/CodeGen/Mips/biggot.ll @@ -1,6 +1,9 @@ ; RUN: llc -march=mipsel -mxgot -relocation-model=pic < %s | FileCheck %s -check-prefix=O32 ; RUN: llc -march=mips64el -mcpu=mips64r2 -mxgot -relocation-model=pic < %s | \ ; RUN: FileCheck %s -check-prefix=N64 +; RUN: llc -march=mipsel -mxgot -relocation-model=pic -fast-isel < %s | FileCheck %s -check-prefix=O32 +; RUN: llc -march=mips64el -mcpu=mips64r2 -mxgot -relocation-model=pic -fast-isel < %s | \ +; RUN: FileCheck %s -check-prefix=N64 @v0 = external global i32 diff --git a/test/CodeGen/Mips/cconv/vector.ll b/test/CodeGen/Mips/cconv/vector.ll new file mode 100644 index 000000000000..5a88d064fe73 --- /dev/null +++ b/test/CodeGen/Mips/cconv/vector.ll @@ -0,0 +1,1657 @@ +; RUN: llc < %s -march=mips -mcpu=mips32 -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS32,MIPS32EB +; RUN: llc < %s -march=mips64 -relocation-model=pic -mcpu=mips64 -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS64,MIPS64EB +; RUN: llc < %s -march=mips -mcpu=mips32r5 -mattr=+fp64,+msa -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS32R5,MIPS32R5EB +; RUN: llc < %s -march=mips64 -relocation-model=pic -mcpu=mips64r5 -mattr=+fp64,+msa -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS64R5 +; RUN: llc < %s -march=mipsel -mcpu=mips32 -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS32,MIPS32EL +; RUN: llc < %s -march=mips64el -relocation-model=pic -mcpu=mips64 -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS64,MIPS64EL +; RUN: llc < %s -march=mipsel -mcpu=mips32r5 -mattr=+fp64,+msa -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS32R5,MIPS32R5EL +; RUN: llc < %s -march=mips64el -relocation-model=pic -mcpu=mips64r5 -mattr=+fp64,+msa -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS64R5 + + + +; Test that vector types are passed through the integer register set whether or +; not MSA is enabled. This is a ABI requirement for MIPS. For GCC compatibility +; we need to handle any power of 2 number of elements. We will test this +; exhaustively for combinations up to MSA register (128 bits) size. + +; First set of tests are for argument passing. + +define <2 x i8> @i8_2(<2 x i8> %a, <2 x i8> %b) { +; ALL-LABEL: i8_2: +; MIPS32EB-DAG: srl ${{[0-9]+}}, $5, 24 +; MIPS32EB-DAG: srl ${{[0-9]+}}, $4, 24 +; MIPS32EB-DAG: srl ${{[0-9]+}}, $5, 16 +; MIPS32EB-DAG: srl ${{[0-9]+}}, $4, 16 + +; MIPS32EL: addu $1, $4, $5 + +; MIPS32R5-DAG: sw $4 +; MIPS32R5-DAG: sw $5 + +; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $5, 56 +; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $4, 56 +; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $5, 48 +; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $4, 48 + +; MIPS64EL-DAG: sll ${{[0-9]+}}, $4, 0 +; MIPS64EL-DAG: sll ${{[0-9]+}}, $5, 0 + +; MIPS64R5-DAG: sd $4 +; MIPS64R5-DAG: sd $5 + + %1 = add <2 x i8> %a, %b + ret <2 x i8> %1 +} + +; Test that vector spilled to the outgoing argument area have the expected +; offset from $sp. + +define <2 x i8> @i8x2_7(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d, + <2 x i8> %e, <2 x i8> %f, <2 x i8> %g) { +entry: + +; MIPS32EB-DAG: srl ${{[0-9]+}}, $4, 24 +; MIPS32EB-DAG: srl ${{[0-9]+}}, $5, 24 +; MIPS32EB-DAG: srl ${{[0-9]+}}, $6, 24 +; MIPS32EB-DAG: srl ${{[0-9]+}}, $7, 24 + +; MIPS32EL-DAG: andi ${{[0-9]+}}, $4, 65280 +; MIPS32EL-DAG: andi ${{[0-9]+}}, $5, 65280 +; MIPS32EL-DAG: andi ${{[0-9]+}}, $6, 65280 +; MIPS32EL-DAG: andi ${{[0-9]+}}, $7, 65280 + +; MIPS32-DAG: lbu ${{[0-9]+}}, 16($sp) +; MIPS32-DAG; lbu ${{[0-9]+}}, 17($sp) +; MIPS32-DAG: lbu ${{[0-9]+}}, 20($sp) +; MIPS32-DAG: lbu ${{[0-9]+}}, 21($sp) +; MIPS32-DAG: lbu ${{[0-9]+}}, 24($sp) +; MIPS32-DAG: lbu ${{[0-9]+}}, 25($sp) + +; MIPS32R5-DAG: sw $4, {{[0-9]+}}($sp) +; MIPS32R5-DAG: sw $5, {{[0-9]+}}($sp) +; MIPS32R5-DAG: sw $6, {{[0-9]+}}($sp) +; MIPS32R5-DAG: sw $7, {{[0-9]+}}($sp) + +; MIPS32R5-DAG: lbu ${{[0-9]+}}, 40($sp) +; MIPS32R5-DAG: lbu ${{[0-9]+}}, 41($sp) +; MIPS32R5-DAG: lbu ${{[0-9]+}}, 42($sp) +; MIPS32R5-DAG: lbu ${{[0-9]+}}, 43($sp) +; MIPS32R5-DAG: lbu ${{[0-9]+}}, 44($sp) +; MIPS32R5-DAG: lbu ${{[0-9]+}}, 45($sp) +; MIPS32R5-DAG: lbu ${{[0-9]+}}, 46($sp) +; MIPS32R5-DAG: lbu ${{[0-9]+}}, 47($sp) +; MIPS32R5-DAG: lbu ${{[0-9]+}}, 48($sp) +; MIPS32R5-DAG: lbu ${{[0-9]+}}, 49($sp) +; MIPS32R5-DAG: lbu ${{[0-9]+}}, 50($sp) +; MIPS32R5-DAG: lbu ${{[0-9]+}}, 51($sp) + +; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $4, 48 +; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $5, 48 +; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $6, 48 +; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $7, 48 +; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $8, 48 +; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $9, 48 +; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $10, 48 + +; MIPS64R5-DAG: sd $4, {{[0-9]+}}($sp) +; MIPS64R5-DAG: sd $5, {{[0-9]+}}($sp) +; MIPS64R5-DAG: sd $6, {{[0-9]+}}($sp) +; MIPS64R5-DAG: sd $7, {{[0-9]+}}($sp) +; MIPS64R5-DAG: sd $8, {{[0-9]+}}($sp) +; MIPS64R5-DAG: sd $9, {{[0-9]+}}($sp) +; MIPS64R5-DAG: sd $10, {{[0-9]+}}($sp) + + %0 = add <2 x i8> %a, %b + %1 = add <2 x i8> %0, %c + %2 = add <2 x i8> %1, %d + %3 = add <2 x i8> %2, %e + %4 = add <2 x i8> %3, %f + %5 = add <2 x i8> %4, %g + ret <2 x i8> %5 +} + +define <4 x i8> @i8_4(<4 x i8> %a, <4 x i8> %b) { +; ALL-LABEL: i8_4: +; MIPS32-DAG: srl ${{[0-9]+}}, $5, 24 +; MIPS32-DAG: srl ${{[0-9]+}}, $4, 24 +; MIPS32-DAG: srl ${{[0-9]+}}, $5, 16 +; MIPS32-DAG: srl ${{[0-9]+}}, $4, 16 +; MIPS32-DAG: srl ${{[0-9]+}}, $5, 8 +; MIPS32-DAG: srl ${{[0-9]+}}, $4, 8 + +; MIPS32R5-DAG: sw $4 +; MIPS32R5-DAG: sw $5 + +; MIPS64-DAG: sll ${{[0-9]+}}, $4, 0 +; MIPS64-DAG: sll ${{[0-9]+}}, $5, 0 + +; MIPS64R5-DAG: sll ${{[0-9]+}}, $4, 0 +; MIPS64R5-DAG: sll ${{[0-9]+}}, $5, 0 + + %1 = add <4 x i8> %a, %b + ret <4 x i8> %1 +} + +define <8 x i8> @i8_8(<8 x i8> %a, <8 x i8> %b) { +; ALL-LABEL: i8_8: +; MIPS32-NOT: lw +; MIPS32-DAG: srl ${{[0-9]+}}, $7, 24 +; MIPS32-DAG: srl ${{[0-9]+}}, $6, 24 +; MIPS32-DAG: srl ${{[0-9]+}}, $7, 16 +; MIPS32-DAG: srl ${{[0-9]+}}, $6, 16 +; MIPS32-DAG: srl ${{[0-9]+}}, $7, 8 +; MIPS32-DAG: srl ${{[0-9]+}}, $6, 8 +; MIPS32-DAG: srl ${{[0-9]+}}, $5, 24 +; MIPS32-DAG: srl ${{[0-9]+}}, $4, 24 +; MIPS32-DAG: srl ${{[0-9]+}}, $5, 16 +; MIPS32-DAG: srl ${{[0-9]+}}, $4, 16 +; MIPS32-DAG: srl ${{[0-9]+}}, $5, 8 +; MIPS32-DAG: srl ${{[0-9]+}}, $4, 8 + +; MIPS32R5-DAG: sw $4 +; MIPS32R5-DAG: sw $5 +; MIPS32R5-DAG: sw $6 +; MIPS32R5-DAG: sw $7 + +; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 56 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 56 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 48 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 48 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 40 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 40 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 32 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 32 +; MIPS64-DAG: sll $[[R0:[0-9]+]], $4, 0 +; MIPS64-DAG: sll $[[R1:[0-9]+]], $5, 0 +; MIPS64-DAG: srl ${{[0-9]+}}, $[[R1]], 24 +; MIPS64-DAG: srl ${{[0-9]+}}, $[[R0]], 24 +; MIPS64-DAG: srl ${{[0-9]+}}, $[[R1]], 16 +; MIPS64-DAG: srl ${{[0-9]+}}, $[[R0]], 16 +; MIPS64-DAG: srl ${{[0-9]+}}, $[[R1]], 8 +; MIPS64-DAG: srl ${{[0-9]+}}, $[[R0]], 8 + +; MIPS64R5-DAG: sd $4 +; MIPS64R5-DAG: sd $5 + + %1 = add <8 x i8> %a, %b + ret <8 x i8> %1 +} + +define <16 x i8> @i8_16(<16 x i8> %a, <16 x i8> %b) { +; ALL-LABEL: i8_16: +; MIPS32-DAG: lw ${{[0-9]+}}, 16($sp) +; MIPS32-DAG: lw ${{[0-9]+}}, 20($sp) +; MIPS32-DAG: lw ${{[0-9]+}}, 24($sp) +; MIPS32-DAG: lw ${{[0-9]+}}, 28($sp) +; MIPS32-DAG: srl ${{[0-9]+}}, $7, 24 +; MIPS32-DAG: srl ${{[0-9]+}}, $6, 24 +; MIPS32-DAG: srl ${{[0-9]+}}, $7, 16 +; MIPS32-DAG: srl ${{[0-9]+}}, $6, 16 +; MIPS32-DAG: srl ${{[0-9]+}}, $7, 8 +; MIPS32-DAG: srl ${{[0-9]+}}, $6, 8 +; MIPS32-DAG: srl ${{[0-9]+}}, $5, 24 +; MIPS32-DAG: srl ${{[0-9]+}}, $4, 24 +; MIPS32-DAG: srl ${{[0-9]+}}, $5, 16 +; MIPS32-DAG: srl ${{[0-9]+}}, $4, 16 +; MIPS32-DAG: srl ${{[0-9]+}}, $5, 8 +; MIPS32-DAG: srl ${{[0-9]+}}, $4, 8 + +; MIPS32R5-DAG: lw ${{[0-9]+}}, 16($sp) +; MIPS32R5-DAG: lw ${{[0-9]+}}, 20($sp) +; MIPS32R5-DAG: lw ${{[0-9]+}}, 24($sp) +; MIPS32R5-DAG: lw ${{[0-9]+}}, 28($sp) +; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $4 +; MIPS32R5-DAG: insert.w $w[[W0]][1], $5 +; MIPS32R5-DAG: insert.w $w[[W0]][2], $6 +; MIPS32R5-DAG: insert.w $w[[W0]][3], $7 + +; MIPS64-DAG: dsrl ${{[0-9]+}}, $7, 56 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $6, 56 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $7, 48 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $6, 48 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $7, 40 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $6, 40 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $7, 32 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $6, 32 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 56 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 56 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 48 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 48 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 32 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 32 + +; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][{{[0-9]}}], $4 +; MIPS64R5-DAG: insert.d $w[[W0]][{{[0-9]}}], $5 +; MIPS64R5-DAG: insert.d $w[[W1:[0-9]+]][{{[0-9]}}], $6 +; MIPS64R5-DAG: insert.d $w[[W1]][{{[0-9]}}], $7 + + %1 = add <16 x i8> %a, %b + + ret <16 x i8> %1 +} + +define <2 x i16> @i16_2(<2 x i16> %a, <2 x i16> %b) { +; ALL-LABEL: i16_2: +; MIPS32: addu $[[R0:[0-9]+]], $4, $5 +; MIPS32: andi $[[R1:[0-9]+]], $[[R0]], 65535 +; MIPS32: srl $[[R2:[0-9]+]], $5, 16 +; MIPS32: srl $[[R3:[0-9]+]], $4, 16 +; MIPS32: addu $[[R4:[0-9]+]], $[[R3]], $[[R2]] +; MIPS32: sll $2, $[[R4]], 16 + +; MIPS32R5-DAG: sw $4 +; MIPS32R5-DAG: sw $5 + +; MIPS64-DAG: sll ${{[0-9]+}}, $5, 0 +; MIPS64-DAG: sll ${{[0-9]+}}, $4, 0 + +; MIPS64R5-DAG: sll ${{[0-9]+}}, $4, 0 +; MIPS64R5-DAG: sll ${{[0-9]+}}, $5, 0 + + %1 = add <2 x i16> %a, %b + ret <2 x i16> %1 +} + +define <4 x i16> @i16_4(<4 x i16> %a, <4 x i16> %b) { +; ALL-LABEL: i16_4: +; MIPS32-DAG: srl ${{[0-9]+}}, $7, 16 +; MIPS32-DAG: srl ${{[0-9]+}}, $6, 16 +; MIPS32-DAG: srl ${{[0-9]+}}, $5, 16 +; MIPS32-DAG: srl ${{[0-9]+}}, $4, 16 + +; MIPS32R5-DAG: sw $4 +; MIPS32R5-DAG: sw $5 +; MIPS32R5-DAG: sw $6 +; MIPS32R5-DAG: sw $7 + +; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 48 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 48 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 32 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 32 + +; MIPS64R5-DAG: sd $4 +; MIPS64R5-DAG: sd $5 + + %1 = add <4 x i16> %a, %b + ret <4 x i16> %1 +} + +define <8 x i16> @i16_8(<8 x i16> %a, <8 x i16> %b) { +; ALL-LABEL: i16_8: +; MIPS32-DAG: lw ${{[0-9]+}}, 16($sp) +; MIPS32-DAG: lw ${{[0-9]+}}, 20($sp) +; MIPS32-DAG: lw ${{[0-9]+}}, 24($sp) +; MIPS32-DAG: lw ${{[0-9]+}}, 28($sp) +; MIPS32-DAG: srl ${{[0-9]+}}, $7, 16 +; MIPS32-DAG: srl ${{[0-9]+}}, $6, 16 +; MIPS32-DAG: srl ${{[0-9]+}}, $5, 16 +; MIPS32-DAG: srl ${{[0-9]+}}, $4, 16 + +; MIPS32R5-DAG: lw ${{[0-9]+}}, 16($sp) +; MIPS32R5-DAG: lw ${{[0-9]+}}, 20($sp) +; MIPS32R5-DAG: lw ${{[0-9]+}}, 24($sp) +; MIPS32R5-DAG: lw ${{[0-9]+}}, 28($sp) +; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $4 +; MIPS32R5-DAG: insert.w $w[[W0]][1], $5 +; MIPS32R5-DAG: insert.w $w[[W0]][2], $6 +; MIPS32R5-DAG: insert.w $w[[W0]][3], $7 + +; MIPS64-DAG: dsrl ${{[0-9]+}}, $6, 48 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $7, 48 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $6, 32 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $7, 32 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 48 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 48 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 32 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 32 + +; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][{{[0-9]}}], $4 +; MIPS64R5-DAG: insert.d $w[[W0]][{{[0-9]}}], $5 +; MIPS64R5-DAG: insert.d $w[[W1:[0-9]+]][{{[0-9]}}], $6 +; MIPS64R5-DAG: insert.d $w[[W1]][{{[0-9]}}], $7 + + %1 = add <8 x i16> %a, %b + ret <8 x i16> %1 +} + +define <2 x i32> @i32_2(<2 x i32> %a, <2 x i32> %b) { +; ALL-LABEL: i32_2: +; MIPS32-DAG: addu $2, $4, $6 +; MIPS32-DAG: addu $3, $5, $7 + +; MIPS32R5-DAG: sw $4 +; MIPS32R5-DAG: sw $5 +; MIPS32R5-DAG: sw $6 +; MIPS32R5-DAG: sw $7 + +; MIPS64-DAG: sll ${{[0-9]+}}, $4, 0 +; MIPS64-DAG: sll ${{[0-9]+}}, $5, 0 + +; MIPS64R5-DAG: sd $4 +; MIPS64R5-DAG: sd $5 + + %1 = add <2 x i32> %a, %b + + ret <2 x i32> %1 +} + +define <4 x i32> @i32_4(<4 x i32> %a, <4 x i32> %b) { +; ALL-LABEL: i32_4: +; MIPS32-DAG: lw ${{[0-9]+}}, 16($sp) +; MIPS32-DAG: lw ${{[0-9]+}}, 20($sp) +; MIPS32-DAG: lw ${{[0-9]+}}, 24($sp) +; MIPS32-DAG: lw ${{[0-9]+}}, 28($sp) +; MIPS32-DAG: addu $2 +; MIPS32-DAG: addu $3 +; MIPS32-DAG: addu $4 +; MIPS32-DAG: addu $5 + +; MIPS32R5-DAG: lw ${{[0-9]+}}, 16($sp) +; MIPS32R5-DAG: lw ${{[0-9]+}}, 20($sp) +; MIPS32R5-DAG: lw ${{[0-9]+}}, 24($sp) +; MIPS32R5-DAG: lw ${{[0-9]+}}, 28($sp) +; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $4 +; MIPS32R5-DAG: insert.w $w[[W0]][1], $5 +; MIPS32R5-DAG: insert.w $w[[W0]][2], $6 +; MIPS32R5-DAG: insert.w $w[[W0]][3], $7 + +; MIPS64-DAG: sll ${{[0-9]+}}, $4, 0 +; MIPS64-DAG: sll ${{[0-9]+}}, $5, 0 +; MIPS64-DAG: sll ${{[0-9]+}}, $6, 0 +; MIPS64-DAG: sll ${{[0-9]+}}, $7, 0 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 32 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 32 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $6, 32 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $7, 32 + %1 = add <4 x i32> %a, %b + ret <4 x i32> %1 +} + +define <2 x i64> @i64_2(<2 x i64> %a, <2 x i64> %b) { +; ALL-LABEL: i64_2: +; MIPS32-DAG: lw ${{[0-9]+}}, 16($sp) +; MIPS32-DAG: lw ${{[0-9]+}}, 20($sp) +; MIPS32-DAG: lw ${{[0-9]+}}, 24($sp) +; MIPS32-DAG: lw ${{[0-9]+}}, 28($sp) +; MIPS32-DAG: addu $2 +; MIPS32-DAG: addu $3 +; MIPS32-DAG: addu $4 +; MIPS32-DAG: addu $5 + +; MIPS32R5-DAG: lw ${{[0-9]+}}, 16($sp) +; MIPS32R5-DAG: lw ${{[0-9]+}}, 20($sp) +; MIPS32R5-DAG: lw ${{[0-9]+}}, 24($sp) +; MIPS32R5-DAG: lw ${{[0-9]+}}, 28($sp) +; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $4 +; MIPS32R5-DAG: insert.w $w[[W0]][1], $5 +; MIPS32R5-DAG: insert.w $w[[W0]][2], $6 +; MIPS32R5-DAG: insert.w $w[[W0]][3], $7 + +; MIPS64-DAG: daddu $2, $4, $6 +; MIPS64-DAG: daddu $3, $5, $7 + +; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][{{[0-9]}}], $4 +; MIPS64R5-DAG: insert.d $w[[W0]][{{[0-9]}}], $5 +; MIPS64R5-DAG: insert.d $w[[W1:[0-9]+]][{{[0-9]}}], $6 +; MIPS64R5-DAG: insert.d $w[[W1]][{{[0-9]}}], $7 + + %1 = add <2 x i64> %a, %b + ret <2 x i64> %1 +} + +; The MIPS vector ABI treats vectors of floats differently to vectors of +; integers. + +; For arguments floating pointer vectors are bitcasted to integer vectors whose +; elements are of GPR width and where the element count is deduced from +; the length of the floating point vector divided by the size of the GPRs. + +; For returns, integer vectors are passed via the GPR register set, but +; floating point vectors are returned via a hidden sret pointer. + +; For testing purposes we skip returning values here and test them below +; instead. +@float_res_v2f32 = external global <2 x float> + +define void @float_2(<2 x float> %a, <2 x float> %b) { +; ALL-LABEL: float_2: +; MIPS32: mtc1 $7, $f[[F0:[0-9]+]] +; MIPS32: mtc1 $5, $f[[F1:[0-9]+]] +; MIPS32: add.s $f[[F2:[0-9]+]], $f[[F1]], $f[[F0]] +; MIPS32: swc1 $f[[F2]] +; MIPS32: mtc1 $6, $f[[F3:[0-9]+]] +; MIPS32: mtc1 $4, $f[[F4:[0-9]+]] +; MIPS32: add.s $f[[F5:[0-9]+]], $f[[F4]], $f[[F3]] +; MIPS32: swc1 $f[[F5]] + +; MIPS32R5-DAG: sw $4 +; MIPS32R5-DAG: sw $5 +; MIPS32R5-DAG: sw $6 +; MIPS32R5-DAG: sw $7 + +; MIPS64-DAG: sll $[[R0:[0-9]+]], $4, 0 +; MIPS64-DAG: sll $[[R1:[0-9]+]], $5, 0 +; MIPS64-DAG: mtc1 $[[R0]], $f{{[0-9]+}} +; MIPS64-DAG: mtc1 $[[R1]], $f{{[0-9]+}} +; MIPS64-DAG: dsrl $[[R2:[0-9]+]], $4, 32 +; MIPS64-DAG: dsrl $[[R3:[0-9]+]], $5, 32 +; MIPS64-DAG: sll $[[R4:[0-9]+]], $[[R2]], 0 +; MIPS64-DAG: sll $[[R5:[0-9]+]], $[[R3]], 0 +; MIPS64-DAG: mtc1 $[[R4]], $f{{[0-9]+}} +; MIPS64-DAG: mtc1 $[[R5]], $f{{[0-9]+}} + +; MIPS64R5-DAG: sd $4 +; MIPS64R5-DAG: sd $5 + + %1 = fadd <2 x float> %a, %b + store <2 x float> %1, <2 x float> * @float_res_v2f32 + ret void +} + +@float_res_v4f32 = external global <4 x float> + +; For MSA this case is suboptimal, the 4 loads can be combined into a single +; ld.w. + +define void @float_4(<4 x float> %a, <4 x float> %b) { +; ALL-LABEL: float_4: +; MIPS32-DAG: mtc1 $4 +; MIPS32-DAG: mtc1 $5 +; MIPS32-DAG: mtc1 $6 +; MIPS32-DAG: mtc1 $7 +; MIPS32-DAG: lwc1 +; MIPS32-DAG: lwc1 +; MIPS32-DAG: lwc1 +; MIPS32-DAG: lwc1 + +; MIPS32R5-DAG: lw $[[R1:[0-9]+]], 16($sp) +; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $[[R1]] +; MIPS32R5-DAG: lw $[[R2:[0-9]+]], 20($sp) +; MIPS32R5-DAG: insert.w $w[[W0]][1], $[[R2]] +; MIPS32R5-DAG: lw $[[R3:[0-9]+]], 24($sp) +; MIPS32R5-DAG: insert.w $w[[W0]][2], $[[R3]] +; MIPS32R5-DAG: lw $[[R4:[0-9]+]], 28($sp) +; MIPS32R5-DAG: insert.w $w[[W0]][3], $[[R4]] + +; MIPS32R5-DAG: insert.w $w[[W1:[0-9]+]][0], $4 +; MIPS32R5-DAG: insert.w $w[[W1]][1], $5 +; MIPS32R5-DAG: insert.w $w[[W1]][2], $6 +; MIPS32R5-DAG: insert.w $w[[W1]][3], $7 + +; MIPS64-DAG: sll $[[R0:[0-9]+]], $4, 0 +; MIPS64-DAG: sll $[[R1:[0-9]+]], $5, 0 +; MIPS64-DAG: mtc1 $[[R0]], $f{{[0-9]+}} +; MIPS64-DAG: mtc1 $[[R1]], $f{{[0-9]+}} +; MIPS64-DAG: dsrl $[[R2:[0-9]+]], $4, 32 +; MIPS64-DAG: dsrl $[[R3:[0-9]+]], $5, 32 +; MIPS64-DAG: sll $[[R4:[0-9]+]], $[[R2]], 0 +; MIPS64-DAG: sll $[[R5:[0-9]+]], $[[R3]], 0 +; MIPS64-DAG: mtc1 $[[R4]], $f{{[0-9]+}} +; MIPS64-DAG: mtc1 $[[R5]], $f{{[0-9]+}} +; MIPS64-DAG: sll $[[R6:[0-9]+]], $6, 0 +; MIPS64-DAG: sll $[[R7:[0-9]+]], $7, 0 +; MIPS64-DAG: mtc1 $[[R6]], $f{{[0-9]+}} +; MIPS64-DAG: mtc1 $[[R7]], $f{{[0-9]+}} +; MIPS64-DAG: dsrl $[[R8:[0-9]+]], $6, 32 +; MIPS64-DAG: dsrl $[[R9:[0-9]+]], $7, 32 +; MIPS64-DAG: sll $[[R10:[0-9]+]], $[[R8]], 0 +; MIPS64-DAG: sll $[[R11:[0-9]+]], $[[R9]], 0 +; MIPS64-DAG: mtc1 $[[R10]], $f{{[0-9]+}} +; MIPS64-DAG: mtc1 $[[R11]], $f{{[0-9]+}} + +; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][{{[0-9]}}], $4 +; MIPS64R5-DAG: insert.d $w[[W0]][{{[0-9]}}], $5 +; MIPS64R5-DAG: insert.d $w[[W1:[0-9]+]][{{[0-9]}}], $6 +; MIPS64R5-DAG: insert.d $w[[W1]][{{[0-9]}}], $7 + + %1 = fadd <4 x float> %a, %b + store <4 x float> %1, <4 x float> * @float_res_v4f32 + ret void +} + +@double_v2f64 = external global <2 x double> + +define void @double_2(<2 x double> %a, <2 x double> %b) { +; ALL-LABEL: double_2: +; MIPS32-DAG: sw $7 +; MIPS32-DAG: sw $6 +; MIPS32-DAG: ldc1 +; MIPS32-DAG: ldc1 +; MIPS32: add.d +; MIPS32-DAG: sw $5 +; MIPS32-DAG: sw $4 +; MIPS32-DAG: ldc1 +; MIPS32-DAG: ldc1 +; MIPS32: add.d + +; MIPS32R5-DAG: lw $[[R1:[0-9]+]], 16($sp) +; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $[[R1]] +; MIPS32R5-DAG: lw $[[R2:[0-9]+]], 20($sp) +; MIPS32R5-DAG: insert.w $w[[W0]][1], $[[R2]] +; MIPS32R5-DAG: lw $[[R3:[0-9]+]], 24($sp) +; MIPS32R5-DAG: insert.w $w[[W0]][2], $[[R3]] +; MIPS32R5-DAG: lw $[[R4:[0-9]+]], 28($sp) +; MIPS32R5-DAG: insert.w $w[[W0]][3], $[[R4]] + +; MIPS32R5-DAG: insert.w $w[[W1:[0-9]+]][0], $4 +; MIPS32R5-DAG: insert.w $w[[W1]][1], $5 +; MIPS32R5-DAG: insert.w $w[[W1]][2], $6 +; MIPS32R5-DAG: insert.w $w[[W1]][3], $7 + +; MIPS64-DAG: dmtc1 $6, $f[[R0:[0-9]+]] +; MIPS64-DAG: dmtc1 $4, $f[[R1:[0-9]+]] +; MIPS64-DAG: add.d $f[[R2:[0-9]+]], $f[[R1]], $f[[R0]] +; MIPS64-DAG: dmtc1 $7, $f[[R3:[0-9]+]] +; MIPS64-DAG: dmtc1 $5, $f[[R4:[0-9]+]] +; MIPS64-DAG: add.d $f[[R5:[0-9]+]], $f[[R4]], $f[[R3]] + +; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][{{[0-9]}}], $4 +; MIPS64R5-DAG: insert.d $w[[W0]][{{[0-9]}}], $5 +; MIPS64R5-DAG: insert.d $w[[W1:[0-9]+]][{{[0-9]}}], $6 +; MIPS64R5-DAG: insert.d $w[[W1]][{{[0-9]}}], $7 + + %1 = fadd <2 x double> %a, %b + store <2 x double> %1, <2 x double> * @double_v2f64 + ret void +} + +; Return value testing. +; Integer vectors are returned in $2, $3, $4, $5 for O32, $2, $3 for N32/N64 +; Floating point vectors are returned through a hidden sret pointer. + +@gv2i8 = global <2 x i8> +@gv4i8 = global <4 x i8> +@gv8i8 = global <8 x i8> +@gv16i8 = global <16 x i8> + +@gv2i16 = global <2 x i16> +@gv4i16 = global <4 x i16> +@gv8i16 = global <8 x i16> + +@gv2i32 = global <2 x i32> +@gv4i32 = global <4 x i32> + +@gv2i64 = global <2 x i64> + +define <2 x i8> @ret_2_i8() { +; ALL-LABEL: ret_2_i8: +; MIPS32-DAG: lhu $2 +; MIPS32R5-DAG: lhu $2 + +; FIXME: why is this lh instead of lhu on mips64? + +; MIPS64-DAG: lh $2 +; MIPS64-DAG: lh $2 + %1 = load <2 x i8>, <2 x i8> * @gv2i8 + ret <2 x i8> %1 +} + +define <4 x i8> @ret_4_i8() { +; ALL-LABEL: ret_4_i8: +; MIPS32-DAG: lw $2 +; MIPS32R5-DAG: lw $2 + +; MIPS64-DAG: lw $2 +; MIPS64R5-DAG: lw $2 + + %1 = load <4 x i8>, <4 x i8> * @gv4i8 + ret <4 x i8> %1 +} + +define <8 x i8> @ret_8_i8() { +; ALL-LABEL: ret_8_i8: +; MIPS32-DAG: lw $2 +; MIPS32-DAG: lw $3 + +; MIPS32R5: copy_s.w $2, $w[[W0:[0-9]+]] +; MIPS32R5: copy_s.w $3, $w[[W0]] + +; MIPS64-DAG: ld $2 +; MIPS64R5-DAG: ld $2 + %1 = load <8 x i8>, <8 x i8> * @gv8i8 + ret <8 x i8> %1 +} + +define <16 x i8> @ret_16_i8() { +; ALL-LABEL: ret_16_i8: +; MIPS32-DAG: lw $2 +; MIPS32-DAG: lw $3 +; MIPS32-DAG: lw $4 +; MIPS32-DAG: lw $5 + +; MIPS32R5-DAG: copy_s.w $2, $w[[W0:[0-9]+]][0] +; MIPS32R5-DAG: copy_s.w $3, $w[[W0]][1] +; MIPS32R5-DAG: copy_s.w $4, $w[[W0]][2] +; MIPS32R5-DAG: copy_s.w $5, $w[[W0]][3] + +; MIPS64-DAG: ld $2 +; MIPS64-DAG: ld $3 + +; MIPS64R5-DAG: copy_s.d $2 +; MIPS64R5-DAG: copy_s.d $3 + + %1 = load <16 x i8>, <16 x i8> * @gv16i8 + ret <16 x i8> %1 +} + +define <2 x i16> @ret_2_i16() { +; ALL-LABEL: ret_2_i16: +; MIPS32-DAG: lw $2 + +; MIPS32R5-DAG: lw $2 + +; MIPS64-DAG: lw $2 + +; MIPS64R5-DAG: lw $2 + %1 = load <2 x i16>, <2 x i16> * @gv2i16 + ret <2 x i16> %1 +} + +define <4 x i16> @ret_4_i16() { +; ALL-LABEL: ret_4_i16: +; MIPS32-DAG: lw $2 +; MIPS32-DAG: lw $3 + +; MIPS32R5-DAG: copy_s.w $2, $w[[W0:[0-9]+]] +; MIPS32R5-DAG: copy_s.w $3, $w[[W0]] + +; MIPS64-DAG: ld $2 +; MIPS64R5-DAG: ld $2 + %1 = load <4 x i16>, <4 x i16> * @gv4i16 + ret <4 x i16> %1 +} + +define <8 x i16> @ret_8_i16() { +; ALL-LABEL: ret_8_i16: +; MIPS32-DAG: lw $2 +; MIPS32-DAG: lw $3 +; MIPS32-DAG: lw $4 +; MIPS32-DAG: lw $5 + +; MIPS32R5-DAG: copy_s.w $2, $w[[W0:[0-9]+]][0] +; MIPS32R5-DAG: copy_s.w $3, $w[[W0]][1] +; MIPS32R5-DAG: copy_s.w $4, $w[[W0]][2] +; MIPS32R5-DAG: copy_s.w $5, $w[[W0]][3] + +; MIPS64-DAG: ld $2 +; MIPS64-DAG: ld $3 + +; MIPS64R5-DAG: copy_s.d $2 +; MIPS64R5-DAG: copy_s.d $3 + + %1 = load <8 x i16>, <8 x i16> * @gv8i16 + ret <8 x i16> %1 +} + +define <2 x i32> @ret_2_i32() { +; ALL-LABEL: ret_2_i32: +; MIPS32-DAG: lw $2 +; MIPS32-DAG: lw $3 + +; MIPS32R5-DAG: copy_s.w $2, $w[[W0:[0-9]+]] +; MIPS32R5-DAG: copy_s.w $3, $w[[W0]] + +; MIPS64-DAG: ld $2 +; MIPS64R5-DAG: ld $2 + + %1 = load <2 x i32>, <2 x i32> * @gv2i32 + ret <2 x i32> %1 +} + +define <4 x i32> @ret_4_i32() { +; ALL-LABEL: ret_4_i32: +; MIPS32-DAG: lw $2 +; MIPS32-DAG: lw $3 +; MIPS32-DAG: lw $4 +; MIPS32-DAG: lw $5 + +; MIPS32R5-DAG: copy_s.w $2, $w[[W0:[0-9]+]][0] +; MIPS32R5-DAG: copy_s.w $3, $w[[W0]][1] +; MIPS32R5-DAG: copy_s.w $4, $w[[W0]][2] +; MIPS32R5-DAG: copy_s.w $5, $w[[W0]][3] + +; MIPS64-DAG: ld $2 +; MIPS64-DAG: ld $3 + +; MIPS64R5-DAG: copy_s.d $2, $w[[W0:[0-9]+]] +; MIPS64R5-DAG: copy_s.d $3, $w[[W0]] + + %1 = load <4 x i32>, <4 x i32> * @gv4i32 + ret <4 x i32> %1 +} + +define <2 x i64> @ret_2_i64() { +; ALL-LABEL: ret_2_i64: +; MIPS32-DAG: lw $2 +; MIPS32-DAG: lw $3 +; MIPS32-DAG: lw $4 +; MIPS32-DAG: lw $5 + +; MIPS32R5-DAG: copy_s.w $2, $w[[W0:[0-9]+]][0] +; MIPS32R5-DAG: copy_s.w $3, $w[[W0]][1] +; MIPS32R5-DAG: copy_s.w $4, $w[[W0]][2] +; MIPS32R5-DAG: copy_s.w $5, $w[[W0]][3] + +; MIPS64-DAG: ld $2 +; MIPS64-DAG: ld $3 + +; MIPS64R5-DAG: copy_s.d $2, $w[[W0:[0-9]+]] +; MIPS64R5-DAG: copy_s.d $3, $w[[W0]] + + %1 = load <2 x i64>, <2 x i64> * @gv2i64 + ret <2 x i64> %1 +} + +@gv2f32 = global <2 x float> +@gv4f32 = global <4 x float> + +define <2 x float> @ret_float_2() { +entry: +; ALL-LABEL: ret_float_2: + +; MIPS32-DAG: swc1 $f{{[0-9]+}}, 0($4) +; MIPS32-DAG: swc1 $f{{[0-9]+}}, 4($4) + +; MIPS32R5-DAG: swc1 $f{{[0-9]+}}, 0($4) +; MIPS32R5-DAG: swc1 $f{{[0-9]+}}, 4($4) + +; MIPS64: ld $2 + +; MIPS64R5: ld $2 + + %0 = load <2 x float>, <2 x float> * @gv2f32 + ret <2 x float> %0 +} + +define <4 x float> @ret_float_4() { +entry: +; ALL-LABEL: ret_float_4: + +; MIPS32-DAG: swc1 $f{{[0-9]+}}, 0($4) +; MIPS32-DAG: swc1 $f{{[0-9]+}}, 4($4) +; MIPS32-DAG: swc1 $f{{[0-9]+}}, 8($4) +; MIPS32-DAG: swc1 $f{{[0-9]+}}, 12($4) + +; MIPS32R5: st.w $w{{[0-9]+}}, 0($4) + +; MIPS64-DAG: ld $2 +; MIPS64-DAG: ld $3 + +; MIPS64R5-DAG: copy_s.d $2, $w{{[0-9]+}}[0] +; MIPS64R5-DAG: copy_s.d $3, $w{{[0-9]+}}[1] + + %0 = load <4 x float>, <4 x float> * @gv4f32 + ret <4 x float> %0 +} + +@gv2f64 = global <2 x double> + +define <2 x double> @ret_double_2() { +entry: +; ALL-LABEL: ret_double_2: + +; MIPS32-DAG: sdc1 $f{{[0-9]+}}, 8($4) +; MIPS32-DAG: sdc1 $f{{[0-9]+}}, 0($4) + +; MIPS32R5: st.d $w{{[0-9]+}}, 0($4) + +; MIPS64-DAG: ld $2 +; MIPS64-DAG: ld $2 + +; MIPS64R5-DAG: copy_s.d $2, $w{{[0-9]+}}[0] +; MIPS64R5-DAG: copy_s.d $3, $w{{[0-9]+}}[1] + + %0 = load <2 x double>, <2 x double> * @gv2f64 + ret <2 x double> %0 +} + +; Test argument lowering and call result lowering. + +define void @call_i8_2() { +entry: +; ALL-LABEL: call_i8_2: +; MIPS32EB-DAG: addiu $4 +; MIPS32EB-DAG: addiu $5 +; MIPS32-NOT: addiu $6 +; MIPS32-NOT: addiu $7 + +; MIPS32R5-DAG: lhu $4, {{[0-9]+}}($sp) +; MIPS32R5-DAG: lhu $5, {{[0-9]+}}($sp) + +; MIPS32R5: jal +; MIPS32R5: sw $2, {{[0-9]+}}($sp) + +; MIPS32R5-DAG: sb ${{[0-9]+}}, 1(${{[0-9]+}}) +; MIPS32R5-DAG; sb ${{[0-9]+}}, %lo(gv2i8)(${{[0-9]+}}) + +; MIPS64EB: daddiu $4, $zero, 1543 +; MIPS64EB: daddiu $5, $zero, 3080 + +; MIPS64EL: daddiu $4, $zero, 1798 +; MIPS64EL; daddiu $5, $zero, 2060 + +; MIPS64R5-DAG: lh $4 +; MIPS64R5-DAG: lh $5 + +; MIPS32: jal i8_2 +; MIPS64: jalr $25 + +; MIPS32EB-DAG: srl $[[R0:[0-9]+]], $2, 16 +; MIPS32EB-DAG: sb $[[R0]] +; MIPS32EB-DAG: srl $[[R1:[0-9]+]], $2, 24 +; MIPS32EB-DAG: sb $[[R1]] + +; MIPS32EL: sb $2 +; MIPS32EL: srl $[[R0:[0-9]+]], $2, 8 +; MIPS32EL: sb $[[R0]] + +; MIPS64EB: dsrl $[[R4:[0-9]+]], $2, 48 +; MIPS64EB: sb $[[R4]] +; MIPS64EB: dsrl $[[R5:[0-9]+]], $2, 56 +; MIPS64EB: sb $[[R5]] + +; MIPS64EL: sll $[[R6:[0-9]+]], $2, 0 +; MIPS64EL: sb $[[R6]] +; MIPS64EL: srl $[[R7:[0-9]+]], $[[R6]], 8 +; MIPS64EL: sb $[[R7]] + +; MIPS64R5: sd $2 + + %0 = call <2 x i8> @i8_2(<2 x i8> , <2 x i8> ) + store <2 x i8> %0, <2 x i8> * @gv2i8 + ret void +} + +define void @call_i8_4() { +entry: +; ALL-LABEL: call_i8_4: +; MIPS32: ori $4 +; MIPS32: ori $5 +; MIPS32-NOT: ori $6 +; MIPS32-NOT: ori $7 + +; MIPS32R5-DAG: lw $4, {{[0-9]+}}($sp) +; MIPS32R5-DAG: lw $5, {{[0-9]+}}($sp) + +; MIPS64: ori $4 +; MIPS64: ori $5 + +; MIPS64R5: lw $4 +; MIPS64R5: lw $5 + +; MIPS32: jal i8_4 +; MIPS64: jalr $25 + +; MIPS32: sw $2 + +; MIPS32R5-DAG: sw $2 + +; MIPS64: sw $2 +; MIPS64R5: sw $2 + + %0 = call <4 x i8> @i8_4(<4 x i8> , <4 x i8> ) + store <4 x i8> %0, <4 x i8> * @gv4i8 + ret void +} + +define void @call_i8_8() { +entry: +; ALL-LABEL: call_i8_8: + +; MIPS32: ori $6 +; MIPS32: ori $4 +; MIPS32: move $5 +; MIPS32: move $7 + +; MIPS32R5-DAG: ori $6 +; MIPS32R5-DAG: ori $4 +; MIPS32R5-DAG: move $5 +; MIPS32R5-DAG: move $7 + +; MIPS64EB: daddiu $4, ${{[0-9]+}}, 2314 +; MIPS64EB: daddiu $5, ${{[0-9]+}}, 2314 + +; MIPS64EL: daddiu $4, ${{[0-9]+}}, 1798 +; MIPS64EL: daddiu $5, ${{[0-9]+}}, 2060 + +; MIPS32: jal i8_8 +; MIPS64: jalr $25 + +; MIPS32-DAG: sw $2 +; MIPS32-DAG: sw $3 + +; MIPS32R5-DAG: sw $2 +; MIPS32R5-DAG: sw $3 + +; MIPS64: sd $2 +; MIPS64R5: sd $2 + + %0 = call <8 x i8> @i8_8(<8 x i8> , <8 x i8> ) + store <8 x i8> %0, <8 x i8> * @gv8i8 + ret void +} + +define void @calli8_16() { +entry: +; ALL-LABEL: calli8_16: +; MIPS32-DAG: sw ${{[0-9]+}}, 28($sp) +; MIPS32-DAG: sw ${{[0-9]+}}, 24($sp) +; MIPS32-DAG: sw ${{[0-9]+}}, 20($sp) +; MIPS32-DAG: sw ${{[0-9]+}}, 16($sp) + +; MIPS32: ori $4, ${{[0-9]+}}, {{[0-9]+}} +; MIPS32: ori $7, ${{[0-9]+}}, {{[0-9]+}} +; MIPS32: move $5, ${{[0-9]+}} +; MIPS32: move $6, ${{[0-9]+}} + +; MIPS32R5-DAG: copy_s.w $4, $w{{[0-9]+}} +; MIPS32R5-DAG: copy_s.w $5, $w{{[0-9]+}} +; MIPS32R5-DAG: copy_s.w $6, $w{{[0-9]+}} +; MIPS32R5-DAG: copy_s.w $7, $w{{[0-9]+}} + +; MIPS32R5-DAG: sw ${{[0-9]+}}, 28($sp) +; MIPS32R5-DAG: sw ${{[0-9]+}}, 24($sp) +; MIPS32R5-DAG: sw ${{[0-9]+}}, 20($sp) +; MIPS32R5-DAG: sw ${{[0-9]+}}, 16($sp) + +; MIPS64-DAG: daddiu $4 +; MIPS64-DAG: daddiu $5 +; MIPS64-DAG: daddiu $6 +; MIPS64-DAG: daddiu $7 + +; MIPS64R5-DAG: copy_s.d $4 +; MIPS64R5-DAG: copy_s.d $5 +; MIPS64R5-DAG: copy_s.d $6 +; MIPS64R5-DAG: copy_s.d $7 + +; MIPS32: jal i8_16 +; MIPS64: jalr $25 + +; MIPS32-DAG: sw $5, 12(${{[0-9]+}}) +; MIPS32-DAG: sw $4, 8(${{[0-9]+}}) +; MIPS32-DAG: sw $3, 4(${{[0-9]+}}) +; MIPS32-DAG: sw $2, %lo(gv16i8)(${{[0-9]+}}) + +; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $2 +; MIPS32R5-DAG: insert.w $w[[W0]][1], $3 +; MIPS32R5-DAG: insert.w $w[[W0]][2], $4 +; MIPS32R5-DAG: insert.w $w[[W0]][3], $5 +; MIPS32R5-DAG: st.w $w[[W0]] + +; MIPS64-DAG: sd $3 +; MIPS64-DAG: sd $2 + +; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][0], $2 +; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][1], $3 + + %0 = call <16 x i8> @i8_16(<16 x i8> , <16 x i8> ) + store <16 x i8> %0, <16 x i8> * @gv16i8 + ret void +} + +define void @calli16_2() { +entry: +; ALL-LABEL: calli16_2: + +; MIPS32-DAG: ori $4 +; MIPS32-DAG: ori $5 + +; MIPS32R5-DAG: lw $4 +; MIPS32R5-DAG: lw $5 + +; MIPS64: ori $4 +; MIPS64: ori $5 + +; MIPS64R5-DAG: lw $4 +; MIPS64R5-DAG: lw $5 + +; MIPS32: jal i16_2 +; MIPS64: jalr $25 + +; MIPS32: sw $2, %lo(gv2i16) + +; MIPS32R5: sw $2, %lo(gv2i16) + +; MIPS64: sw $2 + +; MIPS64R6: sw $2 + + %0 = call <2 x i16> @i16_2(<2 x i16> , <2 x i16> ) + store <2 x i16> %0, <2 x i16> * @gv2i16 + ret void +} + +define void @calli16_4() { +entry: +; ALL-LABEL: calli16_4: +; MIPS32-DAG: ori $4 +; MIPS32-DAG: ori $5 +; MIPS32-DAG: ori $6 +; MIPS32-DAG: move $7 + +; MIPS32R5-DAG: ori $4 +; MIPS32R5-DAG: ori $5 +; MIPS32R5-DAG: ori $6 +; MIPS32R5-DAG: move $7 + +; MIPS64-DAG: daddiu $4 +; MIPS64-DAG: daddiu $5 + +; MIPS64R5-DAG: ld $4 +; MIPS64R5-DAG: ld $5 + +; MIPS32: jal i16_4 +; MIPS64: jalr $25 + +; MIPS32-DAG: sw $3, 4(${{[0-9]+}}) +; MIPS32-DAG: sw $2, %lo(gv4i16)(${{[0-9]+}}) + +; MIPS32R5-DAG: sw $3, 4(${{[0-9]+}}) +; MIPS32R5-DAG: sw $2, %lo(gv4i16)(${{[0-9]+}}) + +; MIPS64: sd $2 +; MIPS64R5: sd $2 + + %0 = call <4 x i16> @i16_4(<4 x i16> , <4 x i16> ) + store <4 x i16> %0, <4 x i16> * @gv4i16 + ret void +} + +define void @calli16_8() { +entry: +; ALL-LABEL: calli16_8: + +; MIPS32-DAG: sw ${{[0-9]+}}, 28($sp) +; MIPS32-DAG: sw ${{[0-9]+}}, 24($sp) +; MIPS32-DAG: sw ${{[0-9]+}}, 20($sp) +; MIPS32-DAG: sw ${{[0-9]+}}, 16($sp) + +; MIPS32-DAG: ori $4, ${{[0-9]+}}, {{[0-9]+}} +; MIPS32-DAG: ori $5, ${{[0-9]+}}, {{[0-9]+}} +; MIPS32-DAG: move $6, ${{[0-9]+}} +; MIPS32-DAG: move $7, ${{[0-9]+}} + +; MIPS32R5-DAG: copy_s.w $4, $w{{[0-9]+}} +; MIPS32R5-DAG: copy_s.w $5, $w{{[0-9]+}} +; MIPS32R5-DAG: copy_s.w $6, $w{{[0-9]+}} +; MIPS32R5-DAG: copy_s.w $7, $w{{[0-9]+}} + +; MIPS32R5-DAG: sw ${{[0-9]+}}, 28($sp) +; MIPS32R5-DAG: sw ${{[0-9]+}}, 24($sp) +; MIPS32R5-DAG: sw ${{[0-9]+}}, 20($sp) +; MIPS32R5-DAG: sw ${{[0-9]+}}, 16($sp) + +; MIPS64-DAG: daddiu $4 +; MIPS64-DAG: daddiu $7 +; MIPS64-DAG: move $5 +; MIPS64-DAG: move $6 + +; MIPS64R5-DAG: copy_s.d $4, $w[[W0:[0-9]+]][0] +; MIPS64R5-DAG: copy_s.d $5, $w[[W0]][1] +; MIPS64R5-DAG: copy_s.d $6, $w[[W1:[0-9]+]][0] +; MIPS64R5-DAG: copy_s.d $7, $w[[W1]][1] + +; MIPS32: jal i16_8 +; MIPS64: jalr $25 + +; MIPS32-DAG: sw $5, 12(${{[0-9]+}}) +; MIPS32-DAG: sw $4, 8(${{[0-9]+}}) +; MIPS32-DAG: sw $3, 4(${{[0-9]+}}) +; MIPS32-DAG: sw $2, %lo(gv8i16)(${{[0-9]+}}) + +; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $2 +; MIPS32R5-DAG: insert.w $w[[W0]][1], $3 +; MIPS32R5-DAG: insert.w $w[[W0]][2], $4 +; MIPS32R5-DAG: insert.w $w[[W0]][3], $5 +; MIPS32R5-DAG: st.w $w[[W0]] + +; MIPS64: sd $3 +; MIPS64: sd $2 + +; MIPS64R5-DAG: insert.d $w[[W2:[0-9]+]][0], $2 +; MIPS64R5-DAG: insert.d $w[[W2]][1], $3 + + %0 = call <8 x i16> @i16_8(<8 x i16> , <8 x i16> ) + store <8 x i16> %0, <8 x i16> * @gv8i16 + ret void +} + +define void @calli32_2() { +entry: +; ALL-LABEL: calli32_2: + +; MIPS32-DAG: addiu $4 +; MIPS32-DAG: addiu $5 +; MIPS32-DAG: addiu $6 +; MIPS32-DAG: addiu $7 + +; MIPS32R5-DAG: addiu $4 +; MIPS32R5-DAG: addiu $5 +; MIPS32R5-DAG: addiu $6 +; MIPS32R5-DAG: addiu $7 + +; MIPS64: daddiu $4 +; MIPS64: daddiu $5 + +; MIPS64R5-DAG: ld $4 +; MIPS64R5-DAG: ld $5 + +; MIPS32: jal i32_2 +; MIPS64: jalr $25 + +; MIPS32-DAG: sw $2, %lo(gv2i32)(${{[0-9]+}}) +; MIPS32-DAG: sw $3, 4(${{[0-9]+}}) + +; MIPS32R5-DAG: sw $2, %lo(gv2i32)(${{[0-9]+}}) +; MIPS32R5-DAG: sw $3, 4(${{[0-9]+}}) + +; MIPS64: sd $2 + +; MIPS64R5: sd $2 + + %0 = call <2 x i32> @i32_2(<2 x i32> , <2 x i32> ) + store <2 x i32> %0, <2 x i32> * @gv2i32 + ret void +} + +define void @calli32_4() { +entry: +; ALL-LABEL: calli32_4: + +; MIPS32-DAG: sw ${{[0-9]+}}, 28($sp) +; MIPS32-DAG: sw ${{[0-9]+}}, 24($sp) +; MIPS32-DAG: sw ${{[0-9]+}}, 20($sp) +; MIPS32-DAG: sw ${{[0-9]+}}, 16($sp) + +; MIPS32-DAG: addiu $4 +; MIPS32-DAG: addiu $5 +; MIPS32-DAG: addiu $6 +; MIPS32-DAG: addiu $7 + +; MIPS32R5-DAG: sw ${{[0-9]+}}, 28($sp) +; MIPS32R5-DAG: sw ${{[0-9]+}}, 24($sp) +; MIPS32R5-DAG: sw ${{[0-9]+}}, 20($sp) +; MIPS32R5-DAG: sw ${{[0-9]+}}, 16($sp) + +; MIPS32R5-DAG: addiu $4 +; MIPS32R5-DAG: addiu $5 +; MIPS32R5-DAG: addiu $6 +; MIPS32R5-DAG: addiu $7 + +; MIPS64-DAG: daddiu $4 +; MIPS64-DAG: daddiu $6 +; MIPS64-DAG: daddiu $5 +; MIPS64-DAG: move $7 + +; MIPS64R5-DAG: copy_s.d $4, $w[[W0:[0-9]+]][0] +; MIPS64R5-DAG: copy_s.d $5, $w[[W0]][1] +; MIPS64R5-DAG: copy_s.d $6, $w[[W1:[0-9]+]][0] +; MIPS64R5-DAG: copy_s.d $7, $w[[W1]][1] + +; MIPS32: jal i32_4 +; MIPS64: jalr $25 + +; MIPS32-DAG: sw $5, 12(${{[0-9]+}}) +; MIPS32-DAG: sw $4, 8(${{[0-9]+}}) +; MIPS32-DAG: sw $3, 4(${{[0-9]+}}) +; MIPS32-DAG: sw $2, %lo(gv4i32)(${{[0-9]+}}) + +; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $2 +; MIPS32R5-DAG: insert.w $w[[W0]][1], $3 +; MIPS32R5-DAG: insert.w $w[[W0]][2], $4 +; MIPS32R5-DAG: insert.w $w[[W0]][3], $5 +; MIPS32R5-DAG: st.w $w[[W0]] + +; MIPS64-DAG: sd $2 +; MIPS64-DAG: sd $3 + +; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][0], $2 +; MIPS64R6-DAG: insert.d $w[[W0:[0-9]+]][1], $3 + + %0 = call <4 x i32> @i32_4(<4 x i32> , <4 x i32> ) + store <4 x i32> %0, <4 x i32> * @gv4i32 + ret void +} + +define void @calli64_2() { +entry: +; ALL-LABEL: calli64_2: + +; MIPS32-DAG: sw ${{[0-9a-z]+}}, 28($sp) +; MIPS32-DAG: sw ${{[0-9a-z]+}}, 24($sp) +; MIPS32-DAG: sw ${{[0-9a-z]+}}, 20($sp) +; MIPS32-DAG: sw ${{[0-9a-z]+}}, 16($sp) + +; MIPS32-DAG: addiu $4 +; MIPS32-DAG: addiu $5 +; MIPS32-DAG: addiu $6 +; MIPS32-DAG: addiu $7 + +; MIPS32R5-DAG: copy_s.w $4, $w{{[0-9]+}} +; MIPS32R5-DAG: copy_s.w $5, $w{{[0-9]+}} +; MIPS32R5-DAG: copy_s.w $6, $w{{[0-9]+}} +; MIPS32R5-DAG: copy_s.w $7, $w{{[0-9]+}} + +; MIPS32R5-DAG: sw ${{[0-9]+}}, 28($sp) +; MIPS32R5-DAG: sw ${{[0-9]+}}, 24($sp) +; MIPS32R5-DAG: sw ${{[0-9]+}}, 20($sp) +; MIPS32R5-DAG: sw ${{[0-9]+}}, 16($sp) + +; MIPS64: daddiu $4 +; MIPS64: daddiu $5 +; MIPS64: daddiu $6 +; MIPS64: daddiu $7 + +; MIPS64R5: daddiu $4 +; MIPS64R5: daddiu $5 +; MIPS64R5: daddiu $6 +; MIPS64R5: daddiu $7 + +; MIPS32: jal i64_2 +; MIPS64: jalr $25 + +; MIPS32-DAG: sw $5, 12(${{[0-9]+}}) +; MIPS32-DAG: sw $4, 8(${{[0-9]+}}) +; MIPS32-DAG: sw $3, 4(${{[0-9]+}}) +; MIPS32-DAG: sw $2, %lo(gv2i64)(${{[0-9]+}}) + +; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $2 +; MIPS32R5-DAG: insert.w $w[[W0]][1], $3 +; MIPS32R5-DAG: insert.w $w[[W0]][2], $4 +; MIPS32R5-DAG: insert.w $w[[W0]][3], $5 +; MIPS32R5-DAG: st.w $w[[W0]] + +; MIPS64-DAG: sd $3 +; MIPS64-DAG: sd $2 + +; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][0], $2 +; MIPS64R6-DAG: insert.d $w[[W0:[0-9]+]][1], $3 + + %0 = call <2 x i64> @i64_2(<2 x i64> , <2 x i64> ) + store <2 x i64> %0, <2 x i64> * @gv2i64 + ret void +} + +declare <2 x float> @float2_extern(<2 x float>, <2 x float>) +declare <4 x float> @float4_extern(<4 x float>, <4 x float>) +declare <2 x double> @double2_extern(<2 x double>, <2 x double>) + +define void @callfloat_2() { +entry: +; ALL-LABEL: callfloat_2: + +; MIPS32-DAG: addiu $4, $sp, 24 +; MIPS32-DAG: addiu $6, $zero, 0 +; MIPS32-DAG: lui $7 + +; MIPS32R5-DAG: addiu $4, $sp, 24 +; MIPS32R5-DAG: addiu $6, $zero, 0 +; MIPS32R5-DAG: lui $7 + +; MIPS64: dsll $4 +; MIPS64: dsll $5 + +; MIPS64R5-DAG: copy_s.d $4, $w{{[0-9]+}} +; MIPS64R5-DAG: copy_s.d $5, $w{{[0-9]+}} + +; MIPS32: jal float2_extern +; MIPS64: jalr $25 + +; MIPS32-DAG: lwc1 $f[[F0:[0-9]+]], 24($sp) +; MIPS32-DAG: lwc1 $f[[F1:[0-9]+]], 28($sp) + +; MIPS32-DAG: swc1 $f[[F1]], 4(${{[0-9]+}}) +; MIPS32-DAG: swc1 $f[[F0]], %lo(gv2f32)(${{[0-9]+}}) + +; MIPS32R5-DAG: lwc1 $f[[F0:[0-9]+]], 24($sp) +; MIPS32R5-DAG: lwc1 $f[[F1:[0-9]+]], 28($sp) + +; MIPS32R5-DAG: swc1 $f[[F1]], 4(${{[0-9]+}}) +; MIPS32R5-DAG: swc1 $f[[F0]], %lo(gv2f32)(${{[0-9]+}}) + +; MIPS64: sd $2 + +; MIPS64R5: sd $2 + + %0 = call <2 x float> @float2_extern(<2 x float> , <2 x float> ) + store <2 x float> %0, <2 x float> * @gv2f32 + ret void +} + +define void @callfloat_4() { +entry: +; ALL-LABEL: callfloat_4: + +; MIPS32: sw ${{[0-9]+}}, 36($sp) +; MIPS32: sw ${{[0-9]+}}, 32($sp) +; MIPS32: sw ${{[0-9]+}}, 28($sp) +; MIPS32: sw ${{[0-9]+}}, 24($sp) +; MIPS32: sw ${{[0-9]+}}, 20($sp) +; MIPS32: sw ${{[0-9]+}}, 16($sp) +; MIPS32: addiu $4, $sp, 48 +; MIPS32: addiu $6, $zero, 0 +; MIPS32: lui $7 + +; MIPS32R5: copy_s.w $6, $w{{[0-9]+}} +; MIPS32R5: copy_s.w $7, $w{{[0-9]+}} +; MIPS32R5: sw ${{[0-9]+}}, 36($sp) +; MIPS32R5: sw ${{[0-9]+}}, 32($sp) +; MIPS32R5: sw ${{[0-9]+}}, 28($sp) +; MIPS32R5: sw ${{[0-9]+}}, 24($sp) +; MIPS32R5: sw ${{[0-9]+}}, 20($sp) +; MIPS32R5: sw ${{[0-9]+}}, 16($sp) +; MIPS32R5: addiu $4, $sp, 48 + +; MIPS64-DAG: dsll $4 +; MIPS64-DAG: dsll $5 +; MIPS64-DAG: dsll $6 +; MIPS64-DAG: dsll $7 + +; MIPS64R5-DAG: copy_s.d $4, $w{{[0-9]+}} +; MIPS64R5-DAG: copy_s.d $5, $w{{[0-9]+}} +; MIPS64R5-DAG: copy_s.d $6, $w{{[0-9]+}} +; MIPS64R5-DAG: copy_s.d $7, $w{{[0-9]+}} + +; MIPS64: jalr $25 +; MIPS32: jal + +; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 48($sp) +; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 52($sp) +; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 56($sp) +; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 60($sp) + +; MIPS32R5: ld.w $w{{[0-9]+}}, 48($sp) + +; MIPS64-DAG: $2 +; MIPS64-DAG: $3 + +; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][0], $2 +; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][1], $3 + + %0 = call <4 x float> @float4_extern(<4 x float> , <4 x float> ) + store <4 x float> %0, <4 x float> * @gv4f32 + ret void +} + +define void @calldouble_2() { +entry: +; ALL-LABEL: calldouble_2: + +; MIPS32-DAG: sw ${{[0-9a-z]+}}, 36($sp) +; MIPS32-DAG: sw ${{[0-9a-z]+}}, 32($sp) +; MIPS32-DAG: sw ${{[0-9a-z]+}}, 28($sp) +; MIPS32-DAG: sw ${{[0-9a-z]+}}, 24($sp) +; MIPS32-DAG: sw ${{[0-9a-z]+}}, 20($sp) +; MIPS32-DAG: sw ${{[0-9a-z]+}}, 16($sp) + +; MIPS32-DAG: addiu $4, $sp, [[R0:[0-9]+]] +; MIPS32-DAG: addiu $6, $zero, 0 +; MIPS32-DAG: addiu $7, $zero, 0 + +; MIPS32R5-DAG: copy_s.w $4, $w{{[0-9]+}} +; MIPS32R5-DAG: copy_s.w $5, $w{{[0-9]+}} +; MIPS32R5-DAG: copy_s.w $6, $w{{[0-9]+}} +; MIPS32R5-DAG: copy_s.w $7, $w{{[0-9]+}} + +; MIPS32R5-DAG: sw ${{[0-9]+}}, 36($sp) +; MIPS32R5-DAG: sw ${{[0-9]+}}, 32($sp) +; MIPS32R5-DAG: sw ${{[0-9]+}}, 28($sp) +; MIPS32R5-DAG: sw ${{[0-9]+}}, 24($sp) +; MIPS32R5-DAG: sw ${{[0-9]+}}, 20($sp) +; MIPS32R5-DAG: sw ${{[0-9]+}}, 16($sp) + +; MIPS64-DAG: dsll $5 +; MIPS64-DAG: dsll $6 +; MIPS64-DAG: dsll $7 +; MIPS64-DAG: daddiu $4 + +; MIPS64R5-DAG: copy_s.d $4, $w{{[0-9]+}} +; MIPS64R5-DAG: copy_s.d $5, $w{{[0-9]+}} +; MIPS64R5-DAG: copy_s.d $6, $w{{[0-9]+}} +; MIPS64R5-DAG: copy_s.d $7, $w{{[0-9]+}} + +; MIPS32: jal double2_extern +; MIPS64: jalr $25 + +; MIPS32-DAG: ldc1 $f[[F0:[0-9]+]], 48($sp) +; MIPS32-DAG: ldc1 $f[[F1:[0-9]+]], 56($sp) + +; MIPS32-DAG: sdc1 $f[[F1]], 8(${{[0-9]+}}) +; MIPS32-DAG: sdc1 $f[[F0]], %lo(gv2f64)(${{[0-9]+}}) + +; MIPS32R5: ld.d $w[[W0:[0-9]+]], 48($sp) +; MIPS32R5: st.d $w[[W0]], 0(${{[0-9]+}}) + +; MIPS64-DAG: sd $2 +; MIPS64-DAG: sd $3 + +; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][0], $2 +; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][1], $3 + + %0 = call <2 x double> @double2_extern(<2 x double> , <2 x double> ) + store <2 x double> %0, <2 x double> * @gv2f64 + ret void +} + +; The mixed tests show that due to alignment requirements, $5 is not used +; in argument passing. + +define float @mixed_i8(<2 x float> %a, i8 %b, <2 x float> %c) { +entry: +; ALL-LABEL: mixed_i8: + +; MIPS32-DAG: mtc1 $5, $f{{[0-9]+}} +; MIPS32: andi $[[R7:[0-9]+]], $6, 255 +; MIPS32: mtc1 $[[R7]], $f[[F0:[0-9]+]] +; MIPS32: cvt.s.w $f{{[0-9]+}}, $f[[F0]] + +; MIPS32-DAG: mtc1 $4, $f{{[0-9]+}} +; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 16($sp) +; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 20($sp) +; MIPS32-DAG: add.s $f0, $f{{[0-9]+}}, $f{{[0-9]+}} + +; MIPS32R5: andi $[[R0:[0-9]+]], $6, 255 +; MIPS32R5: sw $[[R0]], {{[0-9]+}}($sp) +; MIPS32R5: sw $[[R0]], {{[0-9]+}}($sp) +; MIPS32R5-DAG: sw $5, {{[0-9]+}}($sp) +; MIPS32R5-DAG: sw $4, {{[0-9]+}}($sp) + +; MIPS64EB-DAG: sll $[[R0:[0-9]+]], $4, 0 +; MIPS64EB-DAG: mtc1 $[[R0]], $f{{[0-9]+}} +; MIPS64EB: sll $[[R6:[0-9]+]], $5, 0 +; MIPS64EB: andi $[[R7:[0-9]+]], $[[R6]], 255 +; MIPS64EB: mtc1 $[[R7]], $f[[F0:[0-9]+]] +; MIPS64EB: cvt.s.w $f{{[0-9]+}}, $f[[F0]] + +; MIPS64EB-DAG: dsrl $[[R1:[0-9]+]], $4, 32 +; MIPS64EB-DAG: sll $[[R2:[0-9]+]], $[[R1]], 0 +; MIPS64EB-DAG: mtc1 $[[R2:[0-9]+]], $f{{[0-9]+}} + +; MIPS64EB-DAG: sll $[[R3:[0-9]+]], $6, 0 +; MIPS64EB-DAG: mtc1 $[[R3]], $f{{[0-9]+}} +; MIPS64EB-DAG: dsrl $[[R4:[0-9]+]], $6, 32 +; MIPS64EB-DAG: sll $[[R5:[0-9]+]], $[[R4]], 0 +; MIPS64EB-DAG: mtc1 $[[R5:[0-9]+]], $f{{[0-9]+}} + +; MIPS64EL-DAG: dsrl $[[R1:[0-9]+]], $4, 32 +; MIPS64EL-DAG: sll $[[R2:[0-9]+]], $[[R1]], 0 +; MIPS64EL-DAG: mtc1 $[[R2:[0-9]+]], $f{{[0-9]+}} + +; MIPS64EL: sll $[[R6:[0-9]+]], $5, 0 +; MIPS64EL: andi $[[R7:[0-9]+]], $[[R6]], 255 +; MIPS64EL: mtc1 $[[R7]], $f[[F0:[0-9]+]] +; MIPS64EL: cvt.s.w $f{{[0-9]+}}, $f[[F0]] + +; MIPS64EL-DAG: dsrl $[[R4:[0-9]+]], $6, 32 +; MIPS64EL-DAG: sll $[[R5:[0-9]+]], $[[R4]], 0 +; MIPS64EL-DAG: mtc1 $[[R5:[0-9]+]], $f{{[0-9]+}} + +; MIPS64EL-DAG: sll $[[R0:[0-9]+]], $4, 0 +; MIPS64EL-DAG: mtc1 $[[R0]], $f{{[0-9]+}} +; MIPS64EL-DAG: sll $[[R3:[0-9]+]], $6, 0 +; MIPS64EL-DAG: mtc1 $[[R3]], $f{{[0-9]+}} + +; MIPS64R5: sll $[[R0:[0-9]+]], $5, 0 +; MIPS64R5: andi $[[R1:[0-9]+]], $[[R0]], 255 +; MIPS64R5: sd $4, {{[0-9]+}}($sp) +; MIPS64R5: sd $6, {{[0-9]+}}($sp) + + %0 = zext i8 %b to i32 + %1 = uitofp i32 %0 to float + %2 = insertelement <2 x float> undef, float %1, i32 0 + %3 = insertelement <2 x float> %2, float %1, i32 1 + %4 = fadd <2 x float> %3, %a + %5 = fadd <2 x float> %4, %c + %6 = extractelement <2 x float> %5, i32 0 + %7 = extractelement <2 x float> %5, i32 1 + %8 = fadd float %6, %7 + ret float %8 +} + +define <4 x float> @mixed_32(<4 x float> %a, i32 %b) { +entry: +; ALL-LABEL: mixed_32: + +; MIPS32-DAG: mtc1 $6, $f{{[0-9]+}} +; MIPS32-DAG: mtc1 $7, $f{{[0-9]+}} +; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 28($sp) +; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 24($sp) +; MIPS32-DAG: swc1 $f{{[0-9]+}}, 0($4) +; MIPS32-DAG: swc1 $f{{[0-9]+}}, 4($4) +; MIPS32-DAG: swc1 $f{{[0-9]+}}, 8($4) +; MIPS32-DAG: swc1 $f{{[0-9]+}}, 12($4) + +; MIPS32R5: insert.w $w[[W0:[0-9]+]][0], $6 +; MIPS32R5: insert.w $w[[W0:[0-9]+]][1], $7 +; MIPS32R5: lw $[[R0:[0-9]+]], 16($sp) +; MIPS32R5: insert.w $w[[W0:[0-9]+]][2], $[[R0]] +; MIPS32R5: lw $[[R1:[0-9]+]], 20($sp) +; MIPS32R5: insert.w $w[[W0:[0-9]+]][3], $[[R1]] +; MIPS32R5: lw $[[R0:[0-9]+]], 24($sp) + +; MIPS64-DAG: sll ${{[0-9]+}}, $6, 0 +; MIPS64-DAG: dsrl $[[R0:[0-9]+]], $4, 32 +; MIPS64-DAG: sll $[[R1:[0-9]+]], $[[R0]], 0 +; MIPS64-DAG: mtc1 $[[R1]], $f{{[0-9]+}} +; MIPS64-DAG: sll $[[R2:[0-9]+]], $4, 0 +; MIPS64-DAG: dsrl $[[R3:[0-9]+]], $5, 32 +; MIPS64-DAG: sll $[[R4:[0-9]+]], $[[R3]], 0 +; MIPS64-DAG: mtc1 $[[R4]], $f{{[0-9]+}} +; MIPS64-DAG: mtc1 $[[R2]], $f{{[0-9]+}} +; MIPS64-DAG: sll $[[R6:[0-9]+]], $5, 0 +; MIPS64-DAG: mtc1 $[[R6:[0-9]+]], $f{{[0-9]+}} + +; MIPS64R5: insert.d $w[[W0:[0-9]+]][0], $4 +; MIPS64R5: insert.d $w[[W0]][1], $5 +; MIPS64R5: sll $[[R0:[0-9]+]], $6, 0 +; MIPS64R5: fill.w $w{{[0-9]+}}, $[[R0]] + + %0 = uitofp i32 %b to float + %1 = insertelement <4 x float> undef, float %0, i32 0 + %2 = insertelement <4 x float> %1, float %0, i32 1 + %3 = insertelement <4 x float> %2, float %0, i32 2 + %4 = insertelement <4 x float> %3, float %0, i32 3 + %5 = fadd <4 x float> %4, %a + ret <4 x float> %5 +} + + +; This test is slightly more fragile than I'd like as the offset into the +; outgoing arguments area is dependant on the size of the stack frame for +; this function. + +define <4 x float> @cast(<4 x i32> %a) { +entry: +; ALL-LABEL: cast: + +; MIPS32: addiu $sp, $sp, -32 +; MIPS32-DAG: sw $6, {{[0-9]+}}($sp) +; MIPS32-DAG: sw $7, {{[0-9]+}}($sp) +; MIPS32-DAG: lw ${{[0-9]+}}, 48($sp) +; MIPS32-DAG: lw ${{[0-9]+}}, 52($sp) + +; MIPS32R5-DAG: insert.w $w0[0], $6 +; MIPS32R5-DAG: insert.w $w0[1], $7 +; MIPS32R5-DAG: lw $[[R0:[0-9]+]], 16($sp) +; MIPS32R5-DAG: insert.w $w0[2], $[[R0]] +; MIPS32R5-DAG: lw $[[R1:[0-9]+]], 20($sp) +; MIPS32R5-DAG: insert.w $w0[3], $[[R1]] + +; MIPS64-DAG: sll ${{[0-9]+}}, $4, 0 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 32 +; MIPS64-DAG: sll ${{[0-9]+}}, $5, 0 +; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 32 + +; MIPS64R5-DAG: insert.d $w0[0], $4 +; MIPS64R5-DAG: insert.d $w0[1], $5 + + %0 = uitofp <4 x i32> %a to <4 x float> + ret <4 x float> %0 +} + +define <4 x float> @select(<4 x i32> %cond, <4 x float> %arg1, <4 x float> %arg2) { +entry: +; ALL-LABEL: select: + +; MIPS32-DAG: andi ${{[0-9]+}}, $7, 1 +; MIPS32-DAG: andi ${{[0-9]+}}, $6, 1 +; MIPS32-DAG: lw $[[R0:[0-9]+]], 16($sp) +; MIPS32-DAG: andi ${{[0-9]+}}, $[[R0]], 1 +; MIPS32-DAG: lw $[[R1:[0-9]+]], 20($sp) +; MIPS32-DAG: andi ${{[0-9]+}}, $[[R0]], 1 + +; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $6 +; MIPS32R5-DAG: insert.w $w[[W0]][1], $7 +; MIPS32R5-DAG: lw $[[R0:[0-9]+]], 16($sp) +; MIPS32R5-DAG: lw $[[R1:[0-9]+]], 20($sp) +; MIPS32R5-DAG: insert.w $w[[W0]][2], $[[R0]] +; MIPS32R5-DAG: insert.w $w[[W0]][3], $[[R1]] +; MIPS32R5-DAG: slli.w $w{{[0-9]}}, $w[[W0]] + +; MIPS64-DAG: sll $[[R0:[0-9]+]], $6, 0 +; MIPS64-DAG: mtc1 $[[R0]], $f{{[0-9]+}} +; MIPS64-DAG: dsrl $[[R1:[0-9]+]], $6, 32 +; MIPS64-DAG: sll $[[R2:[0-9]+]], $[[R1]], 0 +; MIPS64-DAG: mtc1 $[[R2]], $f{{[0-9]+}} + +; MIPS64-DAG: sll $[[R3:[0-9]+]], $7, 0 +; MIPS64-DAG: mtc1 $[[R3]], $f{{[0-9]+}} +; MIPS64-DAG: dsrl $[[R4:[0-9]+]], $7, 32 +; MIPS64-DAG: sll $[[R5:[0-9]+]], $[[R4]], 0 +; MIPS64-DAG: mtc1 $[[R5]], $f{{[0-9]+}} + +; MIPS64-DAG: sll $[[R6:[0-9]+]], $8, 0 +; MIPS64-DAG: mtc1 $[[R6]], $f{{[0-9]+}} +; MIPS64-DAG: dsrl $[[R7:[0-9]+]], $8, 32 +; MIPS64-DAG: sll $[[R8:[0-9]+]], $[[R7]], 0 +; MIPS64-DAG: mtc1 $[[R8]], $f{{[0-9]+}} + +; MIPS64-DAG: sll $[[R9:[0-9]+]], $9, 0 +; MIPS64-DAG: mtc1 $[[R9]], $f{{[0-9]+}} +; MIPS64-DAG: dsrl $[[R10:[0-9]+]], $9, 32 +; MIPS64-DAG: sll $[[R11:[0-9]+]], $[[R10]], 0 +; MIPS64-DAG: mtc1 $[[R11]], $f{{[0-9]+}} + +; MIPS64-DAG: sll $[[R12:[0-9]+]], $4, 0 +; MIPS64-DAG: andi ${{[0-9]+}}, $[[R12]], 1 +; MIPS64-DAG: dsrl $[[R13:[0-9]+]], $4, 32 +; MIPS64-DAG: sll $[[R14:[0-9]+]], $[[R13]], 0 +; MIPS64-DAG: andi ${{[0-9]+}}, $[[R14]], 1 + +; MIPS64-DAG: sll $[[R15:[0-9]+]], $5, 0 +; MIPS64-DAG: andi ${{[0-9]+}}, $[[R15]], 1 +; MIPS64-DAG: dsrl $[[R16:[0-9]+]], $5, 32 +; MIPS64-DAG: sll $[[R17:[0-9]+]], $[[R16]], 0 +; MIPS64-DAG: andi ${{[0-9]+}}, $[[R17]], 1 + +; MIPS64R5-DAG: insert.d $w{{[0-9]+}}[0], $8 +; MIPS64R5-DAG: insert.d $w{{[0-9]+}}[1], $9 +; MIPS64R5-DAG: insert.d $w{{[0-9]+}}[0], $6 +; MIPS64R5-DAG: insert.d $w{{[0-9]+}}[1], $7 +; MIPS64R5-DAG: insert.d $w{{[0-9]+}}[0], $4 +; MIPS64R5-DAG: insert.d $w{{[0-9]+}}[1], $5 + + %cond.t = trunc <4 x i32> %cond to <4 x i1> + %res = select <4 x i1> %cond.t, <4 x float> %arg1, <4 x float> %arg2 + ret <4 x float> %res +} diff --git a/test/CodeGen/Mips/ctlz-v.ll b/test/CodeGen/Mips/ctlz-v.ll index 3d580e5771f4..156c640681b7 100644 --- a/test/CodeGen/Mips/ctlz-v.ll +++ b/test/CodeGen/Mips/ctlz-v.ll @@ -8,10 +8,14 @@ entry: ; MIPS32: clz $2, $4 ; MIPS32: clz $3, $5 -; MIPS64-DAG: sll $[[A0:[0-9]+]], $4, 0 -; MIPS64-DAG: clz $2, $[[A0]] -; MIPS64-DAG: sll $[[A1:[0-9]+]], $5, 0 -; MIPS64-DAG: clz $3, $[[A1]] +; MIPS64-DAG: dsrl $[[A0:[0-9]+]], $4, 32 +; MIPS64-DAG: sll $[[A1:[0-9]+]], $[[A0]], 0 +; MIPS64-DAG: clz $[[R0:[0-9]+]], $[[A1]] +; MIPS64-DAG: dsll $[[R1:[0-9]+]], $[[R0]], 32 +; MIPS64-DAG: sll $[[A2:[0-9]+]], $4, 0 +; MIPS64-DAG: clz $[[R2:[0-9]+]], $[[A2]] +; MIPS64-DAG: dext $[[R3:[0-9]+]], $[[R2]], 0, 32 +; MIPS64-DAG: or $2, $[[R3]], $[[R1]] %ret = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %x, i1 true) ret <2 x i32> %ret diff --git a/test/CodeGen/Mips/cttz-v.ll b/test/CodeGen/Mips/cttz-v.ll index 85f69f9a17d9..dbcde7f5fe5b 100644 --- a/test/CodeGen/Mips/cttz-v.ll +++ b/test/CodeGen/Mips/cttz-v.ll @@ -24,14 +24,17 @@ entry: ; MIPS64-DAG: and $[[R2:[0-9]+]], $[[R1]], $[[R0]] ; MIPS64-DAG: clz $[[R3:[0-9]+]], $[[R2]] ; MIPS64-DAG: addiu $[[R4:[0-9]+]], $zero, 32 -; MIPS64-DAG: subu $2, $[[R4]], $[[R3]] -; MIPS64-DAG: sll $[[A1:[0-9]+]], $5, 0 -; MIPS64-DAG: addiu $[[R5:[0-9]+]], $[[A1]], -1 -; MIPS64-DAG: not $[[R6:[0-9]+]], $[[A1]] -; MIPS64-DAG: and $[[R7:[0-9]+]], $[[R6]], $[[R5]] -; MIPS64-DAG: clz $[[R8:[0-9]+]], $[[R7]] -; MIPS64-DAG: jr $ra -; MIPS64-DAG: subu $3, $[[R4]], $[[R8]] +; MIPS64-DAG: subu $[[R5:[0-9]+]], $[[R4]], $[[R3]] +; MIPS64-DAG: dsrl $[[R6:[0-9]+]], $4, 32 +; MIPS64-DAG: sll $[[R7:[0-9]+]], $[[R6]], 0 +; MIPS64-DAG: dext $[[R8:[0-9]+]], $[[R5]], 0, 32 +; MIPS64-DAG: addiu $[[R9:[0-9]+]], $[[R7]], -1 +; MIPS64-DAG: not $[[R10:[0-9]+]], $[[R7]] +; MIPS64-DAG: and $[[R11:[0-9]+]], $[[R10]], $[[R9]] +; MIPS64-DAG: clz $[[R12:[0-9]+]], $[[R11]] +; MIPS64-DAG: subu $[[R13:[0-9]+]], $[[R4]], $[[R12]] +; MIPS64-DAG: dsll $[[R14:[0-9]+]], $[[R13]], 32 +; MIPS64-DAG: or $2, $[[R8]], $[[R14]] %ret = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %x, i1 true) ret <2 x i32> %ret diff --git a/test/CodeGen/Mips/dsp-r1.ll b/test/CodeGen/Mips/dsp-r1.ll index edd6258270a0..90eb14a75b42 100644 --- a/test/CodeGen/Mips/dsp-r1.ll +++ b/test/CodeGen/Mips/dsp-r1.ll @@ -1172,9 +1172,19 @@ entry: ret { i32 } %.fca.0.insert } +define { i32 } @test__builtin_mips_repl_ph2(i32 %i0) nounwind readnone { +entry: +; CHECK: repl.ph + + %0 = tail call <2 x i16> @llvm.mips.repl.ph(i32 -2) + %1 = bitcast <2 x i16> %0 to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %1, 0 + ret { i32 } %.fca.0.insert +} + declare <2 x i16> @llvm.mips.repl.ph(i32) nounwind readnone -define { i32 } @test__builtin_mips_repl_ph2(i32 %i0, i32 %a0) nounwind readnone { +define { i32 } @test__builtin_mips_repl_ph3(i32 %i0, i32 %a0) nounwind readnone { entry: ; CHECK: replv.ph diff --git a/test/CodeGen/Mips/fmadd1.ll b/test/CodeGen/Mips/fmadd1.ll index c155eedd62c4..d7f6308ac0b0 100644 --- a/test/CodeGen/Mips/fmadd1.ll +++ b/test/CodeGen/Mips/fmadd1.ll @@ -5,52 +5,63 @@ ; IEEE 754 (1985) and IEEE 754 (2008). These instructions are therefore only ; available when -enable-no-nans-fp-math is given. -; RUN: llc < %s -march=mipsel -mcpu=mips32 -enable-no-nans-fp-math | FileCheck %s -check-prefixes=ALL,32,32-NONAN +; RUN: llc < %s -march=mipsel -mcpu=mips32 -enable-no-nans-fp-math | FileCheck %s -check-prefixes=ALL,32-NOMADD,32-NONAN-NOMADD ; RUN: llc < %s -march=mipsel -mcpu=mips32r2 -enable-no-nans-fp-math | FileCheck %s -check-prefixes=ALL,32R2,32R2-NONAN -; RUN: llc < %s -march=mipsel -mcpu=mips32r6 -enable-no-nans-fp-math | FileCheck %s -check-prefixes=ALL,32R6,32R6-NONAN +; RUN: llc < %s -march=mipsel -mcpu=mips32r6 -enable-no-nans-fp-math | FileCheck %s -check-prefixes=ALL,32R6-NOMADD,32R6-NONAN-NOMADD ; RUN: llc < %s -march=mips64el -mcpu=mips64 -target-abi=n64 -enable-no-nans-fp-math | FileCheck %s -check-prefixes=ALL,64,64-NONAN ; RUN: llc < %s -march=mips64el -mcpu=mips64r2 -target-abi=n64 -enable-no-nans-fp-math | FileCheck %s -check-prefixes=ALL,64R2,64R2-NONAN -; RUN: llc < %s -march=mips64el -mcpu=mips64r6 -target-abi=n64 -enable-no-nans-fp-math | FileCheck %s -check-prefixes=ALL,64R6,64R6-NONAN -; RUN: llc < %s -march=mipsel -mcpu=mips32 | FileCheck %s -check-prefixes=ALL,32,32-NAN +; RUN: llc < %s -march=mips64el -mcpu=mips64r6 -target-abi=n64 -enable-no-nans-fp-math | FileCheck %s -check-prefixes=ALL,64R6-NOMADD,64R6-NONAN-NOMADD +; RUN: llc < %s -march=mipsel -mcpu=mips32 | FileCheck %s -check-prefixes=ALL,32-NOMADD,32-NAN-NOMADD ; RUN: llc < %s -march=mipsel -mcpu=mips32r2 | FileCheck %s -check-prefixes=ALL,32R2,32R2-NAN -; RUN: llc < %s -march=mipsel -mcpu=mips32r6 | FileCheck %s -check-prefixes=ALL,32R6,32R6-NAN +; RUN: llc < %s -march=mipsel -mcpu=mips32r6 | FileCheck %s -check-prefixes=ALL,32R6-NOMADD,32R6-NAN-NOMADD ; RUN: llc < %s -march=mips64el -mcpu=mips64 -target-abi=n64 | FileCheck %s -check-prefixes=ALL,64,64-NAN ; RUN: llc < %s -march=mips64el -mcpu=mips64r2 -target-abi=n64 | FileCheck %s -check-prefixes=ALL,64R2,64R2-NAN -; RUN: llc < %s -march=mips64el -mcpu=mips64r6 -target-abi=n64 | FileCheck %s -check-prefixes=ALL,64R6,64R6-NAN +; RUN: llc < %s -march=mips64el -mcpu=mips64r6 -target-abi=n64 | FileCheck %s -check-prefixes=ALL,64R6-NOMADD,64R6-NAN-NOMADD + +; Check that madd.[ds], msub.[ds], nmadd.[ds], and nmsub.[ds] are not generated +; when +nomadd attribute is specified. +; Output for mips32 and mips64r6 reused since aforementioned instructions are +; not generated in those cases. +; RUN: llc < %s -march=mipsel -mcpu=mips32r2 -enable-no-nans-fp-math -mattr=+nomadd4 | FileCheck %s -check-prefixes=ALL,32-NOMADD,32-NONAN-NOMADD +; RUN: llc < %s -march=mips64el -mcpu=mips64 -target-abi=n64 -enable-no-nans-fp-math -mattr=+nomadd4 | FileCheck %s -check-prefixes=ALL,64R6-NOMADD,64R6-NONAN-NOMADD +; RUN: llc < %s -march=mips64el -mcpu=mips64r2 -target-abi=n64 -enable-no-nans-fp-math -mattr=+nomadd4 | FileCheck %s -check-prefixes=ALL,64R6-NOMADD,64R6-NONAN-NOMADD +; RUN: llc < %s -march=mipsel -mcpu=mips32r2 -mattr=+nomadd4 | FileCheck %s -check-prefixes=ALL,32-NOMADD,32-NAN-NOMADD +; RUN: llc < %s -march=mips64el -mcpu=mips64 -target-abi=n64 -mattr=+nomadd4 | FileCheck %s -check-prefixes=ALL,64R6-NOMADD,64R6-NAN-NOMADD +; RUN: llc < %s -march=mips64el -mcpu=mips64r2 -target-abi=n64 -mattr=+nomadd4 | FileCheck %s -check-prefixes=ALL,64R6-NOMADD,64R6-NAN-NOMADD define float @FOO0float(float %a, float %b, float %c) nounwind readnone { entry: ; ALL-LABEL: FOO0float: -; 32-DAG: mtc1 $6, $[[T0:f[0-9]+]] -; 32-DAG: mul.s $[[T1:f[0-9]+]], $f12, $f14 -; 32-DAG: add.s $[[T2:f[0-9]+]], $[[T1]], $[[T0]] -; 32-DAG: mtc1 $zero, $[[T2:f[0-9]+]] -; 32-DAG: add.s $f0, $[[T1]], $[[T2]] +; 32-NOMADD-DAG: mtc1 $6, $[[T0:f[0-9]+]] +; 32-NOMADD-DAG: mul.s $[[T1:f[0-9]+]], $f12, $f14 +; 32-NOMADD-DAG: add.s $[[T2:f[0-9]+]], $[[T1]], $[[T0]] +; 32-NOMADD-DAG: mtc1 $zero, $[[T2:f[0-9]+]] +; 32-NOMADD-DAG: add.s $f0, $[[T1]], $[[T2]] -; 32R2: mtc1 $6, $[[T0:f[0-9]+]] -; 32R2: madd.s $[[T1:f[0-9]+]], $[[T0]], $f12, $f14 -; 32R2: mtc1 $zero, $[[T2:f[0-9]+]] -; 32R2: add.s $f0, $[[T1]], $[[T2]] +; 32R2: mtc1 $6, $[[T0:f[0-9]+]] +; 32R2: madd.s $[[T1:f[0-9]+]], $[[T0]], $f12, $f14 +; 32R2: mtc1 $zero, $[[T2:f[0-9]+]] +; 32R2: add.s $f0, $[[T1]], $[[T2]] -; 32R6-DAG: mtc1 $6, $[[T0:f[0-9]+]] -; 32R6-DAG: mul.s $[[T1:f[0-9]+]], $f12, $f14 -; 32R6-DAG: add.s $[[T2:f[0-9]+]], $[[T1]], $[[T0]] -; 32R6-DAG: mtc1 $zero, $[[T2:f[0-9]+]] -; 32R6-DAG: add.s $f0, $[[T1]], $[[T2]] +; 32R6-NOMADD-DAG: mtc1 $6, $[[T0:f[0-9]+]] +; 32R6-NOMADD-DAG: mul.s $[[T1:f[0-9]+]], $f12, $f14 +; 32R6-NOMADD-DAG: add.s $[[T2:f[0-9]+]], $[[T1]], $[[T0]] +; 32R6-NOMADD-DAG: mtc1 $zero, $[[T2:f[0-9]+]] +; 32R6-NOMADD-DAG: add.s $f0, $[[T1]], $[[T2]] -; 64-DAG: madd.s $[[T0:f[0-9]+]], $f14, $f12, $f13 -; 64-DAG: mtc1 $zero, $[[T1:f[0-9]+]] -; 64-DAG: add.s $f0, $[[T0]], $[[T1]] +; 64-DAG: madd.s $[[T0:f[0-9]+]], $f14, $f12, $f13 +; 64-DAG: mtc1 $zero, $[[T1:f[0-9]+]] +; 64-DAG: add.s $f0, $[[T0]], $[[T1]] -; 64R2: madd.s $[[T0:f[0-9]+]], $f14, $f12, $f13 -; 64R2: mtc1 $zero, $[[T1:f[0-9]+]] -; 64R2: add.s $f0, $[[T0]], $[[T1]] +; 64R2: madd.s $[[T0:f[0-9]+]], $f14, $f12, $f13 +; 64R2: mtc1 $zero, $[[T1:f[0-9]+]] +; 64R2: add.s $f0, $[[T0]], $[[T1]] -; 64R6-DAG: mul.s $[[T0:f[0-9]+]], $f12, $f13 -; 64R6-DAG: add.s $[[T1:f[0-9]+]], $[[T0]], $f14 -; 64R6-DAG: mtc1 $zero, $[[T2:f[0-9]+]] -; 64R6-DAG: add.s $f0, $[[T1]], $[[T2]] +; 64R6-NOMADD-DAG: mul.s $[[T0:f[0-9]+]], $f12, $f13 +; 64R6-NOMADD-DAG: add.s $[[T1:f[0-9]+]], $[[T0]], $f14 +; 64R6-NOMADD-DAG: mtc1 $zero, $[[T2:f[0-9]+]] +; 64R6-NOMADD-DAG: add.s $f0, $[[T1]], $[[T2]] %mul = fmul float %a, %b %add = fadd float %mul, %c @@ -62,35 +73,35 @@ define float @FOO1float(float %a, float %b, float %c) nounwind readnone { entry: ; ALL-LABEL: FOO1float: -; 32-DAG: mtc1 $6, $[[T0:f[0-9]+]] -; 32-DAG: mul.s $[[T1:f[0-9]+]], $f12, $f14 -; 32-DAG: sub.s $[[T2:f[0-9]+]], $[[T1]], $[[T0]] -; 32-DAG: mtc1 $zero, $[[T2:f[0-9]+]] -; 32-DAG: add.s $f0, $[[T1]], $[[T2]] +; 32-NOMADD-DAG: mtc1 $6, $[[T0:f[0-9]+]] +; 32-NOMADD-DAG: mul.s $[[T1:f[0-9]+]], $f12, $f14 +; 32-NOMADD-DAG: sub.s $[[T2:f[0-9]+]], $[[T1]], $[[T0]] +; 32-NOMADD-DAG: mtc1 $zero, $[[T2:f[0-9]+]] +; 32-NOMADD-DAG: add.s $f0, $[[T1]], $[[T2]] -; 32R2: mtc1 $6, $[[T0:f[0-9]+]] -; 32R2: msub.s $[[T1:f[0-9]+]], $[[T0]], $f12, $f14 -; 32R2: mtc1 $zero, $[[T2:f[0-9]+]] -; 32R2: add.s $f0, $[[T1]], $[[T2]] +; 32R2: mtc1 $6, $[[T0:f[0-9]+]] +; 32R2: msub.s $[[T1:f[0-9]+]], $[[T0]], $f12, $f14 +; 32R2: mtc1 $zero, $[[T2:f[0-9]+]] +; 32R2: add.s $f0, $[[T1]], $[[T2]] -; 32R6-DAG: mtc1 $6, $[[T0:f[0-9]+]] -; 32R6-DAG: mul.s $[[T1:f[0-9]+]], $f12, $f14 -; 32R6-DAG: sub.s $[[T2:f[0-9]+]], $[[T1]], $[[T0]] -; 32R6-DAG: mtc1 $zero, $[[T2:f[0-9]+]] -; 32R6-DAG: add.s $f0, $[[T1]], $[[T2]] +; 32R6-NOMADD-DAG: mtc1 $6, $[[T0:f[0-9]+]] +; 32R6-NOMADD-DAG: mul.s $[[T1:f[0-9]+]], $f12, $f14 +; 32R6-NOMADD-DAG: sub.s $[[T2:f[0-9]+]], $[[T1]], $[[T0]] +; 32R6-NOMADD-DAG: mtc1 $zero, $[[T2:f[0-9]+]] +; 32R6-NOMADD-DAG: add.s $f0, $[[T1]], $[[T2]] -; 64-DAG: msub.s $[[T0:f[0-9]+]], $f14, $f12, $f13 -; 64-DAG: mtc1 $zero, $[[T1:f[0-9]+]] -; 64-DAG: add.s $f0, $[[T0]], $[[T1]] +; 64-DAG: msub.s $[[T0:f[0-9]+]], $f14, $f12, $f13 +; 64-DAG: mtc1 $zero, $[[T1:f[0-9]+]] +; 64-DAG: add.s $f0, $[[T0]], $[[T1]] -; 64R2: msub.s $[[T0:f[0-9]+]], $f14, $f12, $f13 -; 64R2: mtc1 $zero, $[[T1:f[0-9]+]] -; 64R2: add.s $f0, $[[T0]], $[[T1]] +; 64R2: msub.s $[[T0:f[0-9]+]], $f14, $f12, $f13 +; 64R2: mtc1 $zero, $[[T1:f[0-9]+]] +; 64R2: add.s $f0, $[[T0]], $[[T1]] -; 64R6-DAG: mul.s $[[T0:f[0-9]+]], $f12, $f13 -; 64R6-DAG: sub.s $[[T1:f[0-9]+]], $[[T0]], $f14 -; 64R6-DAG: mtc1 $zero, $[[T2:f[0-9]+]] -; 64R6-DAG: add.s $f0, $[[T1]], $[[T2]] +; 64R6-NOMADD-DAG: mul.s $[[T0:f[0-9]+]], $f12, $f13 +; 64R6-NOMADD-DAG: sub.s $[[T1:f[0-9]+]], $[[T0]], $f14 +; 64R6-NOMADD-DAG: mtc1 $zero, $[[T2:f[0-9]+]] +; 64R6-NOMADD-DAG: add.s $f0, $[[T1]], $[[T2]] %mul = fmul float %a, %b %sub = fsub float %mul, %c @@ -102,42 +113,42 @@ define float @FOO2float(float %a, float %b, float %c) nounwind readnone { entry: ; ALL-LABEL: FOO2float: -; 32-DAG: mtc1 $6, $[[T0:f[0-9]+]] -; 32-DAG: mul.s $[[T1:f[0-9]+]], $f12, $f14 -; 32-DAG: add.s $[[T2:f[0-9]+]], $[[T1]], $[[T0]] -; 32-DAG: mtc1 $zero, $[[T2:f[0-9]+]] -; 32-DAG: sub.s $f0, $[[T2]], $[[T1]] +; 32-NOMADD-DAG: mtc1 $6, $[[T0:f[0-9]+]] +; 32-NOMADD-DAG: mul.s $[[T1:f[0-9]+]], $f12, $f14 +; 32-NOMADD-DAG: add.s $[[T2:f[0-9]+]], $[[T1]], $[[T0]] +; 32-NOMADD-DAG: mtc1 $zero, $[[T2:f[0-9]+]] +; 32-NOMADD-DAG: sub.s $f0, $[[T2]], $[[T1]] -; 32R2-NONAN: mtc1 $6, $[[T0:f[0-9]+]] -; 32R2-NONAN: nmadd.s $f0, $[[T0]], $f12, $f14 +; 32R2-NONAN: mtc1 $6, $[[T0:f[0-9]+]] +; 32R2-NONAN: nmadd.s $f0, $[[T0]], $f12, $f14 -; 32R2-NAN: mtc1 $6, $[[T0:f[0-9]+]] -; 32R2-NAN: madd.s $[[T1:f[0-9]+]], $[[T0]], $f12, $f14 -; 32R2-NAN: mtc1 $zero, $[[T2:f[0-9]+]] -; 32R2-NAN: sub.s $f0, $[[T2]], $[[T1]] +; 32R2-NAN: mtc1 $6, $[[T0:f[0-9]+]] +; 32R2-NAN: madd.s $[[T1:f[0-9]+]], $[[T0]], $f12, $f14 +; 32R2-NAN: mtc1 $zero, $[[T2:f[0-9]+]] +; 32R2-NAN: sub.s $f0, $[[T2]], $[[T1]] -; 32R6-DAG: mtc1 $6, $[[T0:f[0-9]+]] -; 32R6-DAG: mul.s $[[T1:f[0-9]+]], $f12, $f14 -; 32R6-DAG: add.s $[[T2:f[0-9]+]], $[[T1]], $[[T0]] -; 32R6-DAG: mtc1 $zero, $[[T2:f[0-9]+]] -; 32R6-DAG: sub.s $f0, $[[T2]], $[[T1]] +; 32R6-NOMADD-DAG: mtc1 $6, $[[T0:f[0-9]+]] +; 32R6-NOMADD-DAG: mul.s $[[T1:f[0-9]+]], $f12, $f14 +; 32R6-NOMADD-DAG: add.s $[[T2:f[0-9]+]], $[[T1]], $[[T0]] +; 32R6-NOMADD-DAG: mtc1 $zero, $[[T2:f[0-9]+]] +; 32R6-NOMADD-DAG: sub.s $f0, $[[T2]], $[[T1]] -; 64-NONAN: nmadd.s $f0, $f14, $f12, $f13 +; 64-NONAN: nmadd.s $f0, $f14, $f12, $f13 -; 64-NAN: madd.s $[[T0:f[0-9]+]], $f14, $f12, $f13 -; 64-NAN: mtc1 $zero, $[[T1:f[0-9]+]] -; 64-NAN: sub.s $f0, $[[T1]], $[[T0]] +; 64-NAN: madd.s $[[T0:f[0-9]+]], $f14, $f12, $f13 +; 64-NAN: mtc1 $zero, $[[T1:f[0-9]+]] +; 64-NAN: sub.s $f0, $[[T1]], $[[T0]] -; 64R2-NONAN: nmadd.s $f0, $f14, $f12, $f13 +; 64R2-NONAN: nmadd.s $f0, $f14, $f12, $f13 -; 64R2-NAN: madd.s $[[T0:f[0-9]+]], $f14, $f12, $f13 -; 64R2-NAN: mtc1 $zero, $[[T1:f[0-9]+]] -; 64R2-NAN: sub.s $f0, $[[T1]], $[[T0]] +; 64R2-NAN: madd.s $[[T0:f[0-9]+]], $f14, $f12, $f13 +; 64R2-NAN: mtc1 $zero, $[[T1:f[0-9]+]] +; 64R2-NAN: sub.s $f0, $[[T1]], $[[T0]] -; 64R6-DAG: mul.s $[[T1:f[0-9]+]], $f12, $f13 -; 64R6-DAG: add.s $[[T2:f[0-9]+]], $[[T1]], $f14 -; 64R6-DAG: mtc1 $zero, $[[T2:f[0-9]+]] -; 64R6-DAG: sub.s $f0, $[[T2]], $[[T1]] +; 64R6-NOMADD-DAG: mul.s $[[T1:f[0-9]+]], $f12, $f13 +; 64R6-NOMADD-DAG: add.s $[[T2:f[0-9]+]], $[[T1]], $f14 +; 64R6-NOMADD-DAG: mtc1 $zero, $[[T2:f[0-9]+]] +; 64R6-NOMADD-DAG: sub.s $f0, $[[T2]], $[[T1]] %mul = fmul float %a, %b %add = fadd float %mul, %c @@ -149,34 +160,34 @@ define float @FOO3float(float %a, float %b, float %c) nounwind readnone { entry: ; ALL-LABEL: FOO3float: -; 32-DAG: mtc1 $6, $[[T0:f[0-9]+]] -; 32-DAG: mul.s $[[T1:f[0-9]+]], $f12, $f14 -; 32-DAG: sub.s $[[T2:f[0-9]+]], $[[T1]], $[[T0]] -; 32-DAG: mtc1 $zero, $[[T2:f[0-9]+]] -; 32-DAG: sub.s $f0, $[[T2]], $[[T1]] +; 32-NOMADD-DAG: mtc1 $6, $[[T0:f[0-9]+]] +; 32-NOMADD-DAG: mul.s $[[T1:f[0-9]+]], $f12, $f14 +; 32-NOMADD-DAG: sub.s $[[T2:f[0-9]+]], $[[T1]], $[[T0]] +; 32-NOMADD-DAG: mtc1 $zero, $[[T2:f[0-9]+]] +; 32-NOMADD-DAG: sub.s $f0, $[[T2]], $[[T1]] -; 32R2-NONAN: mtc1 $6, $[[T0:f[0-9]+]] -; 32R2-NONAN: nmsub.s $f0, $[[T0]], $f12, $f14 +; 32R2-NONAN: mtc1 $6, $[[T0:f[0-9]+]] +; 32R2-NONAN: nmsub.s $f0, $[[T0]], $f12, $f14 -; 32R2-NAN: mtc1 $6, $[[T0:f[0-9]+]] -; 32R2-NAN: msub.s $[[T1:f[0-9]+]], $[[T0]], $f12, $f14 -; 32R2-NAN: mtc1 $zero, $[[T2:f[0-9]+]] -; 32R2-NAN: sub.s $f0, $[[T2]], $[[T1]] +; 32R2-NAN: mtc1 $6, $[[T0:f[0-9]+]] +; 32R2-NAN: msub.s $[[T1:f[0-9]+]], $[[T0]], $f12, $f14 +; 32R2-NAN: mtc1 $zero, $[[T2:f[0-9]+]] +; 32R2-NAN: sub.s $f0, $[[T2]], $[[T1]] -; 64-NAN: msub.s $[[T0:f[0-9]+]], $f14, $f12, $f13 -; 64-NAN: mtc1 $zero, $[[T1:f[0-9]+]] -; 64-NAN: sub.s $f0, $[[T1]], $[[T0]] +; 64-NAN: msub.s $[[T0:f[0-9]+]], $f14, $f12, $f13 +; 64-NAN: mtc1 $zero, $[[T1:f[0-9]+]] +; 64-NAN: sub.s $f0, $[[T1]], $[[T0]] -; 64-NONAN: nmsub.s $f0, $f14, $f12, $f13 +; 64-NONAN: nmsub.s $f0, $f14, $f12, $f13 -; 64R2-NAN: msub.s $[[T0:f[0-9]+]], $f14, $f12, $f13 -; 64R2-NAN: mtc1 $zero, $[[T1:f[0-9]+]] -; 64R2-NAN: sub.s $f0, $[[T1]], $[[T0]] +; 64R2-NAN: msub.s $[[T0:f[0-9]+]], $f14, $f12, $f13 +; 64R2-NAN: mtc1 $zero, $[[T1:f[0-9]+]] +; 64R2-NAN: sub.s $f0, $[[T1]], $[[T0]] -; 64R6-DAG: mul.s $[[T1:f[0-9]+]], $f12, $f13 -; 64R6-DAG: sub.s $[[T2:f[0-9]+]], $[[T1]], $f14 -; 64R6-DAG: mtc1 $zero, $[[T2:f[0-9]+]] -; 64R6-DAG: sub.s $f0, $[[T2]], $[[T1]] +; 64R6-NOMADD-DAG: mul.s $[[T1:f[0-9]+]], $f12, $f13 +; 64R6-NOMADD-DAG: sub.s $[[T2:f[0-9]+]], $[[T1]], $f14 +; 64R6-NOMADD-DAG: mtc1 $zero, $[[T2:f[0-9]+]] +; 64R6-NOMADD-DAG: sub.s $f0, $[[T2]], $[[T1]] %mul = fmul float %a, %b %sub = fsub float %mul, %c @@ -188,36 +199,36 @@ define double @FOO10double(double %a, double %b, double %c) nounwind readnone { entry: ; ALL-LABEL: FOO10double: -; 32-DAG: ldc1 $[[T0:f[0-9]+]], 16($sp) -; 32-DAG: mul.d $[[T1:f[0-9]+]], $f12, $f14 -; 32-DAG: add.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]] -; 32-DAG: mtc1 $zero, $[[T2:f[0-9]+]] -; 32-DAG: add.d $f0, $[[T1]], $[[T2]] +; 32-NOMADD-DAG: ldc1 $[[T0:f[0-9]+]], 16($sp) +; 32-NOMADD-DAG: mul.d $[[T1:f[0-9]+]], $f12, $f14 +; 32-NOMADD-DAG: add.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]] +; 32-NOMADD-DAG: mtc1 $zero, $[[T2:f[0-9]+]] +; 32-NOMADD-DAG: add.d $f0, $[[T1]], $[[T2]] -; 32R2: ldc1 $[[T0:f[0-9]+]], 16($sp) -; 32R2: madd.d $[[T1:f[0-9]+]], $[[T0]], $f12, $f14 -; 32R2: mtc1 $zero, $[[T2:f[0-9]+]] -; 32R2: mthc1 $zero, $[[T2]] -; 32R2: add.d $f0, $[[T1]], $[[T2]] +; 32R2: ldc1 $[[T0:f[0-9]+]], 16($sp) +; 32R2: madd.d $[[T1:f[0-9]+]], $[[T0]], $f12, $f14 +; 32R2: mtc1 $zero, $[[T2:f[0-9]+]] +; 32R2: mthc1 $zero, $[[T2]] +; 32R2: add.d $f0, $[[T1]], $[[T2]] -; 32R6-DAG: ldc1 $[[T0:f[0-9]+]], 16($sp) -; 32R6-DAG: mul.d $[[T1:f[0-9]+]], $f12, $f14 -; 32R6-DAG: add.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]] -; 32R6-DAG: mtc1 $zero, $[[T2:f[0-9]+]] -; 32R6-DAG: add.d $f0, $[[T1]], $[[T2]] +; 32R6-NOMADD-DAG: ldc1 $[[T0:f[0-9]+]], 16($sp) +; 32R6-NOMADD-DAG: mul.d $[[T1:f[0-9]+]], $f12, $f14 +; 32R6-NOMADD-DAG: add.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]] +; 32R6-NOMADD-DAG: mtc1 $zero, $[[T2:f[0-9]+]] +; 32R6-NOMADD-DAG: add.d $f0, $[[T1]], $[[T2]] -; 64-DAG: madd.d $[[T0:f[0-9]+]], $f14, $f12, $f13 -; 64-DAG: mtc1 $zero, $[[T1:f[0-9]+]] -; 64-DAG: add.d $f0, $[[T0]], $[[T1]] +; 64-DAG: madd.d $[[T0:f[0-9]+]], $f14, $f12, $f13 +; 64-DAG: mtc1 $zero, $[[T1:f[0-9]+]] +; 64-DAG: add.d $f0, $[[T0]], $[[T1]] -; 64R2: madd.d $[[T0:f[0-9]+]], $f14, $f12, $f13 -; 64R2: mtc1 $zero, $[[T1:f[0-9]+]] -; 64R2: add.d $f0, $[[T0]], $[[T1]] +; 64R2: madd.d $[[T0:f[0-9]+]], $f14, $f12, $f13 +; 64R2: mtc1 $zero, $[[T1:f[0-9]+]] +; 64R2: add.d $f0, $[[T0]], $[[T1]] -; 64R6-DAG: mul.d $[[T1:f[0-9]+]], $f12, $f13 -; 64R6-DAG: add.d $[[T2:f[0-9]+]], $[[T1]], $f14 -; 64R6-DAG: dmtc1 $zero, $[[T2:f[0-9]+]] -; 64R6-DAG: add.d $f0, $[[T1]], $[[T2]] +; 64R6-NOMADD-DAG: mul.d $[[T1:f[0-9]+]], $f12, $f13 +; 64R6-NOMADD-DAG: add.d $[[T2:f[0-9]+]], $[[T1]], $f14 +; 64R6-NOMADD-DAG: dmtc1 $zero, $[[T2:f[0-9]+]] +; 64R6-NOMADD-DAG: add.d $f0, $[[T1]], $[[T2]] %mul = fmul double %a, %b %add = fadd double %mul, %c @@ -229,36 +240,36 @@ define double @FOO11double(double %a, double %b, double %c) nounwind readnone { entry: ; ALL-LABEL: FOO11double: -; 32-DAG: ldc1 $[[T0:f[0-9]+]], 16($sp) -; 32-DAG: mul.d $[[T1:f[0-9]+]], $f12, $f14 -; 32-DAG: sub.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]] -; 32-DAG: mtc1 $zero, $[[T2:f[0-9]+]] -; 32-DAG: add.d $f0, $[[T1]], $[[T2]] +; 32-NOMADD-DAG: ldc1 $[[T0:f[0-9]+]], 16($sp) +; 32-NOMADD-DAG: mul.d $[[T1:f[0-9]+]], $f12, $f14 +; 32-NOMADD-DAG: sub.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]] +; 32-NOMADD-DAG: mtc1 $zero, $[[T2:f[0-9]+]] +; 32-NOMADD-DAG: add.d $f0, $[[T1]], $[[T2]] -; 32R2: ldc1 $[[T0:f[0-9]+]], 16($sp) -; 32R2: msub.d $[[T1:f[0-9]+]], $[[T0]], $f12, $f14 -; 32R2: mtc1 $zero, $[[T2:f[0-9]+]] -; 32R2: mthc1 $zero, $[[T2]] -; 32R2: add.d $f0, $[[T1]], $[[T2]] +; 32R2: ldc1 $[[T0:f[0-9]+]], 16($sp) +; 32R2: msub.d $[[T1:f[0-9]+]], $[[T0]], $f12, $f14 +; 32R2: mtc1 $zero, $[[T2:f[0-9]+]] +; 32R2: mthc1 $zero, $[[T2]] +; 32R2: add.d $f0, $[[T1]], $[[T2]] -; 32R6-DAG: ldc1 $[[T0:f[0-9]+]], 16($sp) -; 32R6-DAG: mul.d $[[T1:f[0-9]+]], $f12, $f14 -; 32R6-DAG: sub.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]] -; 32R6-DAG: mtc1 $zero, $[[T2:f[0-9]+]] -; 32R6-DAG: add.d $f0, $[[T1]], $[[T2]] +; 32R6-NOMADD-DAG: ldc1 $[[T0:f[0-9]+]], 16($sp) +; 32R6-NOMADD-DAG: mul.d $[[T1:f[0-9]+]], $f12, $f14 +; 32R6-NOMADD-DAG: sub.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]] +; 32R6-NOMADD-DAG: mtc1 $zero, $[[T2:f[0-9]+]] +; 32R6-NOMADD-DAG: add.d $f0, $[[T1]], $[[T2]] -; 64-DAG: msub.d $[[T0:f[0-9]+]], $f14, $f12, $f13 -; 64-DAG: mtc1 $zero, $[[T1:f[0-9]+]] -; 64-DAG: add.d $f0, $[[T0]], $[[T1]] +; 64-DAG: msub.d $[[T0:f[0-9]+]], $f14, $f12, $f13 +; 64-DAG: mtc1 $zero, $[[T1:f[0-9]+]] +; 64-DAG: add.d $f0, $[[T0]], $[[T1]] -; 64R2: msub.d $[[T0:f[0-9]+]], $f14, $f12, $f13 -; 64R2: mtc1 $zero, $[[T1:f[0-9]+]] -; 64R2: add.d $f0, $[[T0]], $[[T1]] +; 64R2: msub.d $[[T0:f[0-9]+]], $f14, $f12, $f13 +; 64R2: mtc1 $zero, $[[T1:f[0-9]+]] +; 64R2: add.d $f0, $[[T0]], $[[T1]] -; 64R6-DAG: mul.d $[[T1:f[0-9]+]], $f12, $f13 -; 64R6-DAG: sub.d $[[T2:f[0-9]+]], $[[T1]], $f14 -; 64R6-DAG: dmtc1 $zero, $[[T2:f[0-9]+]] -; 64R6-DAG: add.d $f0, $[[T1]], $[[T2]] +; 64R6-NOMADD-DAG: mul.d $[[T1:f[0-9]+]], $f12, $f13 +; 64R6-NOMADD-DAG: sub.d $[[T2:f[0-9]+]], $[[T1]], $f14 +; 64R6-NOMADD-DAG: dmtc1 $zero, $[[T2:f[0-9]+]] +; 64R6-NOMADD-DAG: add.d $f0, $[[T1]], $[[T2]] %mul = fmul double %a, %b %sub = fsub double %mul, %c @@ -270,43 +281,43 @@ define double @FOO12double(double %a, double %b, double %c) nounwind readnone { entry: ; ALL-LABEL: FOO12double: -; 32-DAG: ldc1 $[[T0:f[0-9]+]], 16($sp) -; 32-DAG: mul.d $[[T1:f[0-9]+]], $f12, $f14 -; 32-DAG: add.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]] -; 32-DAG: mtc1 $zero, $[[T2:f[0-9]+]] -; 32-DAG: sub.d $f0, $[[T2]], $[[T1]] +; 32-NOMADD-DAG: ldc1 $[[T0:f[0-9]+]], 16($sp) +; 32-NOMADD-DAG: mul.d $[[T1:f[0-9]+]], $f12, $f14 +; 32-NOMADD-DAG: add.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]] +; 32-NOMADD-DAG: mtc1 $zero, $[[T2:f[0-9]+]] +; 32-NOMADD-DAG: sub.d $f0, $[[T2]], $[[T1]] -; 32R2-NONAN: ldc1 $[[T0:f[0-9]+]], 16($sp) -; 32R2-NONAN: nmadd.d $f0, $[[T0]], $f12, $f14 +; 32R2-NONAN: ldc1 $[[T0:f[0-9]+]], 16($sp) +; 32R2-NONAN: nmadd.d $f0, $[[T0]], $f12, $f14 -; 32R2-NAN: ldc1 $[[T0:f[0-9]+]], 16($sp) -; 32R2-NAN: madd.d $[[T1:f[0-9]+]], $[[T0]], $f12, $f14 -; 32R2-NAN: mtc1 $zero, $[[T2:f[0-9]+]] -; 32R2-NAN: mthc1 $zero, $[[T2]] -; 32R2-NAN: sub.d $f0, $[[T2]], $[[T1]] +; 32R2-NAN: ldc1 $[[T0:f[0-9]+]], 16($sp) +; 32R2-NAN: madd.d $[[T1:f[0-9]+]], $[[T0]], $f12, $f14 +; 32R2-NAN: mtc1 $zero, $[[T2:f[0-9]+]] +; 32R2-NAN: mthc1 $zero, $[[T2]] +; 32R2-NAN: sub.d $f0, $[[T2]], $[[T1]] -; 32R6-DAG: ldc1 $[[T0:f[0-9]+]], 16($sp) -; 32R6-DAG: mul.d $[[T1:f[0-9]+]], $f12, $f14 -; 32R6-DAG: add.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]] -; 32R6-DAG: mtc1 $zero, $[[T2:f[0-9]+]] -; 32R6-DAG: sub.d $f0, $[[T2]], $[[T1]] +; 32R6-NOMADD-DAG: ldc1 $[[T0:f[0-9]+]], 16($sp) +; 32R6-NOMADD-DAG: mul.d $[[T1:f[0-9]+]], $f12, $f14 +; 32R6-NOMADD-DAG: add.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]] +; 32R6-NOMADD-DAG: mtc1 $zero, $[[T2:f[0-9]+]] +; 32R6-NOMADD-DAG: sub.d $f0, $[[T2]], $[[T1]] -; 64-NONAN: nmadd.d $f0, $f14, $f12, $f13 +; 64-NONAN: nmadd.d $f0, $f14, $f12, $f13 -; 64-NAN: madd.d $[[T0:f[0-9]+]], $f14, $f12, $f13 -; 64-NAN: mtc1 $zero, $[[T1:f[0-9]+]] -; 64-NAN: sub.d $f0, $[[T1]], $[[T0]] +; 64-NAN: madd.d $[[T0:f[0-9]+]], $f14, $f12, $f13 +; 64-NAN: mtc1 $zero, $[[T1:f[0-9]+]] +; 64-NAN: sub.d $f0, $[[T1]], $[[T0]] -; 64R2-NONAN: nmadd.d $f0, $f14, $f12, $f13 +; 64R2-NONAN: nmadd.d $f0, $f14, $f12, $f13 -; 64R2-NAN: madd.d $[[T0:f[0-9]+]], $f14, $f12, $f13 -; 64R2-NAN: mtc1 $zero, $[[T1:f[0-9]+]] -; 64R2-NAN: sub.d $f0, $[[T1]], $[[T0]] +; 64R2-NAN: madd.d $[[T0:f[0-9]+]], $f14, $f12, $f13 +; 64R2-NAN: mtc1 $zero, $[[T1:f[0-9]+]] +; 64R2-NAN: sub.d $f0, $[[T1]], $[[T0]] -; 64R6-DAG: mul.d $[[T1:f[0-9]+]], $f12, $f13 -; 64R6-DAG: add.d $[[T2:f[0-9]+]], $[[T1]], $f14 -; 64R6-DAG: dmtc1 $zero, $[[T2:f[0-9]+]] -; 64R6-DAG: sub.d $f0, $[[T2]], $[[T1]] +; 64R6-NOMADD-DAG: mul.d $[[T1:f[0-9]+]], $f12, $f13 +; 64R6-NOMADD-DAG: add.d $[[T2:f[0-9]+]], $[[T1]], $f14 +; 64R6-NOMADD-DAG: dmtc1 $zero, $[[T2:f[0-9]+]] +; 64R6-NOMADD-DAG: sub.d $f0, $[[T2]], $[[T1]] %mul = fmul double %a, %b %add = fadd double %mul, %c @@ -318,43 +329,43 @@ define double @FOO13double(double %a, double %b, double %c) nounwind readnone { entry: ; ALL-LABEL: FOO13double: -; 32-DAG: ldc1 $[[T0:f[0-9]+]], 16($sp) -; 32-DAG: mul.d $[[T1:f[0-9]+]], $f12, $f14 -; 32-DAG: sub.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]] -; 32-DAG: mtc1 $zero, $[[T2:f[0-9]+]] -; 32-DAG: sub.d $f0, $[[T2]], $[[T1]] +; 32-NOMADD-DAG: ldc1 $[[T0:f[0-9]+]], 16($sp) +; 32-NOMADD-DAG: mul.d $[[T1:f[0-9]+]], $f12, $f14 +; 32-NOMADD-DAG: sub.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]] +; 32-NOMADD-DAG: mtc1 $zero, $[[T2:f[0-9]+]] +; 32-NOMADD-DAG: sub.d $f0, $[[T2]], $[[T1]] -; 32R2-NONAN: ldc1 $[[T0:f[0-9]+]], 16($sp) -; 32R2-NONAN: nmsub.d $f0, $[[T0]], $f12, $f14 +; 32R2-NONAN: ldc1 $[[T0:f[0-9]+]], 16($sp) +; 32R2-NONAN: nmsub.d $f0, $[[T0]], $f12, $f14 -; 32R2-NAN: ldc1 $[[T0:f[0-9]+]], 16($sp) -; 32R2-NAN: msub.d $[[T1:f[0-9]+]], $[[T0]], $f12, $f14 -; 32R2-NAN: mtc1 $zero, $[[T2:f[0-9]+]] -; 32R2-NAN: mthc1 $zero, $[[T2]] -; 32R2-NAN: sub.d $f0, $[[T2]], $[[T1]] +; 32R2-NAN: ldc1 $[[T0:f[0-9]+]], 16($sp) +; 32R2-NAN: msub.d $[[T1:f[0-9]+]], $[[T0]], $f12, $f14 +; 32R2-NAN: mtc1 $zero, $[[T2:f[0-9]+]] +; 32R2-NAN: mthc1 $zero, $[[T2]] +; 32R2-NAN: sub.d $f0, $[[T2]], $[[T1]] -; 32R6-DAG: ldc1 $[[T0:f[0-9]+]], 16($sp) -; 32R6-DAG: mul.d $[[T1:f[0-9]+]], $f12, $f14 -; 32R6-DAG: sub.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]] -; 32R6-DAG: mtc1 $zero, $[[T2:f[0-9]+]] -; 32R6-DAG: sub.d $f0, $[[T2]], $[[T1]] +; 32R6-NOMADD-DAG: ldc1 $[[T0:f[0-9]+]], 16($sp) +; 32R6-NOMADD-DAG: mul.d $[[T1:f[0-9]+]], $f12, $f14 +; 32R6-NOMADD-DAG: sub.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]] +; 32R6-NOMADD-DAG: mtc1 $zero, $[[T2:f[0-9]+]] +; 32R6-NOMADD-DAG: sub.d $f0, $[[T2]], $[[T1]] -; 64-NONAN: nmsub.d $f0, $f14, $f12, $f13 +; 64-NONAN: nmsub.d $f0, $f14, $f12, $f13 -; 64-NAN: msub.d $[[T0:f[0-9]+]], $f14, $f12, $f13 -; 64-NAN: mtc1 $zero, $[[T1:f[0-9]+]] -; 64-NAN: sub.d $f0, $[[T1]], $[[T0]] +; 64-NAN: msub.d $[[T0:f[0-9]+]], $f14, $f12, $f13 +; 64-NAN: mtc1 $zero, $[[T1:f[0-9]+]] +; 64-NAN: sub.d $f0, $[[T1]], $[[T0]] -; 64R2-NONAN: nmsub.d $f0, $f14, $f12, $f13 +; 64R2-NONAN: nmsub.d $f0, $f14, $f12, $f13 -; 64R2-NAN: msub.d $[[T0:f[0-9]+]], $f14, $f12, $f13 -; 64R2-NAN: mtc1 $zero, $[[T1:f[0-9]+]] -; 64R2-NAN: sub.d $f0, $[[T1]], $[[T0]] +; 64R2-NAN: msub.d $[[T0:f[0-9]+]], $f14, $f12, $f13 +; 64R2-NAN: mtc1 $zero, $[[T1:f[0-9]+]] +; 64R2-NAN: sub.d $f0, $[[T1]], $[[T0]] -; 64R6-DAG: mul.d $[[T1:f[0-9]+]], $f12, $f13 -; 64R6-DAG: sub.d $[[T2:f[0-9]+]], $[[T1]], $f14 -; 64R6-DAG: dmtc1 $zero, $[[T2:f[0-9]+]] -; 64R6-DAG: sub.d $f0, $[[T2]], $[[T1]] +; 64R6-NOMADD-DAG: mul.d $[[T1:f[0-9]+]], $f12, $f13 +; 64R6-NOMADD-DAG: sub.d $[[T2:f[0-9]+]], $[[T1]], $f14 +; 64R6-NOMADD-DAG: dmtc1 $zero, $[[T2:f[0-9]+]] +; 64R6-NOMADD-DAG: sub.d $f0, $[[T2]], $[[T1]] %mul = fmul double %a, %b %sub = fsub double %mul, %c diff --git a/test/CodeGen/Mips/llvm-ir/mul.ll b/test/CodeGen/Mips/llvm-ir/mul.ll index 20853073dfa6..1562372ce9a0 100644 --- a/test/CodeGen/Mips/llvm-ir/mul.ll +++ b/test/CodeGen/Mips/llvm-ir/mul.ll @@ -268,7 +268,7 @@ entry: ; MM64R6: daddu $2, $[[T1]], $[[T0]] ; MM64R6-DAG: dmul $3, $5, $7 - ; MM32: lw $25, %call16(__multi3)($gp) + ; MM32: lw $25, %call16(__multi3)($16) %r = mul i128 %a, %b ret i128 %r diff --git a/test/CodeGen/Mips/llvm-ir/sdiv.ll b/test/CodeGen/Mips/llvm-ir/sdiv.ll index ee2b212a9f2f..defd25bb41ac 100644 --- a/test/CodeGen/Mips/llvm-ir/sdiv.ll +++ b/test/CodeGen/Mips/llvm-ir/sdiv.ll @@ -172,7 +172,7 @@ entry: ; 64R6: ddiv $2, $4, $5 ; 64R6: teq $5, $zero, 7 - ; MM32: lw $25, %call16(__divdi3)($gp) + ; MM32: lw $25, %call16(__divdi3)($2) ; MM64: ddiv $2, $4, $5 ; MM64: teq $5, $zero, 7 @@ -184,7 +184,15 @@ entry: define signext i128 @sdiv_i128(i128 signext %a, i128 signext %b) { entry: ; ALL-LABEL: sdiv_i128: - ; ALL: l{{w|d}} $25, %call16(__divti3)($gp) + + ; GP32: lw $25, %call16(__divti3)($gp) + + ; GP64-NOT-R6: ld $25, %call16(__divti3)($gp) + ; 64R6: ld $25, %call16(__divti3)($gp) + + ; MM32: lw $25, %call16(__divti3)($16) + + ; MM64: ld $25, %call16(__divti3)($2) %r = sdiv i128 %a, %b ret i128 %r diff --git a/test/CodeGen/Mips/llvm-ir/srem.ll b/test/CodeGen/Mips/llvm-ir/srem.ll index 812c10566979..42664d7457e5 100644 --- a/test/CodeGen/Mips/llvm-ir/srem.ll +++ b/test/CodeGen/Mips/llvm-ir/srem.ll @@ -164,7 +164,7 @@ entry: ; 64R6: dmod $2, $4, $5 ; 64R6: teq $5, $zero, 7 - ; MM32: lw $25, %call16(__moddi3)($gp) + ; MM32: lw $25, %call16(__moddi3)($2) ; MM64: dmod $2, $4, $5 ; MM64: teq $5, $zero, 7 @@ -177,7 +177,14 @@ define signext i128 @srem_i128(i128 signext %a, i128 signext %b) { entry: ; ALL-LABEL: srem_i128: - ; ALL: l{{w|d}} $25, %call16(__modti3)($gp) + ; GP32: lw $25, %call16(__modti3)($gp) + + ; GP64-NOT-R6: ld $25, %call16(__modti3)($gp) + ; 64R6: ld $25, %call16(__modti3)($gp) + + ; MM32: lw $25, %call16(__modti3)($16) + + ; MM64: ld $25, %call16(__modti3)($2) %r = srem i128 %a, %b ret i128 %r diff --git a/test/CodeGen/Mips/llvm-ir/udiv.ll b/test/CodeGen/Mips/llvm-ir/udiv.ll index 6e078fdedfca..78ab36442a9a 100644 --- a/test/CodeGen/Mips/llvm-ir/udiv.ll +++ b/test/CodeGen/Mips/llvm-ir/udiv.ll @@ -134,7 +134,7 @@ entry: ; 64R6: ddivu $2, $4, $5 ; 64R6: teq $5, $zero, 7 - ; MM32: lw $25, %call16(__udivdi3)($gp) + ; MM32: lw $25, %call16(__udivdi3)($2) ; MM64: ddivu $2, $4, $5 ; MM64: teq $5, $zero, 7 @@ -147,7 +147,14 @@ define signext i128 @udiv_i128(i128 signext %a, i128 signext %b) { entry: ; ALL-LABEL: udiv_i128: - ; ALL: l{{w|d}} $25, %call16(__udivti3)($gp) + ; GP32: lw $25, %call16(__udivti3)($gp) + + ; GP64-NOT-R6: ld $25, %call16(__udivti3)($gp) + ; 64-R6: ld $25, %call16(__udivti3)($gp) + + ; MM32: lw $25, %call16(__udivti3)($16) + + ; MM64: ld $25, %call16(__udivti3)($2) %r = udiv i128 %a, %b ret i128 %r diff --git a/test/CodeGen/Mips/llvm-ir/urem.ll b/test/CodeGen/Mips/llvm-ir/urem.ll index 3bc82ceecd2a..160c126c7e3a 100644 --- a/test/CodeGen/Mips/llvm-ir/urem.ll +++ b/test/CodeGen/Mips/llvm-ir/urem.ll @@ -190,7 +190,7 @@ entry: ; 64R6: dmodu $2, $4, $5 ; 64R6: teq $5, $zero, 7 - ; MM32: lw $25, %call16(__umoddi3)($gp) + ; MM32: lw $25, %call16(__umoddi3)($2) ; MM64: dmodu $2, $4, $5 ; MM64: teq $5, $zero, 7 @@ -208,9 +208,9 @@ entry: ; GP64-NOT-R6: ld $25, %call16(__umodti3)($gp) ; 64R6: ld $25, %call16(__umodti3)($gp) - ; MM32: lw $25, %call16(__umodti3)($gp) + ; MM32: lw $25, %call16(__umodti3)($16) - ; MM64: ld $25, %call16(__umodti3)($gp) + ; MM64: ld $25, %call16(__umodti3)($2) %r = urem i128 %a, %b ret i128 %r diff --git a/test/CodeGen/Mips/micromips-gp-rc.ll b/test/CodeGen/Mips/micromips-gp-rc.ll index 16e55c357db6..f139f7a8486d 100644 --- a/test/CodeGen/Mips/micromips-gp-rc.ll +++ b/test/CodeGen/Mips/micromips-gp-rc.ll @@ -14,5 +14,5 @@ entry: ; Function Attrs: noreturn declare void @exit(i32 signext) -; CHECK: addu $gp, ${{[0-9]+}} +; CHECK: move $gp, ${{[0-9]+}} diff --git a/test/CodeGen/Mips/mips64fpldst.ll b/test/CodeGen/Mips/mips64fpldst.ll index 6fa506849ee6..564ffdd2f691 100644 --- a/test/CodeGen/Mips/mips64fpldst.ll +++ b/test/CodeGen/Mips/mips64fpldst.ll @@ -1,9 +1,9 @@ -; RUN: llc < %s -march=mips64el -mcpu=mips4 -target-abi n64 -relocation-model=pic -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-N64 -; RUN: llc < %s -march=mips64el -mcpu=mips4 -target-abi n32 -relocation-model=pic -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-N32 -; RUN: llc < %s -march=mips64el -mcpu=mips64 -target-abi n64 -relocation-model=pic -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-N64 -; RUN: llc < %s -march=mips64el -mcpu=mips64 -target-abi n32 -relocation-model=pic -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-N32 -; RUN: llc < %s -march=mipsel -mcpu=mips64r6 -mattr=+micromips -target-abi n32 -relocation-model=pic -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-N32 -; RUN: llc < %s -march=mipsel -mcpu=mips64r6 -mattr=+micromips -target-abi n64 -relocation-model=pic -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-N64 +; RUN: llc < %s -march=mips64el -mcpu=mips4 -target-abi n64 -relocation-model=pic | FileCheck %s -check-prefix=CHECK-N64 +; RUN: llc < %s -march=mips64el -mcpu=mips4 -target-abi n32 -relocation-model=pic | FileCheck %s -check-prefix=CHECK-N32 +; RUN: llc < %s -march=mips64el -mcpu=mips64 -target-abi n64 -relocation-model=pic | FileCheck %s -check-prefix=CHECK-N64 +; RUN: llc < %s -march=mips64el -mcpu=mips64 -target-abi n32 -relocation-model=pic | FileCheck %s -check-prefix=CHECK-N32 +; RUN: llc < %s -march=mipsel -mcpu=mips64r6 -mattr=+micromips -target-abi n32 -relocation-model=pic | FileCheck %s -check-prefix=CHECK-N32 +; RUN: llc < %s -march=mipsel -mcpu=mips64r6 -mattr=+micromips -target-abi n64 -relocation-model=pic | FileCheck %s -check-prefix=CHECK-N64 @f0 = common global float 0.000000e+00, align 4 @d0 = common global double 0.000000e+00, align 8 diff --git a/test/CodeGen/Mips/pbqp-reserved-physreg.ll b/test/CodeGen/Mips/pbqp-reserved-physreg.ll new file mode 100644 index 000000000000..eedc51bd1e57 --- /dev/null +++ b/test/CodeGen/Mips/pbqp-reserved-physreg.ll @@ -0,0 +1,35 @@ +; RUN: llc -march=mips -regalloc=pbqp <%s > %t +; ModuleID = 'bugpoint-reduced-simplified.bc' + +; Function Attrs: nounwind +define void @ham.928() local_unnamed_addr #0 align 2 { +bb: + switch i32 undef, label %bb35 [ + i32 1, label %bb18 + i32 0, label %bb19 + i32 3, label %bb20 + i32 2, label %bb21 + i32 4, label %bb17 + ] + +bb17: ; preds = %bb + unreachable + +bb18: ; preds = %bb + unreachable + +bb19: ; preds = %bb + unreachable + +bb20: ; preds = %bb + unreachable + +bb21: ; preds = %bb + unreachable + +bb35: ; preds = %bb + unreachable +} + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" } + diff --git a/test/CodeGen/Mips/return-vector.ll b/test/CodeGen/Mips/return-vector.ll index 08eddf370096..c59695d18734 100644 --- a/test/CodeGen/Mips/return-vector.ll +++ b/test/CodeGen/Mips/return-vector.ll @@ -128,8 +128,11 @@ entry: ; CHECK-LABEL: call_f2: ; CHECK: call16(f2) -; CHECK-NOT: lwc1 -; CHECK: add.s $[[R2:[a-z0-9]+]], $[[R0:[a-z0-9]+]], $[[R1:[a-z0-9]+]] +; CHECK: addiu $4, $sp, [[O0:[0-9]+]] +; CHECK-DAG: lwc1 $f[[F0:[0-9]]], [[O0]]($sp) +; CHECK-DAG: lwc1 $f[[F1:[0-9]]], 20($sp) +; CHECK: add.s $f0, $f[[F0]], $f[[F1]] + } @@ -143,12 +146,13 @@ entry: ; CHECK-LABEL: call_d2: ; CHECK: call16(d2) -; CHECK-NOT: ldc1 -; CHECK: add.d $[[R2:[a-z0-9]+]], $[[R0:[a-z0-9]+]], $[[R1:[a-z0-9]+]] +; CHECK: addiu $4, $sp, [[O0:[0-9]+]] +; CHECK-DAG: ldc1 $f[[F0:[0-9]+]], 24($sp) +; CHECK-DAG: ldc1 $f[[F1:[0-9]+]], [[O0]]($sp) +; CHECK: add.d $f0, $f[[F1]], $f[[F0]] + } - - ; Check that function returns vector on stack in cases when vector can't be ; returned in registers. Also check that vector is placed on stack starting ; from the address in register $4. @@ -179,11 +183,12 @@ entry: ret <4 x float> %vecins4 ; CHECK-LABEL: return_f4: -; CHECK-DAG: lwc1 $[[R0:[a-z0-9]+]], 16($sp) -; CHECK-DAG: swc1 $[[R0]], 12($4) +; CHECK-DAG: lwc1 $f[[R0:[0-9]+]], 16($sp) +; CHECK-DAG: swc1 $f[[R0]], 12($4) ; CHECK-DAG: sw $7, 8($4) ; CHECK-DAG: sw $6, 4($4) ; CHECK-DAG: sw $5, 0($4) + } @@ -227,8 +232,8 @@ entry: ret <2 x float> %vecins2 ; CHECK-LABEL: return_f2: -; CHECK: mov.s $f0, $f12 -; CHECK: mov.s $f2, $f14 +; CHECK-DAG: sw $5, 0($4) +; CHECK-DAG: sw $6, 4($4) } @@ -239,6 +244,10 @@ entry: ret <2 x double> %vecins2 ; CHECK-LABEL: return_d2: -; CHECK: mov.d $f0, $f12 -; CHECK: mov.d $f2, $f14 +; CHECK-DAG: ldc1 $f[[F0:[0-9]]], 16($sp) +; CHECK-DAG: sdc1 $f[[F0]], 8($4) +; CHECK-DAG: mtc1 $6, $f[[F1:[0-9]+]] +; CHECK-DAG: mtc1 $7, $f +; CHECK-DAG: sdc1 $f[[F0]], 0($4) + } diff --git a/test/CodeGen/Mips/tailcall/tailcall.ll b/test/CodeGen/Mips/tailcall/tailcall.ll index 01a9b64ba63c..3f04e1cf3053 100644 --- a/test/CodeGen/Mips/tailcall/tailcall.ll +++ b/test/CodeGen/Mips/tailcall/tailcall.ll @@ -176,7 +176,7 @@ entry: ; ALL-LABEL: caller8_1: ; PIC32: jalr $25 ; PIC32R6: jalr $25 -; PIC32MM: jalr{{.*}} $25 +; PIC32MM: jalr $25 ; STATIC32: jal ; PIC64: jalr $25 ; STATIC64: jal @@ -288,7 +288,7 @@ entry: ; ALL-LABEL: caller13: ; PIC32: jalr $25 ; PIC32R6: jalr $25 -; PIC32MM: jalr{{.*}} $25 +; PIC32MM: jalr $25 ; STATIC32: jal ; STATIC64: jal ; PIC64R6: jalr $25 diff --git a/test/CodeGen/PowerPC/BoolRetToIntTest-2.ll b/test/CodeGen/PowerPC/BoolRetToIntTest-2.ll new file mode 100644 index 000000000000..14669b9005b7 --- /dev/null +++ b/test/CodeGen/PowerPC/BoolRetToIntTest-2.ll @@ -0,0 +1,19 @@ +; RUN: llc -mtriple=powerpc64le-linux-gnu -mcpu=pwr8 < %s | FileCheck %s + +; https://bugs.llvm.org/show_bug.cgi?id=32442 +; Don't generate zero extension for the return value. +; CHECK-NOT: clrldi + +define zeroext i1 @foo(i32 signext %i, i32* %p) { +entry: + %cmp = icmp eq i32 %i, 0 + br i1 %cmp, label %return, label %if.end + +if.end: + store i32 %i, i32* %p, align 4 + br label %return + +return: + %retval = phi i1 [ true, %if.end ], [ false, %entry ] + ret i1 %retval +} diff --git a/test/CodeGen/PowerPC/BoolRetToIntTest.ll b/test/CodeGen/PowerPC/BoolRetToIntTest.ll index 4a0966b2859f..fd515281e394 100644 --- a/test/CodeGen/PowerPC/BoolRetToIntTest.ll +++ b/test/CodeGen/PowerPC/BoolRetToIntTest.ll @@ -31,14 +31,14 @@ for.body: ; preds = %for.body.preheader, br i1 %call, label %cleanup.loopexit, label %for.cond cleanup.loopexit: ; preds = %for.body, %for.cond -; CHECK: [[PHI:%.+]] = phi i32 [ 1, %for.body ], [ 0, %for.cond ] +; CHECK: [[PHI:%.+]] = phi i64 [ 1, %for.body ], [ 0, %for.cond ] %cleanup.dest.slot.0.ph = phi i1 [ true, %for.body ], [ false, %for.cond ] br label %cleanup cleanup: ; preds = %cleanup.loopexit, %entry -; CHECK: = phi i32 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ] +; CHECK: = phi i64 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ] %cleanup.dest.slot.0 = phi i1 [ false, %entry ], [ %cleanup.dest.slot.0.ph, %cleanup.loopexit ] -; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1 +; CHECK: [[REG:%.+]] = trunc i64 {{%.+}} to i1 ; CHECK: ret i1 [[REG]] ret i1 %cleanup.dest.slot.0 } @@ -78,14 +78,14 @@ for.body: ; preds = %for.body.preheader, br i1 %call, label %cleanup.loopexit, label %for.cond cleanup.loopexit: ; preds = %for.body, %for.cond -; CHECK: [[PHI:%.+]] = phi i32 [ 1, %for.body ], [ 0, %for.cond ] +; CHECK: [[PHI:%.+]] = phi i64 [ 1, %for.body ], [ 0, %for.cond ] %cleanup.dest.slot.0.ph = phi i1 [ true, %for.body ], [ false, %for.cond ] br label %cleanup cleanup: ; preds = %cleanup.loopexit, %entry -; CHECK: = phi i32 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ] +; CHECK: = phi i64 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ] %cleanup.dest.slot.0 = phi i1 [ false, %entry ], [ %cleanup.dest.slot.0.ph, %cleanup.loopexit ] -; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1 +; CHECK: [[REG:%.+]] = trunc i64 {{%.+}} to i1 ; CHECK: call void %cont(i1 [[REG]] tail call void %cont(i1 %cleanup.dest.slot.0) ret void @@ -112,17 +112,17 @@ for.body: ; preds = %for.body.preheader, br i1 %call, label %cleanup.loopexit, label %for.cond cleanup.loopexit: ; preds = %for.body, %for.cond -; CHECK: [[PHI:%.+]] = phi i32 [ 1, %for.body ], [ 0, %for.cond ] +; CHECK: [[PHI:%.+]] = phi i64 [ 1, %for.body ], [ 0, %for.cond ] %cleanup.dest.slot.0.ph = phi i1 [ true, %for.body ], [ false, %for.cond ] br label %cleanup cleanup: ; preds = %cleanup.loopexit, %entry -; CHECK: = phi i32 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ] +; CHECK: = phi i64 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ] %cleanup.dest.slot.0 = phi i1 [ false, %entry ], [ %cleanup.dest.slot.0.ph, %cleanup.loopexit ] -; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1 +; CHECK: [[REG:%.+]] = trunc i64 {{%.+}} to i1 ; CHECK: call void %cont(i1 [[REG]] tail call void %cont(i1 %cleanup.dest.slot.0) -; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1 +; CHECK: [[REG:%.+]] = trunc i64 {{%.+}} to i1 ; CHECK: ret i1 [[REG]] ret i1 %cleanup.dest.slot.0 } @@ -136,7 +136,7 @@ foo: br label %cleanup cleanup: -; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1 +; CHECK: [[REG:%.+]] = trunc i64 {{%.+}} to i1 ; CHECK: ret i1 [[REG]] %result = phi i1 [ false, %foo ], [ %operand, %entry ] ret i1 %result @@ -186,7 +186,7 @@ foo: ; CHECK-LABEL: cleanup cleanup: -; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1 +; CHECK: [[REG:%.+]] = trunc i64 {{%.+}} to i1 ; CHECK: ret i1 [[REG]] %result = phi i1 [ %bar, %foo], [ %operand, %entry ] ret i1 %result @@ -198,8 +198,8 @@ declare zeroext i1 @return_i1() define zeroext i1 @call_test() { ; CHECK: [[REG:%.+]] = call i1 %result = call i1 @return_i1() -; CHECK: [[REG:%.+]] = zext i1 {{%.+}} to i32 -; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1 +; CHECK: [[REG:%.+]] = zext i1 {{%.+}} to i64 +; CHECK: [[REG:%.+]] = trunc i64 {{%.+}} to i1 ; CHECK: ret i1 [[REG]] ret i1 %result } diff --git a/test/CodeGen/PowerPC/crbits.ll b/test/CodeGen/PowerPC/crbits.ll index a85237195c5e..4ae91d1163a4 100644 --- a/test/CodeGen/PowerPC/crbits.ll +++ b/test/CodeGen/PowerPC/crbits.ll @@ -94,13 +94,15 @@ entry: ret i1 %or7 ; CHECK-LABEL: @test5 +; CHECK-DAG: li [[NEG2:[0-9]+]], -2 ; CHECK-DAG: and [[REG1:[0-9]+]], 3, 4 -; CHECK-DAG: cmpwi {{[0-9]+}}, 5, -2 -; CHECK-DAG: li [[REG3:[0-9]+]], 1 -; CHECK-DAG: andi. {{[0-9]+}}, [[REG1]], 1 -; CHECK-DAG: crandc [[REG5:[0-9]+]], -; CHECK: isel 3, 0, [[REG3]], [[REG5]] -; CHECK: blr +; CHECK-DAG: xor [[NE1:[0-9]+]], 5, [[NEG2]] +; CHECK-DAG: clrldi [[TRUNC:[0-9]+]], [[REG1]], 63 +; CHECK-DAG: cntlzw [[NE2:[0-9]+]], [[NE1]] +; CHECK: srwi [[NE3:[0-9]+]], [[NE2]], 5 +; CHECK: xori [[NE4:[0-9]+]], [[NE3]], 1 +; CHECK: or 3, [[TRUNC]], [[NE4]] +; CHECK-NEXT: blr } ; Function Attrs: nounwind readnone @@ -112,15 +114,16 @@ entry: ret i1 %and7 ; CHECK-LABEL: @test6 -; CHECK-DAG: andi. {{[0-9]+}}, 3, 1 -; CHECK-DAG: cmpwi {{[0-9]+}}, 5, -2 -; CHECK-DAG: crmove [[REG1:[0-9]+]], 1 -; CHECK-DAG: andi. {{[0-9]+}}, 4, 1 -; CHECK-DAG: li [[REG2:[0-9]+]], 1 -; CHECK-DAG: crorc [[REG4:[0-9]+]], 1, -; CHECK-DAG: crnand [[REG5:[0-9]+]], [[REG4]], [[REG1]] -; CHECK: isel 3, 0, [[REG2]], [[REG5]] -; CHECK: blr +; CHECK-DAG: li [[NEG2:[0-9]+]], -2 +; CHECK-DAG: clrldi [[CLR1:[0-9]+]], 4, 63 +; CHECK-DAG: clrldi [[CLR2:[0-9]+]], 3, 63 +; CHECK-DAG: xor [[NE1:[0-9]+]], 5, [[NEG2]] +; CHECK-DAG: cntlzw [[NE2:[0-9]+]], [[NE1]] +; CHECK: srwi [[NE3:[0-9]+]], [[NE2]], 5 +; CHECK: xori [[NE4:[0-9]+]], [[NE3]], 1 +; CHECK: or [[OR:[0-9]+]], [[NE4]], [[CLR1]] +; CHECK: and 3, [[OR]], [[CLR2]] +; CHECK-NEXT: blr } ; Function Attrs: nounwind readnone @@ -187,12 +190,13 @@ entry: ret i32 %and ; CHECK-LABEL: @test10 -; CHECK-DAG: cmpwi {{[0-9]+}}, 3, 0 -; CHECK-DAG: cmpwi {{[0-9]+}}, 4, 0 -; CHECK-DAG: li [[REG2:[0-9]+]], 1 -; CHECK-DAG: crorc [[REG3:[0-9]+]], -; CHECK: isel 3, 0, [[REG2]], [[REG3]] -; CHECK: blr +; CHECK-DAG: cntlzw 3, 3 +; CHECK-DAG: cntlzw 4, 4 +; CHECK-DAG: srwi 3, 3, 5 +; CHECK-DAG: srwi 4, 4, 5 +; CHECK: xori 3, 3, 1 +; CHECK: and 3, 3, 4 +; CHECK-NEXT: blr } attributes #0 = { nounwind readnone } diff --git a/test/CodeGen/PowerPC/logic-ops-on-compares.ll b/test/CodeGen/PowerPC/logic-ops-on-compares.ll index df021c20ea86..5a507e9ff678 100644 --- a/test/CodeGen/PowerPC/logic-ops-on-compares.ll +++ b/test/CodeGen/PowerPC/logic-ops-on-compares.ll @@ -40,8 +40,8 @@ return: ; preds = %if.end, %if.then ret i32 %retval.0 } -define void @neg_truncate_i32(i32 *%ptr) { -; CHECK-LABEL: neg_truncate_i32: +define void @neg_truncate_i32_eq(i32 *%ptr) { +; CHECK-LABEL: neg_truncate_i32_eq: ; CHECK: # BB#0: # %entry ; CHECK-NEXT: lwz r3, 0(r3) ; CHECK-NEXT: rldicl. r3, r3, 0, 63 @@ -66,8 +66,8 @@ if.end29: ; preds = %if.else } ; Function Attrs: nounwind -define i64 @logic_ne_64(i64 %a, i64 %b, i64 %c) { -; CHECK-LABEL: logic_ne_64: +define i64 @logic_eq_64(i64 %a, i64 %b, i64 %c) { +; CHECK-LABEL: logic_eq_64: ; CHECK: xor r7, r3, r4 ; CHECK-NEXT: li r6, 55 ; CHECK-NEXT: xor r5, r5, r6 @@ -99,8 +99,8 @@ return: ; preds = %if.end, %if.then ret i64 %retval.0 } -define void @neg_truncate_i64(i64 *%ptr) { -; CHECK-LABEL: neg_truncate_i64: +define void @neg_truncate_i64_eq(i64 *%ptr) { +; CHECK-LABEL: neg_truncate_i64_eq: ; CHECK: # BB#0: # %entry ; CHECK-NEXT: ld r3, 0(r3) ; CHECK-NEXT: rldicl. r3, r3, 0, 63 @@ -124,6 +124,67 @@ if.end29: ; preds = %if.else } +; Function Attrs: nounwind +define i64 @logic_ne_64(i64 %a, i64 %b, i64 %c) { +; CHECK-LABEL: logic_ne_64: +; CHECK: xor r7, r3, r4 +; CHECK-NEXT: li r6, 55 +; CHECK-NEXT: addic r8, r7, -1 +; CHECK-NEXT: xor r5, r5, r6 +; CHECK-NEXT: subfe r7, r8, r7 +; CHECK-NEXT: cntlzd r5, r5 +; CHECK-NEXT: addic r12, r4, -1 +; CHECK-NEXT: rldicl r5, r5, 58, 63 +; CHECK-NEXT: subfe r6, r12, r4 +; CHECK-NEXT: and r6, r7, r6 +; CHECK-NEXT: or. r5, r6, r5 +; CHECK-NEXT: bc 4, 1 +entry: + %tobool = icmp ne i64 %a, %b + %tobool1 = icmp ne i64 %b, 0 + %or.cond = and i1 %tobool, %tobool1 + %tobool3 = icmp eq i64 %c, 55 + %or.cond5 = or i1 %or.cond, %tobool3 + br i1 %or.cond5, label %if.end, label %if.then + +if.then: ; preds = %entry + %call = tail call i64 @foo64(i64 %a) #2 + br label %return + +if.end: ; preds = %entry + %call4 = tail call i64 @bar64(i64 %b) #2 + br label %return + +return: ; preds = %if.end, %if.then + %retval.0 = phi i64 [ %call4, %if.end ], [ %call, %if.then ] + ret i64 %retval.0 +} + +define void @neg_truncate_i64_ne(i64 *%ptr) { +; CHECK-LABEL: neg_truncate_i64_ne: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: ld r3, 0(r3) +; CHECK-NEXT: andi. r3, r3, 1 +; CHECK-NEXT: bclr 12, 1, 0 +; CHECK-NEXT: # BB#1: # %if.end29.thread136 +; CHECK-NEXT: .LBB5_2: # %if.end29 +entry: + %0 = load i64, i64* %ptr, align 4 + %rem17127 = and i64 %0, 1 + %cmp18 = icmp ne i64 %rem17127, 0 + br label %if.else + +if.else: ; preds = %entry + br i1 %cmp18, label %if.end29, label %if.end29.thread136 + +if.end29.thread136: ; preds = %if.else + unreachable + +if.end29: ; preds = %if.else + ret void + +} + declare signext i32 @foo(i32 signext) declare signext i32 @bar(i32 signext) declare i64 @foo64(i64) diff --git a/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll b/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll index 3095429758f6..ad9078c82066 100644 --- a/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll +++ b/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs -mcpu=pwr8 < %s | FileCheck %s target datalayout = "e-m:e-i64:64-n32:64" target triple = "powerpc64le-unknown-linux-gnu" @@ -11,111 +12,237 @@ target triple = "powerpc64le-unknown-linux-gnu" @zeroEqualityTest04.buffer1 = private unnamed_addr constant [15 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14], align 4 @zeroEqualityTest04.buffer2 = private unnamed_addr constant [15 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 13], align 4 -; Function Attrs: nounwind readonly declare signext i32 @memcmp(i8* nocapture, i8* nocapture, i64) local_unnamed_addr #1 -; Validate with if(memcmp()) -; Function Attrs: nounwind readonly -define signext i32 @zeroEqualityTest01() local_unnamed_addr #0 { -entry: - %call = tail call signext i32 @memcmp(i8* bitcast ([3 x i32]* @zeroEqualityTest01.buffer1 to i8*), i8* bitcast ([3 x i32]* @zeroEqualityTest01.buffer2 to i8*), i64 16) - %not.tobool = icmp ne i32 %call, 0 - %. = zext i1 %not.tobool to i32 - ret i32 %. - - ; CHECK-LABEL: @zeroEqualityTest01 - ; CHECK-LABEL: %res_block - ; CHECK: li 3, 1 - ; CHECK-NEXT: clrldi - ; CHECK-NEXT: blr - ; CHECK: li 3, 0 - ; CHECK-NEXT: clrldi - ; CHECK-NEXT: blr -} - -; Validate with if(memcmp() == 0) -; Function Attrs: nounwind readonly -define signext i32 @zeroEqualityTest02() local_unnamed_addr #0 { -entry: - %call = tail call signext i32 @memcmp(i8* bitcast ([4 x i32]* @zeroEqualityTest02.buffer1 to i8*), i8* bitcast ([4 x i32]* @zeroEqualityTest02.buffer2 to i8*), i64 16) +; Check 4 bytes - requires 1 load for each param. +define signext i32 @zeroEqualityTest02(i8* %x, i8* %y) { +; CHECK-LABEL: zeroEqualityTest02: +; CHECK: # BB#0: +; CHECK-NEXT: lwz 3, 0(3) +; CHECK-NEXT: lwz 4, 0(4) +; CHECK-NEXT: xor 3, 3, 4 +; CHECK-NEXT: cntlzw 3, 3 +; CHECK-NEXT: srwi 3, 3, 5 +; CHECK-NEXT: xori 3, 3, 1 +; CHECK-NEXT: blr + %call = tail call signext i32 @memcmp(i8* %x, i8* %y, i64 4) %not.cmp = icmp ne i32 %call, 0 %. = zext i1 %not.cmp to i32 ret i32 %. +} - ; CHECK-LABEL: @zeroEqualityTest02 - ; CHECK-LABEL: %res_block - ; CHECK: li 3, 1 - ; CHECK-NEXT: clrldi - ; CHECK-NEXT: blr - ; CHECK: li 3, 0 - ; CHECK-NEXT: clrldi - ; CHECK-NEXT: blr +; Check 16 bytes - requires 2 loads for each param (or use vectors?). +define signext i32 @zeroEqualityTest01(i8* %x, i8* %y) { +; CHECK-LABEL: zeroEqualityTest01: +; CHECK: # BB#0: # %loadbb +; CHECK-NEXT: ld 5, 0(3) +; CHECK-NEXT: ld 6, 0(4) +; CHECK-NEXT: cmpld 5, 6 +; CHECK-NEXT: bne 0, .LBB1_2 +; CHECK-NEXT: # BB#1: # %loadbb1 +; CHECK-NEXT: ld 3, 8(3) +; CHECK-NEXT: ld 4, 8(4) +; CHECK-NEXT: cmpld 3, 4 +; CHECK-NEXT: li 3, 0 +; CHECK-NEXT: beq 0, .LBB1_3 +; CHECK-NEXT: .LBB1_2: # %res_block +; CHECK-NEXT: li 3, 1 +; CHECK-NEXT: clrldi 3, 3, 32 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB1_3: # %endblock +; CHECK-NEXT: clrldi 3, 3, 32 +; CHECK-NEXT: blr + %call = tail call signext i32 @memcmp(i8* %x, i8* %y, i64 16) + %not.tobool = icmp ne i32 %call, 0 + %. = zext i1 %not.tobool to i32 + ret i32 %. +} + +; Check 7 bytes - requires 3 loads for each param. +define signext i32 @zeroEqualityTest03(i8* %x, i8* %y) { +; CHECK-LABEL: zeroEqualityTest03: +; CHECK: # BB#0: # %loadbb +; CHECK-NEXT: lwz 5, 0(3) +; CHECK-NEXT: lwz 6, 0(4) +; CHECK-NEXT: cmplw 5, 6 +; CHECK-NEXT: bne 0, .LBB2_3 +; CHECK-NEXT: # BB#1: # %loadbb1 +; CHECK-NEXT: lhz 5, 4(3) +; CHECK-NEXT: lhz 6, 4(4) +; CHECK-NEXT: cmplw 5, 6 +; CHECK-NEXT: bne 0, .LBB2_3 +; CHECK-NEXT: # BB#2: # %loadbb2 +; CHECK-NEXT: lbz 3, 6(3) +; CHECK-NEXT: lbz 4, 6(4) +; CHECK-NEXT: cmplw 3, 4 +; CHECK-NEXT: li 3, 0 +; CHECK-NEXT: beq 0, .LBB2_4 +; CHECK-NEXT: .LBB2_3: # %res_block +; CHECK-NEXT: li 3, 1 +; CHECK-NEXT: clrldi 3, 3, 32 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB2_4: # %endblock +; CHECK-NEXT: clrldi 3, 3, 32 +; CHECK-NEXT: blr + %call = tail call signext i32 @memcmp(i8* %x, i8* %y, i64 7) + %not.lnot = icmp ne i32 %call, 0 + %cond = zext i1 %not.lnot to i32 + ret i32 %cond } ; Validate with > 0 -; Function Attrs: nounwind readonly -define signext i32 @zeroEqualityTest03() local_unnamed_addr #0 { -entry: +define signext i32 @zeroEqualityTest04() { +; CHECK-LABEL: zeroEqualityTest04: +; CHECK: # BB#0: # %loadbb +; CHECK-NEXT: addis 3, 2, .LzeroEqualityTest02.buffer1@toc@ha +; CHECK-NEXT: addis 4, 2, .LzeroEqualityTest02.buffer2@toc@ha +; CHECK-NEXT: addi 6, 3, .LzeroEqualityTest02.buffer1@toc@l +; CHECK-NEXT: addi 5, 4, .LzeroEqualityTest02.buffer2@toc@l +; CHECK-NEXT: ldbrx 3, 0, 6 +; CHECK-NEXT: ldbrx 4, 0, 5 +; CHECK-NEXT: subf. 7, 4, 3 +; CHECK-NEXT: bne 0, .LBB3_2 +; CHECK-NEXT: # BB#1: # %loadbb1 +; CHECK-NEXT: li 4, 8 +; CHECK-NEXT: ldbrx 3, 6, 4 +; CHECK-NEXT: ldbrx 4, 5, 4 +; CHECK-NEXT: subf. 5, 4, 3 +; CHECK-NEXT: beq 0, .LBB3_4 +; CHECK-NEXT: .LBB3_2: # %res_block +; CHECK-NEXT: cmpld 3, 4 +; CHECK-NEXT: li 3, 1 +; CHECK-NEXT: li 12, -1 +; CHECK-NEXT: isel 3, 12, 3, 0 +; CHECK-NEXT: .LBB3_3: # %endblock +; CHECK-NEXT: cmpwi 3, 1 +; CHECK-NEXT: li 3, 0 +; CHECK-NEXT: li 4, 1 +; CHECK-NEXT: isel 3, 4, 3, 0 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB3_4: +; CHECK-NEXT: li 3, 0 +; CHECK-NEXT: b .LBB3_3 %call = tail call signext i32 @memcmp(i8* bitcast ([4 x i32]* @zeroEqualityTest02.buffer1 to i8*), i8* bitcast ([4 x i32]* @zeroEqualityTest02.buffer2 to i8*), i64 16) %not.cmp = icmp slt i32 %call, 1 %. = zext i1 %not.cmp to i32 ret i32 %. - - ; CHECK-LABEL: @zeroEqualityTest03 - ; CHECK-LABEL: %res_block - ; CHECK: cmpld - ; CHECK-NEXT: li [[LI:[0-9]+]], 1 - ; CHECK-NEXT: li [[LI2:[0-9]+]], -1 - ; CHECK-NEXT: isel [[ISEL:[0-9]+]], [[LI2]], [[LI]], 0 } ; Validate with < 0 -; Function Attrs: nounwind readonly -define signext i32 @zeroEqualityTest04() local_unnamed_addr #0 { -entry: +define signext i32 @zeroEqualityTest05() { +; CHECK-LABEL: zeroEqualityTest05: +; CHECK: # BB#0: # %loadbb +; CHECK-NEXT: addis 3, 2, .LzeroEqualityTest03.buffer1@toc@ha +; CHECK-NEXT: addis 4, 2, .LzeroEqualityTest03.buffer2@toc@ha +; CHECK-NEXT: addi 6, 3, .LzeroEqualityTest03.buffer1@toc@l +; CHECK-NEXT: addi 5, 4, .LzeroEqualityTest03.buffer2@toc@l +; CHECK-NEXT: ldbrx 3, 0, 6 +; CHECK-NEXT: ldbrx 4, 0, 5 +; CHECK-NEXT: subf. 7, 4, 3 +; CHECK-NEXT: bne 0, .LBB4_2 +; CHECK-NEXT: # BB#1: # %loadbb1 +; CHECK-NEXT: li 4, 8 +; CHECK-NEXT: ldbrx 3, 6, 4 +; CHECK-NEXT: ldbrx 4, 5, 4 +; CHECK-NEXT: subf. 5, 4, 3 +; CHECK-NEXT: beq 0, .LBB4_4 +; CHECK-NEXT: .LBB4_2: # %res_block +; CHECK-NEXT: cmpld 3, 4 +; CHECK-NEXT: li 3, 1 +; CHECK-NEXT: li 12, -1 +; CHECK-NEXT: isel 3, 12, 3, 0 +; CHECK-NEXT: .LBB4_3: # %endblock +; CHECK-NEXT: srwi 3, 3, 31 +; CHECK-NEXT: xori 3, 3, 1 +; CHECK-NEXT: clrldi 3, 3, 32 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB4_4: +; CHECK-NEXT: li 3, 0 +; CHECK-NEXT: b .LBB4_3 %call = tail call signext i32 @memcmp(i8* bitcast ([4 x i32]* @zeroEqualityTest03.buffer1 to i8*), i8* bitcast ([4 x i32]* @zeroEqualityTest03.buffer2 to i8*), i64 16) %call.lobit = lshr i32 %call, 31 %call.lobit.not = xor i32 %call.lobit, 1 ret i32 %call.lobit.not - - ; CHECK-LABEL: @zeroEqualityTest04 - ; CHECK-LABEL: %res_block - ; CHECK: cmpld - ; CHECK-NEXT: li [[LI:[0-9]+]], 1 - ; CHECK-NEXT: li [[LI2:[0-9]+]], -1 - ; CHECK-NEXT: isel [[ISEL:[0-9]+]], [[LI2]], [[LI]], 0 } ; Validate with memcmp()?: -; Function Attrs: nounwind readonly -define signext i32 @zeroEqualityTest05() local_unnamed_addr #0 { -entry: +define signext i32 @equalityFoldTwoConstants() { +; CHECK-LABEL: equalityFoldTwoConstants: +; CHECK: # BB#0: # %loadbb +; CHECK-NEXT: addis 3, 2, .LzeroEqualityTest04.buffer1@toc@ha +; CHECK-NEXT: addis 4, 2, .LzeroEqualityTest04.buffer2@toc@ha +; CHECK-NEXT: ld 3, .LzeroEqualityTest04.buffer1@toc@l(3) +; CHECK-NEXT: ld 4, .LzeroEqualityTest04.buffer2@toc@l(4) +; CHECK-NEXT: cmpld 3, 4 +; CHECK-NEXT: bne 0, .LBB5_2 +; CHECK-NEXT: # BB#1: # %loadbb1 +; CHECK-NEXT: addis 3, 2, .LzeroEqualityTest04.buffer1@toc@ha+8 +; CHECK-NEXT: addis 4, 2, .LzeroEqualityTest04.buffer2@toc@ha+8 +; CHECK-NEXT: ld 3, .LzeroEqualityTest04.buffer1@toc@l+8(3) +; CHECK-NEXT: ld 4, .LzeroEqualityTest04.buffer2@toc@l+8(4) +; CHECK-NEXT: cmpld 3, 4 +; CHECK-NEXT: li 3, 0 +; CHECK-NEXT: beq 0, .LBB5_3 +; CHECK-NEXT: .LBB5_2: # %res_block +; CHECK-NEXT: li 3, 1 +; CHECK-NEXT: .LBB5_3: # %endblock +; CHECK-NEXT: cntlzw 3, 3 +; CHECK-NEXT: srwi 3, 3, 5 +; CHECK-NEXT: blr %call = tail call signext i32 @memcmp(i8* bitcast ([15 x i32]* @zeroEqualityTest04.buffer1 to i8*), i8* bitcast ([15 x i32]* @zeroEqualityTest04.buffer2 to i8*), i64 16) %not.tobool = icmp eq i32 %call, 0 %cond = zext i1 %not.tobool to i32 ret i32 %cond - - ; CHECK-LABEL: @zeroEqualityTest05 - ; CHECK-LABEL: %res_block - ; CHECK: li 3, 1 - ; CHECK: li 3, 0 } -; Validate with !memcmp()?: -; Function Attrs: nounwind readonly -define signext i32 @zeroEqualityTest06() local_unnamed_addr #0 { -entry: - %call = tail call signext i32 @memcmp(i8* bitcast ([15 x i32]* @zeroEqualityTest04.buffer1 to i8*), i8* bitcast ([15 x i32]* @zeroEqualityTest04.buffer2 to i8*), i64 16) - %not.lnot = icmp ne i32 %call, 0 - %cond = zext i1 %not.lnot to i32 +define signext i32 @equalityFoldOneConstant(i8* %X) { +; CHECK-LABEL: equalityFoldOneConstant: +; CHECK: # BB#0: # %loadbb +; CHECK-NEXT: addis 4, 2, .LzeroEqualityTest04.buffer1@toc@ha +; CHECK-NEXT: ld 5, 0(3) +; CHECK-NEXT: ld 4, .LzeroEqualityTest04.buffer1@toc@l(4) +; CHECK-NEXT: cmpld 4, 5 +; CHECK-NEXT: bne 0, .LBB6_2 +; CHECK-NEXT: # BB#1: # %loadbb1 +; CHECK-NEXT: addis 4, 2, .LzeroEqualityTest04.buffer1@toc@ha+8 +; CHECK-NEXT: ld 3, 8(3) +; CHECK-NEXT: ld 4, .LzeroEqualityTest04.buffer1@toc@l+8(4) +; CHECK-NEXT: cmpld 4, 3 +; CHECK-NEXT: li 3, 0 +; CHECK-NEXT: beq 0, .LBB6_3 +; CHECK-NEXT: .LBB6_2: # %res_block +; CHECK-NEXT: li 3, 1 +; CHECK-NEXT: .LBB6_3: # %endblock +; CHECK-NEXT: cntlzw 3, 3 +; CHECK-NEXT: srwi 3, 3, 5 +; CHECK-NEXT: blr + %call = tail call signext i32 @memcmp(i8* bitcast ([15 x i32]* @zeroEqualityTest04.buffer1 to i8*), i8* %X, i64 16) + %not.tobool = icmp eq i32 %call, 0 + %cond = zext i1 %not.tobool to i32 ret i32 %cond - - ; CHECK-LABEL: @zeroEqualityTest06 - ; CHECK-LABEL: %res_block - ; CHECK: li 3, 1 - ; CHECK-NEXT: clrldi - ; CHECK-NEXT: blr - ; CHECK: li 3, 0 - ; CHECK-NEXT: clrldi - ; CHECK-NEXT: blr } + +define i1 @length2_eq_nobuiltin_attr(i8* %X, i8* %Y) { +; CHECK-LABEL: length2_eq_nobuiltin_attr: +; CHECK: # BB#0: +; CHECK-NEXT: mflr 0 +; CHECK-NEXT: std 0, 16(1) +; CHECK-NEXT: stdu 1, -32(1) +; CHECK-NEXT: .Lcfi0: +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .Lcfi1: +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: li 5, 2 +; CHECK-NEXT: bl memcmp +; CHECK-NEXT: nop +; CHECK-NEXT: cntlzw 3, 3 +; CHECK-NEXT: rlwinm 3, 3, 27, 31, 31 +; CHECK-NEXT: addi 1, 1, 32 +; CHECK-NEXT: ld 0, 16(1) +; CHECK-NEXT: mtlr 0 +; CHECK-NEXT: blr + %m = tail call signext i32 @memcmp(i8* %X, i8* %Y, i64 2) nobuiltin + %c = icmp eq i32 %m, 0 + ret i1 %c +} + diff --git a/test/CodeGen/PowerPC/ppc-crbits-onoff.ll b/test/CodeGen/PowerPC/ppc-crbits-onoff.ll index 0e7f8f1bc668..c403b5ac2e5a 100644 --- a/test/CodeGen/PowerPC/ppc-crbits-onoff.ll +++ b/test/CodeGen/PowerPC/ppc-crbits-onoff.ll @@ -37,17 +37,13 @@ entry: ; CHECK-LABEL: @crbitson ; CHECK-NO-ISEL-LABEL: @crbitson -; CHECK-DAG: cmpwi {{[0-9]+}}, 3, 0 -; CHECK-DAG: cmpwi {{[0-9]+}}, 4, 0 -; CHECK-DAG: li [[REG2:[0-9]+]], 1 -; CHECK-DAG: crorc [[REG3:[0-9]+]], -; CHECK: isel 3, 0, [[REG2]], [[REG3]] -; CHECK-NO-ISEL: bc 12, 20, [[TRUE:.LBB[0-9]+]] -; CHECK-NO-ISEL-NEXT: blr -; CHECK-NO-ISEL: [[TRUE]] -; CHECK-NO-ISEL-NEXT: addi 3, 0, 0 -; CHECK-NO-ISEL-NEXT: blr -; CHECK: blr +; CHECK-DAG: cntlzw [[REG1:[0-9]+]], 3 +; CHECK-DAG: cntlzw [[REG2:[0-9]+]], 4 +; CHECK: srwi [[REG3:[0-9]+]], [[REG1]], 5 +; CHECK: srwi [[REG4:[0-9]+]], [[REG2]], 5 +; CHECK: xori [[REG5:[0-9]+]], [[REG3]], 1 +; CHECK: and 3, [[REG5]], [[REG4]] +; CHECK-NEXT: blr } diff --git a/test/CodeGen/PowerPC/setcc-logic.ll b/test/CodeGen/PowerPC/setcc-logic.ll index a5a86f101a94..8a6f4975ec97 100644 --- a/test/CodeGen/PowerPC/setcc-logic.ll +++ b/test/CodeGen/PowerPC/setcc-logic.ll @@ -59,8 +59,8 @@ define zeroext i1 @any_bits_set(i32 %P, i32 %Q) { ; CHECK: # BB#0: ; CHECK-NEXT: or 3, 3, 4 ; CHECK-NEXT: cntlzw 3, 3 -; CHECK-NEXT: nor 3, 3, 3 -; CHECK-NEXT: rlwinm 3, 3, 27, 31, 31 +; CHECK-NEXT: srwi 3, 3, 5 +; CHECK-NEXT: xori 3, 3, 1 ; CHECK-NEXT: blr %a = icmp ne i32 %P, 0 %b = icmp ne i32 %Q, 0 @@ -83,10 +83,12 @@ define zeroext i1 @any_sign_bits_set(i32 %P, i32 %Q) { define zeroext i1 @any_bits_clear(i32 %P, i32 %Q) { ; CHECK-LABEL: any_bits_clear: ; CHECK: # BB#0: +; CHECK-NEXT: li 5, -1 ; CHECK-NEXT: and 3, 3, 4 -; CHECK-NEXT: li 5, 1 -; CHECK-NEXT: cmpwi 0, 3, -1 -; CHECK-NEXT: isel 3, 0, 5, 2 +; CHECK-NEXT: xor 3, 3, 5 +; CHECK-NEXT: cntlzw 3, 3 +; CHECK-NEXT: srwi 3, 3, 5 +; CHECK-NEXT: xori 3, 3, 1 ; CHECK-NEXT: blr %a = icmp ne i32 %P, -1 %b = icmp ne i32 %Q, -1 @@ -452,8 +454,8 @@ define zeroext i1 @or_ne(i32 %a, i32 %b, i32 %c, i32 %d) { ; CHECK-NEXT: xor 3, 3, 4 ; CHECK-NEXT: or 3, 3, 5 ; CHECK-NEXT: cntlzw 3, 3 -; CHECK-NEXT: nor 3, 3, 3 -; CHECK-NEXT: rlwinm 3, 3, 27, 31, 31 +; CHECK-NEXT: srwi 3, 3, 5 +; CHECK-NEXT: xori 3, 3, 1 ; CHECK-NEXT: blr %cmp1 = icmp ne i32 %a, %b %cmp2 = icmp ne i32 %c, %d diff --git a/test/CodeGen/PowerPC/testComparesinesc.ll b/test/CodeGen/PowerPC/testComparesinesc.ll new file mode 100644 index 000000000000..e6ade339573b --- /dev/null +++ b/test/CodeGen/PowerPC/testComparesinesc.ll @@ -0,0 +1,121 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \ +; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \ +; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \ +; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \ +; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py + +@glob = common local_unnamed_addr global i8 0, align 1 + +define signext i32 @test_inesc(i8 signext %a, i8 signext %b) { +; CHECK-LABEL: test_inesc: +; CHECK: xor r3, r3, r4 +; CHECK-NEXT: cntlzw r3, r3 +; CHECK-NEXT: srwi r3, r3, 5 +; CHECK-NEXT: xori r3, r3, 1 +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i8 %a, %b + %conv2 = zext i1 %cmp to i32 + ret i32 %conv2 +} + +define signext i32 @test_inesc_sext(i8 signext %a, i8 signext %b) { +; CHECK-LABEL: test_inesc_sext: +; CHECK: xor r3, r3, r4 +; CHECK-NEXT: cntlzw r3, r3 +; CHECK-NEXT: srwi r3, r3, 5 +; CHECK-NEXT: xori r3, r3, 1 +; CHECK-NEXT: neg r3, r3 +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i8 %a, %b + %sub = sext i1 %cmp to i32 + ret i32 %sub +} + +define signext i32 @test_inesc_z(i8 signext %a) { +; CHECK-LABEL: test_inesc_z: +; CHECK: cntlzw r3, r3 +; CHECK-NEXT: srwi r3, r3, 5 +; CHECK-NEXT: xori r3, r3, 1 +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i8 %a, 0 + %conv1 = zext i1 %cmp to i32 + ret i32 %conv1 +} + +define signext i32 @test_inesc_sext_z(i8 signext %a) { +; CHECK-LABEL: test_inesc_sext_z: +; CHECK: cntlzw r3, r3 +; CHECK-NEXT: srwi r3, r3, 5 +; CHECK-NEXT: xori r3, r3, 1 +; CHECK-NEXT: neg r3, r3 +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i8 %a, 0 + %sub = sext i1 %cmp to i32 + ret i32 %sub +} + +define void @test_inesc_store(i8 signext %a, i8 signext %b) { +; CHECK-LABEL: test_inesc_store: +; CHECK: xor r3, r3, r4 +; CHECK: cntlzw r3, r3 +; CHECK: srwi r3, r3, 5 +; CHECK: xori r3, r3, 1 +; CHECK: stb r3, 0(r4) +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i8 %a, %b + %conv3 = zext i1 %cmp to i8 + store i8 %conv3, i8* @glob, align 1 + ret void +} + +define void @test_inesc_sext_store(i8 signext %a, i8 signext %b) { +; CHECK-LABEL: test_inesc_sext_store: +; CHECK: xor r3, r3, r4 +; CHECK: cntlzw r3, r3 +; CHECK: srwi r3, r3, 5 +; CHECK: xori r3, r3, 1 +; CHECK: neg r3, r3 +; CHECK: stb r3, 0(r4) +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i8 %a, %b + %conv3 = sext i1 %cmp to i8 + store i8 %conv3, i8* @glob, align 1 + ret void +} + +define void @test_inesc_z_store(i8 signext %a) { +; CHECK-LABEL: test_inesc_z_store: +; CHECK: cntlzw r3, r3 +; CHECK: srwi r3, r3, 5 +; CHECK: xori r3, r3, 1 +; CHECK: stb r3, 0(r4) +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i8 %a, 0 + %conv2 = zext i1 %cmp to i8 + store i8 %conv2, i8* @glob, align 1 + ret void +} + +define void @test_inesc_sext_z_store(i8 signext %a) { +; CHECK-LABEL: test_inesc_sext_z_store: +; CHECK: cntlzw r3, r3 +; CHECK: srwi r3, r3, 5 +; CHECK: xori r3, r3, 1 +; CHECK: neg r3, r3 +; CHECK: stb r3, 0(r4) +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i8 %a, 0 + %conv2 = sext i1 %cmp to i8 + store i8 %conv2, i8* @glob, align 1 + ret void +} diff --git a/test/CodeGen/PowerPC/testComparesinesi.ll b/test/CodeGen/PowerPC/testComparesinesi.ll new file mode 100644 index 000000000000..ad9431c09e33 --- /dev/null +++ b/test/CodeGen/PowerPC/testComparesinesi.ll @@ -0,0 +1,121 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \ +; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \ +; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \ +; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \ +; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py + +@glob = common local_unnamed_addr global i32 0, align 4 + +define signext i32 @test_inesi(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: test_inesi: +; CHECK: xor r3, r3, r4 +; CHECK-NEXT: cntlzw r3, r3 +; CHECK-NEXT: srwi r3, r3, 5 +; CHECK-NEXT: xori r3, r3, 1 +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i32 %a, %b + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define signext i32 @test_inesi_sext(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: test_inesi_sext: +; CHECK: xor r3, r3, r4 +; CHECK-NEXT: cntlzw r3, r3 +; CHECK-NEXT: srwi r3, r3, 5 +; CHECK-NEXT: xori r3, r3, 1 +; CHECK-NEXT: neg r3, r3 +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i32 %a, %b + %sub = sext i1 %cmp to i32 + ret i32 %sub +} + +define signext i32 @test_inesi_z(i32 signext %a) { +; CHECK-LABEL: test_inesi_z: +; CHECK: cntlzw r3, r3 +; CHECK-NEXT: srwi r3, r3, 5 +; CHECK-NEXT: xori r3, r3, 1 +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i32 %a, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define signext i32 @test_inesi_sext_z(i32 signext %a) { +; CHECK-LABEL: test_inesi_sext_z: +; CHECK: cntlzw r3, r3 +; CHECK-NEXT: srwi r3, r3, 5 +; CHECK-NEXT: xori r3, r3, 1 +; CHECK-NEXT: neg r3, r3 +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i32 %a, 0 + %sub = sext i1 %cmp to i32 + ret i32 %sub +} + +define void @test_inesi_store(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: test_inesi_store: +; CHECK: xor r3, r3, r4 +; CHECK: cntlzw r3, r3 +; CHECK: srwi r3, r3, 5 +; CHECK: xori r3, r3, 1 +; CHECK: stw r3, 0(r4) +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i32 %a, %b + %conv = zext i1 %cmp to i32 + store i32 %conv, i32* @glob, align 4 + ret void +} + +define void @test_inesi_sext_store(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: test_inesi_sext_store: +; CHECK: xor r3, r3, r4 +; CHECK: cntlzw r3, r3 +; CHECK: srwi r3, r3, 5 +; CHECK: xori r3, r3, 1 +; CHECK: neg r3, r3 +; CHECK: stw r3, 0(r4) +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i32 %a, %b + %sub = sext i1 %cmp to i32 + store i32 %sub, i32* @glob, align 4 + ret void +} + +define void @test_inesi_z_store(i32 signext %a) { +; CHECK-LABEL: test_inesi_z_store: +; CHECK: cntlzw r3, r3 +; CHECK: srwi r3, r3, 5 +; CHECK: xori r3, r3, 1 +; CHECK: stw r3, 0(r4) +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i32 %a, 0 + %conv = zext i1 %cmp to i32 + store i32 %conv, i32* @glob, align 4 + ret void +} + +define void @test_inesi_sext_z_store(i32 signext %a) { +; CHECK-LABEL: test_inesi_sext_z_store: +; CHECK: cntlzw r3, r3 +; CHECK: srwi r3, r3, 5 +; CHECK: xori r3, r3, 1 +; CHECK: neg r3, r3 +; CHECK: stw r3, 0(r4) +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i32 %a, 0 + %sub = sext i1 %cmp to i32 + store i32 %sub, i32* @glob, align 4 + ret void +} diff --git a/test/CodeGen/PowerPC/testComparesinesll.ll b/test/CodeGen/PowerPC/testComparesinesll.ll new file mode 100644 index 000000000000..9e9369455857 --- /dev/null +++ b/test/CodeGen/PowerPC/testComparesinesll.ll @@ -0,0 +1,125 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \ +; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \ +; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \ +; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \ +; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py + +@glob = common local_unnamed_addr global i64 0, align 8 + +define signext i32 @test_inesll(i64 %a, i64 %b) { +; CHECK-LABEL: test_inesll: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: xor r3, r3, r4 +; CHECK-NEXT: addic r4, r3, -1 +; CHECK-NEXT: subfe r3, r4, r3 +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i64 %a, %b + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define signext i32 @test_inesll_sext(i64 %a, i64 %b) { +; CHECK-LABEL: test_inesll_sext: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: xor r3, r3, r4 +; CHECK-NEXT: subfic r3, r3, 0 +; CHECK-NEXT: subfe r3, r3, r3 +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i64 %a, %b + %sub = sext i1 %cmp to i32 + ret i32 %sub +} + +define signext i32 @test_inesll_z(i64 %a) { +; CHECK-LABEL: test_inesll_z: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: addic r4, r3, -1 +; CHECK-NEXT: subfe r3, r4, r3 +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i64 %a, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define signext i32 @test_inesll_sext_z(i64 %a) { +; CHECK-LABEL: test_inesll_sext_z: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: subfic r3, r3, 0 +; CHECK-NEXT: subfe r3, r3, r3 +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i64 %a, 0 + %sub = sext i1 %cmp to i32 + ret i32 %sub +} + +define void @test_inesll_store(i64 %a, i64 %b) { +; CHECK-LABEL: test_inesll_store: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: addis r5, r2, .LC0@toc@ha +; CHECK-NEXT: xor r3, r3, r4 +; CHECK-NEXT: ld r12, .LC0@toc@l(r5) +; CHECK-NEXT: addic r5, r3, -1 +; CHECK-NEXT: subfe r3, r5, r3 +; CHECK-NEXT: std r3, 0(r12) +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i64 %a, %b + %conv1 = zext i1 %cmp to i64 + store i64 %conv1, i64* @glob, align 8 + ret void +} + +define void @test_inesll_sext_store(i64 %a, i64 %b) { +; CHECK-LABEL: test_inesll_sext_store: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: addis r5, r2, .LC0@toc@ha +; CHECK-NEXT: xor r3, r3, r4 +; CHECK-NEXT: ld r12, .LC0@toc@l(r5) +; CHECK-NEXT: subfic r3, r3, 0 +; CHECK-NEXT: subfe r3, r3, r3 +; CHECK-NEXT: std r3, 0(r12) +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i64 %a, %b + %conv1 = sext i1 %cmp to i64 + store i64 %conv1, i64* @glob, align 8 + ret void +} + +define void @test_inesll_z_store(i64 %a) { +; CHECK-LABEL: test_inesll_z_store: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: addis r4, r2, .LC0@toc@ha +; CHECK-NEXT: addic r5, r3, -1 +; CHECK-NEXT: ld r4, .LC0@toc@l(r4) +; CHECK-NEXT: subfe r3, r5, r3 +; CHECK-NEXT: std r3, 0(r4) +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i64 %a, 0 + %conv1 = zext i1 %cmp to i64 + store i64 %conv1, i64* @glob, align 8 + ret void +} + +define void @test_inesll_sext_z_store(i64 %a) { +; CHECK-LABEL: test_inesll_sext_z_store: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: addis r4, r2, .LC0@toc@ha +; CHECK-NEXT: subfic r3, r3, 0 +; CHECK-NEXT: ld r4, .LC0@toc@l(r4) +; CHECK-NEXT: subfe r3, r3, r3 +; CHECK-NEXT: std r3, 0(r4) +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i64 %a, 0 + %conv1 = sext i1 %cmp to i64 + store i64 %conv1, i64* @glob, align 8 + ret void +} diff --git a/test/CodeGen/PowerPC/testComparesiness.ll b/test/CodeGen/PowerPC/testComparesiness.ll new file mode 100644 index 000000000000..56b7a6ab3974 --- /dev/null +++ b/test/CodeGen/PowerPC/testComparesiness.ll @@ -0,0 +1,121 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \ +; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \ +; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \ +; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \ +; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py + +@glob = common local_unnamed_addr global i16 0, align 2 + +define signext i32 @test_iness(i16 signext %a, i16 signext %b) { +; CHECK-LABEL: test_iness: +; CHECK: xor r3, r3, r4 +; CHECK-NEXT: cntlzw r3, r3 +; CHECK-NEXT: srwi r3, r3, 5 +; CHECK-NEXT: xori r3, r3, 1 +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i16 %a, %b + %conv2 = zext i1 %cmp to i32 + ret i32 %conv2 +} + +define signext i32 @test_iness_sext(i16 signext %a, i16 signext %b) { +; CHECK-LABEL: test_iness_sext: +; CHECK: xor r3, r3, r4 +; CHECK-NEXT: cntlzw r3, r3 +; CHECK-NEXT: srwi r3, r3, 5 +; CHECK-NEXT: xori r3, r3, 1 +; CHECK-NEXT: neg r3, r3 +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i16 %a, %b + %sub = sext i1 %cmp to i32 + ret i32 %sub +} + +define signext i32 @test_iness_z(i16 signext %a) { +; CHECK-LABEL: test_iness_z: +; CHECK: cntlzw r3, r3 +; CHECK-NEXT: srwi r3, r3, 5 +; CHECK-NEXT: xori r3, r3, 1 +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i16 %a, 0 + %conv1 = zext i1 %cmp to i32 + ret i32 %conv1 +} + +define signext i32 @test_iness_sext_z(i16 signext %a) { +; CHECK-LABEL: test_iness_sext_z: +; CHECK: cntlzw r3, r3 +; CHECK-NEXT: srwi r3, r3, 5 +; CHECK-NEXT: xori r3, r3, 1 +; CHECK-NEXT: neg r3, r3 +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i16 %a, 0 + %sub = sext i1 %cmp to i32 + ret i32 %sub +} + +define void @test_iness_store(i16 signext %a, i16 signext %b) { +; CHECK-LABEL: test_iness_store: +; CHECK: xor r3, r3, r4 +; CHECK: cntlzw r3, r3 +; CHECK: srwi r3, r3, 5 +; CHECK: xori r3, r3, 1 +; CHECK: sth r3, 0(r4) +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i16 %a, %b + %conv3 = zext i1 %cmp to i16 + store i16 %conv3, i16* @glob, align 2 + ret void +} + +define void @test_iness_sext_store(i16 signext %a, i16 signext %b) { +; CHECK-LABEL: test_iness_sext_store: +; CHECK: xor r3, r3, r4 +; CHECK: cntlzw r3, r3 +; CHECK: srwi r3, r3, 5 +; CHECK: xori r3, r3, 1 +; CHECK: neg r3, r3 +; CHECK: sth r3, 0(r4) +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i16 %a, %b + %conv3 = sext i1 %cmp to i16 + store i16 %conv3, i16* @glob, align 2 + ret void +} + +define void @test_iness_z_store(i16 signext %a) { +; CHECK-LABEL: test_iness_z_store: +; CHECK: cntlzw r3, r3 +; CHECK: srwi r3, r3, 5 +; CHECK: xori r3, r3, 1 +; CHECK: sth r3, 0(r4) +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i16 %a, 0 + %conv2 = zext i1 %cmp to i16 + store i16 %conv2, i16* @glob, align 2 + ret void +} + +define void @test_iness_sext_z_store(i16 signext %a) { +; CHECK-LABEL: test_iness_sext_z_store: +; CHECK: cntlzw r3, r3 +; CHECK: srwi r3, r3, 5 +; CHECK: xori r3, r3, 1 +; CHECK: neg r3, r3 +; CHECK: sth r3, 0(r4) +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i16 %a, 0 + %conv2 = sext i1 %cmp to i16 + store i16 %conv2, i16* @glob, align 2 + ret void +} diff --git a/test/CodeGen/PowerPC/testComparesineuc.ll b/test/CodeGen/PowerPC/testComparesineuc.ll new file mode 100644 index 000000000000..1cba13f12292 --- /dev/null +++ b/test/CodeGen/PowerPC/testComparesineuc.ll @@ -0,0 +1,136 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \ +; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \ +; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \ +; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \ +; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +@glob = common local_unnamed_addr global i8 0, align 1 + +define signext i32 @test_ineuc(i8 zeroext %a, i8 zeroext %b) { +; CHECK-LABEL: test_ineuc: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: xor r3, r3, r4 +; CHECK-NEXT: cntlzw r3, r3 +; CHECK-NEXT: srwi r3, r3, 5 +; CHECK-NEXT: xori r3, r3, 1 +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i8 %a, %b + %conv2 = zext i1 %cmp to i32 + ret i32 %conv2 +} + +define signext i32 @test_ineuc_sext(i8 zeroext %a, i8 zeroext %b) { +; CHECK-LABEL: test_ineuc_sext: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: xor r3, r3, r4 +; CHECK-NEXT: cntlzw r3, r3 +; CHECK-NEXT: srwi r3, r3, 5 +; CHECK-NEXT: xori r3, r3, 1 +; CHECK-NEXT: neg r3, r3 +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i8 %a, %b + %sub = sext i1 %cmp to i32 + ret i32 %sub +} + +define signext i32 @test_ineuc_z(i8 zeroext %a) { +; CHECK-LABEL: test_ineuc_z: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: cntlzw r3, r3 +; CHECK-NEXT: srwi r3, r3, 5 +; CHECK-NEXT: xori r3, r3, 1 +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i8 %a, 0 + %conv1 = zext i1 %cmp to i32 + ret i32 %conv1 +} + +define signext i32 @test_ineuc_sext_z(i8 zeroext %a) { +; CHECK-LABEL: test_ineuc_sext_z: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: cntlzw r3, r3 +; CHECK-NEXT: srwi r3, r3, 5 +; CHECK-NEXT: xori r3, r3, 1 +; CHECK-NEXT: neg r3, r3 +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i8 %a, 0 + %sub = sext i1 %cmp to i32 + ret i32 %sub +} + +define void @test_ineuc_store(i8 zeroext %a, i8 zeroext %b) { +; CHECK-LABEL: test_ineuc_store: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: xor r3, r3, r4 +; CHECK-NEXT: addis r5, r2, .LC0@toc@ha +; CHECK-NEXT: cntlzw r3, r3 +; CHECK-NEXT: ld r4, .LC0@toc@l(r5) +; CHECK-NEXT: srwi r3, r3, 5 +; CHECK-NEXT: xori r3, r3, 1 +; CHECK-NEXT: stb r3, 0(r4) +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i8 %a, %b + %conv3 = zext i1 %cmp to i8 + store i8 %conv3, i8* @glob, align 1 + ret void +} + +define void @test_ineuc_sext_store(i8 zeroext %a, i8 zeroext %b) { +; CHECK-LABEL: test_ineuc_sext_store: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: xor r3, r3, r4 +; CHECK-NEXT: addis r5, r2, .LC0@toc@ha +; CHECK-NEXT: cntlzw r3, r3 +; CHECK-NEXT: ld r4, .LC0@toc@l(r5) +; CHECK-NEXT: srwi r3, r3, 5 +; CHECK-NEXT: xori r3, r3, 1 +; CHECK-NEXT: neg r3, r3 +; CHECK-NEXT: stb r3, 0(r4) +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i8 %a, %b + %conv3 = sext i1 %cmp to i8 + store i8 %conv3, i8* @glob, align 1 + ret void +} + +define void @test_ineuc_z_store(i8 zeroext %a) { +; CHECK-LABEL: test_ineuc_z_store: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: addis r4, r2, .LC0@toc@ha +; CHECK-NEXT: cntlzw r3, r3 +; CHECK-NEXT: ld r4, .LC0@toc@l(r4) +; CHECK-NEXT: srwi r3, r3, 5 +; CHECK-NEXT: xori r3, r3, 1 +; CHECK-NEXT: stb r3, 0(r4) +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i8 %a, 0 + %conv2 = zext i1 %cmp to i8 + store i8 %conv2, i8* @glob, align 1 + ret void +} + +define void @test_ineuc_sext_z_store(i8 zeroext %a) { +; CHECK-LABEL: test_ineuc_sext_z_store: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: addis r4, r2, .LC0@toc@ha +; CHECK-NEXT: cntlzw r3, r3 +; CHECK-NEXT: srwi r3, r3, 5 +; CHECK-NEXT: ld r4, .LC0@toc@l(r4) +; CHECK-NEXT: xori r3, r3, 1 +; CHECK-NEXT: neg r3, r3 +; CHECK-NEXT: stb r3, 0(r4) +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i8 %a, 0 + %conv2 = sext i1 %cmp to i8 + store i8 %conv2, i8* @glob, align 1 + ret void +} diff --git a/test/CodeGen/PowerPC/testComparesineui.ll b/test/CodeGen/PowerPC/testComparesineui.ll new file mode 100644 index 000000000000..36899b7ea8e1 --- /dev/null +++ b/test/CodeGen/PowerPC/testComparesineui.ll @@ -0,0 +1,121 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \ +; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \ +; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \ +; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \ +; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py + +@glob = common local_unnamed_addr global i32 0, align 4 + +define signext i32 @test_ineui(i32 zeroext %a, i32 zeroext %b) { +; CHECK-LABEL: test_ineui: +; CHECK: xor r3, r3, r4 +; CHECK-NEXT: cntlzw r3, r3 +; CHECK-NEXT: srwi r3, r3, 5 +; CHECK-NEXT: xori r3, r3, 1 +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i32 %a, %b + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define signext i32 @test_ineui_sext(i32 zeroext %a, i32 zeroext %b) { +; CHECK-LABEL: test_ineui_sext: +; CHECK: xor r3, r3, r4 +; CHECK-NEXT: cntlzw r3, r3 +; CHECK-NEXT: srwi r3, r3, 5 +; CHECK-NEXT: xori r3, r3, 1 +; CHECK-NEXT: neg r3, r3 +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i32 %a, %b + %sub = sext i1 %cmp to i32 + ret i32 %sub +} + +define signext i32 @test_ineui_z(i32 zeroext %a) { +; CHECK-LABEL: test_ineui_z: +; CHECK: cntlzw r3, r3 +; CHECK-NEXT: srwi r3, r3, 5 +; CHECK-NEXT: xori r3, r3, 1 +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i32 %a, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define signext i32 @test_ineui_sext_z(i32 zeroext %a) { +; CHECK-LABEL: test_ineui_sext_z: +; CHECK: cntlzw r3, r3 +; CHECK-NEXT: srwi r3, r3, 5 +; CHECK-NEXT: xori r3, r3, 1 +; CHECK-NEXT: neg r3, r3 +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i32 %a, 0 + %sub = sext i1 %cmp to i32 + ret i32 %sub +} + +define void @test_ineui_store(i32 zeroext %a, i32 zeroext %b) { +; CHECK-LABEL: test_ineui_store: +; CHECK: xor r3, r3, r4 +; CHECK: cntlzw r3, r3 +; CHECK: srwi r3, r3, 5 +; CHECK: xori r3, r3, 1 +; CHECK: stw r3, 0(r4) +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i32 %a, %b + %conv = zext i1 %cmp to i32 + store i32 %conv, i32* @glob, align 4 + ret void +} + +define void @test_ineui_sext_store(i32 zeroext %a, i32 zeroext %b) { +; CHECK-LABEL: test_ineui_sext_store: +; CHECK: xor r3, r3, r4 +; CHECK: cntlzw r3, r3 +; CHECK: srwi r3, r3, 5 +; CHECK: xori r3, r3, 1 +; CHECK: neg r3, r3 +; CHECK: stw r3, 0(r4) +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i32 %a, %b + %sub = sext i1 %cmp to i32 + store i32 %sub, i32* @glob, align 4 + ret void +} + +define void @test_ineui_z_store(i32 zeroext %a) { +; CHECK-LABEL: test_ineui_z_store: +; CHECK: cntlzw r3, r3 +; CHECK: srwi r3, r3, 5 +; CHECK: xori r3, r3, 1 +; CHECK: stw r3, 0(r4) +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i32 %a, 0 + %conv = zext i1 %cmp to i32 + store i32 %conv, i32* @glob, align 4 + ret void +} + +define void @test_ineui_sext_z_store(i32 zeroext %a) { +; CHECK-LABEL: test_ineui_sext_z_store: +; CHECK: cntlzw r3, r3 +; CHECK: srwi r3, r3, 5 +; CHECK: xori r3, r3, 1 +; CHECK: neg r3, r3 +; CHECK: stw r3, 0(r4) +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i32 %a, 0 + %sub = sext i1 %cmp to i32 + store i32 %sub, i32* @glob, align 4 + ret void +} diff --git a/test/CodeGen/PowerPC/testComparesineull.ll b/test/CodeGen/PowerPC/testComparesineull.ll new file mode 100644 index 000000000000..7f0fed15157c --- /dev/null +++ b/test/CodeGen/PowerPC/testComparesineull.ll @@ -0,0 +1,125 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \ +; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \ +; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \ +; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \ +; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py + +@glob = common local_unnamed_addr global i64 0, align 8 + +define signext i32 @test_ineull(i64 %a, i64 %b) { +; CHECK-LABEL: test_ineull: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: xor r3, r3, r4 +; CHECK-NEXT: addic r4, r3, -1 +; CHECK-NEXT: subfe r3, r4, r3 +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i64 %a, %b + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define signext i32 @test_ineull_sext(i64 %a, i64 %b) { +; CHECK-LABEL: test_ineull_sext: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: xor r3, r3, r4 +; CHECK-NEXT: subfic r3, r3, 0 +; CHECK-NEXT: subfe r3, r3, r3 +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i64 %a, %b + %sub = sext i1 %cmp to i32 + ret i32 %sub +} + +define signext i32 @test_ineull_z(i64 %a) { +; CHECK-LABEL: test_ineull_z: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: addic r4, r3, -1 +; CHECK-NEXT: subfe r3, r4, r3 +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i64 %a, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define signext i32 @test_ineull_sext_z(i64 %a) { +; CHECK-LABEL: test_ineull_sext_z: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: subfic r3, r3, 0 +; CHECK-NEXT: subfe r3, r3, r3 +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i64 %a, 0 + %sub = sext i1 %cmp to i32 + ret i32 %sub +} + +define void @test_ineull_store(i64 %a, i64 %b) { +; CHECK-LABEL: test_ineull_store: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: addis r5, r2, .LC0@toc@ha +; CHECK-NEXT: xor r3, r3, r4 +; CHECK-NEXT: ld r12, .LC0@toc@l(r5) +; CHECK-NEXT: addic r5, r3, -1 +; CHECK-NEXT: subfe r3, r5, r3 +; CHECK-NEXT: std r3, 0(r12) +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i64 %a, %b + %conv1 = zext i1 %cmp to i64 + store i64 %conv1, i64* @glob, align 8 + ret void +} + +define void @test_ineull_sext_store(i64 %a, i64 %b) { +; CHECK-LABEL: test_ineull_sext_store: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: addis r5, r2, .LC0@toc@ha +; CHECK-NEXT: xor r3, r3, r4 +; CHECK-NEXT: ld r12, .LC0@toc@l(r5) +; CHECK-NEXT: subfic r3, r3, 0 +; CHECK-NEXT: subfe r3, r3, r3 +; CHECK-NEXT: std r3, 0(r12) +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i64 %a, %b + %conv1 = sext i1 %cmp to i64 + store i64 %conv1, i64* @glob, align 8 + ret void +} + +define void @test_ineull_z_store(i64 %a) { +; CHECK-LABEL: test_ineull_z_store: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: addis r4, r2, .LC0@toc@ha +; CHECK-NEXT: addic r5, r3, -1 +; CHECK-NEXT: ld r4, .LC0@toc@l(r4) +; CHECK-NEXT: subfe r3, r5, r3 +; CHECK-NEXT: std r3, 0(r4) +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i64 %a, 0 + %conv1 = zext i1 %cmp to i64 + store i64 %conv1, i64* @glob, align 8 + ret void +} + +define void @test_ineull_sext_z_store(i64 %a) { +; CHECK-LABEL: test_ineull_sext_z_store: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: addis r4, r2, .LC0@toc@ha +; CHECK-NEXT: subfic r3, r3, 0 +; CHECK-NEXT: ld r4, .LC0@toc@l(r4) +; CHECK-NEXT: subfe r3, r3, r3 +; CHECK-NEXT: std r3, 0(r4) +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i64 %a, 0 + %conv1 = sext i1 %cmp to i64 + store i64 %conv1, i64* @glob, align 8 + ret void +} diff --git a/test/CodeGen/PowerPC/testComparesineus.ll b/test/CodeGen/PowerPC/testComparesineus.ll new file mode 100644 index 000000000000..d24d854f31c9 --- /dev/null +++ b/test/CodeGen/PowerPC/testComparesineus.ll @@ -0,0 +1,137 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \ +; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \ +; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \ +; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \ +; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py + +@glob = common local_unnamed_addr global i16 0, align 2 + +define signext i32 @test_ineus(i16 zeroext %a, i16 zeroext %b) { +; CHECK-LABEL: test_ineus: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: xor r3, r3, r4 +; CHECK-NEXT: cntlzw r3, r3 +; CHECK-NEXT: srwi r3, r3, 5 +; CHECK-NEXT: xori r3, r3, 1 +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i16 %a, %b + %conv2 = zext i1 %cmp to i32 + ret i32 %conv2 +} + +define signext i32 @test_ineus_sext(i16 zeroext %a, i16 zeroext %b) { +; CHECK-LABEL: test_ineus_sext: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: xor r3, r3, r4 +; CHECK-NEXT: cntlzw r3, r3 +; CHECK-NEXT: srwi r3, r3, 5 +; CHECK-NEXT: xori r3, r3, 1 +; CHECK-NEXT: neg r3, r3 +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i16 %a, %b + %sub = sext i1 %cmp to i32 + ret i32 %sub +} + +define signext i32 @test_ineus_z(i16 zeroext %a) { +; CHECK-LABEL: test_ineus_z: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: cntlzw r3, r3 +; CHECK-NEXT: srwi r3, r3, 5 +; CHECK-NEXT: xori r3, r3, 1 +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i16 %a, 0 + %conv1 = zext i1 %cmp to i32 + ret i32 %conv1 +} + +define signext i32 @test_ineus_sext_z(i16 zeroext %a) { +; CHECK-LABEL: test_ineus_sext_z: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: cntlzw r3, r3 +; CHECK-NEXT: srwi r3, r3, 5 +; CHECK-NEXT: xori r3, r3, 1 +; CHECK-NEXT: neg r3, r3 +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i16 %a, 0 + %sub = sext i1 %cmp to i32 + ret i32 %sub +} + +define void @test_ineus_store(i16 zeroext %a, i16 zeroext %b) { +; CHECK-LABEL: test_ineus_store: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: xor r3, r3, r4 +; CHECK-NEXT: addis r5, r2, .LC0@toc@ha +; CHECK-NEXT: cntlzw r3, r3 +; CHECK-NEXT: ld r4, .LC0@toc@l(r5) +; CHECK-NEXT: srwi r3, r3, 5 +; CHECK-NEXT: xori r3, r3, 1 +; CHECK-NEXT: sth r3, 0(r4) +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i16 %a, %b + %conv3 = zext i1 %cmp to i16 + store i16 %conv3, i16* @glob, align 2 + ret void +} + +define void @test_ineus_sext_store(i16 zeroext %a, i16 zeroext %b) { +; CHECK-LABEL: test_ineus_sext_store: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: xor r3, r3, r4 +; CHECK-NEXT: addis r5, r2, .LC0@toc@ha +; CHECK-NEXT: cntlzw r3, r3 +; CHECK-NEXT: ld r4, .LC0@toc@l(r5) +; CHECK-NEXT: srwi r3, r3, 5 +; CHECK-NEXT: xori r3, r3, 1 +; CHECK-NEXT: neg r3, r3 +; CHECK-NEXT: sth r3, 0(r4) +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i16 %a, %b + %conv3 = sext i1 %cmp to i16 + store i16 %conv3, i16* @glob, align 2 + ret void +} + +define void @test_ineus_z_store(i16 zeroext %a) { +; CHECK-LABEL: test_ineus_z_store: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: addis r4, r2, .LC0@toc@ha +; CHECK-NEXT: cntlzw r3, r3 +; CHECK-NEXT: ld r4, .LC0@toc@l(r4) +; CHECK-NEXT: srwi r3, r3, 5 +; CHECK-NEXT: xori r3, r3, 1 +; CHECK-NEXT: sth r3, 0(r4) +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i16 %a, 0 + %conv2 = zext i1 %cmp to i16 + store i16 %conv2, i16* @glob, align 2 + ret void +} + +define void @test_ineus_sext_z_store(i16 zeroext %a) { +; CHECK-LABEL: test_ineus_sext_z_store: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: addis r4, r2, .LC0@toc@ha +; CHECK-NEXT: cntlzw r3, r3 +; CHECK-NEXT: srwi r3, r3, 5 +; CHECK-NEXT: ld r4, .LC0@toc@l(r4) +; CHECK-NEXT: xori r3, r3, 1 +; CHECK-NEXT: neg r3, r3 +; CHECK-NEXT: sth r3, 0(r4) +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i16 %a, 0 + %conv2 = sext i1 %cmp to i16 + store i16 %conv2, i16* @glob, align 2 + ret void +} diff --git a/test/CodeGen/PowerPC/testComparesllnesll.ll b/test/CodeGen/PowerPC/testComparesllnesll.ll new file mode 100644 index 000000000000..d87ff55739fc --- /dev/null +++ b/test/CodeGen/PowerPC/testComparesllnesll.ll @@ -0,0 +1,125 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \ +; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \ +; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \ +; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \ +; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py + +@glob = common local_unnamed_addr global i64 0, align 8 + +define i64 @test_llnesll(i64 %a, i64 %b) { +; CHECK-LABEL: test_llnesll: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: xor r3, r3, r4 +; CHECK-NEXT: addic r4, r3, -1 +; CHECK-NEXT: subfe r3, r4, r3 +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i64 %a, %b + %conv1 = zext i1 %cmp to i64 + ret i64 %conv1 +} + +define i64 @test_llnesll_sext(i64 %a, i64 %b) { +; CHECK-LABEL: test_llnesll_sext: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: xor r3, r3, r4 +; CHECK-NEXT: subfic r3, r3, 0 +; CHECK-NEXT: subfe r3, r3, r3 +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i64 %a, %b + %conv1 = sext i1 %cmp to i64 + ret i64 %conv1 +} + +define i64 @test_llnesll_z(i64 %a) { +; CHECK-LABEL: test_llnesll_z: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: addic r4, r3, -1 +; CHECK-NEXT: subfe r3, r4, r3 +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i64 %a, 0 + %conv1 = zext i1 %cmp to i64 + ret i64 %conv1 +} + +define i64 @test_llnesll_sext_z(i64 %a) { +; CHECK-LABEL: test_llnesll_sext_z: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: subfic r3, r3, 0 +; CHECK-NEXT: subfe r3, r3, r3 +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i64 %a, 0 + %conv1 = sext i1 %cmp to i64 + ret i64 %conv1 +} + +define void @test_llnesll_store(i64 %a, i64 %b) { +; CHECK-LABEL: test_llnesll_store: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: addis r5, r2, .LC0@toc@ha +; CHECK-NEXT: xor r3, r3, r4 +; CHECK-NEXT: ld r12, .LC0@toc@l(r5) +; CHECK-NEXT: addic r5, r3, -1 +; CHECK-NEXT: subfe r3, r5, r3 +; CHECK-NEXT: std r3, 0(r12) +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i64 %a, %b + %conv1 = zext i1 %cmp to i64 + store i64 %conv1, i64* @glob, align 8 + ret void +} + +define void @test_llnesll_sext_store(i64 %a, i64 %b) { +; CHECK-LABEL: test_llnesll_sext_store: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: addis r5, r2, .LC0@toc@ha +; CHECK-NEXT: xor r3, r3, r4 +; CHECK-NEXT: ld r12, .LC0@toc@l(r5) +; CHECK-NEXT: subfic r3, r3, 0 +; CHECK-NEXT: subfe r3, r3, r3 +; CHECK-NEXT: std r3, 0(r12) +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i64 %a, %b + %conv1 = sext i1 %cmp to i64 + store i64 %conv1, i64* @glob, align 8 + ret void +} + +define void @test_llnesll_z_store(i64 %a) { +; CHECK-LABEL: test_llnesll_z_store: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: addis r4, r2, .LC0@toc@ha +; CHECK-NEXT: addic r5, r3, -1 +; CHECK-NEXT: ld r4, .LC0@toc@l(r4) +; CHECK-NEXT: subfe r3, r5, r3 +; CHECK-NEXT: std r3, 0(r4) +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i64 %a, 0 + %conv1 = zext i1 %cmp to i64 + store i64 %conv1, i64* @glob, align 8 + ret void +} + +define void @test_llnesll_sext_z_store(i64 %a) { +; CHECK-LABEL: test_llnesll_sext_z_store: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: addis r4, r2, .LC0@toc@ha +; CHECK-NEXT: subfic r3, r3, 0 +; CHECK-NEXT: ld r4, .LC0@toc@l(r4) +; CHECK-NEXT: subfe r3, r3, r3 +; CHECK-NEXT: std r3, 0(r4) +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i64 %a, 0 + %conv1 = sext i1 %cmp to i64 + store i64 %conv1, i64* @glob, align 8 + ret void +} diff --git a/test/CodeGen/PowerPC/testComparesllneull.ll b/test/CodeGen/PowerPC/testComparesllneull.ll new file mode 100644 index 000000000000..7309d5899068 --- /dev/null +++ b/test/CodeGen/PowerPC/testComparesllneull.ll @@ -0,0 +1,125 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \ +; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \ +; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \ +; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \ +; RUN: --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py + +@glob = common local_unnamed_addr global i64 0, align 8 + +define i64 @test_llneull(i64 %a, i64 %b) { +; CHECK-LABEL: test_llneull: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: xor r3, r3, r4 +; CHECK-NEXT: addic r4, r3, -1 +; CHECK-NEXT: subfe r3, r4, r3 +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i64 %a, %b + %conv1 = zext i1 %cmp to i64 + ret i64 %conv1 +} + +define i64 @test_llneull_sext(i64 %a, i64 %b) { +; CHECK-LABEL: test_llneull_sext: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: xor r3, r3, r4 +; CHECK-NEXT: subfic r3, r3, 0 +; CHECK-NEXT: subfe r3, r3, r3 +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i64 %a, %b + %conv1 = sext i1 %cmp to i64 + ret i64 %conv1 +} + +define i64 @test_llneull_z(i64 %a) { +; CHECK-LABEL: test_llneull_z: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: addic r4, r3, -1 +; CHECK-NEXT: subfe r3, r4, r3 +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i64 %a, 0 + %conv1 = zext i1 %cmp to i64 + ret i64 %conv1 +} + +define i64 @test_llneull_sext_z(i64 %a) { +; CHECK-LABEL: test_llneull_sext_z: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: subfic r3, r3, 0 +; CHECK-NEXT: subfe r3, r3, r3 +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i64 %a, 0 + %conv1 = sext i1 %cmp to i64 + ret i64 %conv1 +} + +define void @test_llneull_store(i64 %a, i64 %b) { +; CHECK-LABEL: test_llneull_store: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: addis r5, r2, .LC0@toc@ha +; CHECK-NEXT: xor r3, r3, r4 +; CHECK-NEXT: ld r12, .LC0@toc@l(r5) +; CHECK-NEXT: addic r5, r3, -1 +; CHECK-NEXT: subfe r3, r5, r3 +; CHECK-NEXT: std r3, 0(r12) +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i64 %a, %b + %conv1 = zext i1 %cmp to i64 + store i64 %conv1, i64* @glob, align 8 + ret void +} + +define void @test_llneull_sext_store(i64 %a, i64 %b) { +; CHECK-LABEL: test_llneull_sext_store: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: addis r5, r2, .LC0@toc@ha +; CHECK-NEXT: xor r3, r3, r4 +; CHECK-NEXT: ld r12, .LC0@toc@l(r5) +; CHECK-NEXT: subfic r3, r3, 0 +; CHECK-NEXT: subfe r3, r3, r3 +; CHECK-NEXT: std r3, 0(r12) +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i64 %a, %b + %conv1 = sext i1 %cmp to i64 + store i64 %conv1, i64* @glob, align 8 + ret void +} + +define void @test_llneull_z_store(i64 %a) { +; CHECK-LABEL: test_llneull_z_store: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: addis r4, r2, .LC0@toc@ha +; CHECK-NEXT: addic r5, r3, -1 +; CHECK-NEXT: ld r4, .LC0@toc@l(r4) +; CHECK-NEXT: subfe r3, r5, r3 +; CHECK-NEXT: std r3, 0(r4) +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i64 %a, 0 + %conv1 = zext i1 %cmp to i64 + store i64 %conv1, i64* @glob, align 8 + ret void +} + +define void @test_llneull_sext_z_store(i64 %a) { +; CHECK-LABEL: test_llneull_sext_z_store: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: addis r4, r2, .LC0@toc@ha +; CHECK-NEXT: subfic r3, r3, 0 +; CHECK-NEXT: ld r4, .LC0@toc@l(r4) +; CHECK-NEXT: subfe r3, r3, r3 +; CHECK-NEXT: std r3, 0(r4) +; CHECK-NEXT: blr +entry: + %cmp = icmp ne i64 %a, 0 + %conv1 = sext i1 %cmp to i64 + store i64 %conv1, i64* @glob, align 8 + ret void +} diff --git a/test/CodeGen/PowerPC/vec_int_ext.ll b/test/CodeGen/PowerPC/vec_int_ext.ll new file mode 100644 index 000000000000..9e1218c423b7 --- /dev/null +++ b/test/CodeGen/PowerPC/vec_int_ext.ll @@ -0,0 +1,90 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mcpu=pwr9 < %s | FileCheck %s -check-prefix=PWR9 +target triple = "powerpc64le-unknown-linux-gnu" + +define <4 x i32> @vextsb2w(<16 x i8> %a) { +; PWR9-LABEL: vextsb2w: +; PWR9: # BB#0: # %entry +; PWR9-NEXT: vextsb2w 2, 2 +; PWR9-NEXT: blr +entry: + %vecext = extractelement <16 x i8> %a, i32 0 + %conv = sext i8 %vecext to i32 + %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0 + %vecext1 = extractelement <16 x i8> %a, i32 4 + %conv2 = sext i8 %vecext1 to i32 + %vecinit3 = insertelement <4 x i32> %vecinit, i32 %conv2, i32 1 + %vecext4 = extractelement <16 x i8> %a, i32 8 + %conv5 = sext i8 %vecext4 to i32 + %vecinit6 = insertelement <4 x i32> %vecinit3, i32 %conv5, i32 2 + %vecext7 = extractelement <16 x i8> %a, i32 12 + %conv8 = sext i8 %vecext7 to i32 + %vecinit9 = insertelement <4 x i32> %vecinit6, i32 %conv8, i32 3 + ret <4 x i32> %vecinit9 +} + +define <2 x i64> @vextsb2d(<16 x i8> %a) { +; PWR9-LABEL: vextsb2d: +; PWR9: # BB#0: # %entry +; PWR9-NEXT: vextsb2d 2, 2 +; PWR9-NEXT: blr +entry: + %vecext = extractelement <16 x i8> %a, i32 0 + %conv = sext i8 %vecext to i64 + %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0 + %vecext1 = extractelement <16 x i8> %a, i32 8 + %conv2 = sext i8 %vecext1 to i64 + %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1 + ret <2 x i64> %vecinit3 +} + +define <4 x i32> @vextsh2w(<8 x i16> %a) { +; PWR9-LABEL: vextsh2w: +; PWR9: # BB#0: # %entry +; PWR9-NEXT: vextsh2w 2, 2 +; PWR9-NEXT: blr +entry: + %vecext = extractelement <8 x i16> %a, i32 0 + %conv = sext i16 %vecext to i32 + %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0 + %vecext1 = extractelement <8 x i16> %a, i32 2 + %conv2 = sext i16 %vecext1 to i32 + %vecinit3 = insertelement <4 x i32> %vecinit, i32 %conv2, i32 1 + %vecext4 = extractelement <8 x i16> %a, i32 4 + %conv5 = sext i16 %vecext4 to i32 + %vecinit6 = insertelement <4 x i32> %vecinit3, i32 %conv5, i32 2 + %vecext7 = extractelement <8 x i16> %a, i32 6 + %conv8 = sext i16 %vecext7 to i32 + %vecinit9 = insertelement <4 x i32> %vecinit6, i32 %conv8, i32 3 + ret <4 x i32> %vecinit9 +} + +define <2 x i64> @vextsh2d(<8 x i16> %a) { +; PWR9-LABEL: vextsh2d: +; PWR9: # BB#0: # %entry +; PWR9-NEXT: vextsh2d 2, 2 +; PWR9-NEXT: blr +entry: + %vecext = extractelement <8 x i16> %a, i32 0 + %conv = sext i16 %vecext to i64 + %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0 + %vecext1 = extractelement <8 x i16> %a, i32 4 + %conv2 = sext i16 %vecext1 to i64 + %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1 + ret <2 x i64> %vecinit3 +} + +define <2 x i64> @vextsw2d(<4 x i32> %a) { +; PWR9-LABEL: vextsw2d: +; PWR9: # BB#0: # %entry +; PWR9-NEXT: vextsw2d 2, 2 +; PWR9-NEXT: blr +entry: + %vecext = extractelement <4 x i32> %a, i32 0 + %conv = sext i32 %vecext to i64 + %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0 + %vecext1 = extractelement <4 x i32> %a, i32 2 + %conv2 = sext i32 %vecext1 to i64 + %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1 + ret <2 x i64> %vecinit3 +} diff --git a/test/CodeGen/X86/2006-05-11-InstrSched.ll b/test/CodeGen/X86/2006-05-11-InstrSched.ll index b1deb2c5f567..e04d10c9d64a 100644 --- a/test/CodeGen/X86/2006-05-11-InstrSched.ll +++ b/test/CodeGen/X86/2006-05-11-InstrSched.ll @@ -1,6 +1,6 @@ ; REQUIRES: asserts ; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mcpu=penryn -mattr=+sse2 -stats 2>&1 | \ -; RUN: grep "asm-printer" | grep 35 +; RUN: grep "asm-printer" | grep 33 target datalayout = "e-p:32:32" define void @foo(i32* %mc, i32* %bp, i32* %ms, i32* %xmb, i32* %mpp, i32* %tpmm, i32* %ip, i32* %tpim, i32* %dpp, i32* %tpdm, i32* %bpi, i32 %M) nounwind { diff --git a/test/CodeGen/X86/GlobalISel/irtranslator-call.ll b/test/CodeGen/X86/GlobalISel/irtranslator-call.ll deleted file mode 100644 index 6c60aed67a7b..000000000000 --- a/test/CodeGen/X86/GlobalISel/irtranslator-call.ll +++ /dev/null @@ -1,30 +0,0 @@ -; RUN: llc -mtriple i386 -global-isel -stop-after=irtranslator %s -o - | FileCheck %s -; RUN: llc -mtriple x86_64 -global-isel -stop-after=irtranslator %s -o - | FileCheck %s - -define void @test_void_return() { -; CHECK-LABEL: name: test_void_return -; CHECK: alignment: 4 -; CHECK-NEXT: exposesReturnsTwice: false -; CHECK-NEXT: legalized: false -; CHECK-NEXT: regBankSelected: false -; CHECK-NEXT: selected: false -; CHECK-NEXT: tracksRegLiveness: true -; CHECK-NEXT: frameInfo: -; CHECK-NEXT: isFrameAddressTaken: false -; CHECK-NEXT: isReturnAddressTaken: false -; CHECK-NEXT: hasStackMap: false -; CHECK-NEXT: hasPatchPoint: false -; CHECK-NEXT: stackSize: 0 -; CHECK-NEXT: offsetAdjustment: 0 -; CHECK-NEXT: maxAlignment: 0 -; CHECK-NEXT: adjustsStack: false -; CHECK-NEXT: hasCalls: false -; CHECK-NEXT: hasOpaqueSPAdjustment: false -; CHECK-NEXT: hasVAStart: false -; CHECK-NEXT: hasMustTailInVarArgFunc: false -; CHECK-NEXT: body: -; CHECK-NEXT: bb.1.entry: -; CHECK-NEXT: RET 0 -entry: - ret void -} diff --git a/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll b/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll index 8ea3e4f9d739..00aa7cf84e55 100644 --- a/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll +++ b/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=i386-linux-gnu -global-isel -stop-after=irtranslator < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=X32 -; RUN: llc -mtriple=x86_64-linux-gnu -global-isel -stop-after=irtranslator < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=X64 +; RUN: llc -mtriple=i386-linux-gnu -mattr=+sse2 -global-isel -stop-after=irtranslator < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=X32 +; RUN: llc -mtriple=x86_64-linux-gnu -global-isel -stop-after=irtranslator < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=X64 @a1_8bit = external global i8 @a7_8bit = external global i8 @@ -11,8 +11,8 @@ define i8 @test_i8_args_8(i8 %arg1, i8 %arg2, i8 %arg3, i8 %arg4, ; ALL-LABEL: name: test_i8_args_8 ; X64: fixedStack: -; X64: id: [[STACK8:[0-9]+]], offset: 8, size: 1, alignment: 8, isImmutable: true, isAliased: false -; X64: id: [[STACK0:[0-9]+]], offset: 0, size: 1, alignment: 16, isImmutable: true, isAliased: false +; X64: id: [[STACK8:[0-9]+]], type: default, offset: 8, size: 1, alignment: 8, isImmutable: true, +; X64: id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 1, alignment: 16, isImmutable: true, ; X64: liveins: %ecx, %edi, %edx, %esi, %r8d, %r9d ; X64: [[ARG1:%[0-9]+]](s8) = COPY %edi ; X64-NEXT: %{{[0-9]+}}(s8) = COPY %esi @@ -26,14 +26,14 @@ define i8 @test_i8_args_8(i8 %arg1, i8 %arg2, i8 %arg3, i8 %arg4, ; X64-NEXT: [[ARG8:%[0-9]+]](s8) = G_LOAD [[ARG8_ADDR]](p0) :: (invariant load 1 from %fixed-stack.[[STACK8]], align 0) ; X32: fixedStack: -; X32: id: [[STACK28:[0-9]+]], offset: 28, size: 1, alignment: 4, isImmutable: true, isAliased: false } -; X32: id: [[STACK24:[0-9]+]], offset: 24, size: 1, alignment: 8, isImmutable: true, isAliased: false } -; X32: id: [[STACK20:[0-9]+]], offset: 20, size: 1, alignment: 4, isImmutable: true, isAliased: false } -; X32: id: [[STACK16:[0-9]+]], offset: 16, size: 1, alignment: 16, isImmutable: true, isAliased: false } -; X32: id: [[STACK12:[0-9]+]], offset: 12, size: 1, alignment: 4, isImmutable: true, isAliased: false } -; X32: id: [[STACK8:[0-9]+]], offset: 8, size: 1, alignment: 8, isImmutable: true, isAliased: false } -; X32: id: [[STACK4:[0-9]+]], offset: 4, size: 1, alignment: 4, isImmutable: true, isAliased: false } -; X32: id: [[STACK0:[0-9]+]], offset: 0, size: 1, alignment: 16, isImmutable: true, isAliased: false } +; X32: id: [[STACK28:[0-9]+]], type: default, offset: 28, size: 1, alignment: 4, isImmutable: true, +; X32: id: [[STACK24:[0-9]+]], type: default, offset: 24, size: 1, alignment: 8, isImmutable: true, +; X32: id: [[STACK20:[0-9]+]], type: default, offset: 20, size: 1, alignment: 4, isImmutable: true, +; X32: id: [[STACK16:[0-9]+]], type: default, offset: 16, size: 1, alignment: 16, isImmutable: true, +; X32: id: [[STACK12:[0-9]+]], type: default, offset: 12, size: 1, alignment: 4, isImmutable: true, +; X32: id: [[STACK8:[0-9]+]], type: default, offset: 8, size: 1, alignment: 8, isImmutable: true, +; X32: id: [[STACK4:[0-9]+]], type: default, offset: 4, size: 1, alignment: 4, isImmutable: true, +; X32: id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 1, alignment: 16, isImmutable: true, ; X32: [[ARG1_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]] ; X32-NEXT: [[ARG1:%[0-9]+]](s8) = G_LOAD [[ARG1_ADDR]](p0) :: (invariant load 1 from %fixed-stack.[[STACK0]], align 0) ; X32-NEXT: [[ARG2_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK4]] @@ -77,8 +77,8 @@ define i32 @test_i32_args_8(i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, ; ALL-LABEL: name: test_i32_args_8 ; X64: fixedStack: -; X64: id: [[STACK8:[0-9]+]], offset: 8, size: 4, alignment: 8, isImmutable: true, isAliased: false -; X64: id: [[STACK0:[0-9]+]], offset: 0, size: 4, alignment: 16, isImmutable: true, isAliased: false +; X64: id: [[STACK8:[0-9]+]], type: default, offset: 8, size: 4, alignment: 8, isImmutable: true, +; X64: id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 4, alignment: 16, isImmutable: true, ; X64: liveins: %ecx, %edi, %edx, %esi, %r8d, %r9d ; X64: [[ARG1:%[0-9]+]](s32) = COPY %edi ; X64-NEXT: %{{[0-9]+}}(s32) = COPY %esi @@ -92,14 +92,14 @@ define i32 @test_i32_args_8(i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, ; X64-NEXT: [[ARG8:%[0-9]+]](s32) = G_LOAD [[ARG8_ADDR]](p0) :: (invariant load 4 from %fixed-stack.[[STACK8]], align 0) ; X32: fixedStack: -; X32: id: [[STACK28:[0-9]+]], offset: 28, size: 4, alignment: 4, isImmutable: true, isAliased: false } -; X32: id: [[STACK24:[0-9]+]], offset: 24, size: 4, alignment: 8, isImmutable: true, isAliased: false } -; X32: id: [[STACK20:[0-9]+]], offset: 20, size: 4, alignment: 4, isImmutable: true, isAliased: false } -; X32: id: [[STACK16:[0-9]+]], offset: 16, size: 4, alignment: 16, isImmutable: true, isAliased: false } -; X32: id: [[STACK12:[0-9]+]], offset: 12, size: 4, alignment: 4, isImmutable: true, isAliased: false } -; X32: id: [[STACK8:[0-9]+]], offset: 8, size: 4, alignment: 8, isImmutable: true, isAliased: false } -; X32: id: [[STACK4:[0-9]+]], offset: 4, size: 4, alignment: 4, isImmutable: true, isAliased: false } -; X32: id: [[STACK0:[0-9]+]], offset: 0, size: 4, alignment: 16, isImmutable: true, isAliased: false } +; X32: id: [[STACK28:[0-9]+]], type: default, offset: 28, size: 4, alignment: 4, isImmutable: true, +; X32: id: [[STACK24:[0-9]+]], type: default, offset: 24, size: 4, alignment: 8, isImmutable: true, +; X32: id: [[STACK20:[0-9]+]], type: default, offset: 20, size: 4, alignment: 4, isImmutable: true, +; X32: id: [[STACK16:[0-9]+]], type: default, offset: 16, size: 4, alignment: 16, isImmutable: true, +; X32: id: [[STACK12:[0-9]+]], type: default, offset: 12, size: 4, alignment: 4, isImmutable: true, +; X32: id: [[STACK8:[0-9]+]], type: default, offset: 8, size: 4, alignment: 8, isImmutable: true, +; X32: id: [[STACK4:[0-9]+]], type: default, offset: 4, size: 4, alignment: 4, isImmutable: true, +; X32: id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 4, alignment: 16, isImmutable: true, ; X32: [[ARG1_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]] ; X32-NEXT: [[ARG1:%[0-9]+]](s32) = G_LOAD [[ARG1_ADDR]](p0) :: (invariant load 4 from %fixed-stack.[[STACK0]], align 0) ; X32-NEXT: [[ARG2_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK4]] @@ -142,8 +142,8 @@ define i64 @test_i64_args_8(i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, ; ALL-LABEL: name: test_i64_args_8 ; X64: fixedStack: -; X64: id: [[STACK8:[0-9]+]], offset: 8, size: 8, alignment: 8, isImmutable: true, isAliased: false -; X64: id: [[STACK0:[0-9]+]], offset: 0, size: 8, alignment: 16, isImmutable: true, isAliased: false +; X64: id: [[STACK8:[0-9]+]], type: default, offset: 8, size: 8, alignment: 8, isImmutable: true, +; X64: id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 8, alignment: 16, isImmutable: true, ; X64: liveins: %rcx, %rdi, %rdx, %rsi, %r8, %r9 ; X64: [[ARG1:%[0-9]+]](s64) = COPY %rdi ; X64-NEXT: %{{[0-9]+}}(s64) = COPY %rsi @@ -157,22 +157,22 @@ define i64 @test_i64_args_8(i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, ; X64-NEXT: [[ARG8:%[0-9]+]](s64) = G_LOAD [[ARG8_ADDR]](p0) :: (invariant load 8 from %fixed-stack.[[STACK8]], align 0) ; X32: fixedStack: -; X32: id: [[STACK60:[0-9]+]], offset: 60, size: 4, alignment: 4, isImmutable: true, isAliased: false } -; X32: id: [[STACK56:[0-9]+]], offset: 56, size: 4, alignment: 8, isImmutable: true, isAliased: false } -; X32: id: [[STACK52:[0-9]+]], offset: 52, size: 4, alignment: 4, isImmutable: true, isAliased: false } -; X32: id: [[STACK48:[0-9]+]], offset: 48, size: 4, alignment: 16, isImmutable: true, isAliased: false } -; X32: id: [[STACK44:[0-9]+]], offset: 44, size: 4, alignment: 4, isImmutable: true, isAliased: false } -; X32: id: [[STACK40:[0-9]+]], offset: 40, size: 4, alignment: 8, isImmutable: true, isAliased: false } -; X32: id: [[STACK36:[0-9]+]], offset: 36, size: 4, alignment: 4, isImmutable: true, isAliased: false } -; X32: id: [[STACK32:[0-9]+]], offset: 32, size: 4, alignment: 16, isImmutable: true, isAliased: false } -; X32: id: [[STACK28:[0-9]+]], offset: 28, size: 4, alignment: 4, isImmutable: true, isAliased: false } -; X32: id: [[STACK24:[0-9]+]], offset: 24, size: 4, alignment: 8, isImmutable: true, isAliased: false } -; X32: id: [[STACK20:[0-9]+]], offset: 20, size: 4, alignment: 4, isImmutable: true, isAliased: false } -; X32: id: [[STACK16:[0-9]+]], offset: 16, size: 4, alignment: 16, isImmutable: true, isAliased: false } -; X32: id: [[STACK12:[0-9]+]], offset: 12, size: 4, alignment: 4, isImmutable: true, isAliased: false } -; X32: id: [[STACK8:[0-9]+]], offset: 8, size: 4, alignment: 8, isImmutable: true, isAliased: false } -; X32: id: [[STACK4:[0-9]+]], offset: 4, size: 4, alignment: 4, isImmutable: true, isAliased: false } -; X32: id: [[STACK0:[0-9]+]], offset: 0, size: 4, alignment: 16, isImmutable: true, isAliased: false } +; X32: id: [[STACK60:[0-9]+]], type: default, offset: 60, size: 4, alignment: 4, isImmutable: true, +; X32: id: [[STACK56:[0-9]+]], type: default, offset: 56, size: 4, alignment: 8, isImmutable: true, +; X32: id: [[STACK52:[0-9]+]], type: default, offset: 52, size: 4, alignment: 4, isImmutable: true, +; X32: id: [[STACK48:[0-9]+]], type: default, offset: 48, size: 4, alignment: 16, isImmutable: true, +; X32: id: [[STACK44:[0-9]+]], type: default, offset: 44, size: 4, alignment: 4, isImmutable: true, +; X32: id: [[STACK40:[0-9]+]], type: default, offset: 40, size: 4, alignment: 8, isImmutable: true, +; X32: id: [[STACK36:[0-9]+]], type: default, offset: 36, size: 4, alignment: 4, isImmutable: true, +; X32: id: [[STACK32:[0-9]+]], type: default, offset: 32, size: 4, alignment: 16, isImmutable: true, +; X32: id: [[STACK28:[0-9]+]], type: default, offset: 28, size: 4, alignment: 4, isImmutable: true, +; X32: id: [[STACK24:[0-9]+]], type: default, offset: 24, size: 4, alignment: 8, isImmutable: true, +; X32: id: [[STACK20:[0-9]+]], type: default, offset: 20, size: 4, alignment: 4, isImmutable: true, +; X32: id: [[STACK16:[0-9]+]], type: default, offset: 16, size: 4, alignment: 16, isImmutable: true, +; X32: id: [[STACK12:[0-9]+]], type: default, offset: 12, size: 4, alignment: 4, isImmutable: true, +; X32: id: [[STACK8:[0-9]+]], type: default, offset: 8, size: 4, alignment: 8, isImmutable: true, +; X32: id: [[STACK4:[0-9]+]], type: default, offset: 4, size: 4, alignment: 4, isImmutable: true, +; X32: id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 4, alignment: 16, isImmutable: true, ; X32: [[ARG1L_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]] ; X32-NEXT: [[ARG1L:%[0-9]+]](s32) = G_LOAD [[ARG1L_ADDR]](p0) :: (invariant load 4 from %fixed-stack.[[STACK0]], align 0) @@ -249,8 +249,8 @@ define float @test_float_args(float %arg1, float %arg2) { ; X64-NEXT: RET 0, implicit %xmm0 ; X32: fixedStack: -; X32: id: [[STACK4:[0-9]+]], offset: 4, size: 4, alignment: 4, isImmutable: true, isAliased: false } -; X32: id: [[STACK0:[0-9]+]], offset: 0, size: 4, alignment: 16, isImmutable: true, isAliased: false } +; X32: id: [[STACK4:[0-9]+]], type: default, offset: 4, size: 4, alignment: 4, isImmutable: true, +; X32: id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 4, alignment: 16, isImmutable: true, ; X32: [[ARG1_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]] ; X32-NEXT: [[ARG1:%[0-9]+]](s32) = G_LOAD [[ARG1_ADDR:%[0-9]+]](p0) :: (invariant load 4 from %fixed-stack.[[STACK0]], align 0) ; X32-NEXT: [[ARG2_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK4]] @@ -270,8 +270,8 @@ define double @test_double_args(double %arg1, double %arg2) { ; X64-NEXT: RET 0, implicit %xmm0 ; X32: fixedStack: -; X32: id: [[STACK4:[0-9]+]], offset: 8, size: 8, alignment: 8, isImmutable: true, isAliased: false } -; X32: id: [[STACK0:[0-9]+]], offset: 0, size: 8, alignment: 16, isImmutable: true, isAliased: false } +; X32: id: [[STACK4:[0-9]+]], type: default, offset: 8, size: 8, alignment: 8, isImmutable: true, +; X32: id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 8, alignment: 16, isImmutable: true, ; X32: [[ARG1_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]] ; X32-NEXT: [[ARG1:%[0-9]+]](s64) = G_LOAD [[ARG1_ADDR:%[0-9]+]](p0) :: (invariant load 8 from %fixed-stack.[[STACK0]], align 0) ; X32-NEXT: [[ARG2_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK4]] @@ -282,6 +282,38 @@ define double @test_double_args(double %arg1, double %arg2) { ret double %arg2 } +define <4 x i32> @test_v4i32_args(<4 x i32> %arg1, <4 x i32> %arg2) { +; ALL: name: test_v4i32_args +; ALL: liveins: %xmm0, %xmm1 +; ALL: [[ARG1:%[0-9]+]](<4 x s32>) = COPY %xmm0 +; ALL-NEXT: [[ARG2:%[0-9]+]](<4 x s32>) = COPY %xmm1 +; ALL-NEXT: %xmm0 = COPY [[ARG2:%[0-9]+]](<4 x s32>) +; ALL-NEXT: RET 0, implicit %xmm0 + ret <4 x i32> %arg2 +} + +define <8 x i32> @test_v8i32_args(<8 x i32> %arg1) { +; ALL: name: test_v8i32_args +; ALL: liveins: %xmm0, %xmm1 +; ALL: [[ARG1L:%[0-9]+]](<4 x s32>) = COPY %xmm0 +; ALL-NEXT: [[ARG1H:%[0-9]+]](<4 x s32>) = COPY %xmm1 +; ALL-NEXT: [[ARG1:%[0-9]+]](<8 x s32>) = G_MERGE_VALUES [[ARG1L]](<4 x s32>), [[ARG1H]](<4 x s32>) +; ALL-NEXT: [[RETL:%[0-9]+]](<4 x s32>), [[RETH:%[0-9]+]](<4 x s32>) = G_UNMERGE_VALUES [[ARG1:%[0-9]+]](<8 x s32>) +; ALL-NEXT: %xmm0 = COPY [[RETL:%[0-9]+]](<4 x s32>) +; ALL-NEXT: %xmm1 = COPY [[RETH:%[0-9]+]](<4 x s32>) +; ALL-NEXT: RET 0, implicit %xmm0, implicit %xmm1 + + ret <8 x i32> %arg1 +} + +define void @test_void_return() { +; ALL-LABEL: name: test_void_return +; ALL: bb.1.entry: +; ALL-NEXT: RET 0 +entry: + ret void +} + define i32 * @test_memop_i32(i32 * %p1) { ; ALL-LABEL:name: test_memop_i32 ;X64 liveins: %rdi @@ -290,7 +322,7 @@ define i32 * @test_memop_i32(i32 * %p1) { ;X64-NEXT: RET 0, implicit %rax ;X32: fixedStack: -;X32: id: [[STACK0:[0-9]+]], offset: 0, size: 4, alignment: 16, isImmutable: true, isAliased: false } +;X32: id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 4, alignment: 16, isImmutable: true, ;X32: %1(p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]] ;X32-NEXT: %0(p0) = G_LOAD %1(p0) :: (invariant load 4 from %fixed-stack.[[STACK0]], align 0) ;X32-NEXT: %eax = COPY %0(p0) diff --git a/test/CodeGen/X86/GlobalISel/irtranslator-callingconv_64bit.ll b/test/CodeGen/X86/GlobalISel/irtranslator-callingconv_64bit.ll deleted file mode 100644 index 90a05f5fc225..000000000000 --- a/test/CodeGen/X86/GlobalISel/irtranslator-callingconv_64bit.ll +++ /dev/null @@ -1,25 +0,0 @@ -; RUN: llc -mtriple=x86_64-linux-gnu -global-isel -stop-after=irtranslator < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=X64 - -define <4 x i32> @test_v4i32_args(<4 x i32> %arg1, <4 x i32> %arg2) { -; X64: name: test_v4i32_args -; X64: liveins: %xmm0, %xmm1 -; X64: [[ARG1:%[0-9]+]](<4 x s32>) = COPY %xmm0 -; X64-NEXT: [[ARG2:%[0-9]+]](<4 x s32>) = COPY %xmm1 -; X64-NEXT: %xmm0 = COPY [[ARG2:%[0-9]+]](<4 x s32>) -; X64-NEXT: RET 0, implicit %xmm0 - ret <4 x i32> %arg2 -} - -define <8 x i32> @test_v8i32_args(<8 x i32> %arg1) { -; X64: name: test_v8i32_args -; X64: liveins: %xmm0, %xmm1 -; X64: [[ARG1L:%[0-9]+]](<4 x s32>) = COPY %xmm0 -; X64-NEXT: [[ARG1H:%[0-9]+]](<4 x s32>) = COPY %xmm1 -; X64-NEXT: [[ARG1:%[0-9]+]](<8 x s32>) = G_MERGE_VALUES [[ARG1L]](<4 x s32>), [[ARG1H]](<4 x s32>) -; X64-NEXT: [[RETL:%[0-9]+]](<4 x s32>), [[RETH:%[0-9]+]](<4 x s32>) = G_UNMERGE_VALUES [[ARG1:%[0-9]+]](<8 x s32>) -; X64-NEXT: %xmm0 = COPY [[RETL:%[0-9]+]](<4 x s32>) -; X64-NEXT: %xmm1 = COPY [[RETH:%[0-9]+]](<4 x s32>) -; X64-NEXT: RET 0, implicit %xmm0, implicit %xmm1 - - ret <8 x i32> %arg1 -} diff --git a/test/CodeGen/X86/GlobalISel/legalize-mul-scalar.mir b/test/CodeGen/X86/GlobalISel/legalize-mul-scalar.mir index 0d66a6384107..682d01e66fa0 100644 --- a/test/CodeGen/X86/GlobalISel/legalize-mul-scalar.mir +++ b/test/CodeGen/X86/GlobalISel/legalize-mul-scalar.mir @@ -24,9 +24,9 @@ alignment: 4 legalized: false regBankSelected: false # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: _ } -# CHECK-NEXT: - { id: 1, class: _ } -# CHECK-NEXT: - { id: 2, class: _ } +# CHECK-NEXT: - { id: 0, class: _, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: _, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: _, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -56,9 +56,9 @@ alignment: 4 legalized: false regBankSelected: false # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: _ } -# CHECK-NEXT: - { id: 1, class: _ } -# CHECK-NEXT: - { id: 2, class: _ } +# CHECK-NEXT: - { id: 0, class: _, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: _, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: _, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -88,9 +88,9 @@ alignment: 4 legalized: false regBankSelected: false # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: _ } -# CHECK-NEXT: - { id: 1, class: _ } -# CHECK-NEXT: - { id: 2, class: _ } +# CHECK-NEXT: - { id: 0, class: _, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: _, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: _, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } diff --git a/test/CodeGen/X86/GlobalISel/legalize-mul-v128.mir b/test/CodeGen/X86/GlobalISel/legalize-mul-v128.mir index be62832b008a..effd26e9866d 100644 --- a/test/CodeGen/X86/GlobalISel/legalize-mul-v128.mir +++ b/test/CodeGen/X86/GlobalISel/legalize-mul-v128.mir @@ -26,9 +26,9 @@ alignment: 4 legalized: false regBankSelected: false # ALL: registers: -# ALL-NEXT: - { id: 0, class: _ } -# ALL-NEXT: - { id: 1, class: _ } -# ALL-NEXT: - { id: 2, class: _ } +# ALL-NEXT: - { id: 0, class: _, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: _, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: _, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -56,9 +56,9 @@ alignment: 4 legalized: false regBankSelected: false # ALL: registers: -# ALL-NEXT: - { id: 0, class: _ } -# ALL-NEXT: - { id: 1, class: _ } -# ALL-NEXT: - { id: 2, class: _ } +# ALL-NEXT: - { id: 0, class: _, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: _, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: _, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -86,9 +86,9 @@ alignment: 4 legalized: false regBankSelected: false # ALL: registers: -# ALL-NEXT: - { id: 0, class: _ } -# ALL-NEXT: - { id: 1, class: _ } -# ALL-NEXT: - { id: 2, class: _ } +# ALL-NEXT: - { id: 0, class: _, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: _, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: _, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } diff --git a/test/CodeGen/X86/GlobalISel/legalize-mul-v256.mir b/test/CodeGen/X86/GlobalISel/legalize-mul-v256.mir index d99303c3ba3b..5ae8132156d5 100644 --- a/test/CodeGen/X86/GlobalISel/legalize-mul-v256.mir +++ b/test/CodeGen/X86/GlobalISel/legalize-mul-v256.mir @@ -26,9 +26,9 @@ alignment: 4 legalized: false regBankSelected: false # ALL: registers: -# ALL-NEXT: - { id: 0, class: _ } -# ALL-NEXT: - { id: 1, class: _ } -# ALL-NEXT: - { id: 2, class: _ } +# ALL-NEXT: - { id: 0, class: _, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: _, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: _, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -56,9 +56,9 @@ alignment: 4 legalized: false regBankSelected: false # ALL: registers: -# ALL-NEXT: - { id: 0, class: _ } -# ALL-NEXT: - { id: 1, class: _ } -# ALL-NEXT: - { id: 2, class: _ } +# ALL-NEXT: - { id: 0, class: _, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: _, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: _, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -86,9 +86,9 @@ alignment: 4 legalized: false regBankSelected: false # ALL: registers: -# ALL-NEXT: - { id: 0, class: _ } -# ALL-NEXT: - { id: 1, class: _ } -# ALL-NEXT: - { id: 2, class: _ } +# ALL-NEXT: - { id: 0, class: _, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: _, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: _, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } diff --git a/test/CodeGen/X86/GlobalISel/legalize-mul-v512.mir b/test/CodeGen/X86/GlobalISel/legalize-mul-v512.mir index 24eefd30c2ac..71ea313c4c72 100644 --- a/test/CodeGen/X86/GlobalISel/legalize-mul-v512.mir +++ b/test/CodeGen/X86/GlobalISel/legalize-mul-v512.mir @@ -28,9 +28,9 @@ alignment: 4 legalized: false regBankSelected: false # ALL: registers: -# ALL-NEXT: - { id: 0, class: _ } -# ALL-NEXT: - { id: 1, class: _ } -# ALL-NEXT: - { id: 2, class: _ } +# ALL-NEXT: - { id: 0, class: _, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: _, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: _, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -58,9 +58,9 @@ alignment: 4 legalized: false regBankSelected: false # ALL: registers: -# ALL-NEXT: - { id: 0, class: _ } -# ALL-NEXT: - { id: 1, class: _ } -# ALL-NEXT: - { id: 2, class: _ } +# ALL-NEXT: - { id: 0, class: _, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: _, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: _, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -88,9 +88,9 @@ alignment: 4 legalized: false regBankSelected: false # ALL: registers: -# ALL-NEXT: - { id: 0, class: _ } -# ALL-NEXT: - { id: 1, class: _ } -# ALL-NEXT: - { id: 2, class: _ } +# ALL-NEXT: - { id: 0, class: _, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: _, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: _, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } diff --git a/test/CodeGen/X86/GlobalISel/regbankselect-AVX2.mir b/test/CodeGen/X86/GlobalISel/regbankselect-AVX2.mir index cc03f3a57f0b..ca238b29c2dd 100644 --- a/test/CodeGen/X86/GlobalISel/regbankselect-AVX2.mir +++ b/test/CodeGen/X86/GlobalISel/regbankselect-AVX2.mir @@ -33,8 +33,8 @@ selected: false tracksRegLiveness: true # CHECK-LABEL: name: test_mul_vec256 # CHECK: registers: -# CHECK: - { id: 0, class: vecr } -# CHECK: - { id: 1, class: vecr } +# CHECK: - { id: 0, class: vecr, preferred-register: '' } +# CHECK: - { id: 1, class: vecr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -56,8 +56,8 @@ selected: false tracksRegLiveness: true # CHECK-LABEL: name: test_add_vec256 # CHECK: registers: -# CHECK: - { id: 0, class: vecr } -# CHECK: - { id: 1, class: vecr } +# CHECK: - { id: 0, class: vecr, preferred-register: '' } +# CHECK: - { id: 1, class: vecr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -79,8 +79,8 @@ selected: false tracksRegLiveness: true # CHECK-LABEL: name: test_sub_vec256 # CHECK: registers: -# CHECK: - { id: 0, class: vecr } -# CHECK: - { id: 1, class: vecr } +# CHECK: - { id: 0, class: vecr, preferred-register: '' } +# CHECK: - { id: 1, class: vecr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -100,8 +100,8 @@ alignment: 4 legalized: true regBankSelected: false # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr } -# CHECK-NEXT: - { id: 1, class: vecr } +# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: vecr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -122,8 +122,8 @@ alignment: 4 legalized: true regBankSelected: false # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: vecr } -# CHECK-NEXT: - { id: 1, class: gpr } +# CHECK-NEXT: - { id: 0, class: vecr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } diff --git a/test/CodeGen/X86/GlobalISel/regbankselect-AVX512.mir b/test/CodeGen/X86/GlobalISel/regbankselect-AVX512.mir index 278413ad38ef..c94ecc8e9a8d 100644 --- a/test/CodeGen/X86/GlobalISel/regbankselect-AVX512.mir +++ b/test/CodeGen/X86/GlobalISel/regbankselect-AVX512.mir @@ -33,8 +33,8 @@ alignment: 4 legalized: true regBankSelected: false # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: vecr } -# CHECK-NEXT: - { id: 1, class: vecr } +# CHECK-NEXT: - { id: 0, class: vecr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: vecr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -53,8 +53,8 @@ alignment: 4 legalized: true regBankSelected: false # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: vecr } -# CHECK-NEXT: - { id: 1, class: vecr } +# CHECK-NEXT: - { id: 0, class: vecr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: vecr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -73,8 +73,8 @@ alignment: 4 legalized: true regBankSelected: false # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: vecr } -# CHECK-NEXT: - { id: 1, class: vecr } +# CHECK-NEXT: - { id: 0, class: vecr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: vecr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -93,8 +93,8 @@ alignment: 4 legalized: true regBankSelected: false # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr } -# CHECK-NEXT: - { id: 1, class: vecr } +# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: vecr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -115,8 +115,8 @@ alignment: 4 legalized: true regBankSelected: false # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: vecr } -# CHECK-NEXT: - { id: 1, class: gpr } +# CHECK-NEXT: - { id: 0, class: vecr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } diff --git a/test/CodeGen/X86/GlobalISel/regbankselect-X32.mir b/test/CodeGen/X86/GlobalISel/regbankselect-X32.mir index a115d1fa3255..b74e03f0fe79 100644 --- a/test/CodeGen/X86/GlobalISel/regbankselect-X32.mir +++ b/test/CodeGen/X86/GlobalISel/regbankselect-X32.mir @@ -14,11 +14,11 @@ alignment: 4 legalized: true regBankSelected: false # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr } -# CHECK-NEXT: - { id: 1, class: gpr } -# CHECK-NEXT: - { id: 2, class: gpr } -# CHECK-NEXT: - { id: 3, class: gpr } -# CHECK-NEXT: - { id: 4, class: gpr } +# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 4, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } diff --git a/test/CodeGen/X86/GlobalISel/regbankselect-X86_64.mir b/test/CodeGen/X86/GlobalISel/regbankselect-X86_64.mir index 1ea922ee475a..7bcc57aef4ac 100644 --- a/test/CodeGen/X86/GlobalISel/regbankselect-X86_64.mir +++ b/test/CodeGen/X86/GlobalISel/regbankselect-X86_64.mir @@ -145,9 +145,9 @@ selected: false tracksRegLiveness: true # CHECK-LABEL: name: test_add_i8 # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } -# CHECK: - { id: 2, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } +# CHECK: - { id: 2, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -172,9 +172,9 @@ selected: false tracksRegLiveness: true # CHECK-LABEL: name: test_add_i16 # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } -# CHECK: - { id: 2, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } +# CHECK: - { id: 2, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -199,9 +199,9 @@ selected: false tracksRegLiveness: true # CHECK-LABEL: name: test_add_i32 # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } -# CHECK: - { id: 2, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } +# CHECK: - { id: 2, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -226,9 +226,9 @@ selected: false tracksRegLiveness: true # CHECK-LABEL: name: test_add_i64 # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } -# CHECK: - { id: 2, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } +# CHECK: - { id: 2, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -253,14 +253,14 @@ selected: false tracksRegLiveness: true # CHECK-LABEL: name: test_mul_gpr # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } -# CHECK: - { id: 2, class: gpr } -# CHECK: - { id: 3, class: gpr } -# CHECK: - { id: 4, class: gpr } -# CHECK: - { id: 5, class: gpr } -# CHECK: - { id: 6, class: gpr } -# CHECK: - { id: 7, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } +# CHECK: - { id: 2, class: gpr, preferred-register: '' } +# CHECK: - { id: 3, class: gpr, preferred-register: '' } +# CHECK: - { id: 4, class: gpr, preferred-register: '' } +# CHECK: - { id: 5, class: gpr, preferred-register: '' } +# CHECK: - { id: 6, class: gpr, preferred-register: '' } +# CHECK: - { id: 7, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -292,9 +292,9 @@ selected: false tracksRegLiveness: true # CHECK-LABEL: name: test_add_float # CHECK: registers: -# CHECK: - { id: 0, class: vecr } -# CHECK: - { id: 1, class: vecr } -# CHECK: - { id: 2, class: vecr } +# CHECK: - { id: 0, class: vecr, preferred-register: '' } +# CHECK: - { id: 1, class: vecr, preferred-register: '' } +# CHECK: - { id: 2, class: vecr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -319,9 +319,9 @@ selected: false tracksRegLiveness: true # CHECK-LABEL: name: test_add_double # CHECK: registers: -# CHECK: - { id: 0, class: vecr } -# CHECK: - { id: 1, class: vecr } -# CHECK: - { id: 2, class: vecr } +# CHECK: - { id: 0, class: vecr, preferred-register: '' } +# CHECK: - { id: 1, class: vecr, preferred-register: '' } +# CHECK: - { id: 2, class: vecr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -346,9 +346,9 @@ selected: false tracksRegLiveness: true # CHECK-LABEL: name: test_add_v4i32 # CHECK: registers: -# CHECK: - { id: 0, class: vecr } -# CHECK: - { id: 1, class: vecr } -# CHECK: - { id: 2, class: vecr } +# CHECK: - { id: 0, class: vecr, preferred-register: '' } +# CHECK: - { id: 1, class: vecr, preferred-register: '' } +# CHECK: - { id: 2, class: vecr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -373,9 +373,9 @@ selected: false tracksRegLiveness: true # CHECK-LABEL: name: test_add_v4f32 # CHECK: registers: -# CHECK: - { id: 0, class: vecr } -# CHECK: - { id: 1, class: vecr } -# CHECK: - { id: 2, class: vecr } +# CHECK: - { id: 0, class: vecr, preferred-register: '' } +# CHECK: - { id: 1, class: vecr, preferred-register: '' } +# CHECK: - { id: 2, class: vecr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -399,8 +399,8 @@ regBankSelected: false selected: false # CHECK-LABEL: name: test_load_i8 # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -422,8 +422,8 @@ regBankSelected: false selected: false # CHECK-LABEL: name: test_load_i16 # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -445,8 +445,8 @@ regBankSelected: false selected: false # CHECK-LABEL: name: test_load_i32 # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -469,8 +469,8 @@ regBankSelected: false selected: false # CHECK-LABEL: name: test_load_i64 # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -492,8 +492,8 @@ regBankSelected: false selected: false # CHECK-LABEL: name: test_load_float # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -515,8 +515,8 @@ regBankSelected: false selected: false # CHECK-LABEL: name: test_load_double # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -538,8 +538,8 @@ regBankSelected: false selected: false # CHECK-LABEL: name: test_load_v4i32 # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: vecr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: vecr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -561,8 +561,8 @@ regBankSelected: false selected: false # CHECK-LABEL: name: test_store_i32 # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -585,8 +585,8 @@ regBankSelected: false selected: false # CHECK-LABEL: name: test_store_i64 # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -610,12 +610,12 @@ selected: false # CHECK-LABEL: name: test_store_float # CHECK: registers: -# FAST-NEXT: - { id: 0, class: vecr } -# FAST-NEXT: - { id: 1, class: gpr } -# FAST-NEXT: - { id: 2, class: gpr } +# FAST-NEXT: - { id: 0, class: vecr, preferred-register: '' } +# FAST-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# FAST-NEXT: - { id: 2, class: gpr, preferred-register: '' } -# GREEDY-NEXT: - { id: 0, class: vecr } -# GREEDY-NEXT: - { id: 1, class: gpr } +# GREEDY-NEXT: - { id: 0, class: vecr, preferred-register: '' } +# GREEDY-NEXT: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } @@ -647,12 +647,12 @@ selected: false # CHECK-LABEL: name: test_store_double # CHECK: registers: -# FAST-NEXT: - { id: 0, class: vecr } -# FAST-NEXT: - { id: 1, class: gpr } -# FAST-NEXT: - { id: 2, class: gpr } +# FAST-NEXT: - { id: 0, class: vecr, preferred-register: '' } +# FAST-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# FAST-NEXT: - { id: 2, class: gpr, preferred-register: '' } -# GREEDY-NEXT: - { id: 0, class: vecr } -# GREEDY-NEXT: - { id: 1, class: gpr } +# GREEDY-NEXT: - { id: 0, class: vecr, preferred-register: '' } +# GREEDY-NEXT: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } @@ -682,10 +682,10 @@ alignment: 4 legalized: true # CHECK-LABEL: name: constInt_check # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr } -# CHECK-NEXT: - { id: 1, class: gpr } -# CHECK-NEXT: - { id: 2, class: gpr } -# CHECK-NEXT: - { id: 3, class: gpr } +# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -706,10 +706,10 @@ alignment: 4 legalized: true # CHECK-LABEL: name: trunc_check # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr } -# CHECK-NEXT: - { id: 1, class: gpr } -# CHECK-NEXT: - { id: 2, class: gpr } -# CHECK-NEXT: - { id: 3, class: gpr } +# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -729,11 +729,11 @@ name: test_gep legalized: true # CHECK-LABEL: name: test_gep # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr } -# CHECK-NEXT: - { id: 1, class: gpr } -# CHECK-NEXT: - { id: 2, class: gpr } -# CHECK-NEXT: - { id: 3, class: gpr } -# CHECK-NEXT: - { id: 4, class: gpr } +# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 4, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -757,9 +757,9 @@ alignment: 4 legalized: true regBankSelected: false # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr } -# CHECK-NEXT: - { id: 1, class: gpr } -# CHECK-NEXT: - { id: 2, class: gpr } +# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -782,9 +782,9 @@ alignment: 4 legalized: true regBankSelected: false # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr } -# CHECK-NEXT: - { id: 1, class: gpr } -# CHECK-NEXT: - { id: 2, class: gpr } +# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -807,9 +807,9 @@ alignment: 4 legalized: true regBankSelected: false # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr } -# CHECK-NEXT: - { id: 1, class: gpr } -# CHECK-NEXT: - { id: 2, class: gpr } +# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -832,9 +832,9 @@ alignment: 4 legalized: true regBankSelected: false # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr } -# CHECK-NEXT: - { id: 1, class: gpr } -# CHECK-NEXT: - { id: 2, class: gpr } +# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } diff --git a/test/CodeGen/X86/GlobalISel/select-add-v128.mir b/test/CodeGen/X86/GlobalISel/select-add-v128.mir index a39702340bc2..4f7b6ec72d52 100644 --- a/test/CodeGen/X86/GlobalISel/select-add-v128.mir +++ b/test/CodeGen/X86/GlobalISel/select-add-v128.mir @@ -32,19 +32,19 @@ alignment: 4 legalized: true regBankSelected: true # NOVL: registers: -# NOVL-NEXT: - { id: 0, class: vr128 } -# NOVL-NEXT: - { id: 1, class: vr128 } -# NOVL-NEXT: - { id: 2, class: vr128 } +# NOVL-NEXT: - { id: 0, class: vr128, preferred-register: '' } +# NOVL-NEXT: - { id: 1, class: vr128, preferred-register: '' } +# NOVL-NEXT: - { id: 2, class: vr128, preferred-register: '' } # # AVX512VL: registers: -# AVX512VL-NEXT: - { id: 0, class: vr128 } -# AVX512VL-NEXT: - { id: 1, class: vr128 } -# AVX512VL-NEXT: - { id: 2, class: vr128 } +# AVX512VL-NEXT: - { id: 0, class: vr128, preferred-register: '' } +# AVX512VL-NEXT: - { id: 1, class: vr128, preferred-register: '' } +# AVX512VL-NEXT: - { id: 2, class: vr128, preferred-register: '' } # # AVX512BWVL: registers: -# AVX512BWVL-NEXT: - { id: 0, class: vr128x } -# AVX512BWVL-NEXT: - { id: 1, class: vr128x } -# AVX512BWVL-NEXT: - { id: 2, class: vr128x } +# AVX512BWVL-NEXT: - { id: 0, class: vr128x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 1, class: vr128x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 2, class: vr128x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -74,19 +74,19 @@ alignment: 4 legalized: true regBankSelected: true # NOVL: registers: -# NOVL-NEXT: - { id: 0, class: vr128 } -# NOVL-NEXT: - { id: 1, class: vr128 } -# NOVL-NEXT: - { id: 2, class: vr128 } +# NOVL-NEXT: - { id: 0, class: vr128, preferred-register: '' } +# NOVL-NEXT: - { id: 1, class: vr128, preferred-register: '' } +# NOVL-NEXT: - { id: 2, class: vr128, preferred-register: '' } # # AVX512VL: registers: -# AVX512VL-NEXT: - { id: 0, class: vr128 } -# AVX512VL-NEXT: - { id: 1, class: vr128 } -# AVX512VL-NEXT: - { id: 2, class: vr128 } +# AVX512VL-NEXT: - { id: 0, class: vr128, preferred-register: '' } +# AVX512VL-NEXT: - { id: 1, class: vr128, preferred-register: '' } +# AVX512VL-NEXT: - { id: 2, class: vr128, preferred-register: '' } # # AVX512BWVL: registers: -# AVX512BWVL-NEXT: - { id: 0, class: vr128x } -# AVX512BWVL-NEXT: - { id: 1, class: vr128x } -# AVX512BWVL-NEXT: - { id: 2, class: vr128x } +# AVX512BWVL-NEXT: - { id: 0, class: vr128x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 1, class: vr128x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 2, class: vr128x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -116,19 +116,19 @@ alignment: 4 legalized: true regBankSelected: true # NOVL: registers: -# NOVL-NEXT: - { id: 0, class: vr128 } -# NOVL-NEXT: - { id: 1, class: vr128 } -# NOVL-NEXT: - { id: 2, class: vr128 } +# NOVL-NEXT: - { id: 0, class: vr128, preferred-register: '' } +# NOVL-NEXT: - { id: 1, class: vr128, preferred-register: '' } +# NOVL-NEXT: - { id: 2, class: vr128, preferred-register: '' } # # AVX512VL: registers: -# AVX512VL-NEXT: - { id: 0, class: vr128x } -# AVX512VL-NEXT: - { id: 1, class: vr128x } -# AVX512VL-NEXT: - { id: 2, class: vr128x } +# AVX512VL-NEXT: - { id: 0, class: vr128x, preferred-register: '' } +# AVX512VL-NEXT: - { id: 1, class: vr128x, preferred-register: '' } +# AVX512VL-NEXT: - { id: 2, class: vr128x, preferred-register: '' } # # AVX512BWVL: registers: -# AVX512BWVL-NEXT: - { id: 0, class: vr128x } -# AVX512BWVL-NEXT: - { id: 1, class: vr128x } -# AVX512BWVL-NEXT: - { id: 2, class: vr128x } +# AVX512BWVL-NEXT: - { id: 0, class: vr128x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 1, class: vr128x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 2, class: vr128x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -158,19 +158,19 @@ alignment: 4 legalized: true regBankSelected: true # NOVL: registers: -# NOVL-NEXT: - { id: 0, class: vr128 } -# NOVL-NEXT: - { id: 1, class: vr128 } -# NOVL-NEXT: - { id: 2, class: vr128 } +# NOVL-NEXT: - { id: 0, class: vr128, preferred-register: '' } +# NOVL-NEXT: - { id: 1, class: vr128, preferred-register: '' } +# NOVL-NEXT: - { id: 2, class: vr128, preferred-register: '' } # # AVX512VL: registers: -# AVX512VL-NEXT: - { id: 0, class: vr128x } -# AVX512VL-NEXT: - { id: 1, class: vr128x } -# AVX512VL-NEXT: - { id: 2, class: vr128x } +# AVX512VL-NEXT: - { id: 0, class: vr128x, preferred-register: '' } +# AVX512VL-NEXT: - { id: 1, class: vr128x, preferred-register: '' } +# AVX512VL-NEXT: - { id: 2, class: vr128x, preferred-register: '' } # # AVX512BWVL: registers: -# AVX512BWVL-NEXT: - { id: 0, class: vr128x } -# AVX512BWVL-NEXT: - { id: 1, class: vr128x } -# AVX512BWVL-NEXT: - { id: 2, class: vr128x } +# AVX512BWVL-NEXT: - { id: 0, class: vr128x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 1, class: vr128x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 2, class: vr128x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } diff --git a/test/CodeGen/X86/GlobalISel/select-add-v256.mir b/test/CodeGen/X86/GlobalISel/select-add-v256.mir index 7556c2104124..143fd9422974 100644 --- a/test/CodeGen/X86/GlobalISel/select-add-v256.mir +++ b/test/CodeGen/X86/GlobalISel/select-add-v256.mir @@ -30,19 +30,19 @@ alignment: 4 legalized: true regBankSelected: true # AVX2: registers: -# AVX2-NEXT: - { id: 0, class: vr256 } -# AVX2-NEXT: - { id: 1, class: vr256 } -# AVX2-NEXT: - { id: 2, class: vr256 } +# AVX2-NEXT: - { id: 0, class: vr256, preferred-register: '' } +# AVX2-NEXT: - { id: 1, class: vr256, preferred-register: '' } +# AVX2-NEXT: - { id: 2, class: vr256, preferred-register: '' } # # AVX512VL: registers: -# AVX512VL-NEXT: - { id: 0, class: vr256 } -# AVX512VL-NEXT: - { id: 1, class: vr256 } -# AVX512VL-NEXT: - { id: 2, class: vr256 } +# AVX512VL-NEXT: - { id: 0, class: vr256, preferred-register: '' } +# AVX512VL-NEXT: - { id: 1, class: vr256, preferred-register: '' } +# AVX512VL-NEXT: - { id: 2, class: vr256, preferred-register: '' } # # AVX512BWVL: registers: -# AVX512BWVL-NEXT: - { id: 0, class: vr256x } -# AVX512BWVL-NEXT: - { id: 1, class: vr256x } -# AVX512BWVL-NEXT: - { id: 2, class: vr256x } +# AVX512BWVL-NEXT: - { id: 0, class: vr256x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 1, class: vr256x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 2, class: vr256x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -70,19 +70,19 @@ alignment: 4 legalized: true regBankSelected: true # AVX2: registers: -# AVX2-NEXT: - { id: 0, class: vr256 } -# AVX2-NEXT: - { id: 1, class: vr256 } -# AVX2-NEXT: - { id: 2, class: vr256 } +# AVX2-NEXT: - { id: 0, class: vr256, preferred-register: '' } +# AVX2-NEXT: - { id: 1, class: vr256, preferred-register: '' } +# AVX2-NEXT: - { id: 2, class: vr256, preferred-register: '' } # # AVX512VL: registers: -# AVX512VL-NEXT: - { id: 0, class: vr256 } -# AVX512VL-NEXT: - { id: 1, class: vr256 } -# AVX512VL-NEXT: - { id: 2, class: vr256 } +# AVX512VL-NEXT: - { id: 0, class: vr256, preferred-register: '' } +# AVX512VL-NEXT: - { id: 1, class: vr256, preferred-register: '' } +# AVX512VL-NEXT: - { id: 2, class: vr256, preferred-register: '' } # # AVX512BWVL: registers: -# AVX512BWVL-NEXT: - { id: 0, class: vr256x } -# AVX512BWVL-NEXT: - { id: 1, class: vr256x } -# AVX512BWVL-NEXT: - { id: 2, class: vr256x } +# AVX512BWVL-NEXT: - { id: 0, class: vr256x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 1, class: vr256x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 2, class: vr256x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -110,19 +110,19 @@ alignment: 4 legalized: true regBankSelected: true # AVX2: registers: -# AVX2-NEXT: - { id: 0, class: vr256 } -# AVX2-NEXT: - { id: 1, class: vr256 } -# AVX2-NEXT: - { id: 2, class: vr256 } +# AVX2-NEXT: - { id: 0, class: vr256, preferred-register: '' } +# AVX2-NEXT: - { id: 1, class: vr256, preferred-register: '' } +# AVX2-NEXT: - { id: 2, class: vr256, preferred-register: '' } # # AVX512VL: registers: -# AVX512VL-NEXT: - { id: 0, class: vr256x } -# AVX512VL-NEXT: - { id: 1, class: vr256x } -# AVX512VL-NEXT: - { id: 2, class: vr256x } +# AVX512VL-NEXT: - { id: 0, class: vr256x, preferred-register: '' } +# AVX512VL-NEXT: - { id: 1, class: vr256x, preferred-register: '' } +# AVX512VL-NEXT: - { id: 2, class: vr256x, preferred-register: '' } # # AVX512BWVL: registers: -# AVX512BWVL-NEXT: - { id: 0, class: vr256x } -# AVX512BWVL-NEXT: - { id: 1, class: vr256x } -# AVX512BWVL-NEXT: - { id: 2, class: vr256x } +# AVX512BWVL-NEXT: - { id: 0, class: vr256x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 1, class: vr256x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 2, class: vr256x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -150,19 +150,19 @@ alignment: 4 legalized: true regBankSelected: true # AVX2: registers: -# AVX2-NEXT: - { id: 0, class: vr256 } -# AVX2-NEXT: - { id: 1, class: vr256 } -# AVX2-NEXT: - { id: 2, class: vr256 } +# AVX2-NEXT: - { id: 0, class: vr256, preferred-register: '' } +# AVX2-NEXT: - { id: 1, class: vr256, preferred-register: '' } +# AVX2-NEXT: - { id: 2, class: vr256, preferred-register: '' } # # AVX512VL: registers: -# AVX512VL-NEXT: - { id: 0, class: vr256x } -# AVX512VL-NEXT: - { id: 1, class: vr256x } -# AVX512VL-NEXT: - { id: 2, class: vr256x } +# AVX512VL-NEXT: - { id: 0, class: vr256x, preferred-register: '' } +# AVX512VL-NEXT: - { id: 1, class: vr256x, preferred-register: '' } +# AVX512VL-NEXT: - { id: 2, class: vr256x, preferred-register: '' } # # AVX512BWVL: registers: -# AVX512BWVL-NEXT: - { id: 0, class: vr256x } -# AVX512BWVL-NEXT: - { id: 1, class: vr256x } -# AVX512BWVL-NEXT: - { id: 2, class: vr256x } +# AVX512BWVL-NEXT: - { id: 0, class: vr256x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 1, class: vr256x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 2, class: vr256x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } diff --git a/test/CodeGen/X86/GlobalISel/select-add-v512.mir b/test/CodeGen/X86/GlobalISel/select-add-v512.mir index e90be4e996f8..6a0cd32eefd5 100644 --- a/test/CodeGen/X86/GlobalISel/select-add-v512.mir +++ b/test/CodeGen/X86/GlobalISel/select-add-v512.mir @@ -31,9 +31,9 @@ alignment: 4 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: vr512 } -# ALL-NEXT: - { id: 1, class: vr512 } -# ALL-NEXT: - { id: 2, class: vr512 } +# ALL-NEXT: - { id: 0, class: vr512, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: vr512, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: vr512, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -57,9 +57,9 @@ alignment: 4 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: vr512 } -# ALL-NEXT: - { id: 1, class: vr512 } -# ALL-NEXT: - { id: 2, class: vr512 } +# ALL-NEXT: - { id: 0, class: vr512, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: vr512, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: vr512, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -83,9 +83,9 @@ alignment: 4 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: vr512 } -# ALL-NEXT: - { id: 1, class: vr512 } -# ALL-NEXT: - { id: 2, class: vr512 } +# ALL-NEXT: - { id: 0, class: vr512, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: vr512, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: vr512, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -109,9 +109,9 @@ alignment: 4 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: vr512 } -# ALL-NEXT: - { id: 1, class: vr512 } -# ALL-NEXT: - { id: 2, class: vr512 } +# ALL-NEXT: - { id: 0, class: vr512, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: vr512, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: vr512, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } diff --git a/test/CodeGen/X86/GlobalISel/select-add-x32.mir b/test/CodeGen/X86/GlobalISel/select-add-x32.mir index 8710aaa61a21..0b864f417367 100644 --- a/test/CodeGen/X86/GlobalISel/select-add-x32.mir +++ b/test/CodeGen/X86/GlobalISel/select-add-x32.mir @@ -13,16 +13,16 @@ alignment: 4 legalized: true regBankSelected: true # X32: registers: -# X32-NEXT: - { id: 0, class: gr32 } -# X32-NEXT: - { id: 1, class: gr32 } -# X32-NEXT: - { id: 2, class: gr32 } -# X32-NEXT: - { id: 3, class: gr32 } -# X32-NEXT: - { id: 4, class: gpr } -# X32-NEXT: - { id: 5, class: gr32 } -# X32-NEXT: - { id: 6, class: gr32 } -# X32-NEXT: - { id: 7, class: gr32 } -# X32-NEXT: - { id: 8, class: gr32 } -# X32-NEXT: - { id: 9, class: gpr } +# X32-NEXT: - { id: 0, class: gr32, preferred-register: '' } +# X32-NEXT: - { id: 1, class: gr32, preferred-register: '' } +# X32-NEXT: - { id: 2, class: gr32, preferred-register: '' } +# X32-NEXT: - { id: 3, class: gr32, preferred-register: '' } +# X32-NEXT: - { id: 4, class: gpr, preferred-register: '' } +# X32-NEXT: - { id: 5, class: gr32, preferred-register: '' } +# X32-NEXT: - { id: 6, class: gr32, preferred-register: '' } +# X32-NEXT: - { id: 7, class: gr32, preferred-register: '' } +# X32-NEXT: - { id: 8, class: gr32, preferred-register: '' } +# X32-NEXT: - { id: 9, class: gpr, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } diff --git a/test/CodeGen/X86/GlobalISel/select-add.mir b/test/CodeGen/X86/GlobalISel/select-add.mir index 7337ce12c395..78e6bb6913a4 100644 --- a/test/CodeGen/X86/GlobalISel/select-add.mir +++ b/test/CodeGen/X86/GlobalISel/select-add.mir @@ -51,9 +51,9 @@ name: test_add_i64 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: gr64 } -# ALL-NEXT: - { id: 1, class: gr64 } -# ALL-NEXT: - { id: 2, class: gr64 } +# ALL-NEXT: - { id: 0, class: gr64, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: gr64, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: gr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -78,9 +78,9 @@ name: test_add_i32 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: gr32 } -# ALL-NEXT: - { id: 1, class: gr32 } -# ALL-NEXT: - { id: 2, class: gr32 } +# ALL-NEXT: - { id: 0, class: gr32, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -106,9 +106,9 @@ legalized: true regBankSelected: true selected: false # ALL: registers: -# ALL-NEXT: - { id: 0, class: gr16 } -# ALL-NEXT: - { id: 1, class: gr16 } -# ALL-NEXT: - { id: 2, class: gr16 } +# ALL-NEXT: - { id: 0, class: gr16, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: gr16, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: gr16, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -135,9 +135,9 @@ legalized: true regBankSelected: true selected: false # ALL: registers: -# ALL-NEXT: - { id: 0, class: gr8 } -# ALL-NEXT: - { id: 1, class: gr8 } -# ALL-NEXT: - { id: 2, class: gr8 } +# ALL-NEXT: - { id: 0, class: gr8, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: gr8, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: gr8, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -165,12 +165,12 @@ regBankSelected: true selected: false tracksRegLiveness: true # ALL: registers: -# NO_AVX512F-NEXT: - { id: 0, class: fr32 } -# NO_AVX512F-NEXT: - { id: 1, class: fr32 } -# NO_AVX512F-NEXT: - { id: 2, class: fr32 } -# AVX512ALL-NEXT: - { id: 0, class: fr32x } -# AVX512ALL-NEXT: - { id: 1, class: fr32x } -# AVX512ALL-NEXT: - { id: 2, class: fr32x } +# NO_AVX512F-NEXT: - { id: 0, class: fr32, preferred-register: '' } +# NO_AVX512F-NEXT: - { id: 1, class: fr32, preferred-register: '' } +# NO_AVX512F-NEXT: - { id: 2, class: fr32, preferred-register: '' } +# AVX512ALL-NEXT: - { id: 0, class: fr32x, preferred-register: '' } +# AVX512ALL-NEXT: - { id: 1, class: fr32x, preferred-register: '' } +# AVX512ALL-NEXT: - { id: 2, class: fr32x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -200,12 +200,12 @@ regBankSelected: true selected: false tracksRegLiveness: true # ALL: registers: -# NO_AVX512F-NEXT: - { id: 0, class: fr64 } -# NO_AVX512F-NEXT: - { id: 1, class: fr64 } -# NO_AVX512F-NEXT: - { id: 2, class: fr64 } -# AVX512ALL-NEXT: - { id: 0, class: fr64x } -# AVX512ALL-NEXT: - { id: 1, class: fr64x } -# AVX512ALL-NEXT: - { id: 2, class: fr64x } +# NO_AVX512F-NEXT: - { id: 0, class: fr64, preferred-register: '' } +# NO_AVX512F-NEXT: - { id: 1, class: fr64, preferred-register: '' } +# NO_AVX512F-NEXT: - { id: 2, class: fr64, preferred-register: '' } +# AVX512ALL-NEXT: - { id: 0, class: fr64x, preferred-register: '' } +# AVX512ALL-NEXT: - { id: 1, class: fr64x, preferred-register: '' } +# AVX512ALL-NEXT: - { id: 2, class: fr64x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -235,12 +235,12 @@ regBankSelected: true selected: false tracksRegLiveness: true # ALL: registers: -# NO_AVX512VL-NEXT: - { id: 0, class: vr128 } -# NO_AVX512VL-NEXT: - { id: 1, class: vr128 } -# NO_AVX512VL-NEXT: - { id: 2, class: vr128 } -# AVX512VL-NEXT: - { id: 0, class: vr128x } -# AVX512VL-NEXT: - { id: 1, class: vr128x } -# AVX512VL-NEXT: - { id: 2, class: vr128x } +# NO_AVX512VL-NEXT: - { id: 0, class: vr128, preferred-register: '' } +# NO_AVX512VL-NEXT: - { id: 1, class: vr128, preferred-register: '' } +# NO_AVX512VL-NEXT: - { id: 2, class: vr128, preferred-register: '' } +# AVX512VL-NEXT: - { id: 0, class: vr128x, preferred-register: '' } +# AVX512VL-NEXT: - { id: 1, class: vr128x, preferred-register: '' } +# AVX512VL-NEXT: - { id: 2, class: vr128x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -271,12 +271,12 @@ regBankSelected: true selected: false tracksRegLiveness: true # ALL: registers: -# NO_AVX512VL-NEXT: - { id: 0, class: vr128 } -# NO_AVX512VL-NEXT: - { id: 1, class: vr128 } -# NO_AVX512VL-NEXT: - { id: 2, class: vr128 } -# AVX512VL-NEXT: - { id: 0, class: vr128x } -# AVX512VL-NEXT: - { id: 1, class: vr128x } -# AVX512VL-NEXT: - { id: 2, class: vr128x } +# NO_AVX512VL-NEXT: - { id: 0, class: vr128, preferred-register: '' } +# NO_AVX512VL-NEXT: - { id: 1, class: vr128, preferred-register: '' } +# NO_AVX512VL-NEXT: - { id: 2, class: vr128, preferred-register: '' } +# AVX512VL-NEXT: - { id: 0, class: vr128x, preferred-register: '' } +# AVX512VL-NEXT: - { id: 1, class: vr128x, preferred-register: '' } +# AVX512VL-NEXT: - { id: 2, class: vr128x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } diff --git a/test/CodeGen/X86/GlobalISel/select-cmp.mir b/test/CodeGen/X86/GlobalISel/select-cmp.mir index a92c388c1db9..64c8cb6b823a 100644 --- a/test/CodeGen/X86/GlobalISel/select-cmp.mir +++ b/test/CodeGen/X86/GlobalISel/select-cmp.mir @@ -87,11 +87,11 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr8 } -# CHECK-NEXT: - { id: 1, class: gr8 } -# CHECK-NEXT: - { id: 2, class: gr8 } -# CHECK-NEXT: - { id: 3, class: gr32 } -# CHECK-NEXT: - { id: 4, class: gr32 } +# CHECK-NEXT: - { id: 0, class: gr8, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gr8, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gr8, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 4, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -124,11 +124,11 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr16 } -# CHECK-NEXT: - { id: 1, class: gr16 } -# CHECK-NEXT: - { id: 2, class: gr8 } -# CHECK-NEXT: - { id: 3, class: gr32 } -# CHECK-NEXT: - { id: 4, class: gr32 } +# CHECK-NEXT: - { id: 0, class: gr16, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gr16, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gr8, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 4, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -161,11 +161,11 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr64 } -# CHECK-NEXT: - { id: 1, class: gr64 } -# CHECK-NEXT: - { id: 2, class: gr8 } -# CHECK-NEXT: - { id: 3, class: gr32 } -# CHECK-NEXT: - { id: 4, class: gr32 } +# CHECK-NEXT: - { id: 0, class: gr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gr64, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gr8, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 4, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -198,11 +198,11 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr32 } -# CHECK-NEXT: - { id: 1, class: gr32 } -# CHECK-NEXT: - { id: 2, class: gr8 } -# CHECK-NEXT: - { id: 3, class: gr32 } -# CHECK-NEXT: - { id: 4, class: gr32 } +# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gr8, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 4, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -235,11 +235,11 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr32 } -# CHECK-NEXT: - { id: 1, class: gr32 } -# CHECK-NEXT: - { id: 2, class: gr8 } -# CHECK-NEXT: - { id: 3, class: gr32 } -# CHECK-NEXT: - { id: 4, class: gr32 } +# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gr8, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 4, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -272,11 +272,11 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr32 } -# CHECK-NEXT: - { id: 1, class: gr32 } -# CHECK-NEXT: - { id: 2, class: gr8 } -# CHECK-NEXT: - { id: 3, class: gr32 } -# CHECK-NEXT: - { id: 4, class: gr32 } +# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gr8, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 4, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -309,11 +309,11 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr32 } -# CHECK-NEXT: - { id: 1, class: gr32 } -# CHECK-NEXT: - { id: 2, class: gr8 } -# CHECK-NEXT: - { id: 3, class: gr32 } -# CHECK-NEXT: - { id: 4, class: gr32 } +# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gr8, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 4, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -346,11 +346,11 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr32 } -# CHECK-NEXT: - { id: 1, class: gr32 } -# CHECK-NEXT: - { id: 2, class: gr8 } -# CHECK-NEXT: - { id: 3, class: gr32 } -# CHECK-NEXT: - { id: 4, class: gr32 } +# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gr8, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 4, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -383,11 +383,11 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr32 } -# CHECK-NEXT: - { id: 1, class: gr32 } -# CHECK-NEXT: - { id: 2, class: gr8 } -# CHECK-NEXT: - { id: 3, class: gr32 } -# CHECK-NEXT: - { id: 4, class: gr32 } +# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gr8, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 4, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -420,11 +420,11 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr32 } -# CHECK-NEXT: - { id: 1, class: gr32 } -# CHECK-NEXT: - { id: 2, class: gr8 } -# CHECK-NEXT: - { id: 3, class: gr32 } -# CHECK-NEXT: - { id: 4, class: gr32 } +# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gr8, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 4, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -457,11 +457,11 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr32 } -# CHECK-NEXT: - { id: 1, class: gr32 } -# CHECK-NEXT: - { id: 2, class: gr8 } -# CHECK-NEXT: - { id: 3, class: gr32 } -# CHECK-NEXT: - { id: 4, class: gr32 } +# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gr8, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 4, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -494,11 +494,11 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr32 } -# CHECK-NEXT: - { id: 1, class: gr32 } -# CHECK-NEXT: - { id: 2, class: gr8 } -# CHECK-NEXT: - { id: 3, class: gr32 } -# CHECK-NEXT: - { id: 4, class: gr32 } +# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gr8, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 4, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -531,11 +531,11 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr32 } -# CHECK-NEXT: - { id: 1, class: gr32 } -# CHECK-NEXT: - { id: 2, class: gr8 } -# CHECK-NEXT: - { id: 3, class: gr32 } -# CHECK-NEXT: - { id: 4, class: gr32 } +# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gr8, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 4, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } diff --git a/test/CodeGen/X86/GlobalISel/select-constant.mir b/test/CodeGen/X86/GlobalISel/select-constant.mir index 162de0264435..7902a5084ce6 100644 --- a/test/CodeGen/X86/GlobalISel/select-constant.mir +++ b/test/CodeGen/X86/GlobalISel/select-constant.mir @@ -33,7 +33,7 @@ regBankSelected: true selected: false # CHECK-LABEL: name: const_i8 # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr8 } +# CHECK-NEXT: - { id: 0, class: gr8, preferred-register: '' } registers: - { id: 0, class: gpr } # CHECK: body: @@ -52,7 +52,7 @@ regBankSelected: true selected: false # CHECK-LABEL: name: const_i16 # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr16 } +# CHECK-NEXT: - { id: 0, class: gr16, preferred-register: '' } registers: - { id: 0, class: gpr } # CHECK: body: @@ -71,7 +71,7 @@ regBankSelected: true selected: false # CHECK-LABEL: name: const_i32 # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr32 } +# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } # CHECK: body: @@ -90,7 +90,7 @@ regBankSelected: true selected: false # CHECK-LABEL: name: const_i64 # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr64 } +# CHECK-NEXT: - { id: 0, class: gr64, preferred-register: '' } registers: - { id: 0, class: gpr } # CHECK: body: @@ -110,7 +110,7 @@ regBankSelected: true selected: false # CHECK-LABEL: name: const_i64_u32 # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr64 } +# CHECK-NEXT: - { id: 0, class: gr64, preferred-register: '' } registers: - { id: 0, class: gpr } # CHECK: body: @@ -129,7 +129,7 @@ regBankSelected: true selected: false # CHECK-LABEL: name: const_i64_i32 # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr64 } +# CHECK-NEXT: - { id: 0, class: gr64, preferred-register: '' } registers: - { id: 0, class: gpr } # CHECK: body: diff --git a/test/CodeGen/X86/GlobalISel/select-ext-x86-64.mir b/test/CodeGen/X86/GlobalISel/select-ext-x86-64.mir index d1a3abfd0f93..edb467b2bf90 100644 --- a/test/CodeGen/X86/GlobalISel/select-ext-x86-64.mir +++ b/test/CodeGen/X86/GlobalISel/select-ext-x86-64.mir @@ -25,10 +25,10 @@ alignment: 4 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: gr8 } -# ALL-NEXT: - { id: 1, class: gr8 } -# ALL-NEXT: - { id: 2, class: gr64 } -# ALL-NEXT: - { id: 3, class: gr64 } +# ALL-NEXT: - { id: 0, class: gr8, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: gr8, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: gr64, preferred-register: '' } +# ALL-NEXT: - { id: 3, class: gr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -57,8 +57,8 @@ alignment: 4 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: gr8 } -# ALL-NEXT: - { id: 1, class: gr64 } +# ALL-NEXT: - { id: 0, class: gr8, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: gr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -83,8 +83,8 @@ alignment: 4 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: gr16 } -# ALL-NEXT: - { id: 1, class: gr64 } +# ALL-NEXT: - { id: 0, class: gr16, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: gr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } diff --git a/test/CodeGen/X86/GlobalISel/select-ext.mir b/test/CodeGen/X86/GlobalISel/select-ext.mir index dccc20e57100..b52f1f6fa621 100644 --- a/test/CodeGen/X86/GlobalISel/select-ext.mir +++ b/test/CodeGen/X86/GlobalISel/select-ext.mir @@ -35,9 +35,9 @@ alignment: 4 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: gr8 } -# ALL-NEXT: - { id: 1, class: gr32 } -# ALL-NEXT: - { id: 2, class: gr32 } +# ALL-NEXT: - { id: 0, class: gr8, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -63,8 +63,8 @@ alignment: 4 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: gr8 } -# ALL-NEXT: - { id: 1, class: gr32 } +# ALL-NEXT: - { id: 0, class: gr8, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -89,8 +89,8 @@ alignment: 4 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: gr16 } -# ALL-NEXT: - { id: 1, class: gr32 } +# ALL-NEXT: - { id: 0, class: gr16, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -115,8 +115,8 @@ alignment: 4 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: gr8 } -# ALL-NEXT: - { id: 1, class: gr32 } +# ALL-NEXT: - { id: 0, class: gr8, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -141,8 +141,8 @@ alignment: 4 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: gr16 } -# ALL-NEXT: - { id: 1, class: gr32 } +# ALL-NEXT: - { id: 0, class: gr16, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } diff --git a/test/CodeGen/X86/GlobalISel/select-gep.mir b/test/CodeGen/X86/GlobalISel/select-gep.mir index c8a4dc80cb2c..61c766230035 100644 --- a/test/CodeGen/X86/GlobalISel/select-gep.mir +++ b/test/CodeGen/X86/GlobalISel/select-gep.mir @@ -14,9 +14,9 @@ regBankSelected: true selected: false # CHECK-LABEL: name: test_gep_i32 # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr64 } -# CHECK-NEXT: - { id: 1, class: gr64_nosp } -# CHECK-NEXT: - { id: 2, class: gr64 } +# CHECK-NEXT: - { id: 0, class: gr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gr64_nosp, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } diff --git a/test/CodeGen/X86/GlobalISel/select-inc.mir b/test/CodeGen/X86/GlobalISel/select-inc.mir index 7a77864091d3..47fe6ef672ba 100644 --- a/test/CodeGen/X86/GlobalISel/select-inc.mir +++ b/test/CodeGen/X86/GlobalISel/select-inc.mir @@ -13,10 +13,10 @@ name: test_add_i8 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: gr8 } -# INC-NEXT: - { id: 1, class: gpr } -# ADD-NEXT: - { id: 1, class: gr8 } -# ALL-NEXT: - { id: 2, class: gr8 } +# ALL-NEXT: - { id: 0, class: gr8, preferred-register: '' } +# INC-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# ADD-NEXT: - { id: 1, class: gr8, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: gr8, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } diff --git a/test/CodeGen/X86/GlobalISel/select-leaf-constant.mir b/test/CodeGen/X86/GlobalISel/select-leaf-constant.mir index 539520c0b8f5..9128f19b1d24 100644 --- a/test/CodeGen/X86/GlobalISel/select-leaf-constant.mir +++ b/test/CodeGen/X86/GlobalISel/select-leaf-constant.mir @@ -29,7 +29,7 @@ regBankSelected: true selected: false # CHECK-LABEL: name: const_i32_1 # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr32 } +# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } # CHECK: body: @@ -47,7 +47,7 @@ regBankSelected: true selected: false # CHECK-LABEL: name: const_i32_1_optsize # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr32 } +# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } # CHECK: body: @@ -65,7 +65,7 @@ regBankSelected: true selected: false # CHECK-LABEL: name: const_i32_1b # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr32 } +# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } # CHECK: body: @@ -83,7 +83,7 @@ regBankSelected: true selected: false # CHECK-LABEL: name: const_i32_1_optsizeb # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr32 } +# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } # CHECK: body: diff --git a/test/CodeGen/X86/GlobalISel/select-memop-scalar-x32.mir b/test/CodeGen/X86/GlobalISel/select-memop-scalar-x32.mir index 8e6a2771db6e..09f414b48a8a 100644 --- a/test/CodeGen/X86/GlobalISel/select-memop-scalar-x32.mir +++ b/test/CodeGen/X86/GlobalISel/select-memop-scalar-x32.mir @@ -49,9 +49,9 @@ alignment: 4 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: gr32 } -# ALL-NEXT: - { id: 1, class: gr32 } -# ALL-NEXT: - { id: 2, class: gr8 } +# ALL-NEXT: - { id: 0, class: gr32, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: gr8, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -79,9 +79,9 @@ alignment: 4 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: gr32 } -# ALL-NEXT: - { id: 1, class: gr32 } -# ALL-NEXT: - { id: 2, class: gr16 } +# ALL-NEXT: - { id: 0, class: gr32, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: gr16, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -109,9 +109,9 @@ alignment: 4 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: gr32 } -# ALL-NEXT: - { id: 1, class: gr32 } -# ALL-NEXT: - { id: 2, class: gr32 } +# ALL-NEXT: - { id: 0, class: gr32, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -139,10 +139,10 @@ alignment: 4 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: gr8 } -# ALL-NEXT: - { id: 1, class: gr32 } -# ALL-NEXT: - { id: 2, class: gr32 } -# ALL-NEXT: - { id: 3, class: gr32 } +# ALL-NEXT: - { id: 0, class: gr8, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: gr32, preferred-register: '' } +# ALL-NEXT: - { id: 3, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -176,10 +176,10 @@ alignment: 4 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: gr16 } -# ALL-NEXT: - { id: 1, class: gr32 } -# ALL-NEXT: - { id: 2, class: gr32 } -# ALL-NEXT: - { id: 3, class: gr32 } +# ALL-NEXT: - { id: 0, class: gr16, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: gr32, preferred-register: '' } +# ALL-NEXT: - { id: 3, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -213,10 +213,10 @@ alignment: 4 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: gr32 } -# ALL-NEXT: - { id: 1, class: gr32 } -# ALL-NEXT: - { id: 2, class: gr32 } -# ALL-NEXT: - { id: 3, class: gr32 } +# ALL-NEXT: - { id: 0, class: gr32, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: gr32, preferred-register: '' } +# ALL-NEXT: - { id: 3, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -250,9 +250,9 @@ alignment: 4 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: gr32 } -# ALL-NEXT: - { id: 1, class: gr32 } -# ALL-NEXT: - { id: 2, class: gr32 } +# ALL-NEXT: - { id: 0, class: gr32, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -280,10 +280,10 @@ alignment: 4 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: gr32 } -# ALL-NEXT: - { id: 1, class: gr32 } -# ALL-NEXT: - { id: 2, class: gr32 } -# ALL-NEXT: - { id: 3, class: gr32 } +# ALL-NEXT: - { id: 0, class: gr32, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: gr32, preferred-register: '' } +# ALL-NEXT: - { id: 3, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } diff --git a/test/CodeGen/X86/GlobalISel/select-memop-scalar.mir b/test/CodeGen/X86/GlobalISel/select-memop-scalar.mir index b57c9b0cca98..6d03d7525d20 100644 --- a/test/CodeGen/X86/GlobalISel/select-memop-scalar.mir +++ b/test/CodeGen/X86/GlobalISel/select-memop-scalar.mir @@ -91,8 +91,8 @@ alignment: 4 legalized: true regBankSelected: true registers: -# ALL: - { id: 0, class: gr64 } -# ALL: - { id: 1, class: gr8 } +# ALL: - { id: 0, class: gr64, preferred-register: '' } +# ALL: - { id: 1, class: gr8, preferred-register: '' } - { id: 0, class: gpr } - { id: 1, class: gpr } # ALL: %0 = COPY %rdi @@ -115,8 +115,8 @@ alignment: 4 legalized: true regBankSelected: true registers: -# ALL: - { id: 0, class: gr64 } -# ALL: - { id: 1, class: gr16 } +# ALL: - { id: 0, class: gr64, preferred-register: '' } +# ALL: - { id: 1, class: gr16, preferred-register: '' } - { id: 0, class: gpr } - { id: 1, class: gpr } # ALL: %0 = COPY %rdi @@ -139,8 +139,8 @@ alignment: 4 legalized: true regBankSelected: true registers: -# ALL: - { id: 0, class: gr64 } -# ALL: - { id: 1, class: gr32 } +# ALL: - { id: 0, class: gr64, preferred-register: '' } +# ALL: - { id: 1, class: gr32, preferred-register: '' } - { id: 0, class: gpr } - { id: 1, class: gpr } # ALL: %0 = COPY %rdi @@ -163,8 +163,8 @@ alignment: 4 legalized: true regBankSelected: true registers: -# ALL: - { id: 0, class: gr64 } -# ALL: - { id: 1, class: gr64 } +# ALL: - { id: 0, class: gr64, preferred-register: '' } +# ALL: - { id: 1, class: gr64, preferred-register: '' } - { id: 0, class: gpr } - { id: 1, class: gpr } # ALL: %0 = COPY %rdi @@ -187,8 +187,8 @@ alignment: 4 legalized: true regBankSelected: true registers: -# ALL: - { id: 0, class: gr64 } -# ALL: - { id: 1, class: gr32 } +# ALL: - { id: 0, class: gr64, preferred-register: '' } +# ALL: - { id: 1, class: gr32, preferred-register: '' } - { id: 0, class: gpr } - { id: 1, class: gpr } # ALL: %0 = COPY %rdi @@ -211,9 +211,9 @@ alignment: 4 legalized: true regBankSelected: true registers: -# ALL: - { id: 0, class: gr64 } -# NO_AVX512F: - { id: 1, class: fr32 } -# AVX512ALL: - { id: 1, class: fr32x } +# ALL: - { id: 0, class: gr64, preferred-register: '' } +# NO_AVX512F: - { id: 1, class: fr32, preferred-register: '' } +# AVX512ALL: - { id: 1, class: fr32x, preferred-register: '' } - { id: 0, class: gpr } - { id: 1, class: vecr } # ALL: %0 = COPY %rdi @@ -238,8 +238,8 @@ alignment: 4 legalized: true regBankSelected: true registers: -# ALL: - { id: 0, class: gr64 } -# ALL: - { id: 1, class: gr64 } +# ALL: - { id: 0, class: gr64, preferred-register: '' } +# ALL: - { id: 1, class: gr64, preferred-register: '' } - { id: 0, class: gpr } - { id: 1, class: gpr } # ALL: %0 = COPY %rdi @@ -262,9 +262,9 @@ alignment: 4 legalized: true regBankSelected: true registers: -# ALL: - { id: 0, class: gr64 } -# NO_AVX512F: - { id: 1, class: fr64 } -# AVX512ALL: - { id: 1, class: fr64x } +# ALL: - { id: 0, class: gr64, preferred-register: '' } +# NO_AVX512F: - { id: 1, class: fr64, preferred-register: '' } +# AVX512ALL: - { id: 1, class: fr64x, preferred-register: '' } - { id: 0, class: gpr } - { id: 1, class: vecr } # ALL: %0 = COPY %rdi @@ -289,8 +289,8 @@ alignment: 4 legalized: true regBankSelected: true registers: -# ALL: - { id: 0, class: gr32 } -# ALL: - { id: 1, class: gr64 } +# ALL: - { id: 0, class: gr32, preferred-register: '' } +# ALL: - { id: 1, class: gr64, preferred-register: '' } - { id: 0, class: gpr } - { id: 1, class: gpr } # ALL: %0 = COPY %edi @@ -315,8 +315,8 @@ alignment: 4 legalized: true regBankSelected: true registers: -# ALL: - { id: 0, class: gr64 } -# ALL: - { id: 1, class: gr64 } +# ALL: - { id: 0, class: gr64, preferred-register: '' } +# ALL: - { id: 1, class: gr64, preferred-register: '' } - { id: 0, class: gpr } - { id: 1, class: gpr } # ALL: %0 = COPY %rdi @@ -341,9 +341,9 @@ alignment: 4 legalized: true regBankSelected: true registers: -# ALL: - { id: 0, class: fr32x } -# ALL: - { id: 1, class: gr64 } -# ALL: - { id: 2, class: gr32 } +# ALL: - { id: 0, class: fr32x, preferred-register: '' } +# ALL: - { id: 1, class: gr64, preferred-register: '' } +# ALL: - { id: 2, class: gr32, preferred-register: '' } - { id: 0, class: vecr } - { id: 1, class: gpr } - { id: 2, class: gpr } @@ -371,9 +371,9 @@ alignment: 4 legalized: true regBankSelected: true registers: -# NO_AVX512F: - { id: 0, class: fr32 } -# AVX512ALL: - { id: 0, class: fr32x } -# ALL: - { id: 1, class: gr64 } +# NO_AVX512F: - { id: 0, class: fr32, preferred-register: '' } +# AVX512ALL: - { id: 0, class: fr32x, preferred-register: '' } +# ALL: - { id: 1, class: gr64, preferred-register: '' } - { id: 0, class: vecr } - { id: 1, class: gpr } # ALL: %0 = COPY %xmm0 @@ -400,9 +400,9 @@ alignment: 4 legalized: true regBankSelected: true registers: -# ALL: - { id: 0, class: fr64x } -# ALL: - { id: 1, class: gr64 } -# ALL: - { id: 2, class: gr64 } +# ALL: - { id: 0, class: fr64x, preferred-register: '' } +# ALL: - { id: 1, class: gr64, preferred-register: '' } +# ALL: - { id: 2, class: gr64, preferred-register: '' } - { id: 0, class: vecr } - { id: 1, class: gpr } - { id: 2, class: gpr } @@ -430,9 +430,9 @@ alignment: 4 legalized: true regBankSelected: true registers: -# NO_AVX512F: - { id: 0, class: fr64 } -# AVX512ALL: - { id: 0, class: fr64x } -# ALL: - { id: 1, class: gr64 } +# NO_AVX512F: - { id: 0, class: fr64, preferred-register: '' } +# AVX512ALL: - { id: 0, class: fr64x, preferred-register: '' } +# ALL: - { id: 1, class: gr64, preferred-register: '' } - { id: 0, class: vecr } - { id: 1, class: gpr } # ALL: %0 = COPY %xmm0 @@ -460,8 +460,8 @@ legalized: true regBankSelected: true selected: false registers: -# ALL: - { id: 0, class: gr64 } -# ALL: - { id: 1, class: gr64 } +# ALL: - { id: 0, class: gr64, preferred-register: '' } +# ALL: - { id: 1, class: gr64, preferred-register: '' } - { id: 0, class: gpr } - { id: 1, class: gpr } # ALL: %1 = MOV64rm %0, 1, _, 0, _ :: (load 8 from %ir.ptr1) @@ -483,8 +483,8 @@ legalized: true regBankSelected: true selected: false registers: -# ALL: - { id: 0, class: gr64 } -# ALL: - { id: 1, class: gr64 } +# ALL: - { id: 0, class: gr64, preferred-register: '' } +# ALL: - { id: 1, class: gr64, preferred-register: '' } - { id: 0, class: gpr } - { id: 1, class: gpr } # ALL: MOV64mr %0, 1, _, 0, _, %1 :: (store 8 into %ir.ptr1) diff --git a/test/CodeGen/X86/GlobalISel/select-memop-v128.mir b/test/CodeGen/X86/GlobalISel/select-memop-v128.mir index ce3f6b91dcf6..08844657e2a2 100644 --- a/test/CodeGen/X86/GlobalISel/select-memop-v128.mir +++ b/test/CodeGen/X86/GlobalISel/select-memop-v128.mir @@ -32,9 +32,9 @@ alignment: 4 legalized: true regBankSelected: true registers: -# ALL: - { id: 0, class: gr64 } -# NO_AVX512F: - { id: 1, class: vr128 } -# AVX512ALL: - { id: 1, class: vr128x } +# ALL: - { id: 0, class: gr64, preferred-register: '' } +# NO_AVX512F: - { id: 1, class: vr128, preferred-register: '' } +# AVX512ALL: - { id: 1, class: vr128x, preferred-register: '' } - { id: 0, class: gpr } - { id: 1, class: vecr } # ALL: %0 = COPY %rdi @@ -60,9 +60,9 @@ alignment: 4 legalized: true regBankSelected: true registers: -# ALL: - { id: 0, class: gr64 } -# NO_AVX512F: - { id: 1, class: vr128 } -# AVX512ALL: - { id: 1, class: vr128x } +# ALL: - { id: 0, class: gr64, preferred-register: '' } +# NO_AVX512F: - { id: 1, class: vr128, preferred-register: '' } +# AVX512ALL: - { id: 1, class: vr128x, preferred-register: '' } - { id: 0, class: gpr } - { id: 1, class: vecr } # ALL: %0 = COPY %rdi @@ -88,9 +88,9 @@ alignment: 4 legalized: true regBankSelected: true registers: -# NO_AVX512F: - { id: 0, class: vr128 } -# AVX512ALL: - { id: 0, class: vr128x } -# ALL: - { id: 1, class: gr64 } +# NO_AVX512F: - { id: 0, class: vr128, preferred-register: '' } +# AVX512ALL: - { id: 0, class: vr128x, preferred-register: '' } +# ALL: - { id: 1, class: gr64, preferred-register: '' } - { id: 0, class: vecr } - { id: 1, class: gpr } # ALL: %0 = COPY %xmm0 @@ -118,9 +118,9 @@ alignment: 4 legalized: true regBankSelected: true registers: -# NO_AVX512F: - { id: 0, class: vr128 } -# AVX512ALL: - { id: 0, class: vr128x } -# ALL: - { id: 1, class: gr64 } +# NO_AVX512F: - { id: 0, class: vr128, preferred-register: '' } +# AVX512ALL: - { id: 0, class: vr128x, preferred-register: '' } +# ALL: - { id: 1, class: gr64, preferred-register: '' } - { id: 0, class: vecr } - { id: 1, class: gpr } # ALL: %0 = COPY %xmm0 diff --git a/test/CodeGen/X86/GlobalISel/select-memop-v256.mir b/test/CodeGen/X86/GlobalISel/select-memop-v256.mir index b9a7e4a8cc4a..ff371ad9989f 100644 --- a/test/CodeGen/X86/GlobalISel/select-memop-v256.mir +++ b/test/CodeGen/X86/GlobalISel/select-memop-v256.mir @@ -33,12 +33,12 @@ alignment: 4 legalized: true regBankSelected: true # NO_AVX512F: registers: -# NO_AVX512F-NEXT: - { id: 0, class: gr64 } -# NO_AVX512F-NEXT: - { id: 1, class: vr256 } +# NO_AVX512F-NEXT: - { id: 0, class: gr64, preferred-register: '' } +# NO_AVX512F-NEXT: - { id: 1, class: vr256, preferred-register: '' } # # AVX512ALL: registers: -# AVX512ALL-NEXT: - { id: 0, class: gr64 } -# AVX512ALL-NEXT: - { id: 1, class: vr256x } +# AVX512ALL-NEXT: - { id: 0, class: gr64, preferred-register: '' } +# AVX512ALL-NEXT: - { id: 1, class: vr256x, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: vecr } @@ -73,12 +73,12 @@ alignment: 4 legalized: true regBankSelected: true # NO_AVX512F: registers: -# NO_AVX512F-NEXT: - { id: 0, class: gr64 } -# NO_AVX512F-NEXT: - { id: 1, class: vr256 } +# NO_AVX512F-NEXT: - { id: 0, class: gr64, preferred-register: '' } +# NO_AVX512F-NEXT: - { id: 1, class: vr256, preferred-register: '' } # # AVX512ALL: registers: -# AVX512ALL-NEXT: - { id: 0, class: gr64 } -# AVX512ALL-NEXT: - { id: 1, class: vr256x } +# AVX512ALL-NEXT: - { id: 0, class: gr64, preferred-register: '' } +# AVX512ALL-NEXT: - { id: 1, class: vr256x, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: vecr } @@ -113,12 +113,12 @@ alignment: 4 legalized: true regBankSelected: true # NO_AVX512F: registers: -# NO_AVX512F-NEXT: - { id: 0, class: vr256 } -# NO_AVX512F-NEXT: - { id: 1, class: gr64 } +# NO_AVX512F-NEXT: - { id: 0, class: vr256, preferred-register: '' } +# NO_AVX512F-NEXT: - { id: 1, class: gr64, preferred-register: '' } # # AVX512ALL: registers: -# AVX512ALL-NEXT: - { id: 0, class: vr256x } -# AVX512ALL-NEXT: - { id: 1, class: gr64 } +# AVX512ALL-NEXT: - { id: 0, class: vr256x, preferred-register: '' } +# AVX512ALL-NEXT: - { id: 1, class: gr64, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: gpr } @@ -153,12 +153,12 @@ alignment: 4 legalized: true regBankSelected: true # NO_AVX512F: registers: -# NO_AVX512F-NEXT: - { id: 0, class: vr256 } -# NO_AVX512F-NEXT: - { id: 1, class: gr64 } +# NO_AVX512F-NEXT: - { id: 0, class: vr256, preferred-register: '' } +# NO_AVX512F-NEXT: - { id: 1, class: gr64, preferred-register: '' } # # AVX512ALL: registers: -# AVX512ALL-NEXT: - { id: 0, class: vr256x } -# AVX512ALL-NEXT: - { id: 1, class: gr64 } +# AVX512ALL-NEXT: - { id: 0, class: vr256x, preferred-register: '' } +# AVX512ALL-NEXT: - { id: 1, class: gr64, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: gpr } diff --git a/test/CodeGen/X86/GlobalISel/select-memop-v512.mir b/test/CodeGen/X86/GlobalISel/select-memop-v512.mir index 87978a684d4c..131902d81a00 100644 --- a/test/CodeGen/X86/GlobalISel/select-memop-v512.mir +++ b/test/CodeGen/X86/GlobalISel/select-memop-v512.mir @@ -28,8 +28,8 @@ alignment: 4 legalized: true regBankSelected: true # AVX512F: registers: -# AVX512F-NEXT: - { id: 0, class: gr64 } -# AVX512F-NEXT: - { id: 1, class: vr512 } +# AVX512F-NEXT: - { id: 0, class: gr64, preferred-register: '' } +# AVX512F-NEXT: - { id: 1, class: vr512, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: vecr } @@ -54,8 +54,8 @@ alignment: 4 legalized: true regBankSelected: true # AVX512F: registers: -# AVX512F-NEXT: - { id: 0, class: gr64 } -# AVX512F-NEXT: - { id: 1, class: vr512 } +# AVX512F-NEXT: - { id: 0, class: gr64, preferred-register: '' } +# AVX512F-NEXT: - { id: 1, class: vr512, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: vecr } @@ -80,8 +80,8 @@ alignment: 4 legalized: true regBankSelected: true # AVX512F: registers: -# AVX512F-NEXT: - { id: 0, class: vr512 } -# AVX512F-NEXT: - { id: 1, class: gr64 } +# AVX512F-NEXT: - { id: 0, class: vr512, preferred-register: '' } +# AVX512F-NEXT: - { id: 1, class: gr64, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: gpr } @@ -106,8 +106,8 @@ alignment: 4 legalized: true regBankSelected: true # AVX512F: registers: -# AVX512F-NEXT: - { id: 0, class: vr512 } -# AVX512F-NEXT: - { id: 1, class: gr64 } +# AVX512F-NEXT: - { id: 0, class: vr512, preferred-register: '' } +# AVX512F-NEXT: - { id: 1, class: gr64, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: gpr } diff --git a/test/CodeGen/X86/GlobalISel/select-mul-scalar.mir b/test/CodeGen/X86/GlobalISel/select-mul-scalar.mir index 34a77acc2d1e..453557c08469 100644 --- a/test/CodeGen/X86/GlobalISel/select-mul-scalar.mir +++ b/test/CodeGen/X86/GlobalISel/select-mul-scalar.mir @@ -24,9 +24,9 @@ alignment: 4 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: gr16 } -# ALL-NEXT: - { id: 1, class: gr16 } -# ALL-NEXT: - { id: 2, class: gr16 } +# ALL-NEXT: - { id: 0, class: gr16, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: gr16, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: gr16, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -55,9 +55,9 @@ alignment: 4 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: gr32 } -# ALL-NEXT: - { id: 1, class: gr32 } -# ALL-NEXT: - { id: 2, class: gr32 } +# ALL-NEXT: - { id: 0, class: gr32, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -86,9 +86,9 @@ alignment: 4 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: gr64 } -# ALL-NEXT: - { id: 1, class: gr64 } -# ALL-NEXT: - { id: 2, class: gr64 } +# ALL-NEXT: - { id: 0, class: gr64, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: gr64, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: gr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } diff --git a/test/CodeGen/X86/GlobalISel/select-mul-vec.mir b/test/CodeGen/X86/GlobalISel/select-mul-vec.mir index 5f8ab1e4f189..d3651ccd1ab9 100644 --- a/test/CodeGen/X86/GlobalISel/select-mul-vec.mir +++ b/test/CodeGen/X86/GlobalISel/select-mul-vec.mir @@ -95,9 +95,9 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: vr128 } -# CHECK-NEXT: - { id: 1, class: vr128 } -# CHECK-NEXT: - { id: 2, class: vr128 } +# CHECK-NEXT: - { id: 0, class: vr128, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: vr128, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: vr128, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -121,9 +121,9 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: vr128 } -# CHECK-NEXT: - { id: 1, class: vr128 } -# CHECK-NEXT: - { id: 2, class: vr128 } +# CHECK-NEXT: - { id: 0, class: vr128, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: vr128, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: vr128, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -147,9 +147,9 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: vr128x } -# CHECK-NEXT: - { id: 1, class: vr128x } -# CHECK-NEXT: - { id: 2, class: vr128x } +# CHECK-NEXT: - { id: 0, class: vr128x, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: vr128x, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: vr128x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -173,9 +173,9 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: vr128 } -# CHECK-NEXT: - { id: 1, class: vr128 } -# CHECK-NEXT: - { id: 2, class: vr128 } +# CHECK-NEXT: - { id: 0, class: vr128, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: vr128, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: vr128, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -199,9 +199,9 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: vr128 } -# CHECK-NEXT: - { id: 1, class: vr128 } -# CHECK-NEXT: - { id: 2, class: vr128 } +# CHECK-NEXT: - { id: 0, class: vr128, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: vr128, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: vr128, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -225,9 +225,9 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: vr128x } -# CHECK-NEXT: - { id: 1, class: vr128x } -# CHECK-NEXT: - { id: 2, class: vr128x } +# CHECK-NEXT: - { id: 0, class: vr128x, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: vr128x, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: vr128x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -251,9 +251,9 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: vr128x } -# CHECK-NEXT: - { id: 1, class: vr128x } -# CHECK-NEXT: - { id: 2, class: vr128x } +# CHECK-NEXT: - { id: 0, class: vr128x, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: vr128x, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: vr128x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -277,9 +277,9 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: vr256 } -# CHECK-NEXT: - { id: 1, class: vr256 } -# CHECK-NEXT: - { id: 2, class: vr256 } +# CHECK-NEXT: - { id: 0, class: vr256, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: vr256, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: vr256, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -303,9 +303,9 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: vr256x } -# CHECK-NEXT: - { id: 1, class: vr256x } -# CHECK-NEXT: - { id: 2, class: vr256x } +# CHECK-NEXT: - { id: 0, class: vr256x, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: vr256x, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: vr256x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -329,9 +329,9 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: vr256 } -# CHECK-NEXT: - { id: 1, class: vr256 } -# CHECK-NEXT: - { id: 2, class: vr256 } +# CHECK-NEXT: - { id: 0, class: vr256, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: vr256, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: vr256, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -355,9 +355,9 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: vr256x } -# CHECK-NEXT: - { id: 1, class: vr256x } -# CHECK-NEXT: - { id: 2, class: vr256x } +# CHECK-NEXT: - { id: 0, class: vr256x, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: vr256x, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: vr256x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -381,9 +381,9 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: vr256x } -# CHECK-NEXT: - { id: 1, class: vr256x } -# CHECK-NEXT: - { id: 2, class: vr256x } +# CHECK-NEXT: - { id: 0, class: vr256x, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: vr256x, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: vr256x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -407,9 +407,9 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: vr512 } -# CHECK-NEXT: - { id: 1, class: vr512 } -# CHECK-NEXT: - { id: 2, class: vr512 } +# CHECK-NEXT: - { id: 0, class: vr512, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: vr512, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: vr512, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -433,9 +433,9 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: vr512 } -# CHECK-NEXT: - { id: 1, class: vr512 } -# CHECK-NEXT: - { id: 2, class: vr512 } +# CHECK-NEXT: - { id: 0, class: vr512, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: vr512, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: vr512, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -459,9 +459,9 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: vr512 } -# CHECK-NEXT: - { id: 1, class: vr512 } -# CHECK-NEXT: - { id: 2, class: vr512 } +# CHECK-NEXT: - { id: 0, class: vr512, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: vr512, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: vr512, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } diff --git a/test/CodeGen/X86/GlobalISel/select-sub-v128.mir b/test/CodeGen/X86/GlobalISel/select-sub-v128.mir index d60d4155e29d..f77879d93009 100644 --- a/test/CodeGen/X86/GlobalISel/select-sub-v128.mir +++ b/test/CodeGen/X86/GlobalISel/select-sub-v128.mir @@ -32,19 +32,19 @@ alignment: 4 legalized: true regBankSelected: true # NOVL: registers: -# NOVL-NEXT: - { id: 0, class: vr128 } -# NOVL-NEXT: - { id: 1, class: vr128 } -# NOVL-NEXT: - { id: 2, class: vr128 } +# NOVL-NEXT: - { id: 0, class: vr128, preferred-register: '' } +# NOVL-NEXT: - { id: 1, class: vr128, preferred-register: '' } +# NOVL-NEXT: - { id: 2, class: vr128, preferred-register: '' } # # AVX512VL: registers: -# AVX512VL-NEXT: - { id: 0, class: vr128 } -# AVX512VL-NEXT: - { id: 1, class: vr128 } -# AVX512VL-NEXT: - { id: 2, class: vr128 } +# AVX512VL-NEXT: - { id: 0, class: vr128, preferred-register: '' } +# AVX512VL-NEXT: - { id: 1, class: vr128, preferred-register: '' } +# AVX512VL-NEXT: - { id: 2, class: vr128, preferred-register: '' } # # AVX512BWVL: registers: -# AVX512BWVL-NEXT: - { id: 0, class: vr128x } -# AVX512BWVL-NEXT: - { id: 1, class: vr128x } -# AVX512BWVL-NEXT: - { id: 2, class: vr128x } +# AVX512BWVL-NEXT: - { id: 0, class: vr128x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 1, class: vr128x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 2, class: vr128x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -74,19 +74,19 @@ alignment: 4 legalized: true regBankSelected: true # NOVL: registers: -# NOVL-NEXT: - { id: 0, class: vr128 } -# NOVL-NEXT: - { id: 1, class: vr128 } -# NOVL-NEXT: - { id: 2, class: vr128 } +# NOVL-NEXT: - { id: 0, class: vr128, preferred-register: '' } +# NOVL-NEXT: - { id: 1, class: vr128, preferred-register: '' } +# NOVL-NEXT: - { id: 2, class: vr128, preferred-register: '' } # # AVX512VL: registers: -# AVX512VL-NEXT: - { id: 0, class: vr128 } -# AVX512VL-NEXT: - { id: 1, class: vr128 } -# AVX512VL-NEXT: - { id: 2, class: vr128 } +# AVX512VL-NEXT: - { id: 0, class: vr128, preferred-register: '' } +# AVX512VL-NEXT: - { id: 1, class: vr128, preferred-register: '' } +# AVX512VL-NEXT: - { id: 2, class: vr128, preferred-register: '' } # # AVX512BWVL: registers: -# AVX512BWVL-NEXT: - { id: 0, class: vr128x } -# AVX512BWVL-NEXT: - { id: 1, class: vr128x } -# AVX512BWVL-NEXT: - { id: 2, class: vr128x } +# AVX512BWVL-NEXT: - { id: 0, class: vr128x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 1, class: vr128x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 2, class: vr128x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -116,19 +116,19 @@ alignment: 4 legalized: true regBankSelected: true # NOVL: registers: -# NOVL-NEXT: - { id: 0, class: vr128 } -# NOVL-NEXT: - { id: 1, class: vr128 } -# NOVL-NEXT: - { id: 2, class: vr128 } +# NOVL-NEXT: - { id: 0, class: vr128, preferred-register: '' } +# NOVL-NEXT: - { id: 1, class: vr128, preferred-register: '' } +# NOVL-NEXT: - { id: 2, class: vr128, preferred-register: '' } # # AVX512VL: registers: -# AVX512VL-NEXT: - { id: 0, class: vr128x } -# AVX512VL-NEXT: - { id: 1, class: vr128x } -# AVX512VL-NEXT: - { id: 2, class: vr128x } +# AVX512VL-NEXT: - { id: 0, class: vr128x, preferred-register: '' } +# AVX512VL-NEXT: - { id: 1, class: vr128x, preferred-register: '' } +# AVX512VL-NEXT: - { id: 2, class: vr128x, preferred-register: '' } # # AVX512BWVL: registers: -# AVX512BWVL-NEXT: - { id: 0, class: vr128x } -# AVX512BWVL-NEXT: - { id: 1, class: vr128x } -# AVX512BWVL-NEXT: - { id: 2, class: vr128x } +# AVX512BWVL-NEXT: - { id: 0, class: vr128x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 1, class: vr128x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 2, class: vr128x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -158,19 +158,19 @@ alignment: 4 legalized: true regBankSelected: true # NOVL: registers: -# NOVL-NEXT: - { id: 0, class: vr128 } -# NOVL-NEXT: - { id: 1, class: vr128 } -# NOVL-NEXT: - { id: 2, class: vr128 } +# NOVL-NEXT: - { id: 0, class: vr128, preferred-register: '' } +# NOVL-NEXT: - { id: 1, class: vr128, preferred-register: '' } +# NOVL-NEXT: - { id: 2, class: vr128, preferred-register: '' } # # AVX512VL: registers: -# AVX512VL-NEXT: - { id: 0, class: vr128x } -# AVX512VL-NEXT: - { id: 1, class: vr128x } -# AVX512VL-NEXT: - { id: 2, class: vr128x } +# AVX512VL-NEXT: - { id: 0, class: vr128x, preferred-register: '' } +# AVX512VL-NEXT: - { id: 1, class: vr128x, preferred-register: '' } +# AVX512VL-NEXT: - { id: 2, class: vr128x, preferred-register: '' } # # AVX512BWVL: registers: -# AVX512BWVL-NEXT: - { id: 0, class: vr128x } -# AVX512BWVL-NEXT: - { id: 1, class: vr128x } -# AVX512BWVL-NEXT: - { id: 2, class: vr128x } +# AVX512BWVL-NEXT: - { id: 0, class: vr128x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 1, class: vr128x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 2, class: vr128x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } diff --git a/test/CodeGen/X86/GlobalISel/select-sub-v256.mir b/test/CodeGen/X86/GlobalISel/select-sub-v256.mir index fbc44997b4a2..d6bde7fbb691 100644 --- a/test/CodeGen/X86/GlobalISel/select-sub-v256.mir +++ b/test/CodeGen/X86/GlobalISel/select-sub-v256.mir @@ -30,19 +30,19 @@ alignment: 4 legalized: true regBankSelected: true # AVX2: registers: -# AVX2-NEXT: - { id: 0, class: vr256 } -# AVX2-NEXT: - { id: 1, class: vr256 } -# AVX2-NEXT: - { id: 2, class: vr256 } +# AVX2-NEXT: - { id: 0, class: vr256, preferred-register: '' } +# AVX2-NEXT: - { id: 1, class: vr256, preferred-register: '' } +# AVX2-NEXT: - { id: 2, class: vr256, preferred-register: '' } # # AVX512VL: registers: -# AVX512VL-NEXT: - { id: 0, class: vr256 } -# AVX512VL-NEXT: - { id: 1, class: vr256 } -# AVX512VL-NEXT: - { id: 2, class: vr256 } +# AVX512VL-NEXT: - { id: 0, class: vr256, preferred-register: '' } +# AVX512VL-NEXT: - { id: 1, class: vr256, preferred-register: '' } +# AVX512VL-NEXT: - { id: 2, class: vr256, preferred-register: '' } # # AVX512BWVL: registers: -# AVX512BWVL-NEXT: - { id: 0, class: vr256x } -# AVX512BWVL-NEXT: - { id: 1, class: vr256x } -# AVX512BWVL-NEXT: - { id: 2, class: vr256x } +# AVX512BWVL-NEXT: - { id: 0, class: vr256x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 1, class: vr256x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 2, class: vr256x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -70,19 +70,19 @@ alignment: 4 legalized: true regBankSelected: true # AVX2: registers: -# AVX2-NEXT: - { id: 0, class: vr256 } -# AVX2-NEXT: - { id: 1, class: vr256 } -# AVX2-NEXT: - { id: 2, class: vr256 } +# AVX2-NEXT: - { id: 0, class: vr256, preferred-register: '' } +# AVX2-NEXT: - { id: 1, class: vr256, preferred-register: '' } +# AVX2-NEXT: - { id: 2, class: vr256, preferred-register: '' } # # AVX512VL: registers: -# AVX512VL-NEXT: - { id: 0, class: vr256 } -# AVX512VL-NEXT: - { id: 1, class: vr256 } -# AVX512VL-NEXT: - { id: 2, class: vr256 } +# AVX512VL-NEXT: - { id: 0, class: vr256, preferred-register: '' } +# AVX512VL-NEXT: - { id: 1, class: vr256, preferred-register: '' } +# AVX512VL-NEXT: - { id: 2, class: vr256, preferred-register: '' } # # AVX512BWVL: registers: -# AVX512BWVL-NEXT: - { id: 0, class: vr256x } -# AVX512BWVL-NEXT: - { id: 1, class: vr256x } -# AVX512BWVL-NEXT: - { id: 2, class: vr256x } +# AVX512BWVL-NEXT: - { id: 0, class: vr256x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 1, class: vr256x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 2, class: vr256x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -110,19 +110,19 @@ alignment: 4 legalized: true regBankSelected: true # AVX2: registers: -# AVX2-NEXT: - { id: 0, class: vr256 } -# AVX2-NEXT: - { id: 1, class: vr256 } -# AVX2-NEXT: - { id: 2, class: vr256 } +# AVX2-NEXT: - { id: 0, class: vr256, preferred-register: '' } +# AVX2-NEXT: - { id: 1, class: vr256, preferred-register: '' } +# AVX2-NEXT: - { id: 2, class: vr256, preferred-register: '' } # # AVX512VL: registers: -# AVX512VL-NEXT: - { id: 0, class: vr256x } -# AVX512VL-NEXT: - { id: 1, class: vr256x } -# AVX512VL-NEXT: - { id: 2, class: vr256x } +# AVX512VL-NEXT: - { id: 0, class: vr256x, preferred-register: '' } +# AVX512VL-NEXT: - { id: 1, class: vr256x, preferred-register: '' } +# AVX512VL-NEXT: - { id: 2, class: vr256x, preferred-register: '' } # # AVX512BWVL: registers: -# AVX512BWVL-NEXT: - { id: 0, class: vr256x } -# AVX512BWVL-NEXT: - { id: 1, class: vr256x } -# AVX512BWVL-NEXT: - { id: 2, class: vr256x } +# AVX512BWVL-NEXT: - { id: 0, class: vr256x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 1, class: vr256x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 2, class: vr256x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -150,19 +150,19 @@ alignment: 4 legalized: true regBankSelected: true # AVX2: registers: -# AVX2-NEXT: - { id: 0, class: vr256 } -# AVX2-NEXT: - { id: 1, class: vr256 } -# AVX2-NEXT: - { id: 2, class: vr256 } +# AVX2-NEXT: - { id: 0, class: vr256, preferred-register: '' } +# AVX2-NEXT: - { id: 1, class: vr256, preferred-register: '' } +# AVX2-NEXT: - { id: 2, class: vr256, preferred-register: '' } # # AVX512VL: registers: -# AVX512VL-NEXT: - { id: 0, class: vr256x } -# AVX512VL-NEXT: - { id: 1, class: vr256x } -# AVX512VL-NEXT: - { id: 2, class: vr256x } +# AVX512VL-NEXT: - { id: 0, class: vr256x, preferred-register: '' } +# AVX512VL-NEXT: - { id: 1, class: vr256x, preferred-register: '' } +# AVX512VL-NEXT: - { id: 2, class: vr256x, preferred-register: '' } # # AVX512BWVL: registers: -# AVX512BWVL-NEXT: - { id: 0, class: vr256x } -# AVX512BWVL-NEXT: - { id: 1, class: vr256x } -# AVX512BWVL-NEXT: - { id: 2, class: vr256x } +# AVX512BWVL-NEXT: - { id: 0, class: vr256x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 1, class: vr256x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 2, class: vr256x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } diff --git a/test/CodeGen/X86/GlobalISel/select-sub-v512.mir b/test/CodeGen/X86/GlobalISel/select-sub-v512.mir index dcd05f056949..828a243b2656 100644 --- a/test/CodeGen/X86/GlobalISel/select-sub-v512.mir +++ b/test/CodeGen/X86/GlobalISel/select-sub-v512.mir @@ -31,9 +31,9 @@ alignment: 4 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: vr512 } -# ALL-NEXT: - { id: 1, class: vr512 } -# ALL-NEXT: - { id: 2, class: vr512 } +# ALL-NEXT: - { id: 0, class: vr512, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: vr512, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: vr512, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -57,9 +57,9 @@ alignment: 4 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: vr512 } -# ALL-NEXT: - { id: 1, class: vr512 } -# ALL-NEXT: - { id: 2, class: vr512 } +# ALL-NEXT: - { id: 0, class: vr512, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: vr512, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: vr512, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -83,9 +83,9 @@ alignment: 4 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: vr512 } -# ALL-NEXT: - { id: 1, class: vr512 } -# ALL-NEXT: - { id: 2, class: vr512 } +# ALL-NEXT: - { id: 0, class: vr512, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: vr512, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: vr512, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -109,9 +109,9 @@ alignment: 4 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: vr512 } -# ALL-NEXT: - { id: 1, class: vr512 } -# ALL-NEXT: - { id: 2, class: vr512 } +# ALL-NEXT: - { id: 0, class: vr512, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: vr512, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: vr512, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } diff --git a/test/CodeGen/X86/GlobalISel/select-sub.mir b/test/CodeGen/X86/GlobalISel/select-sub.mir index d4db6eec6d80..4768a2d93222 100644 --- a/test/CodeGen/X86/GlobalISel/select-sub.mir +++ b/test/CodeGen/X86/GlobalISel/select-sub.mir @@ -40,9 +40,9 @@ name: test_sub_i64 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: gr64 } -# ALL-NEXT: - { id: 1, class: gr64 } -# ALL-NEXT: - { id: 2, class: gr64 } +# ALL-NEXT: - { id: 0, class: gr64, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: gr64, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: gr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -66,9 +66,9 @@ name: test_sub_i32 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: gr32 } -# ALL-NEXT: - { id: 1, class: gr32 } -# ALL-NEXT: - { id: 2, class: gr32 } +# ALL-NEXT: - { id: 0, class: gr32, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -94,12 +94,12 @@ regBankSelected: true selected: false tracksRegLiveness: true # ALL: registers: -# NO_AVX512F-NEXT: - { id: 0, class: fr32 } -# NO_AVX512F-NEXT: - { id: 1, class: fr32 } -# NO_AVX512F-NEXT: - { id: 2, class: fr32 } -# AVX512ALL-NEXT: - { id: 0, class: fr32x } -# AVX512ALL-NEXT: - { id: 1, class: fr32x } -# AVX512ALL-NEXT: - { id: 2, class: fr32x } +# NO_AVX512F-NEXT: - { id: 0, class: fr32, preferred-register: '' } +# NO_AVX512F-NEXT: - { id: 1, class: fr32, preferred-register: '' } +# NO_AVX512F-NEXT: - { id: 2, class: fr32, preferred-register: '' } +# AVX512ALL-NEXT: - { id: 0, class: fr32x, preferred-register: '' } +# AVX512ALL-NEXT: - { id: 1, class: fr32x, preferred-register: '' } +# AVX512ALL-NEXT: - { id: 2, class: fr32x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -128,12 +128,12 @@ regBankSelected: true selected: false tracksRegLiveness: true # ALL: registers: -# NO_AVX512F-NEXT: - { id: 0, class: fr64 } -# NO_AVX512F-NEXT: - { id: 1, class: fr64 } -# NO_AVX512F-NEXT: - { id: 2, class: fr64 } -# AVX512ALL-NEXT: - { id: 0, class: fr64x } -# AVX512ALL-NEXT: - { id: 1, class: fr64x } -# AVX512ALL-NEXT: - { id: 2, class: fr64x } +# NO_AVX512F-NEXT: - { id: 0, class: fr64, preferred-register: '' } +# NO_AVX512F-NEXT: - { id: 1, class: fr64, preferred-register: '' } +# NO_AVX512F-NEXT: - { id: 2, class: fr64, preferred-register: '' } +# AVX512ALL-NEXT: - { id: 0, class: fr64x, preferred-register: '' } +# AVX512ALL-NEXT: - { id: 1, class: fr64x, preferred-register: '' } +# AVX512ALL-NEXT: - { id: 2, class: fr64x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -161,12 +161,12 @@ regBankSelected: true selected: false tracksRegLiveness: true # ALL: registers: -# NO_AVX512VL-NEXT: - { id: 0, class: vr128 } -# NO_AVX512VL-NEXT: - { id: 1, class: vr128 } -# NO_AVX512VL-NEXT: - { id: 2, class: vr128 } -# AVX512VL-NEXT: - { id: 0, class: vr128x } -# AVX512VL-NEXT: - { id: 1, class: vr128x } -# AVX512VL-NEXT: - { id: 2, class: vr128x } +# NO_AVX512VL-NEXT: - { id: 0, class: vr128, preferred-register: '' } +# NO_AVX512VL-NEXT: - { id: 1, class: vr128, preferred-register: '' } +# NO_AVX512VL-NEXT: - { id: 2, class: vr128, preferred-register: '' } +# AVX512VL-NEXT: - { id: 0, class: vr128x, preferred-register: '' } +# AVX512VL-NEXT: - { id: 1, class: vr128x, preferred-register: '' } +# AVX512VL-NEXT: - { id: 2, class: vr128x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -196,12 +196,12 @@ regBankSelected: true selected: false tracksRegLiveness: true # ALL: registers: -# NO_AVX512VL-NEXT: - { id: 0, class: vr128 } -# NO_AVX512VL-NEXT: - { id: 1, class: vr128 } -# NO_AVX512VL-NEXT: - { id: 2, class: vr128 } -# AVX512VL-NEXT: - { id: 0, class: vr128x } -# AVX512VL-NEXT: - { id: 1, class: vr128x } -# AVX512VL-NEXT: - { id: 2, class: vr128x } +# NO_AVX512VL-NEXT: - { id: 0, class: vr128, preferred-register: '' } +# NO_AVX512VL-NEXT: - { id: 1, class: vr128, preferred-register: '' } +# NO_AVX512VL-NEXT: - { id: 2, class: vr128, preferred-register: '' } +# AVX512VL-NEXT: - { id: 0, class: vr128x, preferred-register: '' } +# AVX512VL-NEXT: - { id: 1, class: vr128x, preferred-register: '' } +# AVX512VL-NEXT: - { id: 2, class: vr128x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } diff --git a/test/CodeGen/X86/GlobalISel/select-trunc.mir b/test/CodeGen/X86/GlobalISel/select-trunc.mir index 9b90543d6559..4df585628ddc 100644 --- a/test/CodeGen/X86/GlobalISel/select-trunc.mir +++ b/test/CodeGen/X86/GlobalISel/select-trunc.mir @@ -38,8 +38,8 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr32 } -# CHECK-NEXT: - { id: 1, class: gr8 } +# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gr8, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -64,8 +64,8 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr32 } -# CHECK-NEXT: - { id: 1, class: gr8 } +# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gr8, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -90,8 +90,8 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr32 } -# CHECK-NEXT: - { id: 1, class: gr16 } +# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gr16, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -116,8 +116,8 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr64_with_sub_8bit } -# CHECK-NEXT: - { id: 1, class: gr8 } +# CHECK-NEXT: - { id: 0, class: gr64_with_sub_8bit, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gr8, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -142,8 +142,8 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr64 } -# CHECK-NEXT: - { id: 1, class: gr16 } +# CHECK-NEXT: - { id: 0, class: gr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gr16, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -168,8 +168,8 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr64 } -# CHECK-NEXT: - { id: 1, class: gr32 } +# CHECK-NEXT: - { id: 0, class: gr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } diff --git a/test/CodeGen/X86/O0-pipeline.ll b/test/CodeGen/X86/O0-pipeline.ll index 874e3e379d8e..5e375cc42e01 100644 --- a/test/CodeGen/X86/O0-pipeline.ll +++ b/test/CodeGen/X86/O0-pipeline.ll @@ -5,12 +5,12 @@ ; CHECK-LABEL: Pass Arguments: ; CHECK-NEXT: Target Library Information ; CHECK-NEXT: Target Pass Configuration +; CHECK-NEXT: Machine Module Information ; CHECK-NEXT: Target Transform Information ; CHECK-NEXT: Type-Based Alias Analysis ; CHECK-NEXT: Scoped NoAlias Alias Analysis ; CHECK-NEXT: Assumption Cache Tracker ; CHECK-NEXT: Create Garbage Collector Module Metadata -; CHECK-NEXT: Machine Module Information ; CHECK-NEXT: Machine Branch Probability Analysis ; CHECK-NEXT: ModulePass Manager ; CHECK-NEXT: Pre-ISel Intrinsic Lowering diff --git a/test/CodeGen/X86/atom-fixup-lea3.ll b/test/CodeGen/X86/atom-fixup-lea3.ll index ed2df277480e..e79d2e69e347 100644 --- a/test/CodeGen/X86/atom-fixup-lea3.ll +++ b/test/CodeGen/X86/atom-fixup-lea3.ll @@ -1,6 +1,8 @@ ; RUN: llc < %s -mcpu=atom -mtriple=i686-linux | FileCheck %s -; CHECK: addl ([[reg:%[a-z]+]]) -; CHECK-NEXT: addl $4, [[reg]] +; CHECK: addl ({{%[a-z]+}},[[reg:%[a-z]+]],4) +; CHECK-NEXT: movl +; CHECK-NEXT: addl 4({{%[a-z]+}},[[reg:%[a-z]+]],4) +; CHECK-NEXT: incl ; Test for the FixupLEAs pre-emit pass. ; An LEA should NOT be substituted for the ADD instruction @@ -20,7 +22,7 @@ ; return sum; ;} -define i32 @test(i32 %n, i32* nocapture %array, i32* nocapture %m, i32* nocapture %array2) #0 { +define i32 @test(i32 %n, i32* nocapture %array, i32* nocapture %k, i32* nocapture %l, i32* nocapture %m, i32* nocapture %array2) #0 { entry: %cmp7 = icmp sgt i32 %n, 0 br i1 %cmp7, label %for.body.lr.ph, label %for.end @@ -35,6 +37,9 @@ for.body: ; preds = %for.body, %for.body %j.09 = phi i32 [ 0, %for.body.lr.ph ], [ %inc1, %for.body ] %inc1 = add nsw i32 %j.09, 1 %arrayidx = getelementptr inbounds i32, i32* %array2, i32 %j.09 + store i32 %0, i32* %m, align 4 + store i32 %sum.010, i32* %m, align 4 + store i32 %0, i32* %m, align 4 %1 = load i32, i32* %arrayidx, align 4 %add = add nsw i32 %0, %1 store i32 %add, i32* %m, align 4 diff --git a/test/CodeGen/X86/avx-schedule.ll b/test/CodeGen/X86/avx-schedule.ll index bb05481e313d..47e95fe31bdf 100644 --- a/test/CodeGen/X86/avx-schedule.ll +++ b/test/CodeGen/X86/avx-schedule.ll @@ -910,14 +910,14 @@ define <4 x double> @test_haddpd(<4 x double> %a0, <4 x double> %a1, <4 x double ; ; BTVER2-LABEL: test_haddpd: ; BTVER2: # BB#0: -; BTVER2-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BTVER2-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; BTVER2-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] +; BTVER2-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [8:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_haddpd: ; ZNVER1: # BB#0: -; ZNVER1-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; ZNVER1-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] +; ZNVER1-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [8:2.00] ; ZNVER1-NEXT: retq # sched: [4:1.00] %1 = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a0, <4 x double> %a1) %2 = load <4 x double>, <4 x double> *%a2, align 32 @@ -941,14 +941,14 @@ define <8 x float> @test_haddps(<8 x float> %a0, <8 x float> %a1, <8 x float> *% ; ; BTVER2-LABEL: test_haddps: ; BTVER2: # BB#0: -; BTVER2-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BTVER2-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; BTVER2-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] +; BTVER2-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [8:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_haddps: ; ZNVER1: # BB#0: -; ZNVER1-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; ZNVER1-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] +; ZNVER1-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [8:2.00] ; ZNVER1-NEXT: retq # sched: [4:1.00] %1 = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a0, <8 x float> %a1) %2 = load <8 x float>, <8 x float> *%a2, align 32 @@ -972,14 +972,14 @@ define <4 x double> @test_hsubpd(<4 x double> %a0, <4 x double> %a1, <4 x double ; ; BTVER2-LABEL: test_hsubpd: ; BTVER2: # BB#0: -; BTVER2-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BTVER2-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; BTVER2-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] +; BTVER2-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [8:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_hsubpd: ; ZNVER1: # BB#0: -; ZNVER1-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; ZNVER1-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] +; ZNVER1-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [8:2.00] ; ZNVER1-NEXT: retq # sched: [4:1.00] %1 = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a0, <4 x double> %a1) %2 = load <4 x double>, <4 x double> *%a2, align 32 @@ -1003,14 +1003,14 @@ define <8 x float> @test_hsubps(<8 x float> %a0, <8 x float> %a1, <8 x float> *% ; ; BTVER2-LABEL: test_hsubps: ; BTVER2: # BB#0: -; BTVER2-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BTVER2-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; BTVER2-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] +; BTVER2-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [8:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_hsubps: ; ZNVER1: # BB#0: -; ZNVER1-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; ZNVER1-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] +; ZNVER1-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [8:2.00] ; ZNVER1-NEXT: retq # sched: [4:1.00] %1 = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a0, <8 x float> %a1) %2 = load <8 x float>, <8 x float> *%a2, align 32 diff --git a/test/CodeGen/X86/avx-splat.ll b/test/CodeGen/X86/avx-splat.ll index 1914b5134bee..91d1f64c6706 100644 --- a/test/CodeGen/X86/avx-splat.ll +++ b/test/CodeGen/X86/avx-splat.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s define <32 x i8> @funcA(<32 x i8> %a) nounwind uwtable readnone ssp { ; CHECK-LABEL: funcA: -; CHECK: ## BB#0: ## %entry +; CHECK: # BB#0: # %entry ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5] ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; CHECK-NEXT: retq @@ -14,7 +14,7 @@ entry: define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp { ; CHECK-LABEL: funcB: -; CHECK: ## BB#0: ## %entry +; CHECK: # BB#0: # %entry ; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5] ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 @@ -26,7 +26,7 @@ entry: define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp { ; CHECK-LABEL: funcC: -; CHECK: ## BB#0: ## %entry +; CHECK: # BB#0: # %entry ; CHECK-NEXT: vmovq %rdi, %xmm0 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 @@ -41,7 +41,7 @@ entry: define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp { ; CHECK-LABEL: funcD: -; CHECK: ## BB#0: ## %entry +; CHECK: # BB#0: # %entry ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; CHECK-NEXT: retq @@ -58,20 +58,20 @@ entry: ; define <8 x float> @funcE() nounwind { ; CHECK-LABEL: funcE: -; CHECK: ## BB#0: ## %for_exit499 +; CHECK: # BB#0: # %for_exit499 ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: ## implicit-def: %YMM0 +; CHECK-NEXT: # implicit-def: %YMM0 ; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: jne LBB4_2 -; CHECK-NEXT: ## BB#1: ## %load.i1247 +; CHECK-NEXT: jne .LBB4_2 +; CHECK-NEXT: # BB#1: # %load.i1247 ; CHECK-NEXT: pushq %rbp ; CHECK-NEXT: movq %rsp, %rbp ; CHECK-NEXT: andq $-32, %rsp -; CHECK-NEXT: subq $1312, %rsp ## imm = 0x520 +; CHECK-NEXT: subq $1312, %rsp # imm = 0x520 ; CHECK-NEXT: vbroadcastss {{[0-9]+}}(%rsp), %ymm0 ; CHECK-NEXT: movq %rbp, %rsp ; CHECK-NEXT: popq %rbp -; CHECK-NEXT: LBB4_2: ## %__load_and_broadcast_32.exit1249 +; CHECK-NEXT: .LBB4_2: # %__load_and_broadcast_32.exit1249 ; CHECK-NEXT: retq allocas: %udx495 = alloca [18 x [18 x float]], align 32 @@ -99,7 +99,7 @@ __load_and_broadcast_32.exit1249: ; preds = %load.i1247, %for_ex define <8 x float> @funcF(i32 %val) nounwind { ; CHECK-LABEL: funcF: -; CHECK: ## BB#0: +; CHECK: # BB#0: ; CHECK-NEXT: vmovd %edi, %xmm0 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,0] ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 @@ -112,7 +112,7 @@ define <8 x float> @funcF(i32 %val) nounwind { define <8 x float> @funcG(<8 x float> %a) nounwind uwtable readnone ssp { ; CHECK-LABEL: funcG: -; CHECK: ## BB#0: ## %entry +; CHECK: # BB#0: # %entry ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; CHECK-NEXT: retq @@ -123,7 +123,7 @@ entry: define <8 x float> @funcH(<8 x float> %a) nounwind uwtable readnone ssp { ; CHECK-LABEL: funcH: -; CHECK: ## BB#0: ## %entry +; CHECK: # BB#0: # %entry ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,1,1,5,5,5,5] ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] ; CHECK-NEXT: retq @@ -134,7 +134,7 @@ entry: define <2 x double> @splat_load_2f64_11(<2 x double>* %ptr) { ; CHECK-LABEL: splat_load_2f64_11: -; CHECK: ## BB#0: +; CHECK: # BB#0: ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] ; CHECK-NEXT: retq %x = load <2 x double>, <2 x double>* %ptr @@ -144,7 +144,7 @@ define <2 x double> @splat_load_2f64_11(<2 x double>* %ptr) { define <4 x double> @splat_load_4f64_2222(<4 x double>* %ptr) { ; CHECK-LABEL: splat_load_4f64_2222: -; CHECK: ## BB#0: +; CHECK: # BB#0: ; CHECK-NEXT: vbroadcastsd 16(%rdi), %ymm0 ; CHECK-NEXT: retq %x = load <4 x double>, <4 x double>* %ptr @@ -154,7 +154,7 @@ define <4 x double> @splat_load_4f64_2222(<4 x double>* %ptr) { define <4 x float> @splat_load_4f32_0000(<4 x float>* %ptr) { ; CHECK-LABEL: splat_load_4f32_0000: -; CHECK: ## BB#0: +; CHECK: # BB#0: ; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 ; CHECK-NEXT: retq %x = load <4 x float>, <4 x float>* %ptr @@ -164,7 +164,7 @@ define <4 x float> @splat_load_4f32_0000(<4 x float>* %ptr) { define <8 x float> @splat_load_8f32_77777777(<8 x float>* %ptr) { ; CHECK-LABEL: splat_load_8f32_77777777: -; CHECK: ## BB#0: +; CHECK: # BB#0: ; CHECK-NEXT: vbroadcastss 28(%rdi), %ymm0 ; CHECK-NEXT: retq %x = load <8 x float>, <8 x float>* %ptr diff --git a/test/CodeGen/X86/avx512-cvt.ll b/test/CodeGen/X86/avx512-cvt.ll index 8f6afa8785d0..140299f5495d 100644 --- a/test/CodeGen/X86/avx512-cvt.ll +++ b/test/CodeGen/X86/avx512-cvt.ll @@ -1549,8 +1549,6 @@ define <2 x float> @uitofp_2i1_float(<2 x i32> %a) { ; NOVL: # BB#0: ; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; NOVL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] -; NOVL-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] -; NOVL-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NOVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NOVL-NEXT: vpextrb $8, %xmm0, %eax ; NOVL-NEXT: andl $1, %eax @@ -1579,8 +1577,6 @@ define <2 x double> @uitofp_2i1_double(<2 x i32> %a) { ; NOVL: # BB#0: ; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; NOVL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] -; NOVL-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] -; NOVL-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NOVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NOVL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 ; NOVL-NEXT: retq diff --git a/test/CodeGen/X86/build-vector-128.ll b/test/CodeGen/X86/build-vector-128.ll index 8c3a6790ffa6..c73d7654045e 100644 --- a/test/CodeGen/X86/build-vector-128.ll +++ b/test/CodeGen/X86/build-vector-128.ll @@ -41,9 +41,9 @@ define <4 x float> @test_buildvector_v4f32(float %a0, float %a1, float %a2, floa ; ; SSE2-64-LABEL: test_buildvector_v4f32: ; SSE2-64: # BB#0: -; SSE2-64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] -; SSE2-64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; SSE2-64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] ; SSE2-64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; SSE2-64-NEXT: retq ; ; SSE41-64-LABEL: test_buildvector_v4f32: @@ -74,13 +74,9 @@ define <4 x float> @test_buildvector_v4f32(float %a0, float %a1, float %a2, floa define <2 x i64> @test_buildvector_v2i64(i64 %a0, i64 %a1) { ; SSE2-32-LABEL: test_buildvector_v2i64: ; SSE2-32: # BB#0: -; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE2-32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; SSE2-32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE2-32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero -; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE2-32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; SSE2-32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-32-NEXT: movq {{.*#+}} xmm1 = mem[0],zero +; SSE2-32-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; SSE2-32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE2-32-NEXT: retl ; ; SSE-64-LABEL: test_buildvector_v2i64: @@ -126,12 +122,12 @@ define <4 x i32> @test_buildvector_v4i32(i32 %f0, i32 %f1, i32 %f2, i32 %f3) { ; SSE2-64-LABEL: test_buildvector_v4i32: ; SSE2-64: # BB#0: ; SSE2-64-NEXT: movd %ecx, %xmm0 -; SSE2-64-NEXT: movd %esi, %xmm1 +; SSE2-64-NEXT: movd %edx, %xmm1 ; SSE2-64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE2-64-NEXT: movd %edx, %xmm2 +; SSE2-64-NEXT: movd %esi, %xmm2 ; SSE2-64-NEXT: movd %edi, %xmm0 ; SSE2-64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; SSE2-64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE2-64-NEXT: retq ; ; SSE41-64-LABEL: test_buildvector_v4i32: @@ -170,34 +166,34 @@ define <8 x i16> @test_buildvector_v8i16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 ; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2-32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero ; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] +; SSE2-32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] ; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2-32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] ; SSE2-32-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero ; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] -; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] +; SSE2-32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; SSE2-32-NEXT: retl ; ; SSE2-64-LABEL: test_buildvector_v8i16: ; SSE2-64: # BB#0: -; SSE2-64-NEXT: movd %ecx, %xmm0 -; SSE2-64-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; SSE2-64-NEXT: movd %r9d, %xmm1 -; SSE2-64-NEXT: movd %esi, %xmm2 -; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] -; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSE2-64-NEXT: movd %edx, %xmm1 ; SSE2-64-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE2-64-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; SSE2-64-NEXT: movd %r8d, %xmm3 +; SSE2-64-NEXT: movd %r9d, %xmm0 +; SSE2-64-NEXT: movd %r8d, %xmm2 +; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] +; SSE2-64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; SSE2-64-NEXT: movd %ecx, %xmm0 +; SSE2-64-NEXT: movd %edx, %xmm1 +; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; SSE2-64-NEXT: movd %esi, %xmm3 ; SSE2-64-NEXT: movd %edi, %xmm0 ; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] -; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] +; SSE2-64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; SSE2-64-NEXT: retq ; ; SSE41-32-LABEL: test_buildvector_v8i16: @@ -267,31 +263,31 @@ define <16 x i8> @test_buildvector_v16i8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, ; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2-32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero ; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] +; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] ; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2-32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] ; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2-32-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero ; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] -; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] -; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] +; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3] +; SSE2-32-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] ; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2-32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] ; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2-32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero ; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] +; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] ; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2-32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] ; SSE2-32-NEXT: movd {{.*#+}} xmm4 = mem[0],zero,zero,zero ; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] -; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] -; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] +; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSE2-32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; SSE2-32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0] ; SSE2-32-NEXT: retl ; ; SSE2-64-LABEL: test_buildvector_v16i8: @@ -299,34 +295,34 @@ define <16 x i8> @test_buildvector_v16i8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, ; SSE2-64-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2-64-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; SSE2-64-NEXT: movd %ecx, %xmm0 +; SSE2-64-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2-64-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero -; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] -; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; SSE2-64-NEXT: movd %r9d, %xmm1 -; SSE2-64-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero -; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] -; SSE2-64-NEXT: movd %esi, %xmm2 -; SSE2-64-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero -; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7] -; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] ; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] +; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] ; SSE2-64-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2-64-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; SSE2-64-NEXT: movd %edx, %xmm3 ; SSE2-64-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE2-64-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero ; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] -; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] -; SSE2-64-NEXT: movd %r8d, %xmm1 +; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3] +; SSE2-64-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] ; SSE2-64-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE2-64-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; SSE2-64-NEXT: movd %r9d, %xmm0 +; SSE2-64-NEXT: movd %r8d, %xmm2 +; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] +; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] +; SSE2-64-NEXT: movd %ecx, %xmm0 +; SSE2-64-NEXT: movd %edx, %xmm1 +; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; SSE2-64-NEXT: movd %esi, %xmm4 ; SSE2-64-NEXT: movd %edi, %xmm0 -; SSE2-64-NEXT: movd {{.*#+}} xmm4 = mem[0],zero,zero,zero ; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] -; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] -; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] +; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSE2-64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; SSE2-64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0] ; SSE2-64-NEXT: retq ; ; SSE41-32-LABEL: test_buildvector_v16i8: diff --git a/test/CodeGen/X86/buildvec-insertvec.ll b/test/CodeGen/X86/buildvec-insertvec.ll index 730376acdc93..cd5abc1373b9 100644 --- a/test/CodeGen/X86/buildvec-insertvec.ll +++ b/test/CodeGen/X86/buildvec-insertvec.ll @@ -75,9 +75,9 @@ entry: define <4 x float> @test_buildvector_v4f32_register(float %f0, float %f1, float %f2, float %f3) { ; SSE2-LABEL: test_buildvector_v4f32_register: ; SSE2: # BB#0: -; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] -; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_buildvector_v4f32_register: @@ -102,7 +102,7 @@ define <4 x float> @test_buildvector_v4f32_load(float* %p0, float* %p1, float* % ; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero ; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_buildvector_v4f32_load: @@ -126,10 +126,10 @@ define <4 x float> @test_buildvector_v4f32_load(float* %p0, float* %p1, float* % define <4 x float> @test_buildvector_v4f32_partial_load(float %f0, float %f1, float %f2, float* %p3) { ; SSE2-LABEL: test_buildvector_v4f32_partial_load: ; SSE2: # BB#0: -; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_buildvector_v4f32_partial_load: @@ -150,12 +150,12 @@ define <4 x i32> @test_buildvector_v4i32_register(i32 %a0, i32 %a1, i32 %a2, i32 ; SSE2-LABEL: test_buildvector_v4i32_register: ; SSE2: # BB#0: ; SSE2-NEXT: movd %ecx, %xmm0 -; SSE2-NEXT: movd %esi, %xmm1 +; SSE2-NEXT: movd %edx, %xmm1 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE2-NEXT: movd %edx, %xmm2 +; SSE2-NEXT: movd %esi, %xmm2 ; SSE2-NEXT: movd %edi, %xmm0 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_buildvector_v4i32_register: @@ -178,7 +178,7 @@ define <4 x i32> @test_buildvector_v4i32_partial(i32 %a0, i32 %a3) { ; SSE2-NEXT: movd %edi, %xmm0 ; SSE2-NEXT: movd %esi, %xmm1 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] -; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_buildvector_v4i32_partial: @@ -228,21 +228,21 @@ define <4 x i32> @test_buildvector_v4i32_register_zero_2(i32 %a1, i32 %a2, i32 % define <8 x i16> @test_buildvector_v8i16_register(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) { ; SSE2-LABEL: test_buildvector_v8i16_register: ; SSE2: # BB#0: -; SSE2-NEXT: movd %ecx, %xmm0 -; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; SSE2-NEXT: movd %r9d, %xmm1 -; SSE2-NEXT: movd %esi, %xmm2 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSE2-NEXT: movd %edx, %xmm1 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; SSE2-NEXT: movd %r8d, %xmm3 +; SSE2-NEXT: movd %r9d, %xmm0 +; SSE2-NEXT: movd %r8d, %xmm2 +; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; SSE2-NEXT: movd %ecx, %xmm0 +; SSE2-NEXT: movd %edx, %xmm1 +; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; SSE2-NEXT: movd %esi, %xmm3 ; SSE2-NEXT: movd %edi, %xmm0 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_buildvector_v8i16_register: @@ -333,34 +333,34 @@ define <16 x i8> @test_buildvector_v16i8_register(i8 %a0, i8 %a1, i8 %a2, i8 %a3 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; SSE2-NEXT: movd %ecx, %xmm0 +; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero -; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; SSE2-NEXT: movd %r9d, %xmm1 -; SSE2-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero -; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] -; SSE2-NEXT: movd %esi, %xmm2 -; SSE2-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero -; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] ; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; SSE2-NEXT: movd %edx, %xmm3 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE2-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero ; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] -; SSE2-NEXT: movd %r8d, %xmm1 +; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; SSE2-NEXT: movd %r9d, %xmm0 +; SSE2-NEXT: movd %r8d, %xmm2 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] +; SSE2-NEXT: movd %ecx, %xmm0 +; SSE2-NEXT: movd %edx, %xmm1 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; SSE2-NEXT: movd %esi, %xmm4 ; SSE2-NEXT: movd %edi, %xmm0 -; SSE2-NEXT: movd {{.*#+}} xmm4 = mem[0],zero,zero,zero ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0] ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_buildvector_v16i8_register: diff --git a/test/CodeGen/X86/clear_upper_vector_element_bits.ll b/test/CodeGen/X86/clear_upper_vector_element_bits.ll index 1218b68b1be4..f6d816ec8919 100644 --- a/test/CodeGen/X86/clear_upper_vector_element_bits.ll +++ b/test/CodeGen/X86/clear_upper_vector_element_bits.ll @@ -159,28 +159,7 @@ define <8 x i32> @_clearupper8xi32a(<8 x i32>) nounwind { define <8 x i16> @_clearupper8xi16a(<8 x i16>) nounwind { ; SSE-LABEL: _clearupper8xi16a: ; SSE: # BB#0: -; SSE-NEXT: pextrw $1, %xmm0, %eax -; SSE-NEXT: pextrw $2, %xmm0, %r9d -; SSE-NEXT: pextrw $3, %xmm0, %edx -; SSE-NEXT: pextrw $4, %xmm0, %r8d -; SSE-NEXT: pextrw $5, %xmm0, %edi -; SSE-NEXT: pextrw $6, %xmm0, %esi -; SSE-NEXT: pextrw $7, %xmm0, %ecx -; SSE-NEXT: movd %ecx, %xmm1 -; SSE-NEXT: movd %edx, %xmm2 -; SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] -; SSE-NEXT: movd %edi, %xmm1 -; SSE-NEXT: movd %eax, %xmm3 -; SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3] -; SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] -; SSE-NEXT: movd %esi, %xmm1 -; SSE-NEXT: movd %r9d, %xmm2 -; SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] -; SSE-NEXT: movd %r8d, %xmm1 -; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] -; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] -; SSE-NEXT: pand {{.*}}(%rip), %xmm0 +; SSE-NEXT: andps {{.*}}(%rip), %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: _clearupper8xi16a: @@ -225,61 +204,9 @@ define <8 x i16> @_clearupper8xi16a(<8 x i16>) nounwind { define <16 x i16> @_clearupper16xi16a(<16 x i16>) nounwind { ; SSE-LABEL: _clearupper16xi16a: ; SSE: # BB#0: -; SSE-NEXT: pushq %rbp -; SSE-NEXT: pushq %r15 -; SSE-NEXT: pushq %r14 -; SSE-NEXT: pushq %r12 -; SSE-NEXT: pushq %rbx -; SSE-NEXT: pextrw $1, %xmm0, %edi -; SSE-NEXT: pextrw $2, %xmm0, %eax -; SSE-NEXT: pextrw $3, %xmm0, %ecx -; SSE-NEXT: pextrw $4, %xmm0, %edx -; SSE-NEXT: pextrw $5, %xmm0, %esi -; SSE-NEXT: pextrw $6, %xmm0, %ebx -; SSE-NEXT: pextrw $7, %xmm0, %ebp -; SSE-NEXT: pextrw $1, %xmm1, %r10d -; SSE-NEXT: pextrw $2, %xmm1, %r9d -; SSE-NEXT: pextrw $3, %xmm1, %r14d -; SSE-NEXT: pextrw $4, %xmm1, %r8d -; SSE-NEXT: pextrw $5, %xmm1, %r15d -; SSE-NEXT: pextrw $6, %xmm1, %r11d -; SSE-NEXT: pextrw $7, %xmm1, %r12d -; SSE-NEXT: movd %ebp, %xmm2 -; SSE-NEXT: movd %ecx, %xmm3 -; SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] -; SSE-NEXT: movd %esi, %xmm2 -; SSE-NEXT: movd %edi, %xmm4 -; SSE-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3] -; SSE-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] -; SSE-NEXT: movd %ebx, %xmm2 -; SSE-NEXT: movd %eax, %xmm3 -; SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] -; SSE-NEXT: movd %edx, %xmm2 -; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] -; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] -; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3] -; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] -; SSE-NEXT: pand %xmm2, %xmm0 -; SSE-NEXT: movd %r12d, %xmm3 -; SSE-NEXT: movd %r14d, %xmm4 -; SSE-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] -; SSE-NEXT: movd %r15d, %xmm3 -; SSE-NEXT: movd %r10d, %xmm5 -; SSE-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1],xmm5[2],xmm3[2],xmm5[3],xmm3[3] -; SSE-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3] -; SSE-NEXT: movd %r11d, %xmm3 -; SSE-NEXT: movd %r9d, %xmm4 -; SSE-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] -; SSE-NEXT: movd %r8d, %xmm3 -; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] -; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3] -; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1],xmm1[2],xmm5[2],xmm1[3],xmm5[3] -; SSE-NEXT: pand %xmm2, %xmm1 -; SSE-NEXT: popq %rbx -; SSE-NEXT: popq %r12 -; SSE-NEXT: popq %r14 -; SSE-NEXT: popq %r15 -; SSE-NEXT: popq %rbp +; SSE-NEXT: movaps {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] +; SSE-NEXT: andps %xmm2, %xmm0 +; SSE-NEXT: andps %xmm2, %xmm1 ; SSE-NEXT: retq ; ; AVX-LABEL: _clearupper16xi16a: @@ -364,10 +291,9 @@ define <16 x i8> @_clearupper16xi8a(<16 x i8>) nounwind { ; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] ; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; SSE-NEXT: movd %eax, %xmm0 -; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE-NEXT: movd %eax, %xmm2 +; SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero ; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] +; SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] ; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; SSE-NEXT: movd %eax, %xmm0 ; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax @@ -375,31 +301,32 @@ define <16 x i8> @_clearupper16xi8a(<16 x i8>) nounwind { ; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] ; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; SSE-NEXT: movd %eax, %xmm0 -; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE-NEXT: movd %eax, %xmm1 +; SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7] -; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] -; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero +; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] +; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; SSE-NEXT: movd %eax, %xmm0 +; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; SSE-NEXT: movd %eax, %xmm2 ; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] +; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; SSE-NEXT: movd %eax, %xmm0 ; SSE-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero -; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] -; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] -; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE-NEXT: movd %eax, %xmm2 -; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE-NEXT: movd %eax, %xmm3 -; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] +; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] +; SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] +; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; SSE-NEXT: movd %eax, %xmm0 ; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; SSE-NEXT: movd %eax, %xmm2 +; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] ; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; SSE-NEXT: movd %eax, %xmm4 -; SSE-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7] -; SSE-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] +; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] -; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] +; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] +; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-NEXT: pand {{.*}}(%rip), %xmm0 ; SSE-NEXT: retq ; @@ -486,10 +413,9 @@ define <32 x i8> @_clearupper32xi8a(<32 x i8>) nounwind { ; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] ; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; SSE-NEXT: movd %eax, %xmm0 -; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE-NEXT: movd %eax, %xmm2 +; SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero ; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] +; SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] ; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; SSE-NEXT: movd %eax, %xmm0 ; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax @@ -497,31 +423,32 @@ define <32 x i8> @_clearupper32xi8a(<32 x i8>) nounwind { ; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] ; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; SSE-NEXT: movd %eax, %xmm0 -; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE-NEXT: movd %eax, %xmm1 +; SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7] -; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] -; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero +; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] +; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; SSE-NEXT: movd %eax, %xmm0 +; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; SSE-NEXT: movd %eax, %xmm2 ; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] +; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; SSE-NEXT: movd %eax, %xmm0 ; SSE-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero -; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] -; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] -; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE-NEXT: movd %eax, %xmm2 -; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE-NEXT: movd %eax, %xmm3 -; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] +; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] +; SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] +; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; SSE-NEXT: movd %eax, %xmm0 ; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; SSE-NEXT: movd %eax, %xmm2 +; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] ; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; SSE-NEXT: movd %eax, %xmm4 -; SSE-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7] -; SSE-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] +; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] -; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] +; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] +; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE-NEXT: pand %xmm2, %xmm0 ; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax @@ -531,10 +458,9 @@ define <32 x i8> @_clearupper32xi8a(<32 x i8>) nounwind { ; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] ; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; SSE-NEXT: movd %eax, %xmm1 -; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE-NEXT: movd %eax, %xmm4 +; SSE-NEXT: movd {{.*#+}} xmm4 = mem[0],zero,zero,zero ; SSE-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7] -; SSE-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] +; SSE-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] ; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; SSE-NEXT: movd %eax, %xmm1 ; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax @@ -542,31 +468,32 @@ define <32 x i8> @_clearupper32xi8a(<32 x i8>) nounwind { ; SSE-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm1[0],xmm5[1],xmm1[1],xmm5[2],xmm1[2],xmm5[3],xmm1[3],xmm5[4],xmm1[4],xmm5[5],xmm1[5],xmm5[6],xmm1[6],xmm5[7],xmm1[7] ; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; SSE-NEXT: movd %eax, %xmm1 -; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE-NEXT: movd %eax, %xmm3 +; SSE-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero ; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] -; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1],xmm3[2],xmm5[2],xmm3[3],xmm5[3],xmm3[4],xmm5[4],xmm3[5],xmm5[5],xmm3[6],xmm5[6],xmm3[7],xmm5[7] -; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] -; SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; SSE-NEXT: movd {{.*#+}} xmm4 = mem[0],zero,zero,zero +; SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1],xmm3[2],xmm5[2],xmm3[3],xmm5[3] +; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] +; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; SSE-NEXT: movd %eax, %xmm1 +; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; SSE-NEXT: movd %eax, %xmm4 ; SSE-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7] +; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; SSE-NEXT: movd %eax, %xmm1 ; SSE-NEXT: movd {{.*#+}} xmm5 = mem[0],zero,zero,zero -; SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1],xmm1[2],xmm5[2],xmm1[3],xmm5[3],xmm1[4],xmm5[4],xmm1[5],xmm5[5],xmm1[6],xmm5[6],xmm1[7],xmm5[7] -; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] -; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE-NEXT: movd %eax, %xmm4 -; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE-NEXT: movd %eax, %xmm5 -; SSE-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3],xmm5[4],xmm4[4],xmm5[5],xmm4[5],xmm5[6],xmm4[6],xmm5[7],xmm4[7] +; SSE-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm1[0],xmm5[1],xmm1[1],xmm5[2],xmm1[2],xmm5[3],xmm1[3],xmm5[4],xmm1[4],xmm5[5],xmm1[5],xmm5[6],xmm1[6],xmm5[7],xmm1[7] +; SSE-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3] +; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; SSE-NEXT: movd %eax, %xmm1 ; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; SSE-NEXT: movd %eax, %xmm4 +; SSE-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7] ; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; SSE-NEXT: movd %eax, %xmm6 -; SSE-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm4[0],xmm6[1],xmm4[1],xmm6[2],xmm4[2],xmm6[3],xmm4[3],xmm6[4],xmm4[4],xmm6[5],xmm4[5],xmm6[6],xmm4[6],xmm6[7],xmm4[7] -; SSE-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3],xmm6[4],xmm5[4],xmm6[5],xmm5[5],xmm6[6],xmm5[6],xmm6[7],xmm5[7] +; SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3],xmm1[4],xmm6[4],xmm1[5],xmm6[5],xmm1[6],xmm6[6],xmm1[7],xmm6[7] -; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7] +; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3] +; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1] +; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] ; SSE-NEXT: pand %xmm2, %xmm1 ; SSE-NEXT: retq ; diff --git a/test/CodeGen/X86/fast-isel-nontemporal.ll b/test/CodeGen/X86/fast-isel-nontemporal.ll index 4140721bd5f3..33d001cdc216 100644 --- a/test/CodeGen/X86/fast-isel-nontemporal.ll +++ b/test/CodeGen/X86/fast-isel-nontemporal.ll @@ -545,7 +545,11 @@ define <8 x float> @test_load_nt8xfloat(<8 x float>* nocapture %ptr) { ; ; AVX1-LABEL: test_load_nt8xfloat: ; AVX1: # BB#0: # %entry -; AVX1-NEXT: vmovaps (%rdi), %ymm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 +; AVX1-NEXT: # implicit-def: %YMM1 +; AVX1-NEXT: vmovaps %xmm0, %xmm1 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt8xfloat: @@ -583,7 +587,11 @@ define <4 x double> @test_load_nt4xdouble(<4 x double>* nocapture %ptr) { ; ; AVX1-LABEL: test_load_nt4xdouble: ; AVX1: # BB#0: # %entry -; AVX1-NEXT: vmovapd (%rdi), %ymm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 +; AVX1-NEXT: # implicit-def: %YMM1 +; AVX1-NEXT: vmovaps %xmm0, %xmm1 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt4xdouble: @@ -621,7 +629,11 @@ define <32 x i8> @test_load_nt32xi8(<32 x i8>* nocapture %ptr) { ; ; AVX1-LABEL: test_load_nt32xi8: ; AVX1: # BB#0: # %entry -; AVX1-NEXT: vmovdqa (%rdi), %ymm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 +; AVX1-NEXT: # implicit-def: %YMM1 +; AVX1-NEXT: vmovaps %xmm0, %xmm1 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt32xi8: @@ -659,7 +671,11 @@ define <16 x i16> @test_load_nt16xi16(<16 x i16>* nocapture %ptr) { ; ; AVX1-LABEL: test_load_nt16xi16: ; AVX1: # BB#0: # %entry -; AVX1-NEXT: vmovdqa (%rdi), %ymm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 +; AVX1-NEXT: # implicit-def: %YMM1 +; AVX1-NEXT: vmovaps %xmm0, %xmm1 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt16xi16: @@ -697,7 +713,11 @@ define <8 x i32> @test_load_nt8xi32(<8 x i32>* nocapture %ptr) { ; ; AVX1-LABEL: test_load_nt8xi32: ; AVX1: # BB#0: # %entry -; AVX1-NEXT: vmovdqa (%rdi), %ymm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 +; AVX1-NEXT: # implicit-def: %YMM1 +; AVX1-NEXT: vmovaps %xmm0, %xmm1 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt8xi32: @@ -735,7 +755,11 @@ define <4 x i64> @test_load_nt4xi64(<4 x i64>* nocapture %ptr) { ; ; AVX1-LABEL: test_load_nt4xi64: ; AVX1: # BB#0: # %entry -; AVX1-NEXT: vmovdqa (%rdi), %ymm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 +; AVX1-NEXT: # implicit-def: %YMM1 +; AVX1-NEXT: vmovaps %xmm0, %xmm1 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt4xi64: @@ -957,8 +981,16 @@ define <16 x float> @test_load_nt16xfloat(<16 x float>* nocapture %ptr) { ; ; AVX1-LABEL: test_load_nt16xfloat: ; AVX1: # BB#0: # %entry -; AVX1-NEXT: vmovaps (%rdi), %ymm0 -; AVX1-NEXT: vmovaps 32(%rdi), %ymm1 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 +; AVX1-NEXT: # implicit-def: %YMM1 +; AVX1-NEXT: vmovaps %xmm0, %xmm1 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 +; AVX1-NEXT: # implicit-def: %YMM1 +; AVX1-NEXT: vmovaps %xmm2, %xmm1 +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt16xfloat: @@ -1003,8 +1035,16 @@ define <8 x double> @test_load_nt8xdouble(<8 x double>* nocapture %ptr) { ; ; AVX1-LABEL: test_load_nt8xdouble: ; AVX1: # BB#0: # %entry -; AVX1-NEXT: vmovapd (%rdi), %ymm0 -; AVX1-NEXT: vmovapd 32(%rdi), %ymm1 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 +; AVX1-NEXT: # implicit-def: %YMM1 +; AVX1-NEXT: vmovaps %xmm0, %xmm1 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 +; AVX1-NEXT: # implicit-def: %YMM1 +; AVX1-NEXT: vmovaps %xmm2, %xmm1 +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt8xdouble: @@ -1049,8 +1089,16 @@ define <64 x i8> @test_load_nt64xi8(<64 x i8>* nocapture %ptr) { ; ; AVX1-LABEL: test_load_nt64xi8: ; AVX1: # BB#0: # %entry -; AVX1-NEXT: vmovaps (%rdi), %ymm0 -; AVX1-NEXT: vmovaps 32(%rdi), %ymm1 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 +; AVX1-NEXT: # implicit-def: %YMM1 +; AVX1-NEXT: vmovaps %xmm0, %xmm1 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 +; AVX1-NEXT: # implicit-def: %YMM1 +; AVX1-NEXT: vmovaps %xmm2, %xmm1 +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt64xi8: @@ -1101,8 +1149,16 @@ define <32 x i16> @test_load_nt32xi16(<32 x i16>* nocapture %ptr) { ; ; AVX1-LABEL: test_load_nt32xi16: ; AVX1: # BB#0: # %entry -; AVX1-NEXT: vmovaps (%rdi), %ymm0 -; AVX1-NEXT: vmovaps 32(%rdi), %ymm1 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 +; AVX1-NEXT: # implicit-def: %YMM1 +; AVX1-NEXT: vmovaps %xmm0, %xmm1 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 +; AVX1-NEXT: # implicit-def: %YMM1 +; AVX1-NEXT: vmovaps %xmm2, %xmm1 +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt32xi16: @@ -1153,8 +1209,16 @@ define <16 x i32> @test_load_nt16xi32(<16 x i32>* nocapture %ptr) { ; ; AVX1-LABEL: test_load_nt16xi32: ; AVX1: # BB#0: # %entry -; AVX1-NEXT: vmovaps (%rdi), %ymm0 -; AVX1-NEXT: vmovaps 32(%rdi), %ymm1 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 +; AVX1-NEXT: # implicit-def: %YMM1 +; AVX1-NEXT: vmovaps %xmm0, %xmm1 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 +; AVX1-NEXT: # implicit-def: %YMM1 +; AVX1-NEXT: vmovaps %xmm2, %xmm1 +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt16xi32: @@ -1199,8 +1263,16 @@ define <8 x i64> @test_load_nt8xi64(<8 x i64>* nocapture %ptr) { ; ; AVX1-LABEL: test_load_nt8xi64: ; AVX1: # BB#0: # %entry -; AVX1-NEXT: vmovaps (%rdi), %ymm0 -; AVX1-NEXT: vmovaps 32(%rdi), %ymm1 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 +; AVX1-NEXT: # implicit-def: %YMM1 +; AVX1-NEXT: vmovaps %xmm0, %xmm1 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 +; AVX1-NEXT: # implicit-def: %YMM1 +; AVX1-NEXT: vmovaps %xmm2, %xmm1 +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt8xi64: diff --git a/test/CodeGen/X86/full-lsr.ll b/test/CodeGen/X86/full-lsr.ll index 85b2b41fa191..068480873c23 100644 --- a/test/CodeGen/X86/full-lsr.ll +++ b/test/CodeGen/X86/full-lsr.ll @@ -1,16 +1,10 @@ ; RUN: llc < %s -march=x86 -mcpu=generic | FileCheck %s -; RUN: llc < %s -march=x86 -mcpu=atom | FileCheck -check-prefix=ATOM %s +; RUN: llc < %s -march=x86 -mcpu=atom | FileCheck %s define void @foo(float* nocapture %A, float* nocapture %B, float* nocapture %C, i32 %N) nounwind { -; ATOM: foo -; ATOM: addl -; ATOM: addl -; ATOM: leal ; CHECK: foo -; CHECK: addl -; CHECK: addl -; CHECK: addl +; CHECK: incl entry: %0 = icmp sgt i32 %N, 0 ; [#uses=1] diff --git a/test/CodeGen/X86/haddsub-2.ll b/test/CodeGen/X86/haddsub-2.ll index 4596b83f7bc2..fd023d018031 100644 --- a/test/CodeGen/X86/haddsub-2.ll +++ b/test/CodeGen/X86/haddsub-2.ll @@ -142,12 +142,12 @@ define <4 x i32> @phadd_d_test1(<4 x i32> %A, <4 x i32> %B) { ; SSE3-NEXT: movd %xmm0, %edi ; SSE3-NEXT: addl %eax, %edi ; SSE3-NEXT: movd %edi, %xmm0 -; SSE3-NEXT: movd %edx, %xmm1 +; SSE3-NEXT: movd %esi, %xmm1 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE3-NEXT: movd %esi, %xmm2 +; SSE3-NEXT: movd %edx, %xmm2 ; SSE3-NEXT: movd %ecx, %xmm0 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE3-NEXT: retq ; ; SSSE3-LABEL: phadd_d_test1: @@ -196,16 +196,16 @@ define <4 x i32> @phadd_d_test2(<4 x i32> %A, <4 x i32> %B) { ; SSE3-NEXT: movd %xmm0, %esi ; SSE3-NEXT: addl %eax, %esi ; SSE3-NEXT: movd %esi, %xmm0 +; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,2,3] +; SSE3-NEXT: movd %xmm2, %eax +; SSE3-NEXT: movd %xmm1, %esi +; SSE3-NEXT: addl %eax, %esi +; SSE3-NEXT: movd %esi, %xmm1 +; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE3-NEXT: movd %ecx, %xmm2 -; SSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] -; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] -; SSE3-NEXT: movd %xmm0, %eax -; SSE3-NEXT: movd %xmm1, %ecx -; SSE3-NEXT: addl %eax, %ecx -; SSE3-NEXT: movd %ecx, %xmm1 ; SSE3-NEXT: movd %edx, %xmm0 -; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; SSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE3-NEXT: retq ; ; SSSE3-LABEL: phadd_d_test2: @@ -258,12 +258,12 @@ define <4 x i32> @phsub_d_test1(<4 x i32> %A, <4 x i32> %B) { ; SSE3-NEXT: movd %xmm0, %edi ; SSE3-NEXT: subl %edi, %esi ; SSE3-NEXT: movd %esi, %xmm0 -; SSE3-NEXT: movd %ecx, %xmm1 +; SSE3-NEXT: movd %edx, %xmm1 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE3-NEXT: movd %edx, %xmm2 +; SSE3-NEXT: movd %ecx, %xmm2 ; SSE3-NEXT: movd %eax, %xmm0 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE3-NEXT: retq ; ; SSSE3-LABEL: phsub_d_test1: @@ -312,16 +312,16 @@ define <4 x i32> @phsub_d_test2(<4 x i32> %A, <4 x i32> %B) { ; SSE3-NEXT: movd %xmm0, %esi ; SSE3-NEXT: subl %esi, %edx ; SSE3-NEXT: movd %edx, %xmm0 +; SSE3-NEXT: movd %xmm1, %edx +; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,2,3] +; SSE3-NEXT: movd %xmm1, %esi +; SSE3-NEXT: subl %esi, %edx +; SSE3-NEXT: movd %edx, %xmm1 +; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE3-NEXT: movd %eax, %xmm2 -; SSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] -; SSE3-NEXT: movd %xmm1, %eax -; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] -; SSE3-NEXT: movd %xmm0, %edx -; SSE3-NEXT: subl %edx, %eax -; SSE3-NEXT: movd %eax, %xmm1 ; SSE3-NEXT: movd %ecx, %xmm0 -; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; SSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE3-NEXT: retq ; ; SSSE3-LABEL: phsub_d_test2: @@ -518,19 +518,19 @@ define <8 x i32> @avx2_vphadd_d_test(<8 x i32> %A, <8 x i32> %B) { ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE3-NEXT: movd %xmm0, %r9d ; SSE3-NEXT: addl %edx, %r9d -; SSE3-NEXT: movd %xmm1, %esi +; SSE3-NEXT: movd %xmm1, %edx ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] -; SSE3-NEXT: movd %xmm0, %r10d -; SSE3-NEXT: addl %esi, %r10d -; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] ; SSE3-NEXT: movd %xmm0, %esi +; SSE3-NEXT: addl %edx, %esi +; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] +; SSE3-NEXT: movd %xmm0, %edx ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,1,2,3] ; SSE3-NEXT: movd %xmm0, %edi -; SSE3-NEXT: addl %esi, %edi +; SSE3-NEXT: addl %edx, %edi ; SSE3-NEXT: movd %xmm2, %eax ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3] -; SSE3-NEXT: movd %xmm0, %r11d -; SSE3-NEXT: addl %eax, %r11d +; SSE3-NEXT: movd %xmm0, %r10d +; SSE3-NEXT: addl %eax, %r10d ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1] ; SSE3-NEXT: movd %xmm0, %eax ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[3,1,2,3] @@ -541,24 +541,24 @@ define <8 x i32> @avx2_vphadd_d_test(<8 x i32> %A, <8 x i32> %B) { ; SSE3-NEXT: movd %xmm0, %edx ; SSE3-NEXT: addl %eax, %edx ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[2,3,0,1] -; SSE3-NEXT: movd %xmm0, %eax +; SSE3-NEXT: movd %xmm0, %r11d ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[3,1,2,3] -; SSE3-NEXT: movd %xmm0, %esi -; SSE3-NEXT: addl %eax, %esi +; SSE3-NEXT: movd %xmm0, %eax +; SSE3-NEXT: addl %r11d, %eax ; SSE3-NEXT: movd %edi, %xmm0 -; SSE3-NEXT: movd %r9d, %xmm1 +; SSE3-NEXT: movd %esi, %xmm1 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE3-NEXT: movd %r10d, %xmm2 +; SSE3-NEXT: movd %r9d, %xmm2 ; SSE3-NEXT: movd %r8d, %xmm0 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; SSE3-NEXT: movd %esi, %xmm1 -; SSE3-NEXT: movd %ecx, %xmm2 +; SSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE3-NEXT: movd %eax, %xmm1 +; SSE3-NEXT: movd %edx, %xmm2 ; SSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] -; SSE3-NEXT: movd %edx, %xmm3 -; SSE3-NEXT: movd %r11d, %xmm1 +; SSE3-NEXT: movd %ecx, %xmm3 +; SSE3-NEXT: movd %r10d, %xmm1 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] -; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; SSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; SSE3-NEXT: retq ; ; SSSE3-LABEL: avx2_vphadd_d_test: @@ -658,83 +658,83 @@ define <16 x i16> @avx2_vphadd_w_test(<16 x i16> %a, <16 x i16> %b) { ; SSE3-NEXT: addl %eax, %ecx ; SSE3-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # 4-byte Spill ; SSE3-NEXT: pextrw $2, %xmm0, %eax -; SSE3-NEXT: pextrw $3, %xmm0, %r11d -; SSE3-NEXT: addl %eax, %r11d -; SSE3-NEXT: pextrw $4, %xmm0, %eax -; SSE3-NEXT: pextrw $5, %xmm0, %r10d -; SSE3-NEXT: addl %eax, %r10d -; SSE3-NEXT: pextrw $6, %xmm0, %eax -; SSE3-NEXT: pextrw $7, %xmm0, %r13d -; SSE3-NEXT: addl %eax, %r13d -; SSE3-NEXT: movd %xmm1, %eax -; SSE3-NEXT: pextrw $1, %xmm1, %r14d -; SSE3-NEXT: addl %eax, %r14d -; SSE3-NEXT: pextrw $2, %xmm1, %eax -; SSE3-NEXT: pextrw $3, %xmm1, %ebp -; SSE3-NEXT: addl %eax, %ebp -; SSE3-NEXT: pextrw $4, %xmm1, %eax -; SSE3-NEXT: pextrw $5, %xmm1, %ebx -; SSE3-NEXT: addl %eax, %ebx -; SSE3-NEXT: pextrw $6, %xmm1, %eax -; SSE3-NEXT: pextrw $7, %xmm1, %edx -; SSE3-NEXT: addl %eax, %edx -; SSE3-NEXT: movd %xmm2, %eax -; SSE3-NEXT: pextrw $1, %xmm2, %ecx +; SSE3-NEXT: pextrw $3, %xmm0, %ecx ; SSE3-NEXT: addl %eax, %ecx ; SSE3-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # 4-byte Spill -; SSE3-NEXT: pextrw $2, %xmm2, %eax -; SSE3-NEXT: pextrw $3, %xmm2, %r12d -; SSE3-NEXT: addl %eax, %r12d -; SSE3-NEXT: pextrw $4, %xmm2, %eax -; SSE3-NEXT: pextrw $5, %xmm2, %r15d +; SSE3-NEXT: pextrw $4, %xmm0, %eax +; SSE3-NEXT: pextrw $5, %xmm0, %r11d +; SSE3-NEXT: addl %eax, %r11d +; SSE3-NEXT: pextrw $6, %xmm0, %eax +; SSE3-NEXT: pextrw $7, %xmm0, %r15d ; SSE3-NEXT: addl %eax, %r15d -; SSE3-NEXT: pextrw $6, %xmm2, %eax -; SSE3-NEXT: pextrw $7, %xmm2, %r8d +; SSE3-NEXT: movd %xmm1, %eax +; SSE3-NEXT: pextrw $1, %xmm1, %r13d +; SSE3-NEXT: addl %eax, %r13d +; SSE3-NEXT: pextrw $2, %xmm1, %eax +; SSE3-NEXT: pextrw $3, %xmm1, %ebx +; SSE3-NEXT: addl %eax, %ebx +; SSE3-NEXT: pextrw $4, %xmm1, %eax +; SSE3-NEXT: pextrw $5, %xmm1, %r8d ; SSE3-NEXT: addl %eax, %r8d -; SSE3-NEXT: movd %xmm3, %eax -; SSE3-NEXT: pextrw $1, %xmm3, %r9d -; SSE3-NEXT: addl %eax, %r9d -; SSE3-NEXT: pextrw $2, %xmm3, %eax -; SSE3-NEXT: pextrw $3, %xmm3, %esi +; SSE3-NEXT: pextrw $6, %xmm1, %eax +; SSE3-NEXT: pextrw $7, %xmm1, %esi ; SSE3-NEXT: addl %eax, %esi -; SSE3-NEXT: pextrw $4, %xmm3, %eax -; SSE3-NEXT: pextrw $5, %xmm3, %edi -; SSE3-NEXT: addl %eax, %edi -; SSE3-NEXT: pextrw $6, %xmm3, %ecx +; SSE3-NEXT: movd %xmm2, %eax +; SSE3-NEXT: pextrw $1, %xmm2, %r10d +; SSE3-NEXT: addl %eax, %r10d +; SSE3-NEXT: pextrw $2, %xmm2, %eax +; SSE3-NEXT: pextrw $3, %xmm2, %r14d +; SSE3-NEXT: addl %eax, %r14d +; SSE3-NEXT: pextrw $4, %xmm2, %eax +; SSE3-NEXT: pextrw $5, %xmm2, %r12d +; SSE3-NEXT: addl %eax, %r12d +; SSE3-NEXT: pextrw $6, %xmm2, %eax +; SSE3-NEXT: pextrw $7, %xmm2, %r9d +; SSE3-NEXT: addl %eax, %r9d +; SSE3-NEXT: movd %xmm3, %eax +; SSE3-NEXT: pextrw $1, %xmm3, %ebp +; SSE3-NEXT: addl %eax, %ebp +; SSE3-NEXT: pextrw $2, %xmm3, %edx +; SSE3-NEXT: pextrw $3, %xmm3, %edi +; SSE3-NEXT: addl %edx, %edi +; SSE3-NEXT: pextrw $4, %xmm3, %edx +; SSE3-NEXT: pextrw $5, %xmm3, %ecx +; SSE3-NEXT: addl %edx, %ecx +; SSE3-NEXT: pextrw $6, %xmm3, %edx ; SSE3-NEXT: pextrw $7, %xmm3, %eax -; SSE3-NEXT: addl %ecx, %eax -; SSE3-NEXT: movd %edx, %xmm8 -; SSE3-NEXT: movd %r13d, %xmm3 -; SSE3-NEXT: movd %ebp, %xmm9 -; SSE3-NEXT: movd %r11d, %xmm4 -; SSE3-NEXT: movd %ebx, %xmm10 -; SSE3-NEXT: movd %r10d, %xmm7 -; SSE3-NEXT: movd %r14d, %xmm11 +; SSE3-NEXT: addl %edx, %eax +; SSE3-NEXT: movd %esi, %xmm8 +; SSE3-NEXT: movd %r8d, %xmm3 +; SSE3-NEXT: movd %ebx, %xmm9 +; SSE3-NEXT: movd %r13d, %xmm4 +; SSE3-NEXT: movd %r15d, %xmm10 +; SSE3-NEXT: movd %r11d, %xmm7 +; SSE3-NEXT: movd -{{[0-9]+}}(%rsp), %xmm11 # 4-byte Folded Reload +; SSE3-NEXT: # xmm11 = mem[0],zero,zero,zero ; SSE3-NEXT: movd -{{[0-9]+}}(%rsp), %xmm0 # 4-byte Folded Reload ; SSE3-NEXT: # xmm0 = mem[0],zero,zero,zero ; SSE3-NEXT: movd %eax, %xmm12 -; SSE3-NEXT: movd %r8d, %xmm6 -; SSE3-NEXT: movd %esi, %xmm13 -; SSE3-NEXT: movd %r12d, %xmm5 -; SSE3-NEXT: movd %edi, %xmm14 -; SSE3-NEXT: movd %r15d, %xmm2 -; SSE3-NEXT: movd %r9d, %xmm15 -; SSE3-NEXT: movd -{{[0-9]+}}(%rsp), %xmm1 # 4-byte Folded Reload -; SSE3-NEXT: # xmm1 = mem[0],zero,zero,zero +; SSE3-NEXT: movd %ecx, %xmm6 +; SSE3-NEXT: movd %edi, %xmm13 +; SSE3-NEXT: movd %ebp, %xmm5 +; SSE3-NEXT: movd %r9d, %xmm14 +; SSE3-NEXT: movd %r12d, %xmm2 +; SSE3-NEXT: movd %r14d, %xmm15 +; SSE3-NEXT: movd %r10d, %xmm1 ; SSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm8[0],xmm3[1],xmm8[1],xmm3[2],xmm8[2],xmm3[3],xmm8[3] ; SSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm9[0],xmm4[1],xmm9[1],xmm4[2],xmm9[2],xmm4[3],xmm9[3] -; SSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] +; SSE3-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1] ; SSE3-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm10[0],xmm7[1],xmm10[1],xmm7[2],xmm10[2],xmm7[3],xmm10[3] ; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm11[0],xmm0[1],xmm11[1],xmm0[2],xmm11[2],xmm0[3],xmm11[3] -; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3] -; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3] +; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1] +; SSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm4[0] ; SSE3-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm12[0],xmm6[1],xmm12[1],xmm6[2],xmm12[2],xmm6[3],xmm12[3] ; SSE3-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm13[0],xmm5[1],xmm13[1],xmm5[2],xmm13[2],xmm5[3],xmm13[3] -; SSE3-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1],xmm5[2],xmm6[2],xmm5[3],xmm6[3] +; SSE3-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1] ; SSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm14[0],xmm2[1],xmm14[1],xmm2[2],xmm14[2],xmm2[3],xmm14[3] ; SSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm15[0],xmm1[1],xmm15[1],xmm1[2],xmm15[2],xmm1[3],xmm15[3] -; SSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] -; SSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1],xmm1[2],xmm5[2],xmm1[3],xmm5[3] +; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; SSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm5[0] ; SSE3-NEXT: popq %rbx ; SSE3-NEXT: popq %r12 ; SSE3-NEXT: popq %r13 @@ -858,12 +858,12 @@ define <4 x i32> @not_a_hsub_1(<4 x i32> %A, <4 x i32> %B) { ; SSE-NEXT: movd %xmm0, %edi ; SSE-NEXT: subl %edi, %esi ; SSE-NEXT: movd %esi, %xmm0 -; SSE-NEXT: movd %ecx, %xmm1 +; SSE-NEXT: movd %edx, %xmm1 ; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE-NEXT: movd %edx, %xmm2 +; SSE-NEXT: movd %ecx, %xmm2 ; SSE-NEXT: movd %eax, %xmm0 ; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-NEXT: retq ; ; AVX-LABEL: not_a_hsub_1: @@ -919,11 +919,11 @@ define <4 x float> @not_a_hsub_2(<4 x float> %A, <4 x float> %B) { ; SSE-NEXT: movaps %xmm1, %xmm4 ; SSE-NEXT: movhlps {{.*#+}} xmm4 = xmm4[1,1] ; SSE-NEXT: subss %xmm4, %xmm3 -; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] -; SSE-NEXT: movshdup {{.*#+}} xmm3 = xmm1[1,1,3,3] -; SSE-NEXT: subss %xmm3, %xmm1 -; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE-NEXT: movshdup {{.*#+}} xmm4 = xmm1[1,1,3,3] +; SSE-NEXT: subss %xmm4, %xmm1 +; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-NEXT: retq ; ; AVX-LABEL: not_a_hsub_2: @@ -1162,19 +1162,19 @@ define <8 x i32> @avx2_hadd_d(<8 x i32> %a, <8 x i32> %b) { ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE3-NEXT: movd %xmm0, %r9d ; SSE3-NEXT: addl %edx, %r9d -; SSE3-NEXT: movd %xmm2, %esi +; SSE3-NEXT: movd %xmm2, %edx ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3] -; SSE3-NEXT: movd %xmm0, %r10d -; SSE3-NEXT: addl %esi, %r10d -; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1] ; SSE3-NEXT: movd %xmm0, %esi +; SSE3-NEXT: addl %edx, %esi +; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1] +; SSE3-NEXT: movd %xmm0, %edx ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[3,1,2,3] ; SSE3-NEXT: movd %xmm0, %edi -; SSE3-NEXT: addl %esi, %edi +; SSE3-NEXT: addl %edx, %edi ; SSE3-NEXT: movd %xmm1, %eax ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] -; SSE3-NEXT: movd %xmm0, %r11d -; SSE3-NEXT: addl %eax, %r11d +; SSE3-NEXT: movd %xmm0, %r10d +; SSE3-NEXT: addl %eax, %r10d ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] ; SSE3-NEXT: movd %xmm0, %eax ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,1,2,3] @@ -1185,24 +1185,24 @@ define <8 x i32> @avx2_hadd_d(<8 x i32> %a, <8 x i32> %b) { ; SSE3-NEXT: movd %xmm0, %edx ; SSE3-NEXT: addl %eax, %edx ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[2,3,0,1] -; SSE3-NEXT: movd %xmm0, %eax +; SSE3-NEXT: movd %xmm0, %r11d ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[3,1,2,3] -; SSE3-NEXT: movd %xmm0, %esi -; SSE3-NEXT: addl %eax, %esi +; SSE3-NEXT: movd %xmm0, %eax +; SSE3-NEXT: addl %r11d, %eax ; SSE3-NEXT: movd %edi, %xmm0 -; SSE3-NEXT: movd %r9d, %xmm1 +; SSE3-NEXT: movd %esi, %xmm1 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE3-NEXT: movd %r10d, %xmm2 +; SSE3-NEXT: movd %r9d, %xmm2 ; SSE3-NEXT: movd %r8d, %xmm0 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; SSE3-NEXT: movd %esi, %xmm1 -; SSE3-NEXT: movd %ecx, %xmm2 +; SSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE3-NEXT: movd %eax, %xmm1 +; SSE3-NEXT: movd %edx, %xmm2 ; SSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] -; SSE3-NEXT: movd %edx, %xmm3 -; SSE3-NEXT: movd %r11d, %xmm1 +; SSE3-NEXT: movd %ecx, %xmm3 +; SSE3-NEXT: movd %r10d, %xmm1 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] -; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; SSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; SSE3-NEXT: retq ; ; SSSE3-LABEL: avx2_hadd_d: @@ -1293,15 +1293,14 @@ define <16 x i16> @avx2_hadd_w(<16 x i16> %a, <16 x i16> %b) { ; SSE3-NEXT: .Lcfi23: ; SSE3-NEXT: .cfi_offset %rbp, -16 ; SSE3-NEXT: movd %xmm0, %eax -; SSE3-NEXT: pextrw $1, %xmm0, %ecx -; SSE3-NEXT: addl %eax, %ecx -; SSE3-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # 4-byte Spill +; SSE3-NEXT: pextrw $1, %xmm0, %r10d +; SSE3-NEXT: addl %eax, %r10d ; SSE3-NEXT: pextrw $2, %xmm0, %eax -; SSE3-NEXT: pextrw $3, %xmm0, %r15d -; SSE3-NEXT: addl %eax, %r15d +; SSE3-NEXT: pextrw $3, %xmm0, %r11d +; SSE3-NEXT: addl %eax, %r11d ; SSE3-NEXT: pextrw $4, %xmm0, %eax -; SSE3-NEXT: pextrw $5, %xmm0, %r14d -; SSE3-NEXT: addl %eax, %r14d +; SSE3-NEXT: pextrw $5, %xmm0, %r12d +; SSE3-NEXT: addl %eax, %r12d ; SSE3-NEXT: pextrw $6, %xmm0, %eax ; SSE3-NEXT: pextrw $7, %xmm0, %r13d ; SSE3-NEXT: addl %eax, %r13d @@ -1310,70 +1309,71 @@ define <16 x i16> @avx2_hadd_w(<16 x i16> %a, <16 x i16> %b) { ; SSE3-NEXT: addl %eax, %ecx ; SSE3-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # 4-byte Spill ; SSE3-NEXT: pextrw $2, %xmm1, %eax -; SSE3-NEXT: pextrw $3, %xmm1, %r11d -; SSE3-NEXT: addl %eax, %r11d -; SSE3-NEXT: pextrw $4, %xmm1, %eax -; SSE3-NEXT: pextrw $5, %xmm1, %r10d -; SSE3-NEXT: addl %eax, %r10d -; SSE3-NEXT: pextrw $6, %xmm1, %eax -; SSE3-NEXT: pextrw $7, %xmm1, %r12d -; SSE3-NEXT: addl %eax, %r12d -; SSE3-NEXT: movd %xmm2, %eax -; SSE3-NEXT: pextrw $1, %xmm2, %ebx -; SSE3-NEXT: addl %eax, %ebx -; SSE3-NEXT: pextrw $2, %xmm2, %eax -; SSE3-NEXT: pextrw $3, %xmm2, %ecx +; SSE3-NEXT: pextrw $3, %xmm1, %ecx ; SSE3-NEXT: addl %eax, %ecx +; SSE3-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # 4-byte Spill +; SSE3-NEXT: pextrw $4, %xmm1, %eax +; SSE3-NEXT: pextrw $5, %xmm1, %r14d +; SSE3-NEXT: addl %eax, %r14d +; SSE3-NEXT: pextrw $6, %xmm1, %esi +; SSE3-NEXT: pextrw $7, %xmm1, %r15d +; SSE3-NEXT: addl %esi, %r15d +; SSE3-NEXT: movd %xmm2, %esi +; SSE3-NEXT: pextrw $1, %xmm2, %ebp +; SSE3-NEXT: addl %esi, %ebp +; SSE3-NEXT: pextrw $2, %xmm2, %esi +; SSE3-NEXT: pextrw $3, %xmm2, %edi +; SSE3-NEXT: addl %esi, %edi ; SSE3-NEXT: pextrw $4, %xmm2, %esi -; SSE3-NEXT: pextrw $5, %xmm2, %r8d -; SSE3-NEXT: addl %esi, %r8d -; SSE3-NEXT: pextrw $6, %xmm2, %esi -; SSE3-NEXT: pextrw $7, %xmm2, %edx -; SSE3-NEXT: addl %esi, %edx -; SSE3-NEXT: movd %xmm3, %edi -; SSE3-NEXT: pextrw $1, %xmm3, %r9d -; SSE3-NEXT: addl %edi, %r9d -; SSE3-NEXT: pextrw $2, %xmm3, %ebp -; SSE3-NEXT: pextrw $3, %xmm3, %edi -; SSE3-NEXT: addl %ebp, %edi -; SSE3-NEXT: pextrw $4, %xmm3, %eax -; SSE3-NEXT: pextrw $5, %xmm3, %ebp -; SSE3-NEXT: addl %eax, %ebp -; SSE3-NEXT: pextrw $6, %xmm3, %esi -; SSE3-NEXT: pextrw $7, %xmm3, %eax +; SSE3-NEXT: pextrw $5, %xmm2, %eax ; SSE3-NEXT: addl %esi, %eax -; SSE3-NEXT: movd %edx, %xmm8 -; SSE3-NEXT: movd %r13d, %xmm3 -; SSE3-NEXT: movd %ecx, %xmm9 -; SSE3-NEXT: movd %r15d, %xmm4 -; SSE3-NEXT: movd %r8d, %xmm10 -; SSE3-NEXT: movd %r14d, %xmm7 -; SSE3-NEXT: movd %ebx, %xmm11 -; SSE3-NEXT: movd -{{[0-9]+}}(%rsp), %xmm0 # 4-byte Folded Reload -; SSE3-NEXT: # xmm0 = mem[0],zero,zero,zero -; SSE3-NEXT: movd %eax, %xmm12 -; SSE3-NEXT: movd %r12d, %xmm6 -; SSE3-NEXT: movd %edi, %xmm13 -; SSE3-NEXT: movd %r11d, %xmm5 -; SSE3-NEXT: movd %ebp, %xmm14 -; SSE3-NEXT: movd %r10d, %xmm2 -; SSE3-NEXT: movd %r9d, %xmm15 +; SSE3-NEXT: pextrw $6, %xmm2, %esi +; SSE3-NEXT: pextrw $7, %xmm2, %ecx +; SSE3-NEXT: addl %esi, %ecx +; SSE3-NEXT: movd %xmm3, %ebx +; SSE3-NEXT: pextrw $1, %xmm3, %r9d +; SSE3-NEXT: addl %ebx, %r9d +; SSE3-NEXT: pextrw $2, %xmm3, %edx +; SSE3-NEXT: pextrw $3, %xmm3, %ebx +; SSE3-NEXT: addl %edx, %ebx +; SSE3-NEXT: pextrw $4, %xmm3, %edx +; SSE3-NEXT: pextrw $5, %xmm3, %esi +; SSE3-NEXT: addl %edx, %esi +; SSE3-NEXT: pextrw $6, %xmm3, %r8d +; SSE3-NEXT: pextrw $7, %xmm3, %edx +; SSE3-NEXT: addl %r8d, %edx +; SSE3-NEXT: movd %ecx, %xmm8 +; SSE3-NEXT: movd %eax, %xmm3 +; SSE3-NEXT: movd %edi, %xmm9 +; SSE3-NEXT: movd %ebp, %xmm4 +; SSE3-NEXT: movd %r13d, %xmm10 +; SSE3-NEXT: movd %r12d, %xmm7 +; SSE3-NEXT: movd %r11d, %xmm11 +; SSE3-NEXT: movd %r10d, %xmm0 +; SSE3-NEXT: movd %edx, %xmm12 +; SSE3-NEXT: movd %esi, %xmm6 +; SSE3-NEXT: movd %ebx, %xmm13 +; SSE3-NEXT: movd %r9d, %xmm5 +; SSE3-NEXT: movd %r15d, %xmm14 +; SSE3-NEXT: movd %r14d, %xmm2 +; SSE3-NEXT: movd -{{[0-9]+}}(%rsp), %xmm15 # 4-byte Folded Reload +; SSE3-NEXT: # xmm15 = mem[0],zero,zero,zero ; SSE3-NEXT: movd -{{[0-9]+}}(%rsp), %xmm1 # 4-byte Folded Reload ; SSE3-NEXT: # xmm1 = mem[0],zero,zero,zero ; SSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm8[0],xmm3[1],xmm8[1],xmm3[2],xmm8[2],xmm3[3],xmm8[3] ; SSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm9[0],xmm4[1],xmm9[1],xmm4[2],xmm9[2],xmm4[3],xmm9[3] -; SSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] +; SSE3-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1] ; SSE3-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm10[0],xmm7[1],xmm10[1],xmm7[2],xmm10[2],xmm7[3],xmm10[3] ; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm11[0],xmm0[1],xmm11[1],xmm0[2],xmm11[2],xmm0[3],xmm11[3] -; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3] -; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3] +; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1] +; SSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm4[0] ; SSE3-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm12[0],xmm6[1],xmm12[1],xmm6[2],xmm12[2],xmm6[3],xmm12[3] ; SSE3-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm13[0],xmm5[1],xmm13[1],xmm5[2],xmm13[2],xmm5[3],xmm13[3] -; SSE3-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1],xmm5[2],xmm6[2],xmm5[3],xmm6[3] +; SSE3-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1] ; SSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm14[0],xmm2[1],xmm14[1],xmm2[2],xmm14[2],xmm2[3],xmm14[3] ; SSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm15[0],xmm1[1],xmm15[1],xmm1[2],xmm15[2],xmm1[3],xmm15[3] -; SSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] -; SSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1],xmm1[2],xmm5[2],xmm1[3],xmm5[3] +; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; SSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm5[0] ; SSE3-NEXT: popq %rbx ; SSE3-NEXT: popq %r12 ; SSE3-NEXT: popq %r13 diff --git a/test/CodeGen/X86/haddsub-undef.ll b/test/CodeGen/X86/haddsub-undef.ll index 6d79d4de5206..091d1a22dbcd 100644 --- a/test/CodeGen/X86/haddsub-undef.ll +++ b/test/CodeGen/X86/haddsub-undef.ll @@ -171,9 +171,8 @@ define <4 x float> @test8_undef(<4 x float> %a, <4 x float> %b) { ; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE-NEXT: addss %xmm2, %xmm0 -; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1,1,3] -; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; SSE-NEXT: movapd %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: test8_undef: diff --git a/test/CodeGen/X86/hoist-spill.ll b/test/CodeGen/X86/hoist-spill.ll index afabf96b12a3..03f558fc3ae2 100644 --- a/test/CodeGen/X86/hoist-spill.ll +++ b/test/CodeGen/X86/hoist-spill.ll @@ -3,10 +3,8 @@ ; Check no spills to the same stack slot after hoisting. ; CHECK: mov{{.}} %{{.*}}, [[SPOFFSET1:-?[0-9]*]](%rsp) ; CHECK: mov{{.}} %{{.*}}, [[SPOFFSET2:-?[0-9]*]](%rsp) -; CHECK: mov{{.}} %{{.*}}, [[SPOFFSET3:-?[0-9]*]](%rsp) ; CHECK-NOT: mov{{.}} %{{.*}}, [[SPOFFSET1]](%rsp) ; CHECK-NOT: mov{{.}} %{{.*}}, [[SPOFFSET2]](%rsp) -; CHECK-NOT: mov{{.}} %{{.*}}, [[SPOFFSET3]](%rsp) target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/test/CodeGen/X86/loop-strength-reduce4.ll b/test/CodeGen/X86/loop-strength-reduce4.ll index 786534b00d39..56f4161147b4 100644 --- a/test/CodeGen/X86/loop-strength-reduce4.ll +++ b/test/CodeGen/X86/loop-strength-reduce4.ll @@ -4,16 +4,19 @@ ; By starting the IV at -64 instead of 0, a cmp is eliminated, ; as the flags from the add can be used directly. -; STATIC: movl $-64, [[ECX:%e..]] +; STATIC: movl $-64, [[EAX:%e..]] -; STATIC: movl [[EAX:%e..]], _state+76([[ECX]]) -; STATIC: addl $16, [[ECX]] +; STATIC: movl %{{.+}}, _state+76([[EAX]]) +; STATIC: addl $16, [[EAX]] ; STATIC: jne -; In PIC mode the symbol can't be folded, so the change-compare-stride -; trick applies. +; The same for PIC mode. -; PIC: cmpl $64 +; PIC: movl $-64, [[EAX:%e..]] + +; PIC: movl %{{.+}}, 76(%{{.+}},[[EAX]]) +; PIC: addl $16, [[EAX]] +; PIC: jne @state = external global [0 x i32] ; <[0 x i32]*> [#uses=4] @S = external global [0 x i32] ; <[0 x i32]*> [#uses=4] diff --git a/test/CodeGen/X86/madd.ll b/test/CodeGen/X86/madd.ll index af86df510016..7c2bb822c967 100644 --- a/test/CodeGen/X86/madd.ll +++ b/test/CodeGen/X86/madd.ll @@ -9,17 +9,17 @@ define i32 @_Z10test_shortPsS_i(i16* nocapture readonly, i16* nocapture readonly ; SSE2: # BB#0: # %entry ; SSE2-NEXT: movl %edx, %eax ; SSE2-NEXT: pxor %xmm0, %xmm0 +; SSE2-NEXT: xorl %ecx, %ecx ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: .p2align 4, 0x90 ; SSE2-NEXT: .LBB0_1: # %vector.body ; SSE2-NEXT: # =>This Inner Loop Header: Depth=1 -; SSE2-NEXT: movdqu (%rdi), %xmm2 -; SSE2-NEXT: movdqu (%rsi), %xmm3 +; SSE2-NEXT: movdqu (%rdi,%rcx,2), %xmm2 +; SSE2-NEXT: movdqu (%rsi,%rcx,2), %xmm3 ; SSE2-NEXT: pmaddwd %xmm2, %xmm3 ; SSE2-NEXT: paddd %xmm3, %xmm1 -; SSE2-NEXT: addq $16, %rsi -; SSE2-NEXT: addq $16, %rdi -; SSE2-NEXT: addq $-8, %rax +; SSE2-NEXT: addq $8, %rcx +; SSE2-NEXT: cmpq %rcx, %rax ; SSE2-NEXT: jne .LBB0_1 ; SSE2-NEXT: # BB#2: # %middle.block ; SSE2-NEXT: paddd %xmm0, %xmm1 @@ -34,17 +34,17 @@ define i32 @_Z10test_shortPsS_i(i16* nocapture readonly, i16* nocapture readonly ; AVX2: # BB#0: # %entry ; AVX2-NEXT: movl %edx, %eax ; AVX2-NEXT: vpxor %ymm0, %ymm0, %ymm0 +; AVX2-NEXT: xorl %ecx, %ecx ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: .p2align 4, 0x90 ; AVX2-NEXT: .LBB0_1: # %vector.body ; AVX2-NEXT: # =>This Inner Loop Header: Depth=1 -; AVX2-NEXT: vmovdqu (%rsi), %xmm2 -; AVX2-NEXT: vpmaddwd (%rdi), %xmm2, %xmm2 +; AVX2-NEXT: vmovdqu (%rsi,%rcx,2), %xmm2 +; AVX2-NEXT: vpmaddwd (%rdi,%rcx,2), %xmm2, %xmm2 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm2 ; AVX2-NEXT: vpaddd %ymm0, %ymm2, %ymm0 -; AVX2-NEXT: addq $16, %rsi -; AVX2-NEXT: addq $16, %rdi -; AVX2-NEXT: addq $-8, %rax +; AVX2-NEXT: addq $8, %rcx +; AVX2-NEXT: cmpq %rcx, %rax ; AVX2-NEXT: jne .LBB0_1 ; AVX2-NEXT: # BB#2: # %middle.block ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 @@ -60,17 +60,17 @@ define i32 @_Z10test_shortPsS_i(i16* nocapture readonly, i16* nocapture readonly ; AVX512: # BB#0: # %entry ; AVX512-NEXT: movl %edx, %eax ; AVX512-NEXT: vpxor %ymm0, %ymm0, %ymm0 +; AVX512-NEXT: xorl %ecx, %ecx ; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512-NEXT: .p2align 4, 0x90 ; AVX512-NEXT: .LBB0_1: # %vector.body ; AVX512-NEXT: # =>This Inner Loop Header: Depth=1 -; AVX512-NEXT: vmovdqu (%rsi), %xmm2 -; AVX512-NEXT: vpmaddwd (%rdi), %xmm2, %xmm2 +; AVX512-NEXT: vmovdqu (%rsi,%rcx,2), %xmm2 +; AVX512-NEXT: vpmaddwd (%rdi,%rcx,2), %xmm2, %xmm2 ; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm2 ; AVX512-NEXT: vpaddd %ymm0, %ymm2, %ymm0 -; AVX512-NEXT: addq $16, %rsi -; AVX512-NEXT: addq $16, %rdi -; AVX512-NEXT: addq $-8, %rax +; AVX512-NEXT: addq $8, %rcx +; AVX512-NEXT: cmpq %rcx, %rax ; AVX512-NEXT: jne .LBB0_1 ; AVX512-NEXT: # BB#2: # %middle.block ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 @@ -118,12 +118,13 @@ define i32 @test_unsigned_short(i16* nocapture readonly, i16* nocapture readonly ; SSE2: # BB#0: # %entry ; SSE2-NEXT: movl %edx, %eax ; SSE2-NEXT: pxor %xmm0, %xmm0 +; SSE2-NEXT: xorl %ecx, %ecx ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: .p2align 4, 0x90 ; SSE2-NEXT: .LBB1_1: # %vector.body ; SSE2-NEXT: # =>This Inner Loop Header: Depth=1 -; SSE2-NEXT: movdqu (%rdi), %xmm2 -; SSE2-NEXT: movdqu (%rsi), %xmm3 +; SSE2-NEXT: movdqu (%rdi,%rcx,2), %xmm2 +; SSE2-NEXT: movdqu (%rsi,%rcx,2), %xmm3 ; SSE2-NEXT: movdqa %xmm3, %xmm4 ; SSE2-NEXT: pmulhuw %xmm2, %xmm4 ; SSE2-NEXT: pmullw %xmm2, %xmm3 @@ -132,9 +133,8 @@ define i32 @test_unsigned_short(i16* nocapture readonly, i16* nocapture readonly ; SSE2-NEXT: paddd %xmm2, %xmm0 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] ; SSE2-NEXT: paddd %xmm3, %xmm1 -; SSE2-NEXT: addq $16, %rsi -; SSE2-NEXT: addq $16, %rdi -; SSE2-NEXT: addq $-8, %rax +; SSE2-NEXT: addq $8, %rcx +; SSE2-NEXT: cmpq %rcx, %rax ; SSE2-NEXT: jne .LBB1_1 ; SSE2-NEXT: # BB#2: # %middle.block ; SSE2-NEXT: paddd %xmm1, %xmm0 @@ -149,6 +149,7 @@ define i32 @test_unsigned_short(i16* nocapture readonly, i16* nocapture readonly ; AVX2: # BB#0: # %entry ; AVX2-NEXT: movl %edx, %eax ; AVX2-NEXT: vpxor %ymm0, %ymm0, %ymm0 +; AVX2-NEXT: xorl %ecx, %ecx ; AVX2-NEXT: .p2align 4, 0x90 ; AVX2-NEXT: .LBB1_1: # %vector.body ; AVX2-NEXT: # =>This Inner Loop Header: Depth=1 @@ -156,9 +157,8 @@ define i32 @test_unsigned_short(i16* nocapture readonly, i16* nocapture readonly ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero ; AVX2-NEXT: vpmulld %ymm1, %ymm2, %ymm1 ; AVX2-NEXT: vpaddd %ymm0, %ymm1, %ymm0 -; AVX2-NEXT: addq $16, %rsi -; AVX2-NEXT: addq $16, %rdi -; AVX2-NEXT: addq $-8, %rax +; AVX2-NEXT: addq $8, %rcx +; AVX2-NEXT: cmpq %rcx, %rax ; AVX2-NEXT: jne .LBB1_1 ; AVX2-NEXT: # BB#2: # %middle.block ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 @@ -174,6 +174,7 @@ define i32 @test_unsigned_short(i16* nocapture readonly, i16* nocapture readonly ; AVX512: # BB#0: # %entry ; AVX512-NEXT: movl %edx, %eax ; AVX512-NEXT: vpxor %ymm0, %ymm0, %ymm0 +; AVX512-NEXT: xorl %ecx, %ecx ; AVX512-NEXT: .p2align 4, 0x90 ; AVX512-NEXT: .LBB1_1: # %vector.body ; AVX512-NEXT: # =>This Inner Loop Header: Depth=1 @@ -181,9 +182,8 @@ define i32 @test_unsigned_short(i16* nocapture readonly, i16* nocapture readonly ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero ; AVX512-NEXT: vpmulld %ymm1, %ymm2, %ymm1 ; AVX512-NEXT: vpaddd %ymm0, %ymm1, %ymm0 -; AVX512-NEXT: addq $16, %rsi -; AVX512-NEXT: addq $16, %rdi -; AVX512-NEXT: addq $-8, %rax +; AVX512-NEXT: addq $8, %rcx +; AVX512-NEXT: cmpq %rcx, %rax ; AVX512-NEXT: jne .LBB1_1 ; AVX512-NEXT: # BB#2: # %middle.block ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 @@ -231,6 +231,7 @@ define i32 @_Z9test_charPcS_i(i8* nocapture readonly, i8* nocapture readonly, i3 ; SSE2: # BB#0: # %entry ; SSE2-NEXT: movl %edx, %eax ; SSE2-NEXT: pxor %xmm0, %xmm0 +; SSE2-NEXT: xorl %ecx, %ecx ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pxor %xmm3, %xmm3 ; SSE2-NEXT: pxor %xmm2, %xmm2 @@ -263,9 +264,8 @@ define i32 @_Z9test_charPcS_i(i8* nocapture readonly, i8* nocapture readonly, i3 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm4 = xmm4[4],xmm5[4],xmm4[5],xmm5[5],xmm4[6],xmm5[6],xmm4[7],xmm5[7] ; SSE2-NEXT: psrad $16, %xmm4 ; SSE2-NEXT: paddd %xmm4, %xmm2 -; SSE2-NEXT: addq $16, %rsi -; SSE2-NEXT: addq $16, %rdi -; SSE2-NEXT: addq $-16, %rax +; SSE2-NEXT: addq $16, %rcx +; SSE2-NEXT: cmpq %rcx, %rax ; SSE2-NEXT: jne .LBB2_1 ; SSE2-NEXT: # BB#2: # %middle.block ; SSE2-NEXT: paddd %xmm3, %xmm0 @@ -282,17 +282,17 @@ define i32 @_Z9test_charPcS_i(i8* nocapture readonly, i8* nocapture readonly, i3 ; AVX2: # BB#0: # %entry ; AVX2-NEXT: movl %edx, %eax ; AVX2-NEXT: vpxor %ymm0, %ymm0, %ymm0 +; AVX2-NEXT: xorl %ecx, %ecx ; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1 ; AVX2-NEXT: .p2align 4, 0x90 ; AVX2-NEXT: .LBB2_1: # %vector.body ; AVX2-NEXT: # =>This Inner Loop Header: Depth=1 -; AVX2-NEXT: vpmovsxbw (%rdi), %ymm2 -; AVX2-NEXT: vpmovsxbw (%rsi), %ymm3 +; AVX2-NEXT: vpmovsxbw (%rdi,%rcx), %ymm2 +; AVX2-NEXT: vpmovsxbw (%rsi,%rcx), %ymm3 ; AVX2-NEXT: vpmaddwd %ymm2, %ymm3, %ymm2 ; AVX2-NEXT: vpaddd %ymm1, %ymm2, %ymm1 -; AVX2-NEXT: addq $16, %rsi -; AVX2-NEXT: addq $16, %rdi -; AVX2-NEXT: addq $-16, %rax +; AVX2-NEXT: addq $16, %rcx +; AVX2-NEXT: cmpq %rcx, %rax ; AVX2-NEXT: jne .LBB2_1 ; AVX2-NEXT: # BB#2: # %middle.block ; AVX2-NEXT: vpaddd %ymm0, %ymm1, %ymm0 @@ -309,18 +309,18 @@ define i32 @_Z9test_charPcS_i(i8* nocapture readonly, i8* nocapture readonly, i3 ; AVX512: # BB#0: # %entry ; AVX512-NEXT: movl %edx, %eax ; AVX512-NEXT: vpxord %zmm0, %zmm0, %zmm0 +; AVX512-NEXT: xorl %ecx, %ecx ; AVX512-NEXT: vpxor %ymm1, %ymm1, %ymm1 ; AVX512-NEXT: .p2align 4, 0x90 ; AVX512-NEXT: .LBB2_1: # %vector.body ; AVX512-NEXT: # =>This Inner Loop Header: Depth=1 -; AVX512-NEXT: vpmovsxbw (%rdi), %ymm2 -; AVX512-NEXT: vpmovsxbw (%rsi), %ymm3 +; AVX512-NEXT: vpmovsxbw (%rdi,%rcx), %ymm2 +; AVX512-NEXT: vpmovsxbw (%rsi,%rcx), %ymm3 ; AVX512-NEXT: vpmaddwd %ymm2, %ymm3, %ymm2 ; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm2 ; AVX512-NEXT: vpaddd %zmm0, %zmm2, %zmm0 -; AVX512-NEXT: addq $16, %rsi -; AVX512-NEXT: addq $16, %rdi -; AVX512-NEXT: addq $-16, %rax +; AVX512-NEXT: addq $16, %rcx +; AVX512-NEXT: cmpq %rcx, %rax ; AVX512-NEXT: jne .LBB2_1 ; AVX512-NEXT: # BB#2: # %middle.block ; AVX512-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[4,5,6,7,0,1,0,1] diff --git a/test/CodeGen/X86/masked-iv-safe.ll b/test/CodeGen/X86/masked-iv-safe.ll index 8c0a4d4f1752..61aa05a5270b 100644 --- a/test/CodeGen/X86/masked-iv-safe.ll +++ b/test/CodeGen/X86/masked-iv-safe.ll @@ -5,7 +5,7 @@ ; CHECK-LABEL: count_up ; CHECK-NOT: {{and|movz|sar|shl}} -; CHECK: incq +; CHECK: addq $8 ; CHECK-NOT: {{and|movz|sar|shl}} ; CHECK: jne define void @count_up(double* %d, i64 %n) nounwind { @@ -38,7 +38,7 @@ return: ; CHECK-LABEL: count_down ; CHECK-NOT: {{and|movz|sar|shl}} -; CHECK: addq +; CHECK: addq $-8 ; CHECK-NOT: {{and|movz|sar|shl}} ; CHECK: jne define void @count_down(double* %d, i64 %n) nounwind { @@ -71,7 +71,7 @@ return: ; CHECK-LABEL: count_up_signed ; CHECK-NOT: {{and|movz|sar|shl}} -; CHECK: incq +; CHECK: addq $8 ; CHECK-NOT: {{and|movz|sar|shl}} ; CHECK: jne define void @count_up_signed(double* %d, i64 %n) nounwind { @@ -106,7 +106,7 @@ return: ; CHECK-LABEL: count_down_signed ; CHECK-NOT: {{and|movz|sar|shl}} -; CHECK: addq +; CHECK: addq $-8 ; CHECK-NOT: {{and|movz|sar|shl}} ; CHECK: jne define void @count_down_signed(double* %d, i64 %n) nounwind { @@ -141,7 +141,7 @@ return: ; CHECK-LABEL: another_count_up ; CHECK-NOT: {{and|movz|sar|shl}} -; CHECK: addq +; CHECK: addq $8 ; CHECK-NOT: {{and|movz|sar|shl}} ; CHECK: jne define void @another_count_up(double* %d, i64 %n) nounwind { @@ -174,7 +174,7 @@ return: ; CHECK-LABEL: another_count_down ; CHECK-NOT: {{and|movz|sar|shl}} -; CHECK: addq $-8, +; CHECK: addq $-8 ; CHECK-NOT: {{and|movz|sar|shl}} ; CHECK: jne define void @another_count_down(double* %d, i64 %n) nounwind { @@ -207,7 +207,7 @@ return: ; CHECK-LABEL: another_count_up_signed ; CHECK-NOT: {{and|movz|sar|shl}} -; CHECK: addq +; CHECK: addq $8 ; CHECK-NOT: {{and|movz|sar|shl}} ; CHECK: jne define void @another_count_up_signed(double* %d, i64 %n) nounwind { @@ -242,7 +242,7 @@ return: ; CHECK-LABEL: another_count_down_signed ; CHECK-NOT: {{and|movz|sar|shl}} -; CHECK: decq +; CHECK: addq $-8 ; CHECK-NOT: {{and|movz|sar|shl}} ; CHECK: jne define void @another_count_down_signed(double* %d, i64 %n) nounwind { diff --git a/test/CodeGen/X86/memcmp.ll b/test/CodeGen/X86/memcmp.ll index ce1bb3b06ce5..4e2475b1c67d 100644 --- a/test/CodeGen/X86/memcmp.ll +++ b/test/CodeGen/X86/memcmp.ll @@ -10,9 +10,28 @@ declare i32 @memcmp(i8*, i8*, i64) -define i1 @length2(i8* %X, i8* %Y, i32* nocapture %P) nounwind { +define i32 @length2(i8* %X, i8* %Y) nounwind { ; X32-LABEL: length2: ; X32: # BB#0: +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $2 +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: calll memcmp +; X32-NEXT: addl $16, %esp +; X32-NEXT: retl +; +; X64-LABEL: length2: +; X64: # BB#0: +; X64-NEXT: movl $2, %edx +; X64-NEXT: jmp memcmp # TAILCALL + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind + ret i32 %m +} + +define i1 @length2_eq(i8* %X, i8* %Y) nounwind { +; X32-LABEL: length2_eq: +; X32: # BB#0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movzwl (%ecx), %ecx @@ -20,7 +39,7 @@ define i1 @length2(i8* %X, i8* %Y, i32* nocapture %P) nounwind { ; X32-NEXT: sete %al ; X32-NEXT: retl ; -; X64-LABEL: length2: +; X64-LABEL: length2_eq: ; X64: # BB#0: ; X64-NEXT: movzwl (%rdi), %eax ; X64-NEXT: cmpw (%rsi), %ax @@ -31,8 +50,8 @@ define i1 @length2(i8* %X, i8* %Y, i32* nocapture %P) nounwind { ret i1 %c } -define i1 @length2_const(i8* %X, i32* nocapture %P) nounwind { -; X32-LABEL: length2_const: +define i1 @length2_eq_const(i8* %X) nounwind { +; X32-LABEL: length2_eq_const: ; X32: # BB#0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movzwl (%eax), %eax @@ -40,7 +59,7 @@ define i1 @length2_const(i8* %X, i32* nocapture %P) nounwind { ; X32-NEXT: setne %al ; X32-NEXT: retl ; -; X64-LABEL: length2_const: +; X64-LABEL: length2_eq_const: ; X64: # BB#0: ; X64-NEXT: movzwl (%rdi), %eax ; X64-NEXT: cmpl $12849, %eax # imm = 0x3231 @@ -51,8 +70,8 @@ define i1 @length2_const(i8* %X, i32* nocapture %P) nounwind { ret i1 %c } -define i1 @length2_nobuiltin_attr(i8* %X, i8* %Y, i32* nocapture %P) nounwind { -; X32-LABEL: length2_nobuiltin_attr: +define i1 @length2_eq_nobuiltin_attr(i8* %X, i8* %Y) nounwind { +; X32-LABEL: length2_eq_nobuiltin_attr: ; X32: # BB#0: ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $2 @@ -64,7 +83,7 @@ define i1 @length2_nobuiltin_attr(i8* %X, i8* %Y, i32* nocapture %P) nounwind { ; X32-NEXT: sete %al ; X32-NEXT: retl ; -; X64-LABEL: length2_nobuiltin_attr: +; X64-LABEL: length2_eq_nobuiltin_attr: ; X64: # BB#0: ; X64-NEXT: pushq %rax ; X64-NEXT: movl $2, %edx @@ -78,9 +97,74 @@ define i1 @length2_nobuiltin_attr(i8* %X, i8* %Y, i32* nocapture %P) nounwind { ret i1 %c } -define i1 @length4(i8* %X, i8* %Y, i32* nocapture %P) nounwind { +define i32 @length3(i8* %X, i8* %Y) nounwind { +; X32-LABEL: length3: +; X32: # BB#0: +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $3 +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: calll memcmp +; X32-NEXT: addl $16, %esp +; X32-NEXT: retl +; +; X64-LABEL: length3: +; X64: # BB#0: +; X64-NEXT: movl $3, %edx +; X64-NEXT: jmp memcmp # TAILCALL + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 3) nounwind + ret i32 %m +} + +define i1 @length3_eq(i8* %X, i8* %Y) nounwind { +; X32-LABEL: length3_eq: +; X32: # BB#0: +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $3 +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: calll memcmp +; X32-NEXT: addl $16, %esp +; X32-NEXT: testl %eax, %eax +; X32-NEXT: setne %al +; X32-NEXT: retl +; +; X64-LABEL: length3_eq: +; X64: # BB#0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $3, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setne %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 3) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length4(i8* %X, i8* %Y) nounwind { ; X32-LABEL: length4: ; X32: # BB#0: +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $4 +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: calll memcmp +; X32-NEXT: addl $16, %esp +; X32-NEXT: retl +; +; X64-LABEL: length4: +; X64: # BB#0: +; X64-NEXT: movl $4, %edx +; X64-NEXT: jmp memcmp # TAILCALL + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 4) nounwind + ret i32 %m +} + +define i1 @length4_eq(i8* %X, i8* %Y) nounwind { +; X32-LABEL: length4_eq: +; X32: # BB#0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl (%ecx), %ecx @@ -88,7 +172,7 @@ define i1 @length4(i8* %X, i8* %Y, i32* nocapture %P) nounwind { ; X32-NEXT: setne %al ; X32-NEXT: retl ; -; X64-LABEL: length4: +; X64-LABEL: length4_eq: ; X64: # BB#0: ; X64-NEXT: movl (%rdi), %eax ; X64-NEXT: cmpl (%rsi), %eax @@ -99,15 +183,15 @@ define i1 @length4(i8* %X, i8* %Y, i32* nocapture %P) nounwind { ret i1 %c } -define i1 @length4_const(i8* %X, i32* nocapture %P) nounwind { -; X32-LABEL: length4_const: +define i1 @length4_eq_const(i8* %X) nounwind { +; X32-LABEL: length4_eq_const: ; X32: # BB#0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: cmpl $875770417, (%eax) # imm = 0x34333231 ; X32-NEXT: sete %al ; X32-NEXT: retl ; -; X64-LABEL: length4_const: +; X64-LABEL: length4_eq_const: ; X64: # BB#0: ; X64-NEXT: cmpl $875770417, (%rdi) # imm = 0x34333231 ; X64-NEXT: sete %al @@ -117,7 +201,53 @@ define i1 @length4_const(i8* %X, i32* nocapture %P) nounwind { ret i1 %c } -define i1 @length8(i8* %X, i8* %Y, i32* nocapture %P) nounwind { +define i32 @length5(i8* %X, i8* %Y) nounwind { +; X32-LABEL: length5: +; X32: # BB#0: +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $5 +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: calll memcmp +; X32-NEXT: addl $16, %esp +; X32-NEXT: retl +; +; X64-LABEL: length5: +; X64: # BB#0: +; X64-NEXT: movl $5, %edx +; X64-NEXT: jmp memcmp # TAILCALL + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 5) nounwind + ret i32 %m +} + +define i1 @length5_eq(i8* %X, i8* %Y) nounwind { +; X32-LABEL: length5_eq: +; X32: # BB#0: +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $5 +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: calll memcmp +; X32-NEXT: addl $16, %esp +; X32-NEXT: testl %eax, %eax +; X32-NEXT: setne %al +; X32-NEXT: retl +; +; X64-LABEL: length5_eq: +; X64: # BB#0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $5, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setne %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 5) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length8(i8* %X, i8* %Y) nounwind { ; X32-LABEL: length8: ; X32: # BB#0: ; X32-NEXT: pushl $0 @@ -126,11 +256,30 @@ define i1 @length8(i8* %X, i8* %Y, i32* nocapture %P) nounwind { ; X32-NEXT: pushl {{[0-9]+}}(%esp) ; X32-NEXT: calll memcmp ; X32-NEXT: addl $16, %esp +; X32-NEXT: retl +; +; X64-LABEL: length8: +; X64: # BB#0: +; X64-NEXT: movl $8, %edx +; X64-NEXT: jmp memcmp # TAILCALL + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 8) nounwind + ret i32 %m +} + +define i1 @length8_eq(i8* %X, i8* %Y) nounwind { +; X32-LABEL: length8_eq: +; X32: # BB#0: +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $8 +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: calll memcmp +; X32-NEXT: addl $16, %esp ; X32-NEXT: testl %eax, %eax ; X32-NEXT: sete %al ; X32-NEXT: retl ; -; X64-LABEL: length8: +; X64-LABEL: length8_eq: ; X64: # BB#0: ; X64-NEXT: movq (%rdi), %rax ; X64-NEXT: cmpq (%rsi), %rax @@ -141,8 +290,8 @@ define i1 @length8(i8* %X, i8* %Y, i32* nocapture %P) nounwind { ret i1 %c } -define i1 @length8_const(i8* %X, i32* nocapture %P) nounwind { -; X32-LABEL: length8_const: +define i1 @length8_eq_const(i8* %X) nounwind { +; X32-LABEL: length8_eq_const: ; X32: # BB#0: ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $8 @@ -154,7 +303,7 @@ define i1 @length8_const(i8* %X, i32* nocapture %P) nounwind { ; X32-NEXT: setne %al ; X32-NEXT: retl ; -; X64-LABEL: length8_const: +; X64-LABEL: length8_eq_const: ; X64: # BB#0: ; X64-NEXT: movabsq $3978425819141910832, %rax # imm = 0x3736353433323130 ; X64-NEXT: cmpq %rax, (%rdi) @@ -165,7 +314,55 @@ define i1 @length8_const(i8* %X, i32* nocapture %P) nounwind { ret i1 %c } -define i1 @length16(i8* %x, i8* %y) nounwind { +define i1 @length12_eq(i8* %X, i8* %Y) nounwind { +; X32-LABEL: length12_eq: +; X32: # BB#0: +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $12 +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: calll memcmp +; X32-NEXT: addl $16, %esp +; X32-NEXT: testl %eax, %eax +; X32-NEXT: setne %al +; X32-NEXT: retl +; +; X64-LABEL: length12_eq: +; X64: # BB#0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $12, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setne %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 12) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length12(i8* %X, i8* %Y) nounwind { +; X32-LABEL: length12: +; X32: # BB#0: +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $12 +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: calll memcmp +; X32-NEXT: addl $16, %esp +; X32-NEXT: retl +; +; X64-LABEL: length12: +; X64: # BB#0: +; X64-NEXT: movl $12, %edx +; X64-NEXT: jmp memcmp # TAILCALL + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 12) nounwind + ret i32 %m +} + +; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329 + +define i32 @length16(i8* %X, i8* %Y) nounwind { ; X32-LABEL: length16: ; X32: # BB#0: ; X32-NEXT: pushl $0 @@ -174,11 +371,30 @@ define i1 @length16(i8* %x, i8* %y) nounwind { ; X32-NEXT: pushl {{[0-9]+}}(%esp) ; X32-NEXT: calll memcmp ; X32-NEXT: addl $16, %esp +; X32-NEXT: retl +; +; X64-LABEL: length16: +; X64: # BB#0: +; X64-NEXT: movl $16, %edx +; X64-NEXT: jmp memcmp # TAILCALL + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 16) nounwind + ret i32 %m +} + +define i1 @length16_eq(i8* %x, i8* %y) nounwind { +; X32-LABEL: length16_eq: +; X32: # BB#0: +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $16 +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: calll memcmp +; X32-NEXT: addl $16, %esp ; X32-NEXT: testl %eax, %eax ; X32-NEXT: setne %al ; X32-NEXT: retl ; -; SSE2-LABEL: length16: +; SSE2-LABEL: length16_eq: ; SSE2: # BB#0: ; SSE2-NEXT: movdqu (%rsi), %xmm0 ; SSE2-NEXT: movdqu (%rdi), %xmm1 @@ -188,7 +404,7 @@ define i1 @length16(i8* %x, i8* %y) nounwind { ; SSE2-NEXT: setne %al ; SSE2-NEXT: retq ; -; AVX2-LABEL: length16: +; AVX2-LABEL: length16_eq: ; AVX2: # BB#0: ; AVX2-NEXT: vmovdqu (%rdi), %xmm0 ; AVX2-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0 @@ -201,8 +417,8 @@ define i1 @length16(i8* %x, i8* %y) nounwind { ret i1 %cmp } -define i1 @length16_const(i8* %X, i32* nocapture %P) nounwind { -; X32-LABEL: length16_const: +define i1 @length16_eq_const(i8* %X) nounwind { +; X32-LABEL: length16_eq_const: ; X32: # BB#0: ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $16 @@ -214,7 +430,7 @@ define i1 @length16_const(i8* %X, i32* nocapture %P) nounwind { ; X32-NEXT: sete %al ; X32-NEXT: retl ; -; SSE2-LABEL: length16_const: +; SSE2-LABEL: length16_eq_const: ; SSE2: # BB#0: ; SSE2-NEXT: movdqu (%rdi), %xmm0 ; SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm0 @@ -223,7 +439,7 @@ define i1 @length16_const(i8* %X, i32* nocapture %P) nounwind { ; SSE2-NEXT: sete %al ; SSE2-NEXT: retq ; -; AVX2-LABEL: length16_const: +; AVX2-LABEL: length16_eq_const: ; AVX2: # BB#0: ; AVX2-NEXT: vmovdqu (%rdi), %xmm0 ; AVX2-NEXT: vpcmpeqb {{.*}}(%rip), %xmm0, %xmm0 @@ -236,7 +452,7 @@ define i1 @length16_const(i8* %X, i32* nocapture %P) nounwind { ret i1 %c } -define i1 @length32(i8* %x, i8* %y) nounwind { +define i32 @length32(i8* %X, i8* %Y) nounwind { ; X32-LABEL: length32: ; X32: # BB#0: ; X32-NEXT: pushl $0 @@ -245,11 +461,32 @@ define i1 @length32(i8* %x, i8* %y) nounwind { ; X32-NEXT: pushl {{[0-9]+}}(%esp) ; X32-NEXT: calll memcmp ; X32-NEXT: addl $16, %esp +; X32-NEXT: retl +; +; X64-LABEL: length32: +; X64: # BB#0: +; X64-NEXT: movl $32, %edx +; X64-NEXT: jmp memcmp # TAILCALL + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 32) nounwind + ret i32 %m +} + +; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325 + +define i1 @length32_eq(i8* %x, i8* %y) nounwind { +; X32-LABEL: length32_eq: +; X32: # BB#0: +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $32 +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: calll memcmp +; X32-NEXT: addl $16, %esp ; X32-NEXT: testl %eax, %eax ; X32-NEXT: sete %al ; X32-NEXT: retl ; -; SSE2-LABEL: length32: +; SSE2-LABEL: length32_eq: ; SSE2: # BB#0: ; SSE2-NEXT: pushq %rax ; SSE2-NEXT: movl $32, %edx @@ -259,7 +496,7 @@ define i1 @length32(i8* %x, i8* %y) nounwind { ; SSE2-NEXT: popq %rcx ; SSE2-NEXT: retq ; -; AVX2-LABEL: length32: +; AVX2-LABEL: length32_eq: ; AVX2: # BB#0: ; AVX2-NEXT: vmovdqu (%rdi), %ymm0 ; AVX2-NEXT: vpcmpeqb (%rsi), %ymm0, %ymm0 @@ -273,8 +510,8 @@ define i1 @length32(i8* %x, i8* %y) nounwind { ret i1 %cmp } -define i1 @length32_const(i8* %X, i32* nocapture %P) nounwind { -; X32-LABEL: length32_const: +define i1 @length32_eq_const(i8* %X) nounwind { +; X32-LABEL: length32_eq_const: ; X32: # BB#0: ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $32 @@ -286,7 +523,7 @@ define i1 @length32_const(i8* %X, i32* nocapture %P) nounwind { ; X32-NEXT: setne %al ; X32-NEXT: retl ; -; SSE2-LABEL: length32_const: +; SSE2-LABEL: length32_eq_const: ; SSE2: # BB#0: ; SSE2-NEXT: pushq %rax ; SSE2-NEXT: movl $.L.str, %esi @@ -297,7 +534,7 @@ define i1 @length32_const(i8* %X, i32* nocapture %P) nounwind { ; SSE2-NEXT: popq %rcx ; SSE2-NEXT: retq ; -; AVX2-LABEL: length32_const: +; AVX2-LABEL: length32_eq_const: ; AVX2: # BB#0: ; AVX2-NEXT: vmovdqu (%rdi), %ymm0 ; AVX2-NEXT: vpcmpeqb {{.*}}(%rip), %ymm0, %ymm0 @@ -311,7 +548,7 @@ define i1 @length32_const(i8* %X, i32* nocapture %P) nounwind { ret i1 %c } -define i1 @length64(i8* %x, i8* %y) nounwind { +define i32 @length64(i8* %X, i8* %Y) nounwind { ; X32-LABEL: length64: ; X32: # BB#0: ; X32-NEXT: pushl $0 @@ -320,11 +557,30 @@ define i1 @length64(i8* %x, i8* %y) nounwind { ; X32-NEXT: pushl {{[0-9]+}}(%esp) ; X32-NEXT: calll memcmp ; X32-NEXT: addl $16, %esp +; X32-NEXT: retl +; +; X64-LABEL: length64: +; X64: # BB#0: +; X64-NEXT: movl $64, %edx +; X64-NEXT: jmp memcmp # TAILCALL + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 64) nounwind + ret i32 %m +} + +define i1 @length64_eq(i8* %x, i8* %y) nounwind { +; X32-LABEL: length64_eq: +; X32: # BB#0: +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $64 +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: calll memcmp +; X32-NEXT: addl $16, %esp ; X32-NEXT: testl %eax, %eax ; X32-NEXT: setne %al ; X32-NEXT: retl ; -; X64-LABEL: length64: +; X64-LABEL: length64_eq: ; X64: # BB#0: ; X64-NEXT: pushq %rax ; X64-NEXT: movl $64, %edx @@ -338,8 +594,8 @@ define i1 @length64(i8* %x, i8* %y) nounwind { ret i1 %cmp } -define i1 @length64_const(i8* %X, i32* nocapture %P) nounwind { -; X32-LABEL: length64_const: +define i1 @length64_eq_const(i8* %X) nounwind { +; X32-LABEL: length64_eq_const: ; X32: # BB#0: ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $64 @@ -351,7 +607,7 @@ define i1 @length64_const(i8* %X, i32* nocapture %P) nounwind { ; X32-NEXT: sete %al ; X32-NEXT: retl ; -; X64-LABEL: length64_const: +; X64-LABEL: length64_eq_const: ; X64: # BB#0: ; X64-NEXT: pushq %rax ; X64-NEXT: movl $.L.str, %esi diff --git a/test/CodeGen/X86/merge-consecutive-loads-128.ll b/test/CodeGen/X86/merge-consecutive-loads-128.ll index 71417694b0d4..1d5829407b71 100644 --- a/test/CodeGen/X86/merge-consecutive-loads-128.ll +++ b/test/CodeGen/X86/merge-consecutive-loads-128.ll @@ -269,10 +269,8 @@ define <4 x float> @merge_4f32_f32_012u(float* %ptr) nounwind uwtable noinline s ; SSE2-LABEL: merge_4f32_f32_012u: ; SSE2: # BB#0: ; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE2-NEXT: retq ; ; SSE41-LABEL: merge_4f32_f32_012u: @@ -290,11 +288,11 @@ define <4 x float> @merge_4f32_f32_012u(float* %ptr) nounwind uwtable noinline s ; X32-SSE1-LABEL: merge_4f32_f32_012u: ; X32-SSE1: # BB#0: ; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X32-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X32-SSE1-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X32-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; X32-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X32-SSE1-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X32-SSE1-NEXT: retl ; ; X32-SSE41-LABEL: merge_4f32_f32_012u: @@ -320,10 +318,8 @@ define <4 x float> @merge_4f32_f32_019u(float* %ptr) nounwind uwtable noinline s ; SSE2-LABEL: merge_4f32_f32_019u: ; SSE2: # BB#0: ; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE2-NEXT: retq ; ; SSE41-LABEL: merge_4f32_f32_019u: @@ -341,11 +337,11 @@ define <4 x float> @merge_4f32_f32_019u(float* %ptr) nounwind uwtable noinline s ; X32-SSE1-LABEL: merge_4f32_f32_019u: ; X32-SSE1: # BB#0: ; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X32-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X32-SSE1-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X32-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; X32-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X32-SSE1-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X32-SSE1-NEXT: retl ; ; X32-SSE41-LABEL: merge_4f32_f32_019u: @@ -1037,13 +1033,11 @@ define <2 x i64> @merge_2i64_i64_12_volatile(i64* %ptr) nounwind uwtable noinlin define <4 x float> @merge_4f32_f32_2345_volatile(float* %ptr) nounwind uwtable noinline ssp { ; SSE2-LABEL: merge_4f32_f32_2345_volatile: ; SSE2: # BB#0: -; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] ; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE2-NEXT: retq ; ; SSE41-LABEL: merge_4f32_f32_2345_volatile: @@ -1065,13 +1059,13 @@ define <4 x float> @merge_4f32_f32_2345_volatile(float* %ptr) nounwind uwtable n ; X32-SSE1-LABEL: merge_4f32_f32_2345_volatile: ; X32-SSE1: # BB#0: ; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X32-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; X32-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X32-SSE1-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] -; X32-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; X32-SSE1-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X32-SSE1-NEXT: retl ; ; X32-SSE41-LABEL: merge_4f32_f32_2345_volatile: diff --git a/test/CodeGen/X86/mul-constant-i16.ll b/test/CodeGen/X86/mul-constant-i16.ll index e3e2737cf3e6..7b39bfe1c484 100644 --- a/test/CodeGen/X86/mul-constant-i16.ll +++ b/test/CodeGen/X86/mul-constant-i16.ll @@ -188,13 +188,16 @@ define i16 @test_mul_by_11(i16 %x) { ; X86-LABEL: test_mul_by_11: ; X86: # BB#0: ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: imull $11, %eax, %eax +; X86-NEXT: leal (%eax,%eax,4), %ecx +; X86-NEXT: leal (%eax,%ecx,2), %eax ; X86-NEXT: # kill: %AX %AX %EAX ; X86-NEXT: retl ; ; X64-LABEL: test_mul_by_11: ; X64: # BB#0: -; X64-NEXT: imull $11, %edi, %eax +; X64-NEXT: # kill: %EDI %EDI %RDI +; X64-NEXT: leal (%rdi,%rdi,4), %eax +; X64-NEXT: leal (%rdi,%rax,2), %eax ; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq %mul = mul nsw i16 %x, 11 @@ -225,13 +228,16 @@ define i16 @test_mul_by_13(i16 %x) { ; X86-LABEL: test_mul_by_13: ; X86: # BB#0: ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: imull $13, %eax, %eax +; X86-NEXT: leal (%eax,%eax,2), %ecx +; X86-NEXT: leal (%eax,%ecx,4), %eax ; X86-NEXT: # kill: %AX %AX %EAX ; X86-NEXT: retl ; ; X64-LABEL: test_mul_by_13: ; X64: # BB#0: -; X64-NEXT: imull $13, %edi, %eax +; X64-NEXT: # kill: %EDI %EDI %RDI +; X64-NEXT: leal (%rdi,%rdi,2), %eax +; X64-NEXT: leal (%rdi,%rax,4), %eax ; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq %mul = mul nsw i16 %x, 13 @@ -241,14 +247,19 @@ define i16 @test_mul_by_13(i16 %x) { define i16 @test_mul_by_14(i16 %x) { ; X86-LABEL: test_mul_by_14: ; X86: # BB#0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: imull $14, %eax, %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal (%ecx,%ecx,2), %eax +; X86-NEXT: leal (%ecx,%eax,4), %eax +; X86-NEXT: addl %ecx, %eax ; X86-NEXT: # kill: %AX %AX %EAX ; X86-NEXT: retl ; ; X64-LABEL: test_mul_by_14: ; X64: # BB#0: -; X64-NEXT: imull $14, %edi, %eax +; X64-NEXT: # kill: %EDI %EDI %RDI +; X64-NEXT: leal (%rdi,%rdi,2), %eax +; X64-NEXT: leal (%rdi,%rax,4), %eax +; X64-NEXT: addl %edi, %eax ; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq %mul = mul nsw i16 %x, 14 @@ -337,14 +348,19 @@ define i16 @test_mul_by_18(i16 %x) { define i16 @test_mul_by_19(i16 %x) { ; X86-LABEL: test_mul_by_19: ; X86: # BB#0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: imull $19, %eax, %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal (%ecx,%ecx,4), %eax +; X86-NEXT: shll $2, %eax +; X86-NEXT: subl %ecx, %eax ; X86-NEXT: # kill: %AX %AX %EAX ; X86-NEXT: retl ; ; X64-LABEL: test_mul_by_19: ; X64: # BB#0: -; X64-NEXT: imull $19, %edi, %eax +; X64-NEXT: # kill: %EDI %EDI %RDI +; X64-NEXT: leal (%rdi,%rdi,4), %eax +; X64-NEXT: shll $2, %eax +; X64-NEXT: subl %edi, %eax ; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq %mul = mul nsw i16 %x, 19 @@ -375,13 +391,16 @@ define i16 @test_mul_by_21(i16 %x) { ; X86-LABEL: test_mul_by_21: ; X86: # BB#0: ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: imull $21, %eax, %eax +; X86-NEXT: leal (%eax,%eax,4), %ecx +; X86-NEXT: leal (%eax,%ecx,4), %eax ; X86-NEXT: # kill: %AX %AX %EAX ; X86-NEXT: retl ; ; X64-LABEL: test_mul_by_21: ; X64: # BB#0: -; X64-NEXT: imull $21, %edi, %eax +; X64-NEXT: # kill: %EDI %EDI %RDI +; X64-NEXT: leal (%rdi,%rdi,4), %eax +; X64-NEXT: leal (%rdi,%rax,4), %eax ; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq %mul = mul nsw i16 %x, 21 @@ -391,14 +410,19 @@ define i16 @test_mul_by_21(i16 %x) { define i16 @test_mul_by_22(i16 %x) { ; X86-LABEL: test_mul_by_22: ; X86: # BB#0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: imull $22, %eax, %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal (%ecx,%ecx,4), %eax +; X86-NEXT: leal (%ecx,%eax,4), %eax +; X86-NEXT: addl %ecx, %eax ; X86-NEXT: # kill: %AX %AX %EAX ; X86-NEXT: retl ; ; X64-LABEL: test_mul_by_22: ; X64: # BB#0: -; X64-NEXT: imull $22, %edi, %eax +; X64-NEXT: # kill: %EDI %EDI %RDI +; X64-NEXT: leal (%rdi,%rdi,4), %eax +; X64-NEXT: leal (%rdi,%rax,4), %eax +; X64-NEXT: addl %edi, %eax ; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq %mul = mul nsw i16 %x, 22 @@ -408,14 +432,19 @@ define i16 @test_mul_by_22(i16 %x) { define i16 @test_mul_by_23(i16 %x) { ; X86-LABEL: test_mul_by_23: ; X86: # BB#0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: imull $23, %eax, %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal (%ecx,%ecx,2), %eax +; X86-NEXT: shll $3, %eax +; X86-NEXT: subl %ecx, %eax ; X86-NEXT: # kill: %AX %AX %EAX ; X86-NEXT: retl ; ; X64-LABEL: test_mul_by_23: ; X64: # BB#0: -; X64-NEXT: imull $23, %edi, %eax +; X64-NEXT: # kill: %EDI %EDI %RDI +; X64-NEXT: leal (%rdi,%rdi,2), %eax +; X64-NEXT: shll $3, %eax +; X64-NEXT: subl %edi, %eax ; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq %mul = mul nsw i16 %x, 23 @@ -465,14 +494,19 @@ define i16 @test_mul_by_25(i16 %x) { define i16 @test_mul_by_26(i16 %x) { ; X86-LABEL: test_mul_by_26: ; X86: # BB#0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: imull $26, %eax, %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal (%ecx,%ecx,8), %eax +; X86-NEXT: leal (%eax,%eax,2), %eax +; X86-NEXT: subl %ecx, %eax ; X86-NEXT: # kill: %AX %AX %EAX ; X86-NEXT: retl ; ; X64-LABEL: test_mul_by_26: ; X64: # BB#0: -; X64-NEXT: imull $26, %edi, %eax +; X64-NEXT: # kill: %EDI %EDI %RDI +; X64-NEXT: leal (%rdi,%rdi,8), %eax +; X64-NEXT: leal (%rax,%rax,2), %eax +; X64-NEXT: subl %edi, %eax ; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq %mul = mul nsw i16 %x, 26 @@ -502,14 +536,19 @@ define i16 @test_mul_by_27(i16 %x) { define i16 @test_mul_by_28(i16 %x) { ; X86-LABEL: test_mul_by_28: ; X86: # BB#0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: imull $28, %eax, %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal (%ecx,%ecx,8), %eax +; X86-NEXT: leal (%eax,%eax,2), %eax +; X86-NEXT: addl %ecx, %eax ; X86-NEXT: # kill: %AX %AX %EAX ; X86-NEXT: retl ; ; X64-LABEL: test_mul_by_28: ; X64: # BB#0: -; X64-NEXT: imull $28, %edi, %eax +; X64-NEXT: # kill: %EDI %EDI %RDI +; X64-NEXT: leal (%rdi,%rdi,8), %eax +; X64-NEXT: leal (%rax,%rax,2), %eax +; X64-NEXT: addl %edi, %eax ; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq %mul = mul nsw i16 %x, 28 @@ -519,14 +558,21 @@ define i16 @test_mul_by_28(i16 %x) { define i16 @test_mul_by_29(i16 %x) { ; X86-LABEL: test_mul_by_29: ; X86: # BB#0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: imull $29, %eax, %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal (%ecx,%ecx,8), %eax +; X86-NEXT: leal (%eax,%eax,2), %eax +; X86-NEXT: addl %ecx, %eax +; X86-NEXT: addl %ecx, %eax ; X86-NEXT: # kill: %AX %AX %EAX ; X86-NEXT: retl ; ; X64-LABEL: test_mul_by_29: ; X64: # BB#0: -; X64-NEXT: imull $29, %edi, %eax +; X64-NEXT: # kill: %EDI %EDI %RDI +; X64-NEXT: leal (%rdi,%rdi,8), %eax +; X64-NEXT: leal (%rax,%rax,2), %eax +; X64-NEXT: addl %edi, %eax +; X64-NEXT: addl %edi, %eax ; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq %mul = mul nsw i16 %x, 29 @@ -536,14 +582,20 @@ define i16 @test_mul_by_29(i16 %x) { define i16 @test_mul_by_30(i16 %x) { ; X86-LABEL: test_mul_by_30: ; X86: # BB#0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: imull $30, %eax, %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: shll $5, %eax +; X86-NEXT: subl %ecx, %eax +; X86-NEXT: subl %ecx, %eax ; X86-NEXT: # kill: %AX %AX %EAX ; X86-NEXT: retl ; ; X64-LABEL: test_mul_by_30: ; X64: # BB#0: -; X64-NEXT: imull $30, %edi, %eax +; X64-NEXT: movl %edi, %eax +; X64-NEXT: shll $5, %eax +; X64-NEXT: subl %edi, %eax +; X64-NEXT: subl %edi, %eax ; X64-NEXT: # kill: %AX %AX %EAX ; X64-NEXT: retq %mul = mul nsw i16 %x, 30 @@ -587,3 +639,30 @@ define i16 @test_mul_by_32(i16 %x) { %mul = mul nsw i16 %x, 32 ret i16 %mul } + +; (x*9+42)*(x*5+2) +define i16 @test_mul_spec(i16 %x) nounwind { +; X86-LABEL: test_mul_spec: +; X86: # BB#0: +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal 42(%eax,%eax,8), %ecx +; X86-NEXT: leal 2(%eax,%eax,4), %eax +; X86-NEXT: imull %ecx, %eax +; X86-NEXT: # kill: %AX %AX %EAX +; X86-NEXT: retl +; +; X64-LABEL: test_mul_spec: +; X64: # BB#0: +; X64-NEXT: # kill: %EDI %EDI %RDI +; X64-NEXT: leal 42(%rdi,%rdi,8), %ecx +; X64-NEXT: leal 2(%rdi,%rdi,4), %eax +; X64-NEXT: imull %ecx, %eax +; X64-NEXT: # kill: %AX %AX %EAX +; X64-NEXT: retq + %mul = mul nsw i16 %x, 9 + %add = add nsw i16 %mul, 42 + %mul2 = mul nsw i16 %x, 5 + %add2 = add nsw i16 %mul2, 2 + %mul3 = mul nsw i16 %add, %add2 + ret i16 %mul3 +} diff --git a/test/CodeGen/X86/mul-constant-i32.ll b/test/CodeGen/X86/mul-constant-i32.ll index 76e46e1f1b09..d545b477e102 100644 --- a/test/CodeGen/X86/mul-constant-i32.ll +++ b/test/CodeGen/X86/mul-constant-i32.ll @@ -1,6 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefix=X86 -; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule=true -mcpu=haswell| FileCheck %s --check-prefix=X64-HSW +; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule=true -mcpu=btver2| FileCheck %s --check-prefix=X64-JAG +; RUN: llc < %s -mtriple=i686-unknown -mul-constant-optimization=false | FileCheck %s --check-prefix=X86-NOOPT +; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -print-schedule=true -mcpu=haswell| FileCheck %s --check-prefix=HSW-NOOPT +; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -print-schedule=true -mcpu=btver2| FileCheck %s --check-prefix=JAG-NOOPT +; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule=true -mcpu=slm| FileCheck %s --check-prefix=X64-SLM +; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -print-schedule=true -mcpu=slm| FileCheck %s --check-prefix=SLM-NOOPT define i32 @test_mul_by_1(i32 %x) { ; X86-LABEL: test_mul_by_1: @@ -8,10 +14,40 @@ define i32 @test_mul_by_1(i32 %x) { ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_1: -; X64: # BB#0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_1: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_1: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.17] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_1: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_1: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.25] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_1: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.17] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_1: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: movl %edi, %eax # sched: [1:0.50] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_1: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.50] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 1 ret i32 %mul } @@ -23,11 +59,47 @@ define i32 @test_mul_by_2(i32 %x) { ; X86-NEXT: addl %eax, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_2: -; X64: # BB#0: -; X64-NEXT: # kill: %EDI %EDI %RDI -; X64-NEXT: leal (%rdi,%rdi), %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_2: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI %EDI %RDI +; X64-HSW-NEXT: leal (%rdi,%rdi), %eax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_2: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI %EDI %RDI +; X64-JAG-NEXT: leal (%rdi,%rdi), %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_2: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: addl %eax, %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_2: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: # kill: %EDI %EDI %RDI +; HSW-NOOPT-NEXT: leal (%rdi,%rdi), %eax # sched: [1:0.50] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_2: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: # kill: %EDI %EDI %RDI +; JAG-NOOPT-NEXT: leal (%rdi,%rdi), %eax # sched: [1:0.50] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_2: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: # kill: %EDI %EDI %RDI +; X64-SLM-NEXT: leal (%rdi,%rdi), %eax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_2: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: # kill: %EDI %EDI %RDI +; SLM-NOOPT-NEXT: leal (%rdi,%rdi), %eax # sched: [1:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 2 ret i32 %mul } @@ -38,11 +110,46 @@ define i32 @test_mul_by_3(i32 %x) { ; X86-NEXT: imull $3, {{[0-9]+}}(%esp), %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_3: -; X64: # BB#0: -; X64-NEXT: # kill: %EDI %EDI %RDI -; X64-NEXT: leal (%rdi,%rdi,2), %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_3: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI %EDI %RDI +; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_3: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI %EDI %RDI +; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_3: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $3, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_3: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: # kill: %EDI %EDI %RDI +; HSW-NOOPT-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_3: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: # kill: %EDI %EDI %RDI +; JAG-NOOPT-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_3: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: # kill: %EDI %EDI %RDI +; X64-SLM-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_3: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: # kill: %EDI %EDI %RDI +; SLM-NOOPT-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 3 ret i32 %mul } @@ -54,11 +161,47 @@ define i32 @test_mul_by_4(i32 %x) { ; X86-NEXT: shll $2, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_4: -; X64: # BB#0: -; X64-NEXT: # kill: %EDI %EDI %RDI -; X64-NEXT: leal (,%rdi,4), %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_4: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI %EDI %RDI +; X64-HSW-NEXT: leal (,%rdi,4), %eax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_4: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI %EDI %RDI +; X64-JAG-NEXT: leal (,%rdi,4), %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_4: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: shll $2, %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_4: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: # kill: %EDI %EDI %RDI +; HSW-NOOPT-NEXT: leal (,%rdi,4), %eax # sched: [1:0.50] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_4: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: # kill: %EDI %EDI %RDI +; JAG-NOOPT-NEXT: leal (,%rdi,4), %eax # sched: [1:0.50] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_4: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: # kill: %EDI %EDI %RDI +; X64-SLM-NEXT: leal (,%rdi,4), %eax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_4: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: # kill: %EDI %EDI %RDI +; SLM-NOOPT-NEXT: leal (,%rdi,4), %eax # sched: [1:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 4 ret i32 %mul } @@ -69,11 +212,46 @@ define i32 @test_mul_by_5(i32 %x) { ; X86-NEXT: imull $5, {{[0-9]+}}(%esp), %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_5: -; X64: # BB#0: -; X64-NEXT: # kill: %EDI %EDI %RDI -; X64-NEXT: leal (%rdi,%rdi,4), %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_5: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI %EDI %RDI +; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_5: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI %EDI %RDI +; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_5: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $5, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_5: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: # kill: %EDI %EDI %RDI +; HSW-NOOPT-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_5: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: # kill: %EDI %EDI %RDI +; JAG-NOOPT-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_5: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: # kill: %EDI %EDI %RDI +; X64-SLM-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_5: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: # kill: %EDI %EDI %RDI +; SLM-NOOPT-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 5 ret i32 %mul } @@ -86,12 +264,46 @@ define i32 @test_mul_by_6(i32 %x) { ; X86-NEXT: leal (%eax,%eax,2), %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_6: -; X64: # BB#0: -; X64-NEXT: # kill: %EDI %EDI %RDI -; X64-NEXT: addl %edi, %edi -; X64-NEXT: leal (%rdi,%rdi,2), %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_6: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI %EDI %RDI +; X64-HSW-NEXT: addl %edi, %edi # sched: [1:0.25] +; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_6: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI %EDI %RDI +; X64-JAG-NEXT: addl %edi, %edi # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_6: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $6, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_6: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $6, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_6: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $6, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_6: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: # kill: %EDI %EDI %RDI +; X64-SLM-NEXT: addl %edi, %edi # sched: [1:0.50] +; X64-SLM-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_6: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $6, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 6 ret i32 %mul } @@ -104,12 +316,46 @@ define i32 @test_mul_by_7(i32 %x) { ; X86-NEXT: subl %ecx, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_7: -; X64: # BB#0: -; X64-NEXT: # kill: %EDI %EDI %RDI -; X64-NEXT: leal (,%rdi,8), %eax -; X64-NEXT: subl %edi, %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_7: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI %EDI %RDI +; X64-HSW-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50] +; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_7: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI %EDI %RDI +; X64-JAG-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50] +; X64-JAG-NEXT: subl %edi, %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_7: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $7, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_7: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $7, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_7: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $7, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_7: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: # kill: %EDI %EDI %RDI +; X64-SLM-NEXT: leal (,%rdi,8), %eax # sched: [1:1.00] +; X64-SLM-NEXT: subl %edi, %eax # sched: [1:0.50] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_7: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $7, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 7 ret i32 %mul } @@ -121,11 +367,47 @@ define i32 @test_mul_by_8(i32 %x) { ; X86-NEXT: shll $3, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_8: -; X64: # BB#0: -; X64-NEXT: # kill: %EDI %EDI %RDI -; X64-NEXT: leal (,%rdi,8), %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_8: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI %EDI %RDI +; X64-HSW-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_8: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI %EDI %RDI +; X64-JAG-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_8: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: shll $3, %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_8: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: # kill: %EDI %EDI %RDI +; HSW-NOOPT-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_8: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: # kill: %EDI %EDI %RDI +; JAG-NOOPT-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_8: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: # kill: %EDI %EDI %RDI +; X64-SLM-NEXT: leal (,%rdi,8), %eax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_8: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: # kill: %EDI %EDI %RDI +; SLM-NOOPT-NEXT: leal (,%rdi,8), %eax # sched: [1:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 8 ret i32 %mul } @@ -136,11 +418,46 @@ define i32 @test_mul_by_9(i32 %x) { ; X86-NEXT: imull $9, {{[0-9]+}}(%esp), %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_9: -; X64: # BB#0: -; X64-NEXT: # kill: %EDI %EDI %RDI -; X64-NEXT: leal (%rdi,%rdi,8), %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_9: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI %EDI %RDI +; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_9: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI %EDI %RDI +; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_9: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $9, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_9: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: # kill: %EDI %EDI %RDI +; HSW-NOOPT-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_9: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: # kill: %EDI %EDI %RDI +; JAG-NOOPT-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_9: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: # kill: %EDI %EDI %RDI +; X64-SLM-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_9: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: # kill: %EDI %EDI %RDI +; SLM-NOOPT-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 9 ret i32 %mul } @@ -153,12 +470,46 @@ define i32 @test_mul_by_10(i32 %x) { ; X86-NEXT: leal (%eax,%eax,4), %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_10: -; X64: # BB#0: -; X64-NEXT: # kill: %EDI %EDI %RDI -; X64-NEXT: addl %edi, %edi -; X64-NEXT: leal (%rdi,%rdi,4), %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_10: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI %EDI %RDI +; X64-HSW-NEXT: addl %edi, %edi # sched: [1:0.25] +; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_10: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI %EDI %RDI +; X64-JAG-NEXT: addl %edi, %edi # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_10: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $10, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_10: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $10, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_10: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $10, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_10: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: # kill: %EDI %EDI %RDI +; X64-SLM-NEXT: addl %edi, %edi # sched: [1:0.50] +; X64-SLM-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_10: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $10, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 10 ret i32 %mul } @@ -166,13 +517,49 @@ define i32 @test_mul_by_10(i32 %x) { define i32 @test_mul_by_11(i32 %x) { ; X86-LABEL: test_mul_by_11: ; X86: # BB#0: -; X86-NEXT: imull $11, {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,4), %ecx +; X86-NEXT: leal (%eax,%ecx,2), %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_11: -; X64: # BB#0: -; X64-NEXT: imull $11, %edi, %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_11: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI %EDI %RDI +; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-HSW-NEXT: leal (%rdi,%rax,2), %eax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_11: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI %EDI %RDI +; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rdi,%rax,2), %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_11: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $11, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_11: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $11, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_11: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $11, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_11: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imull $11, %edi, %eax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_11: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $11, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 11 ret i32 %mul } @@ -185,12 +572,46 @@ define i32 @test_mul_by_12(i32 %x) { ; X86-NEXT: leal (%eax,%eax,2), %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_12: -; X64: # BB#0: -; X64-NEXT: # kill: %EDI %EDI %RDI -; X64-NEXT: shll $2, %edi -; X64-NEXT: leal (%rdi,%rdi,2), %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_12: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI %EDI %RDI +; X64-HSW-NEXT: shll $2, %edi # sched: [1:0.50] +; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_12: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI %EDI %RDI +; X64-JAG-NEXT: shll $2, %edi # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_12: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $12, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_12: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $12, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_12: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $12, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_12: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: # kill: %EDI %EDI %RDI +; X64-SLM-NEXT: shll $2, %edi # sched: [1:1.00] +; X64-SLM-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_12: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $12, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 12 ret i32 %mul } @@ -198,13 +619,49 @@ define i32 @test_mul_by_12(i32 %x) { define i32 @test_mul_by_13(i32 %x) { ; X86-LABEL: test_mul_by_13: ; X86: # BB#0: -; X86-NEXT: imull $13, {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,2), %ecx +; X86-NEXT: leal (%eax,%ecx,4), %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_13: -; X64: # BB#0: -; X64-NEXT: imull $13, %edi, %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_13: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI %EDI %RDI +; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] +; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_13: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI %EDI %RDI +; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_13: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $13, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_13: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $13, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_13: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $13, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_13: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imull $13, %edi, %eax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_13: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $13, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 13 ret i32 %mul } @@ -212,13 +669,52 @@ define i32 @test_mul_by_13(i32 %x) { define i32 @test_mul_by_14(i32 %x) { ; X86-LABEL: test_mul_by_14: ; X86: # BB#0: -; X86-NEXT: imull $14, {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal (%ecx,%ecx,2), %eax +; X86-NEXT: leal (%ecx,%eax,4), %eax +; X86-NEXT: addl %ecx, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_14: -; X64: # BB#0: -; X64-NEXT: imull $14, %edi, %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_14: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI %EDI %RDI +; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] +; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50] +; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_14: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI %EDI %RDI +; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50] +; X64-JAG-NEXT: addl %edi, %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_14: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $14, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_14: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $14, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_14: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $14, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_14: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imull $14, %edi, %eax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_14: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $14, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 14 ret i32 %mul } @@ -231,12 +727,46 @@ define i32 @test_mul_by_15(i32 %x) { ; X86-NEXT: leal (%eax,%eax,2), %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_15: -; X64: # BB#0: -; X64-NEXT: # kill: %EDI %EDI %RDI -; X64-NEXT: leal (%rdi,%rdi,4), %eax -; X64-NEXT: leal (%rax,%rax,2), %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_15: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI %EDI %RDI +; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_15: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI %EDI %RDI +; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_15: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $15, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_15: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $15, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_15: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $15, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_15: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: # kill: %EDI %EDI %RDI +; X64-SLM-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:1.00] +; X64-SLM-NEXT: leal (%rax,%rax,2), %eax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_15: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $15, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 15 ret i32 %mul } @@ -248,11 +778,47 @@ define i32 @test_mul_by_16(i32 %x) { ; X86-NEXT: shll $4, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_16: -; X64: # BB#0: -; X64-NEXT: shll $4, %edi -; X64-NEXT: movl %edi, %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_16: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: shll $4, %edi # sched: [1:0.50] +; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_16: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: shll $4, %edi # sched: [1:0.50] +; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.17] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_16: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: shll $4, %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_16: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: shll $4, %edi # sched: [1:0.50] +; HSW-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.25] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_16: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: shll $4, %edi # sched: [1:0.50] +; JAG-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.17] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_16: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: shll $4, %edi # sched: [1:1.00] +; X64-SLM-NEXT: movl %edi, %eax # sched: [1:0.50] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_16: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: shll $4, %edi # sched: [1:1.00] +; SLM-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.50] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 16 ret i32 %mul } @@ -266,13 +832,49 @@ define i32 @test_mul_by_17(i32 %x) { ; X86-NEXT: addl %ecx, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_17: -; X64: # BB#0: -; X64-NEXT: # kill: %EDI %EDI %RDI -; X64-NEXT: movl %edi, %eax -; X64-NEXT: shll $4, %eax -; X64-NEXT: leal (%rax,%rdi), %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_17: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI %EDI %RDI +; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: shll $4, %eax # sched: [1:0.50] +; X64-HSW-NEXT: leal (%rax,%rdi), %eax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_17: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI %EDI %RDI +; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.17] +; X64-JAG-NEXT: shll $4, %eax # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rax,%rdi), %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_17: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $17, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_17: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $17, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_17: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $17, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_17: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: # kill: %EDI %EDI %RDI +; X64-SLM-NEXT: movl %edi, %eax # sched: [1:0.50] +; X64-SLM-NEXT: shll $4, %eax # sched: [1:1.00] +; X64-SLM-NEXT: leal (%rax,%rdi), %eax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_17: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $17, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 17 ret i32 %mul } @@ -285,12 +887,46 @@ define i32 @test_mul_by_18(i32 %x) { ; X86-NEXT: leal (%eax,%eax,8), %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_18: -; X64: # BB#0: -; X64-NEXT: # kill: %EDI %EDI %RDI -; X64-NEXT: addl %edi, %edi -; X64-NEXT: leal (%rdi,%rdi,8), %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_18: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI %EDI %RDI +; X64-HSW-NEXT: addl %edi, %edi # sched: [1:0.25] +; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_18: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI %EDI %RDI +; X64-JAG-NEXT: addl %edi, %edi # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_18: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $18, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_18: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $18, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_18: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $18, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_18: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: # kill: %EDI %EDI %RDI +; X64-SLM-NEXT: addl %edi, %edi # sched: [1:0.50] +; X64-SLM-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_18: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $18, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 18 ret i32 %mul } @@ -298,13 +934,52 @@ define i32 @test_mul_by_18(i32 %x) { define i32 @test_mul_by_19(i32 %x) { ; X86-LABEL: test_mul_by_19: ; X86: # BB#0: -; X86-NEXT: imull $19, {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal (%ecx,%ecx,4), %eax +; X86-NEXT: shll $2, %eax +; X86-NEXT: subl %ecx, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_19: -; X64: # BB#0: -; X64-NEXT: imull $19, %edi, %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_19: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI %EDI %RDI +; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-HSW-NEXT: shll $2, %eax # sched: [1:0.50] +; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_19: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI %EDI %RDI +; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-JAG-NEXT: shll $2, %eax # sched: [1:0.50] +; X64-JAG-NEXT: subl %edi, %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_19: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $19, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_19: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $19, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_19: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $19, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_19: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imull $19, %edi, %eax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_19: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $19, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 19 ret i32 %mul } @@ -317,12 +992,46 @@ define i32 @test_mul_by_20(i32 %x) { ; X86-NEXT: leal (%eax,%eax,4), %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_20: -; X64: # BB#0: -; X64-NEXT: # kill: %EDI %EDI %RDI -; X64-NEXT: shll $2, %edi -; X64-NEXT: leal (%rdi,%rdi,4), %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_20: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI %EDI %RDI +; X64-HSW-NEXT: shll $2, %edi # sched: [1:0.50] +; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_20: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI %EDI %RDI +; X64-JAG-NEXT: shll $2, %edi # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_20: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $20, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_20: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $20, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_20: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $20, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_20: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: # kill: %EDI %EDI %RDI +; X64-SLM-NEXT: shll $2, %edi # sched: [1:1.00] +; X64-SLM-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_20: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $20, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 20 ret i32 %mul } @@ -330,13 +1039,49 @@ define i32 @test_mul_by_20(i32 %x) { define i32 @test_mul_by_21(i32 %x) { ; X86-LABEL: test_mul_by_21: ; X86: # BB#0: -; X86-NEXT: imull $21, {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,4), %ecx +; X86-NEXT: leal (%eax,%ecx,4), %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_21: -; X64: # BB#0: -; X64-NEXT: imull $21, %edi, %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_21: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI %EDI %RDI +; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_21: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI %EDI %RDI +; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_21: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $21, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_21: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $21, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_21: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $21, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_21: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imull $21, %edi, %eax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_21: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $21, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 21 ret i32 %mul } @@ -344,13 +1089,52 @@ define i32 @test_mul_by_21(i32 %x) { define i32 @test_mul_by_22(i32 %x) { ; X86-LABEL: test_mul_by_22: ; X86: # BB#0: -; X86-NEXT: imull $22, {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal (%ecx,%ecx,4), %eax +; X86-NEXT: leal (%ecx,%eax,4), %eax +; X86-NEXT: addl %ecx, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_22: -; X64: # BB#0: -; X64-NEXT: imull $22, %edi, %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_22: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI %EDI %RDI +; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50] +; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_22: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI %EDI %RDI +; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50] +; X64-JAG-NEXT: addl %edi, %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_22: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $22, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_22: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $22, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_22: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $22, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_22: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imull $22, %edi, %eax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_22: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $22, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 22 ret i32 %mul } @@ -358,13 +1142,52 @@ define i32 @test_mul_by_22(i32 %x) { define i32 @test_mul_by_23(i32 %x) { ; X86-LABEL: test_mul_by_23: ; X86: # BB#0: -; X86-NEXT: imull $23, {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal (%ecx,%ecx,2), %eax +; X86-NEXT: shll $3, %eax +; X86-NEXT: subl %ecx, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_23: -; X64: # BB#0: -; X64-NEXT: imull $23, %edi, %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_23: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI %EDI %RDI +; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] +; X64-HSW-NEXT: shll $3, %eax # sched: [1:0.50] +; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_23: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI %EDI %RDI +; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] +; X64-JAG-NEXT: shll $3, %eax # sched: [1:0.50] +; X64-JAG-NEXT: subl %edi, %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_23: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $23, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_23: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $23, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_23: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $23, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_23: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imull $23, %edi, %eax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_23: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $23, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 23 ret i32 %mul } @@ -377,12 +1200,46 @@ define i32 @test_mul_by_24(i32 %x) { ; X86-NEXT: leal (%eax,%eax,2), %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_24: -; X64: # BB#0: -; X64-NEXT: # kill: %EDI %EDI %RDI -; X64-NEXT: shll $3, %edi -; X64-NEXT: leal (%rdi,%rdi,2), %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_24: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI %EDI %RDI +; X64-HSW-NEXT: shll $3, %edi # sched: [1:0.50] +; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_24: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI %EDI %RDI +; X64-JAG-NEXT: shll $3, %edi # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_24: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $24, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_24: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $24, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_24: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $24, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_24: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: # kill: %EDI %EDI %RDI +; X64-SLM-NEXT: shll $3, %edi # sched: [1:1.00] +; X64-SLM-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_24: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $24, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 24 ret i32 %mul } @@ -395,12 +1252,46 @@ define i32 @test_mul_by_25(i32 %x) { ; X86-NEXT: leal (%eax,%eax,4), %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_25: -; X64: # BB#0: -; X64-NEXT: # kill: %EDI %EDI %RDI -; X64-NEXT: leal (%rdi,%rdi,4), %eax -; X64-NEXT: leal (%rax,%rax,4), %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_25: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI %EDI %RDI +; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-HSW-NEXT: leal (%rax,%rax,4), %eax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_25: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI %EDI %RDI +; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rax,%rax,4), %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_25: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $25, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_25: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $25, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_25: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $25, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_25: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: # kill: %EDI %EDI %RDI +; X64-SLM-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:1.00] +; X64-SLM-NEXT: leal (%rax,%rax,4), %eax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_25: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $25, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 25 ret i32 %mul } @@ -408,13 +1299,52 @@ define i32 @test_mul_by_25(i32 %x) { define i32 @test_mul_by_26(i32 %x) { ; X86-LABEL: test_mul_by_26: ; X86: # BB#0: -; X86-NEXT: imull $26, {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal (%ecx,%ecx,8), %eax +; X86-NEXT: leal (%eax,%eax,2), %eax +; X86-NEXT: subl %ecx, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_26: -; X64: # BB#0: -; X64-NEXT: imull $26, %edi, %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_26: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI %EDI %RDI +; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] +; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50] +; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_26: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI %EDI %RDI +; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50] +; X64-JAG-NEXT: subl %edi, %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_26: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $26, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_26: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $26, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_26: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $26, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_26: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imull $26, %edi, %eax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_26: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $26, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 26 ret i32 %mul } @@ -427,12 +1357,46 @@ define i32 @test_mul_by_27(i32 %x) { ; X86-NEXT: leal (%eax,%eax,2), %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_27: -; X64: # BB#0: -; X64-NEXT: # kill: %EDI %EDI %RDI -; X64-NEXT: leal (%rdi,%rdi,8), %eax -; X64-NEXT: leal (%rax,%rax,2), %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_27: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI %EDI %RDI +; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] +; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_27: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI %EDI %RDI +; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_27: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $27, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_27: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $27, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_27: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $27, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_27: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: # kill: %EDI %EDI %RDI +; X64-SLM-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:1.00] +; X64-SLM-NEXT: leal (%rax,%rax,2), %eax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_27: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $27, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 27 ret i32 %mul } @@ -440,13 +1404,52 @@ define i32 @test_mul_by_27(i32 %x) { define i32 @test_mul_by_28(i32 %x) { ; X86-LABEL: test_mul_by_28: ; X86: # BB#0: -; X86-NEXT: imull $28, {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal (%ecx,%ecx,8), %eax +; X86-NEXT: leal (%eax,%eax,2), %eax +; X86-NEXT: addl %ecx, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_28: -; X64: # BB#0: -; X64-NEXT: imull $28, %edi, %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_28: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI %EDI %RDI +; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] +; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50] +; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_28: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI %EDI %RDI +; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50] +; X64-JAG-NEXT: addl %edi, %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_28: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $28, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_28: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $28, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_28: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $28, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_28: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imull $28, %edi, %eax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_28: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $28, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 28 ret i32 %mul } @@ -454,13 +1457,55 @@ define i32 @test_mul_by_28(i32 %x) { define i32 @test_mul_by_29(i32 %x) { ; X86-LABEL: test_mul_by_29: ; X86: # BB#0: -; X86-NEXT: imull $29, {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal (%ecx,%ecx,8), %eax +; X86-NEXT: leal (%eax,%eax,2), %eax +; X86-NEXT: addl %ecx, %eax +; X86-NEXT: addl %ecx, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_29: -; X64: # BB#0: -; X64-NEXT: imull $29, %edi, %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_29: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI %EDI %RDI +; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] +; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50] +; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_29: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI %EDI %RDI +; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50] +; X64-JAG-NEXT: addl %edi, %eax # sched: [1:0.50] +; X64-JAG-NEXT: addl %edi, %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_29: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $29, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_29: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $29, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_29: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $29, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_29: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imull $29, %edi, %eax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_29: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $29, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 29 ret i32 %mul } @@ -468,13 +1513,53 @@ define i32 @test_mul_by_29(i32 %x) { define i32 @test_mul_by_30(i32 %x) { ; X86-LABEL: test_mul_by_30: ; X86: # BB#0: -; X86-NEXT: imull $30, {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: shll $5, %eax +; X86-NEXT: subl %ecx, %eax +; X86-NEXT: subl %ecx, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_30: -; X64: # BB#0: -; X64-NEXT: imull $30, %edi, %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_30: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: shll $5, %eax # sched: [1:0.50] +; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_30: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.17] +; X64-JAG-NEXT: shll $5, %eax # sched: [1:0.50] +; X64-JAG-NEXT: subl %edi, %eax # sched: [1:0.50] +; X64-JAG-NEXT: subl %edi, %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_30: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $30, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_30: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $30, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_30: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $30, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_30: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imull $30, %edi, %eax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_30: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $30, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 30 ret i32 %mul } @@ -488,12 +1573,46 @@ define i32 @test_mul_by_31(i32 %x) { ; X86-NEXT: subl %ecx, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_31: -; X64: # BB#0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: shll $5, %eax -; X64-NEXT: subl %edi, %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_31: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: shll $5, %eax # sched: [1:0.50] +; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_31: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.17] +; X64-JAG-NEXT: shll $5, %eax # sched: [1:0.50] +; X64-JAG-NEXT: subl %edi, %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_31: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $31, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_31: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $31, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_31: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $31, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_31: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: movl %edi, %eax # sched: [1:0.50] +; X64-SLM-NEXT: shll $5, %eax # sched: [1:1.00] +; X64-SLM-NEXT: subl %edi, %eax # sched: [1:0.50] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_31: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $31, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 31 ret i32 %mul } @@ -505,11 +1624,124 @@ define i32 @test_mul_by_32(i32 %x) { ; X86-NEXT: shll $5, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_32: -; X64: # BB#0: -; X64-NEXT: shll $5, %edi -; X64-NEXT: movl %edi, %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_32: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: shll $5, %edi # sched: [1:0.50] +; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_32: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: shll $5, %edi # sched: [1:0.50] +; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.17] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_32: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: shll $5, %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_32: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: shll $5, %edi # sched: [1:0.50] +; HSW-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.25] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_32: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: shll $5, %edi # sched: [1:0.50] +; JAG-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.17] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_32: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: shll $5, %edi # sched: [1:1.00] +; X64-SLM-NEXT: movl %edi, %eax # sched: [1:0.50] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_32: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: shll $5, %edi # sched: [1:1.00] +; SLM-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.50] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 32 ret i32 %mul } + +; (x*9+42)*(x*5+2) +define i32 @test_mul_spec(i32 %x) nounwind { +; X86-LABEL: test_mul_spec: +; X86: # BB#0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal 42(%eax,%eax,8), %ecx +; X86-NEXT: leal 2(%eax,%eax,4), %eax +; X86-NEXT: imull %ecx, %eax +; X86-NEXT: retl +; +; X64-HSW-LABEL: test_mul_spec: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI %EDI %RDI +; X64-HSW-NEXT: leal (%rdi,%rdi,8), %ecx # sched: [1:0.50] +; X64-HSW-NEXT: addl $42, %ecx # sched: [1:0.25] +; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-HSW-NEXT: addl $2, %eax # sched: [1:0.25] +; X64-HSW-NEXT: imull %ecx, %eax # sched: [4:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_spec: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI %EDI %RDI +; X64-JAG-NEXT: leal 42(%rdi,%rdi,8), %ecx # sched: [1:0.50] +; X64-JAG-NEXT: leal 2(%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-JAG-NEXT: imull %ecx, %eax # sched: [3:1.00] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_spec: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: leal 42(%eax,%eax,8), %ecx +; X86-NOOPT-NEXT: leal 2(%eax,%eax,4), %eax +; X86-NOOPT-NEXT: imull %ecx, %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_spec: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: # kill: %EDI %EDI %RDI +; HSW-NOOPT-NEXT: leal (%rdi,%rdi,8), %ecx # sched: [1:0.50] +; HSW-NOOPT-NEXT: addl $42, %ecx # sched: [1:0.25] +; HSW-NOOPT-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; HSW-NOOPT-NEXT: addl $2, %eax # sched: [1:0.25] +; HSW-NOOPT-NEXT: imull %ecx, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_spec: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: # kill: %EDI %EDI %RDI +; JAG-NOOPT-NEXT: leal 42(%rdi,%rdi,8), %ecx # sched: [1:0.50] +; JAG-NOOPT-NEXT: leal 2(%rdi,%rdi,4), %eax # sched: [1:0.50] +; JAG-NOOPT-NEXT: imull %ecx, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_spec: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: # kill: %EDI %EDI %RDI +; X64-SLM-NEXT: leal 42(%rdi,%rdi,8), %ecx # sched: [1:1.00] +; X64-SLM-NEXT: leal 2(%rdi,%rdi,4), %eax # sched: [1:1.00] +; X64-SLM-NEXT: imull %ecx, %eax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_spec: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: # kill: %EDI %EDI %RDI +; SLM-NOOPT-NEXT: leal 42(%rdi,%rdi,8), %ecx # sched: [1:1.00] +; SLM-NOOPT-NEXT: leal 2(%rdi,%rdi,4), %eax # sched: [1:1.00] +; SLM-NOOPT-NEXT: imull %ecx, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] + %mul = mul nsw i32 %x, 9 + %add = add nsw i32 %mul, 42 + %mul2 = mul nsw i32 %x, 5 + %add2 = add nsw i32 %mul2, 2 + %mul3 = mul nsw i32 %add, %add2 + ret i32 %mul3 +} diff --git a/test/CodeGen/X86/mul-constant-i64.ll b/test/CodeGen/X86/mul-constant-i64.ll index 8579179a8231..ea841c761c7b 100644 --- a/test/CodeGen/X86/mul-constant-i64.ll +++ b/test/CodeGen/X86/mul-constant-i64.ll @@ -1,18 +1,55 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefix=X86 -; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule=true -mcpu=haswell| FileCheck %s --check-prefix=X64-HSW +; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule=true -mcpu=btver2| FileCheck %s --check-prefix=X64-JAG +; RUN: llc < %s -mtriple=i686-unknown -mul-constant-optimization=false | FileCheck %s --check-prefix=X86-NOOPT +; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -print-schedule=true -mcpu=haswell| FileCheck %s --check-prefix=HSW-NOOPT +; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -print-schedule=true -mcpu=btver2| FileCheck %s --check-prefix=JAG-NOOPT +; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule=true -mcpu=slm| FileCheck %s --check-prefix=X64-SLM +; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -print-schedule=true -mcpu=slm| FileCheck %s --check-prefix=SLM-NOOPT -define i64 @test_mul_by_1(i64 %x) { +define i64 @test_mul_by_1(i64 %x) nounwind { ; X86-LABEL: test_mul_by_1: ; X86: # BB#0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_1: -; X64: # BB#0: -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_1: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_1: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.17] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_1: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_1: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.25] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_1: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.17] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_1: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: movq %rdi, %rax # sched: [1:0.50] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_1: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.50] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 1 ret i64 %mul } @@ -26,10 +63,43 @@ define i64 @test_mul_by_2(i64 %x) { ; X86-NEXT: addl %eax, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_2: -; X64: # BB#0: -; X64-NEXT: leaq (%rdi,%rdi), %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_2: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_2: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_2: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOOPT-NEXT: shldl $1, %eax, %edx +; X86-NOOPT-NEXT: addl %eax, %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_2: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:0.50] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_2: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:0.50] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_2: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_2: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 2 ret i64 %mul } @@ -43,10 +113,43 @@ define i64 @test_mul_by_3(i64 %x) { ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_3: -; X64: # BB#0: -; X64-NEXT: leaq (%rdi,%rdi,2), %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_3: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_3: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_3: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $3, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $3, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_3: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_3: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_3: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_3: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 3 ret i64 %mul } @@ -60,10 +163,43 @@ define i64 @test_mul_by_4(i64 %x) { ; X86-NEXT: shll $2, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_4: -; X64: # BB#0: -; X64-NEXT: leaq (,%rdi,4), %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_4: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (,%rdi,4), %rax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_4: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (,%rdi,4), %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_4: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOOPT-NEXT: shldl $2, %eax, %edx +; X86-NOOPT-NEXT: shll $2, %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_4: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: leaq (,%rdi,4), %rax # sched: [1:0.50] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_4: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: leaq (,%rdi,4), %rax # sched: [1:0.50] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_4: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: leaq (,%rdi,4), %rax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_4: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: leaq (,%rdi,4), %rax # sched: [1:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 4 ret i64 %mul } @@ -77,10 +213,43 @@ define i64 @test_mul_by_5(i64 %x) { ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_5: -; X64: # BB#0: -; X64-NEXT: leaq (%rdi,%rdi,4), %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_5: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_5: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_5: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $5, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $5, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_5: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_5: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_5: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_5: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 5 ret i64 %mul } @@ -95,11 +264,46 @@ define i64 @test_mul_by_6(i64 %x) { ; X86-NEXT: leal (%edx,%ecx,2), %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_6: -; X64: # BB#0: -; X64-NEXT: addq %rdi, %rdi -; X64-NEXT: leaq (%rdi,%rdi,2), %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_6: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: addq %rdi, %rdi # sched: [1:0.25] +; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_6: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: addq %rdi, %rdi # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_6: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $6, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $6, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_6: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $6, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_6: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $6, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_6: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: addq %rdi, %rdi # sched: [1:0.50] +; X64-SLM-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_6: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $6, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 6 ret i64 %mul } @@ -115,11 +319,46 @@ define i64 @test_mul_by_7(i64 %x) { ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_7: -; X64: # BB#0: -; X64-NEXT: leaq (,%rdi,8), %rax -; X64-NEXT: subq %rdi, %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_7: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50] +; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_7: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50] +; X64-JAG-NEXT: subq %rdi, %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_7: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $7, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $7, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_7: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $7, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_7: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $7, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_7: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: leaq (,%rdi,8), %rax # sched: [1:1.00] +; X64-SLM-NEXT: subq %rdi, %rax # sched: [1:0.50] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_7: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $7, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 7 ret i64 %mul } @@ -133,10 +372,43 @@ define i64 @test_mul_by_8(i64 %x) { ; X86-NEXT: shll $3, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_8: -; X64: # BB#0: -; X64-NEXT: leaq (,%rdi,8), %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_8: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_8: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_8: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOOPT-NEXT: shldl $3, %eax, %edx +; X86-NOOPT-NEXT: shll $3, %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_8: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_8: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_8: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: leaq (,%rdi,8), %rax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_8: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: leaq (,%rdi,8), %rax # sched: [1:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 8 ret i64 %mul } @@ -150,10 +422,43 @@ define i64 @test_mul_by_9(i64 %x) { ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_9: -; X64: # BB#0: -; X64-NEXT: leaq (%rdi,%rdi,8), %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_9: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_9: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_9: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $9, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $9, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_9: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_9: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_9: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_9: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 9 ret i64 %mul } @@ -168,11 +473,46 @@ define i64 @test_mul_by_10(i64 %x) { ; X86-NEXT: leal (%edx,%ecx,2), %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_10: -; X64: # BB#0: -; X64-NEXT: addq %rdi, %rdi -; X64-NEXT: leaq (%rdi,%rdi,4), %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_10: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: addq %rdi, %rdi # sched: [1:0.25] +; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_10: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: addq %rdi, %rdi # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_10: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $10, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $10, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_10: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $10, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_10: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $10, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_10: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: addq %rdi, %rdi # sched: [1:0.50] +; X64-SLM-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_10: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $10, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 10 ret i64 %mul } @@ -180,16 +520,53 @@ define i64 @test_mul_by_10(i64 %x) { define i64 @test_mul_by_11(i64 %x) { ; X86-LABEL: test_mul_by_11: ; X86: # BB#0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,4), %ecx +; X86-NEXT: leal (%eax,%ecx,2), %ecx ; X86-NEXT: movl $11, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: imull $11, {{[0-9]+}}(%esp), %ecx ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_11: -; X64: # BB#0: -; X64-NEXT: imulq $11, %rdi, %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_11: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-HSW-NEXT: leaq (%rdi,%rax,2), %rax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_11: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rdi,%rax,2), %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_11: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $11, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $11, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_11: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $11, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_11: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $11, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_11: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imulq $11, %rdi, %rax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_11: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $11, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 11 ret i64 %mul } @@ -204,11 +581,46 @@ define i64 @test_mul_by_12(i64 %x) { ; X86-NEXT: leal (%edx,%ecx,4), %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_12: -; X64: # BB#0: -; X64-NEXT: shlq $2, %rdi -; X64-NEXT: leaq (%rdi,%rdi,2), %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_12: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: shlq $2, %rdi # sched: [1:0.50] +; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_12: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: shlq $2, %rdi # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_12: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $12, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $12, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_12: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $12, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_12: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $12, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_12: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: shlq $2, %rdi # sched: [1:1.00] +; X64-SLM-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_12: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $12, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 12 ret i64 %mul } @@ -216,16 +628,53 @@ define i64 @test_mul_by_12(i64 %x) { define i64 @test_mul_by_13(i64 %x) { ; X86-LABEL: test_mul_by_13: ; X86: # BB#0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,2), %ecx +; X86-NEXT: leal (%eax,%ecx,4), %ecx ; X86-NEXT: movl $13, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: imull $13, {{[0-9]+}}(%esp), %ecx ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_13: -; X64: # BB#0: -; X64-NEXT: imulq $13, %rdi, %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_13: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] +; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_13: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_13: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $13, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $13, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_13: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $13, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_13: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $13, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_13: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imulq $13, %rdi, %rax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_13: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $13, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 13 ret i64 %mul } @@ -233,16 +682,56 @@ define i64 @test_mul_by_13(i64 %x) { define i64 @test_mul_by_14(i64 %x) { ; X86-LABEL: test_mul_by_14: ; X86: # BB#0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,2), %ecx +; X86-NEXT: leal (%eax,%ecx,4), %ecx +; X86-NEXT: addl %eax, %ecx ; X86-NEXT: movl $14, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: imull $14, {{[0-9]+}}(%esp), %ecx ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_14: -; X64: # BB#0: -; X64-NEXT: imulq $14, %rdi, %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_14: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] +; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50] +; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_14: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50] +; X64-JAG-NEXT: addq %rdi, %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_14: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $14, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $14, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_14: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $14, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_14: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $14, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_14: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imulq $14, %rdi, %rax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_14: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $14, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 14 ret i64 %mul } @@ -258,11 +747,46 @@ define i64 @test_mul_by_15(i64 %x) { ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_15: -; X64: # BB#0: -; X64-NEXT: leaq (%rdi,%rdi,4), %rax -; X64-NEXT: leaq (%rax,%rax,2), %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_15: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_15: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_15: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $15, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $15, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_15: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $15, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_15: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $15, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_15: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:1.00] +; X64-SLM-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_15: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $15, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 15 ret i64 %mul } @@ -276,11 +800,49 @@ define i64 @test_mul_by_16(i64 %x) { ; X86-NEXT: shll $4, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_16: -; X64: # BB#0: -; X64-NEXT: shlq $4, %rdi -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_16: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: shlq $4, %rdi # sched: [1:0.50] +; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_16: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: shlq $4, %rdi # sched: [1:0.50] +; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.17] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_16: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOOPT-NEXT: shldl $4, %eax, %edx +; X86-NOOPT-NEXT: shll $4, %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_16: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: shlq $4, %rdi # sched: [1:0.50] +; HSW-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.25] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_16: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: shlq $4, %rdi # sched: [1:0.50] +; JAG-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.17] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_16: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: shlq $4, %rdi # sched: [1:1.00] +; X64-SLM-NEXT: movq %rdi, %rax # sched: [1:0.50] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_16: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: shlq $4, %rdi # sched: [1:1.00] +; SLM-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.50] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 16 ret i64 %mul } @@ -297,12 +859,49 @@ define i64 @test_mul_by_17(i64 %x) { ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_17: -; X64: # BB#0: -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: shlq $4, %rax -; X64-NEXT: leaq (%rax,%rdi), %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_17: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: shlq $4, %rax # sched: [1:0.50] +; X64-HSW-NEXT: leaq (%rax,%rdi), %rax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_17: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.17] +; X64-JAG-NEXT: shlq $4, %rax # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rax,%rdi), %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_17: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $17, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $17, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_17: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $17, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_17: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $17, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_17: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: movq %rdi, %rax # sched: [1:0.50] +; X64-SLM-NEXT: shlq $4, %rax # sched: [1:1.00] +; X64-SLM-NEXT: addq %rdi, %rax # sched: [1:0.50] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_17: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $17, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 17 ret i64 %mul } @@ -317,11 +916,46 @@ define i64 @test_mul_by_18(i64 %x) { ; X86-NEXT: leal (%edx,%ecx,2), %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_18: -; X64: # BB#0: -; X64-NEXT: addq %rdi, %rdi -; X64-NEXT: leaq (%rdi,%rdi,8), %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_18: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: addq %rdi, %rdi # sched: [1:0.25] +; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_18: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: addq %rdi, %rdi # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_18: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $18, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $18, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_18: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $18, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_18: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $18, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_18: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: addq %rdi, %rdi # sched: [1:0.50] +; X64-SLM-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_18: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $18, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 18 ret i64 %mul } @@ -329,16 +963,56 @@ define i64 @test_mul_by_18(i64 %x) { define i64 @test_mul_by_19(i64 %x) { ; X86-LABEL: test_mul_by_19: ; X86: # BB#0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,4), %ecx +; X86-NEXT: shll $2, %ecx +; X86-NEXT: subl %eax, %ecx ; X86-NEXT: movl $19, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: imull $19, {{[0-9]+}}(%esp), %ecx ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_19: -; X64: # BB#0: -; X64-NEXT: imulq $19, %rdi, %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_19: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-HSW-NEXT: shlq $2, %rax # sched: [1:0.50] +; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_19: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-JAG-NEXT: shlq $2, %rax # sched: [1:0.50] +; X64-JAG-NEXT: subq %rdi, %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_19: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $19, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $19, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_19: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $19, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_19: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $19, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_19: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imulq $19, %rdi, %rax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_19: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $19, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 19 ret i64 %mul } @@ -353,11 +1027,46 @@ define i64 @test_mul_by_20(i64 %x) { ; X86-NEXT: leal (%edx,%ecx,4), %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_20: -; X64: # BB#0: -; X64-NEXT: shlq $2, %rdi -; X64-NEXT: leaq (%rdi,%rdi,4), %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_20: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: shlq $2, %rdi # sched: [1:0.50] +; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_20: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: shlq $2, %rdi # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_20: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $20, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $20, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_20: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $20, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_20: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $20, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_20: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: shlq $2, %rdi # sched: [1:1.00] +; X64-SLM-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_20: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $20, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 20 ret i64 %mul } @@ -365,16 +1074,53 @@ define i64 @test_mul_by_20(i64 %x) { define i64 @test_mul_by_21(i64 %x) { ; X86-LABEL: test_mul_by_21: ; X86: # BB#0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,4), %ecx +; X86-NEXT: leal (%eax,%ecx,4), %ecx ; X86-NEXT: movl $21, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: imull $21, {{[0-9]+}}(%esp), %ecx ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_21: -; X64: # BB#0: -; X64-NEXT: imulq $21, %rdi, %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_21: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_21: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_21: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $21, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $21, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_21: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $21, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_21: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $21, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_21: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imulq $21, %rdi, %rax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_21: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $21, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 21 ret i64 %mul } @@ -382,16 +1128,56 @@ define i64 @test_mul_by_21(i64 %x) { define i64 @test_mul_by_22(i64 %x) { ; X86-LABEL: test_mul_by_22: ; X86: # BB#0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,4), %ecx +; X86-NEXT: leal (%eax,%ecx,4), %ecx +; X86-NEXT: addl %eax, %ecx ; X86-NEXT: movl $22, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: imull $22, {{[0-9]+}}(%esp), %ecx ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_22: -; X64: # BB#0: -; X64-NEXT: imulq $22, %rdi, %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_22: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50] +; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_22: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50] +; X64-JAG-NEXT: addq %rdi, %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_22: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $22, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $22, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_22: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $22, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_22: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $22, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_22: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imulq $22, %rdi, %rax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_22: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $22, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 22 ret i64 %mul } @@ -399,16 +1185,56 @@ define i64 @test_mul_by_22(i64 %x) { define i64 @test_mul_by_23(i64 %x) { ; X86-LABEL: test_mul_by_23: ; X86: # BB#0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,2), %ecx +; X86-NEXT: shll $3, %ecx +; X86-NEXT: subl %eax, %ecx ; X86-NEXT: movl $23, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: imull $23, {{[0-9]+}}(%esp), %ecx ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_23: -; X64: # BB#0: -; X64-NEXT: imulq $23, %rdi, %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_23: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] +; X64-HSW-NEXT: shlq $3, %rax # sched: [1:0.50] +; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_23: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] +; X64-JAG-NEXT: shlq $3, %rax # sched: [1:0.50] +; X64-JAG-NEXT: subq %rdi, %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_23: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $23, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $23, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_23: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $23, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_23: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $23, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_23: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imulq $23, %rdi, %rax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_23: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $23, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 23 ret i64 %mul } @@ -423,11 +1249,46 @@ define i64 @test_mul_by_24(i64 %x) { ; X86-NEXT: leal (%edx,%ecx,8), %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_24: -; X64: # BB#0: -; X64-NEXT: shlq $3, %rdi -; X64-NEXT: leaq (%rdi,%rdi,2), %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_24: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: shlq $3, %rdi # sched: [1:0.50] +; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_24: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: shlq $3, %rdi # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_24: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $24, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $24, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_24: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $24, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_24: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $24, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_24: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: shlq $3, %rdi # sched: [1:1.00] +; X64-SLM-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_24: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $24, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 24 ret i64 %mul } @@ -443,11 +1304,46 @@ define i64 @test_mul_by_25(i64 %x) { ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_25: -; X64: # BB#0: -; X64-NEXT: leaq (%rdi,%rdi,4), %rax -; X64-NEXT: leaq (%rax,%rax,4), %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_25: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-HSW-NEXT: leaq (%rax,%rax,4), %rax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_25: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rax,%rax,4), %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_25: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $25, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $25, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_25: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $25, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_25: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $25, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_25: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:1.00] +; X64-SLM-NEXT: leaq (%rax,%rax,4), %rax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_25: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $25, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 25 ret i64 %mul } @@ -455,16 +1351,56 @@ define i64 @test_mul_by_25(i64 %x) { define i64 @test_mul_by_26(i64 %x) { ; X86-LABEL: test_mul_by_26: ; X86: # BB#0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,8), %ecx +; X86-NEXT: leal (%ecx,%ecx,2), %ecx +; X86-NEXT: subl %eax, %ecx ; X86-NEXT: movl $26, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: imull $26, {{[0-9]+}}(%esp), %ecx ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_26: -; X64: # BB#0: -; X64-NEXT: imulq $26, %rdi, %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_26: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] +; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50] +; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_26: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50] +; X64-JAG-NEXT: subq %rdi, %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_26: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $26, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $26, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_26: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $26, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_26: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $26, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_26: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imulq $26, %rdi, %rax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_26: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $26, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 26 ret i64 %mul } @@ -480,11 +1416,46 @@ define i64 @test_mul_by_27(i64 %x) { ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_27: -; X64: # BB#0: -; X64-NEXT: leaq (%rdi,%rdi,8), %rax -; X64-NEXT: leaq (%rax,%rax,2), %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_27: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] +; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_27: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_27: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $27, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $27, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_27: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $27, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_27: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $27, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_27: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:1.00] +; X64-SLM-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_27: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $27, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 27 ret i64 %mul } @@ -492,16 +1463,56 @@ define i64 @test_mul_by_27(i64 %x) { define i64 @test_mul_by_28(i64 %x) { ; X86-LABEL: test_mul_by_28: ; X86: # BB#0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,8), %ecx +; X86-NEXT: leal (%ecx,%ecx,2), %ecx +; X86-NEXT: addl %eax, %ecx ; X86-NEXT: movl $28, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: imull $28, {{[0-9]+}}(%esp), %ecx ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_28: -; X64: # BB#0: -; X64-NEXT: imulq $28, %rdi, %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_28: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] +; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50] +; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_28: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50] +; X64-JAG-NEXT: addq %rdi, %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_28: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $28, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $28, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_28: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $28, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_28: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $28, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_28: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imulq $28, %rdi, %rax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_28: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $28, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 28 ret i64 %mul } @@ -509,16 +1520,59 @@ define i64 @test_mul_by_28(i64 %x) { define i64 @test_mul_by_29(i64 %x) { ; X86-LABEL: test_mul_by_29: ; X86: # BB#0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,8), %ecx +; X86-NEXT: leal (%ecx,%ecx,2), %ecx +; X86-NEXT: addl %eax, %ecx +; X86-NEXT: addl %eax, %ecx ; X86-NEXT: movl $29, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: imull $29, {{[0-9]+}}(%esp), %ecx ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_29: -; X64: # BB#0: -; X64-NEXT: imulq $29, %rdi, %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_29: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] +; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50] +; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_29: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50] +; X64-JAG-NEXT: addq %rdi, %rax # sched: [1:0.50] +; X64-JAG-NEXT: addq %rdi, %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_29: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $29, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $29, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_29: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $29, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_29: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $29, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_29: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imulq $29, %rdi, %rax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_29: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $29, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 29 ret i64 %mul } @@ -526,16 +1580,59 @@ define i64 @test_mul_by_29(i64 %x) { define i64 @test_mul_by_30(i64 %x) { ; X86-LABEL: test_mul_by_30: ; X86: # BB#0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: shll $5, %ecx +; X86-NEXT: subl %eax, %ecx +; X86-NEXT: subl %eax, %ecx ; X86-NEXT: movl $30, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: imull $30, {{[0-9]+}}(%esp), %ecx ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_30: -; X64: # BB#0: -; X64-NEXT: imulq $30, %rdi, %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_30: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: shlq $5, %rax # sched: [1:0.50] +; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_30: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.17] +; X64-JAG-NEXT: shlq $5, %rax # sched: [1:0.50] +; X64-JAG-NEXT: subq %rdi, %rax # sched: [1:0.50] +; X64-JAG-NEXT: subq %rdi, %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_30: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $30, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $30, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_30: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $30, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_30: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $30, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_30: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imulq $30, %rdi, %rax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_30: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $30, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 30 ret i64 %mul } @@ -552,12 +1649,49 @@ define i64 @test_mul_by_31(i64 %x) { ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_31: -; X64: # BB#0: -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: shlq $5, %rax -; X64-NEXT: subq %rdi, %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_31: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: shlq $5, %rax # sched: [1:0.50] +; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_31: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.17] +; X64-JAG-NEXT: shlq $5, %rax # sched: [1:0.50] +; X64-JAG-NEXT: subq %rdi, %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_31: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $31, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $31, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_31: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $31, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_31: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $31, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_31: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: movq %rdi, %rax # sched: [1:0.50] +; X64-SLM-NEXT: shlq $5, %rax # sched: [1:1.00] +; X64-SLM-NEXT: subq %rdi, %rax # sched: [1:0.50] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_31: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $31, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 31 ret i64 %mul } @@ -571,11 +1705,168 @@ define i64 @test_mul_by_32(i64 %x) { ; X86-NEXT: shll $5, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_32: -; X64: # BB#0: -; X64-NEXT: shlq $5, %rdi -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_32: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: shlq $5, %rdi # sched: [1:0.50] +; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_32: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: shlq $5, %rdi # sched: [1:0.50] +; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.17] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_32: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOOPT-NEXT: shldl $5, %eax, %edx +; X86-NOOPT-NEXT: shll $5, %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_32: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: shlq $5, %rdi # sched: [1:0.50] +; HSW-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.25] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_32: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: shlq $5, %rdi # sched: [1:0.50] +; JAG-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.17] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_32: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: shlq $5, %rdi # sched: [1:1.00] +; X64-SLM-NEXT: movq %rdi, %rax # sched: [1:0.50] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_32: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: shlq $5, %rdi # sched: [1:1.00] +; SLM-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.50] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 32 ret i64 %mul } + +; (x*9+42)*(x*5+2) +define i64 @test_mul_spec(i64 %x) nounwind { +; X86-LABEL: test_mul_spec: +; X86: # BB#0: +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl $9, %edx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: mull %edx +; X86-NEXT: movl %eax, %esi +; X86-NEXT: leal (%edi,%edi,8), %ebx +; X86-NEXT: addl $42, %esi +; X86-NEXT: adcl %edx, %ebx +; X86-NEXT: movl $5, %edx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: mull %edx +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: leal (%edi,%edi,4), %edi +; X86-NEXT: addl $2, %ecx +; X86-NEXT: adcl %edx, %edi +; X86-NEXT: movl %esi, %eax +; X86-NEXT: mull %ecx +; X86-NEXT: imull %esi, %edi +; X86-NEXT: addl %edi, %edx +; X86-NEXT: imull %ebx, %ecx +; X86-NEXT: addl %ecx, %edx +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: retl +; +; X64-HSW-LABEL: test_mul_spec: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rcx # sched: [1:0.50] +; X64-HSW-NEXT: addq $42, %rcx # sched: [1:0.25] +; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-HSW-NEXT: addq $2, %rax # sched: [1:0.25] +; X64-HSW-NEXT: imulq %rcx, %rax # sched: [3:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_spec: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq 42(%rdi,%rdi,8), %rcx # sched: [1:0.50] +; X64-JAG-NEXT: leaq 2(%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-JAG-NEXT: imulq %rcx, %rax # sched: [3:1.00] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_spec: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: pushl %ebx +; X86-NOOPT-NEXT: pushl %edi +; X86-NOOPT-NEXT: pushl %esi +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOOPT-NEXT: movl $9, %edx +; X86-NOOPT-NEXT: movl %ecx, %eax +; X86-NOOPT-NEXT: mull %edx +; X86-NOOPT-NEXT: movl %eax, %esi +; X86-NOOPT-NEXT: leal (%edi,%edi,8), %ebx +; X86-NOOPT-NEXT: addl $42, %esi +; X86-NOOPT-NEXT: adcl %edx, %ebx +; X86-NOOPT-NEXT: movl $5, %edx +; X86-NOOPT-NEXT: movl %ecx, %eax +; X86-NOOPT-NEXT: mull %edx +; X86-NOOPT-NEXT: movl %eax, %ecx +; X86-NOOPT-NEXT: leal (%edi,%edi,4), %edi +; X86-NOOPT-NEXT: addl $2, %ecx +; X86-NOOPT-NEXT: adcl %edx, %edi +; X86-NOOPT-NEXT: movl %esi, %eax +; X86-NOOPT-NEXT: mull %ecx +; X86-NOOPT-NEXT: imull %esi, %edi +; X86-NOOPT-NEXT: addl %edi, %edx +; X86-NOOPT-NEXT: imull %ebx, %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: popl %esi +; X86-NOOPT-NEXT: popl %edi +; X86-NOOPT-NEXT: popl %ebx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_spec: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,8), %rcx # sched: [1:0.50] +; HSW-NOOPT-NEXT: addq $42, %rcx # sched: [1:0.25] +; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; HSW-NOOPT-NEXT: addq $2, %rax # sched: [1:0.25] +; HSW-NOOPT-NEXT: imulq %rcx, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_spec: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: leaq 42(%rdi,%rdi,8), %rcx # sched: [1:0.50] +; JAG-NOOPT-NEXT: leaq 2(%rdi,%rdi,4), %rax # sched: [1:0.50] +; JAG-NOOPT-NEXT: imulq %rcx, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_spec: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: leaq 42(%rdi,%rdi,8), %rcx # sched: [1:1.00] +; X64-SLM-NEXT: leaq 2(%rdi,%rdi,4), %rax # sched: [1:1.00] +; X64-SLM-NEXT: imulq %rcx, %rax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_spec: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: leaq 42(%rdi,%rdi,8), %rcx # sched: [1:1.00] +; SLM-NOOPT-NEXT: leaq 2(%rdi,%rdi,4), %rax # sched: [1:1.00] +; SLM-NOOPT-NEXT: imulq %rcx, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] + %mul = mul nsw i64 %x, 9 + %add = add nsw i64 %mul, 42 + %mul2 = mul nsw i64 %x, 5 + %add2 = add nsw i64 %mul2, 2 + %mul3 = mul nsw i64 %add, %add2 + ret i64 %mul3 +} diff --git a/test/CodeGen/X86/mul-constant-result.ll b/test/CodeGen/X86/mul-constant-result.ll new file mode 100644 index 000000000000..65d80a699e24 --- /dev/null +++ b/test/CodeGen/X86/mul-constant-result.ll @@ -0,0 +1,1291 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefix=X86 +; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=haswell| FileCheck %s --check-prefix=X64-HSW + +; Function Attrs: norecurse nounwind readnone uwtable +define i32 @mult(i32, i32) local_unnamed_addr #0 { +; X86-LABEL: mult: +; X86: # BB#0: +; X86-NEXT: pushl %esi +; X86-NEXT: .Lcfi0: +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .Lcfi1: +; X86-NEXT: .cfi_offset %esi, -8 +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: cmpl $1, %edx +; X86-NEXT: movl $1, %eax +; X86-NEXT: movl $1, %esi +; X86-NEXT: jg .LBB0_2 +; X86-NEXT: # BB#1: +; X86-NEXT: movl %edx, %esi +; X86-NEXT: .LBB0_2: +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: testl %edx, %edx +; X86-NEXT: je .LBB0_4 +; X86-NEXT: # BB#3: +; X86-NEXT: movl %esi, %eax +; X86-NEXT: .LBB0_4: +; X86-NEXT: decl %ecx +; X86-NEXT: cmpl $31, %ecx +; X86-NEXT: ja .LBB0_39 +; X86-NEXT: # BB#5: +; X86-NEXT: jmpl *.LJTI0_0(,%ecx,4) +; X86-NEXT: .LBB0_6: +; X86-NEXT: addl %eax, %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_39: +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: .LBB0_40: +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_7: +; X86-NEXT: leal (%eax,%eax,2), %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_8: +; X86-NEXT: shll $2, %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_9: +; X86-NEXT: leal (%eax,%eax,4), %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_10: +; X86-NEXT: addl %eax, %eax +; X86-NEXT: leal (%eax,%eax,2), %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_11: +; X86-NEXT: leal (,%eax,8), %ecx +; X86-NEXT: jmp .LBB0_12 +; X86-NEXT: .LBB0_13: +; X86-NEXT: shll $3, %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_14: +; X86-NEXT: leal (%eax,%eax,8), %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_15: +; X86-NEXT: addl %eax, %eax +; X86-NEXT: leal (%eax,%eax,4), %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_16: +; X86-NEXT: leal (%eax,%eax,4), %ecx +; X86-NEXT: leal (%eax,%ecx,2), %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_17: +; X86-NEXT: shll $2, %eax +; X86-NEXT: leal (%eax,%eax,2), %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_18: +; X86-NEXT: leal (%eax,%eax,2), %ecx +; X86-NEXT: leal (%eax,%ecx,4), %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_19: +; X86-NEXT: leal (%eax,%eax,2), %ecx +; X86-NEXT: jmp .LBB0_20 +; X86-NEXT: .LBB0_21: +; X86-NEXT: leal (%eax,%eax,4), %eax +; X86-NEXT: leal (%eax,%eax,2), %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_22: +; X86-NEXT: shll $4, %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_23: +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: shll $4, %ecx +; X86-NEXT: addl %ecx, %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_24: +; X86-NEXT: addl %eax, %eax +; X86-NEXT: leal (%eax,%eax,8), %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_25: +; X86-NEXT: leal (%eax,%eax,4), %ecx +; X86-NEXT: shll $2, %ecx +; X86-NEXT: jmp .LBB0_12 +; X86-NEXT: .LBB0_26: +; X86-NEXT: shll $2, %eax +; X86-NEXT: leal (%eax,%eax,4), %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_27: +; X86-NEXT: leal (%eax,%eax,4), %ecx +; X86-NEXT: leal (%eax,%ecx,4), %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_28: +; X86-NEXT: leal (%eax,%eax,4), %ecx +; X86-NEXT: .LBB0_20: +; X86-NEXT: leal (%eax,%ecx,4), %ecx +; X86-NEXT: addl %ecx, %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_29: +; X86-NEXT: leal (%eax,%eax,2), %ecx +; X86-NEXT: shll $3, %ecx +; X86-NEXT: jmp .LBB0_12 +; X86-NEXT: .LBB0_30: +; X86-NEXT: shll $3, %eax +; X86-NEXT: leal (%eax,%eax,2), %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_31: +; X86-NEXT: leal (%eax,%eax,4), %eax +; X86-NEXT: leal (%eax,%eax,4), %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_32: +; X86-NEXT: leal (%eax,%eax,8), %ecx +; X86-NEXT: leal (%ecx,%ecx,2), %ecx +; X86-NEXT: jmp .LBB0_12 +; X86-NEXT: .LBB0_33: +; X86-NEXT: leal (%eax,%eax,8), %eax +; X86-NEXT: leal (%eax,%eax,2), %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_34: +; X86-NEXT: leal (%eax,%eax,8), %ecx +; X86-NEXT: leal (%ecx,%ecx,2), %ecx +; X86-NEXT: addl %ecx, %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_35: +; X86-NEXT: leal (%eax,%eax,8), %ecx +; X86-NEXT: leal (%ecx,%ecx,2), %ecx +; X86-NEXT: addl %eax, %ecx +; X86-NEXT: addl %ecx, %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_36: +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: shll $5, %ecx +; X86-NEXT: subl %eax, %ecx +; X86-NEXT: jmp .LBB0_12 +; X86-NEXT: .LBB0_37: +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: shll $5, %ecx +; X86-NEXT: .LBB0_12: +; X86-NEXT: subl %eax, %ecx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_38: +; X86-NEXT: shll $5, %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; +; X64-HSW-LABEL: mult: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI %EDI %RDI +; X64-HSW-NEXT: cmpl $1, %esi +; X64-HSW-NEXT: movl $1, %ecx +; X64-HSW-NEXT: movl %esi, %eax +; X64-HSW-NEXT: cmovgl %ecx, %eax +; X64-HSW-NEXT: testl %esi, %esi +; X64-HSW-NEXT: cmovel %ecx, %eax +; X64-HSW-NEXT: addl $-1, %edi +; X64-HSW-NEXT: cmpl $31, %edi +; X64-HSW-NEXT: ja .LBB0_36 +; X64-HSW-NEXT: # BB#1: +; X64-HSW-NEXT: jmpq *.LJTI0_0(,%rdi,8) +; X64-HSW-NEXT: .LBB0_2: +; X64-HSW-NEXT: addl %eax, %eax +; X64-HSW-NEXT: # kill: %EAX %EAX %RAX +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_36: +; X64-HSW-NEXT: xorl %eax, %eax +; X64-HSW-NEXT: .LBB0_37: +; X64-HSW-NEXT: # kill: %EAX %EAX %RAX +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_3: +; X64-HSW-NEXT: leal (%rax,%rax,2), %eax +; X64-HSW-NEXT: # kill: %EAX %EAX %RAX +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_4: +; X64-HSW-NEXT: shll $2, %eax +; X64-HSW-NEXT: # kill: %EAX %EAX %RAX +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_5: +; X64-HSW-NEXT: leal (%rax,%rax,4), %eax +; X64-HSW-NEXT: # kill: %EAX %EAX %RAX +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_6: +; X64-HSW-NEXT: addl %eax, %eax +; X64-HSW-NEXT: leal (%rax,%rax,2), %eax +; X64-HSW-NEXT: # kill: %EAX %EAX %RAX +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_7: +; X64-HSW-NEXT: leal (,%rax,8), %ecx +; X64-HSW-NEXT: jmp .LBB0_8 +; X64-HSW-NEXT: .LBB0_9: +; X64-HSW-NEXT: shll $3, %eax +; X64-HSW-NEXT: # kill: %EAX %EAX %RAX +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_10: +; X64-HSW-NEXT: leal (%rax,%rax,8), %eax +; X64-HSW-NEXT: # kill: %EAX %EAX %RAX +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_11: +; X64-HSW-NEXT: addl %eax, %eax +; X64-HSW-NEXT: leal (%rax,%rax,4), %eax +; X64-HSW-NEXT: # kill: %EAX %EAX %RAX +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_12: +; X64-HSW-NEXT: leal (%rax,%rax,4), %ecx +; X64-HSW-NEXT: leal (%rax,%rcx,2), %eax +; X64-HSW-NEXT: # kill: %EAX %EAX %RAX +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_13: +; X64-HSW-NEXT: shll $2, %eax +; X64-HSW-NEXT: leal (%rax,%rax,2), %eax +; X64-HSW-NEXT: # kill: %EAX %EAX %RAX +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_14: +; X64-HSW-NEXT: leal (%rax,%rax,2), %ecx +; X64-HSW-NEXT: leal (%rax,%rcx,4), %eax +; X64-HSW-NEXT: # kill: %EAX %EAX %RAX +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_15: +; X64-HSW-NEXT: leal (%rax,%rax,2), %ecx +; X64-HSW-NEXT: jmp .LBB0_16 +; X64-HSW-NEXT: .LBB0_18: +; X64-HSW-NEXT: leal (%rax,%rax,4), %eax +; X64-HSW-NEXT: leal (%rax,%rax,2), %eax +; X64-HSW-NEXT: # kill: %EAX %EAX %RAX +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_19: +; X64-HSW-NEXT: shll $4, %eax +; X64-HSW-NEXT: # kill: %EAX %EAX %RAX +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_20: +; X64-HSW-NEXT: movl %eax, %ecx +; X64-HSW-NEXT: shll $4, %ecx +; X64-HSW-NEXT: jmp .LBB0_17 +; X64-HSW-NEXT: .LBB0_21: +; X64-HSW-NEXT: addl %eax, %eax +; X64-HSW-NEXT: leal (%rax,%rax,8), %eax +; X64-HSW-NEXT: # kill: %EAX %EAX %RAX +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_22: +; X64-HSW-NEXT: leal (%rax,%rax,4), %ecx +; X64-HSW-NEXT: shll $2, %ecx +; X64-HSW-NEXT: jmp .LBB0_8 +; X64-HSW-NEXT: .LBB0_23: +; X64-HSW-NEXT: shll $2, %eax +; X64-HSW-NEXT: leal (%rax,%rax,4), %eax +; X64-HSW-NEXT: # kill: %EAX %EAX %RAX +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_24: +; X64-HSW-NEXT: leal (%rax,%rax,4), %ecx +; X64-HSW-NEXT: leal (%rax,%rcx,4), %eax +; X64-HSW-NEXT: # kill: %EAX %EAX %RAX +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_25: +; X64-HSW-NEXT: leal (%rax,%rax,4), %ecx +; X64-HSW-NEXT: .LBB0_16: +; X64-HSW-NEXT: leal (%rax,%rcx,4), %ecx +; X64-HSW-NEXT: jmp .LBB0_17 +; X64-HSW-NEXT: .LBB0_26: +; X64-HSW-NEXT: leal (%rax,%rax,2), %ecx +; X64-HSW-NEXT: shll $3, %ecx +; X64-HSW-NEXT: jmp .LBB0_8 +; X64-HSW-NEXT: .LBB0_27: +; X64-HSW-NEXT: shll $3, %eax +; X64-HSW-NEXT: leal (%rax,%rax,2), %eax +; X64-HSW-NEXT: # kill: %EAX %EAX %RAX +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_28: +; X64-HSW-NEXT: leal (%rax,%rax,4), %eax +; X64-HSW-NEXT: leal (%rax,%rax,4), %eax +; X64-HSW-NEXT: # kill: %EAX %EAX %RAX +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_29: +; X64-HSW-NEXT: leal (%rax,%rax,8), %ecx +; X64-HSW-NEXT: leal (%rcx,%rcx,2), %ecx +; X64-HSW-NEXT: jmp .LBB0_8 +; X64-HSW-NEXT: .LBB0_30: +; X64-HSW-NEXT: leal (%rax,%rax,8), %eax +; X64-HSW-NEXT: leal (%rax,%rax,2), %eax +; X64-HSW-NEXT: # kill: %EAX %EAX %RAX +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_31: +; X64-HSW-NEXT: leal (%rax,%rax,8), %ecx +; X64-HSW-NEXT: leal (%rcx,%rcx,2), %ecx +; X64-HSW-NEXT: jmp .LBB0_17 +; X64-HSW-NEXT: .LBB0_32: +; X64-HSW-NEXT: leal (%rax,%rax,8), %ecx +; X64-HSW-NEXT: leal (%rcx,%rcx,2), %ecx +; X64-HSW-NEXT: addl %eax, %ecx +; X64-HSW-NEXT: .LBB0_17: +; X64-HSW-NEXT: addl %eax, %ecx +; X64-HSW-NEXT: movl %ecx, %eax +; X64-HSW-NEXT: # kill: %EAX %EAX %RAX +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_33: +; X64-HSW-NEXT: movl %eax, %ecx +; X64-HSW-NEXT: shll $5, %ecx +; X64-HSW-NEXT: subl %eax, %ecx +; X64-HSW-NEXT: jmp .LBB0_8 +; X64-HSW-NEXT: .LBB0_34: +; X64-HSW-NEXT: movl %eax, %ecx +; X64-HSW-NEXT: shll $5, %ecx +; X64-HSW-NEXT: .LBB0_8: +; X64-HSW-NEXT: subl %eax, %ecx +; X64-HSW-NEXT: movl %ecx, %eax +; X64-HSW-NEXT: # kill: %EAX %EAX %RAX +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_35: +; X64-HSW-NEXT: shll $5, %eax +; X64-HSW-NEXT: # kill: %EAX %EAX %RAX +; X64-HSW-NEXT: retq + %3 = icmp eq i32 %1, 0 + %4 = icmp sgt i32 %1, 1 + %5 = or i1 %3, %4 + %6 = select i1 %5, i32 1, i32 %1 + switch i32 %0, label %69 [ + i32 1, label %70 + i32 2, label %7 + i32 3, label %9 + i32 4, label %11 + i32 5, label %13 + i32 6, label %15 + i32 7, label %17 + i32 8, label %19 + i32 9, label %21 + i32 10, label %23 + i32 11, label %25 + i32 12, label %27 + i32 13, label %29 + i32 14, label %31 + i32 15, label %33 + i32 16, label %35 + i32 17, label %37 + i32 18, label %39 + i32 19, label %41 + i32 20, label %43 + i32 21, label %45 + i32 22, label %47 + i32 23, label %49 + i32 24, label %51 + i32 25, label %53 + i32 26, label %55 + i32 27, label %57 + i32 28, label %59 + i32 29, label %61 + i32 30, label %63 + i32 31, label %65 + i32 32, label %67 + ] + +;