From 7ab83427af0f77b59941ceba41d509d7d097b065 Mon Sep 17 00:00:00 2001
From: Dimitry Andric <dim@FreeBSD.org>
Date: Sat, 10 Jun 2017 13:44:06 +0000
Subject: [PATCH] Vendor import of llvm trunk r305145:
 https://llvm.org/svn/llvm-project/llvm/trunk@305145

---
 CMakeLists.txt                                |    6 +-
 bindings/go/llvm/ir.go                        |    6 +
 bindings/go/llvm/ir_test.go                   |   26 +
 bindings/ocaml/llvm/llvm.ml                   |    2 +
 bindings/ocaml/llvm/llvm.mli                  |    3 +
 bindings/ocaml/llvm/llvm_ocaml.c              |   14 +
 cmake/modules/HandleLLVMOptions.cmake         |    7 +
 cmake/modules/TableGen.cmake                  |   50 +-
 docs/AMDGPUUsage.rst                          | 3558 ++++++++++++++++-
 docs/CodeGenerator.rst                        |   59 +-
 docs/CompilerWriterInfo.rst                   |   11 +-
 docs/LangRef.rst                              |   33 +-
 docs/ReleaseNotes.rst                         |   11 +
 docs/index.rst                                |    4 +-
 examples/ExceptionDemo/ExceptionDemo.cpp      |    4 +-
 include/llvm-c/Core.h                         |   14 +
 include/llvm-c/ExecutionEngine.h              |    2 +-
 include/llvm-c/Support.h                      |    2 +-
 include/llvm-c/TargetMachine.h                |    2 +-
 include/llvm/ADT/APInt.h                      |    5 +
 include/llvm/ADT/GraphTraits.h                |    1 -
 include/llvm/ADT/ImmutableSet.h               |    6 +-
 include/llvm/ADT/PointerUnion.h               |    2 +-
 include/llvm/ADT/PostOrderIterator.h          |    2 +-
 include/llvm/ADT/PriorityWorklist.h           |    2 +-
 include/llvm/ADT/SCCIterator.h                |   10 -
 include/llvm/ADT/SmallPtrSet.h                |    6 +-
 include/llvm/ADT/SmallVector.h                |   21 +-
 include/llvm/ADT/SparseMultiSet.h             |    2 +-
 include/llvm/ADT/StringExtras.h               |    2 +-
 include/llvm/ADT/StringRef.h                  |    2 +-
 include/llvm/ADT/iterator_range.h             |    2 +-
 include/llvm/Analysis/AliasAnalysis.h         |    4 +-
 include/llvm/Analysis/AssumptionCache.h       |    2 +-
 include/llvm/Analysis/BranchProbabilityInfo.h |   11 +-
 include/llvm/Analysis/ConstantFolding.h       |    6 +-
 include/llvm/Analysis/DemandedBits.h          |    2 +-
 include/llvm/Analysis/InlineCost.h            |    2 +-
 include/llvm/Analysis/InstructionSimplify.h   |    6 +-
 .../llvm/Analysis/LazyBranchProbabilityInfo.h |    9 +-
 include/llvm/Analysis/LazyValueInfo.h         |    7 +-
 include/llvm/Analysis/LoopInfoImpl.h          |    2 +-
 .../llvm/Analysis/MemoryDependenceAnalysis.h  |    2 +-
 include/llvm/Analysis/MemorySSAUpdater.h      |    2 +-
 include/llvm/Analysis/ObjCARCAnalysisUtils.h  |    2 +-
 include/llvm/Analysis/ObjCARCInstKind.h       |    2 +-
 .../Analysis/ScalarEvolutionNormalization.h   |    2 +-
 include/llvm/Analysis/TargetLibraryInfo.h     |    8 +
 include/llvm/Analysis/TargetTransformInfo.h   |   31 +
 .../llvm/Analysis/TargetTransformInfoImpl.h   |   18 +-
 include/llvm/BinaryFormat/COFF.h              |  713 ++++
 .../llvm/{Support => BinaryFormat}/Dwarf.def  |    0
 .../llvm/{Support => BinaryFormat}/Dwarf.h    |   58 +-
 include/llvm/{Support => BinaryFormat}/ELF.h  |   23 +-
 .../ELFRelocs/AArch64.def                     |    0
 .../ELFRelocs/AMDGPU.def                      |    0
 .../ELFRelocs/ARM.def                         |    0
 .../ELFRelocs/AVR.def                         |    0
 .../ELFRelocs/BPF.def                         |    0
 .../ELFRelocs/Hexagon.def                     |    0
 .../ELFRelocs/Lanai.def                       |    0
 .../ELFRelocs/Mips.def                        |    0
 .../ELFRelocs/PowerPC.def                     |    0
 .../ELFRelocs/PowerPC64.def                   |    0
 .../ELFRelocs/RISCV.def                       |    0
 .../ELFRelocs/Sparc.def                       |    0
 .../ELFRelocs/SystemZ.def                     |    0
 .../ELFRelocs/WebAssembly.def                 |    0
 .../ELFRelocs/i386.def                        |    0
 .../ELFRelocs/x86_64.def                      |    0
 .../llvm/{Support => BinaryFormat}/MachO.def  |    0
 include/llvm/BinaryFormat/MachO.h             | 1984 +++++++++
 include/llvm/BinaryFormat/Magic.h             |   73 +
 include/llvm/{Support => BinaryFormat}/Wasm.h |   46 +-
 .../WasmRelocs/WebAssembly.def                |    0
 include/llvm/Bitcode/BitcodeReader.h          |   13 +-
 include/llvm/Bitcode/LLVMBitCodes.h           |    2 +
 include/llvm/CodeGen/BasicTTIImpl.h           |   76 +-
 include/llvm/CodeGen/DFAPacketizer.h          |   37 +-
 include/llvm/CodeGen/DIE.h                    |    2 +-
 include/llvm/CodeGen/ExecutionDepsFix.h       |   42 +-
 include/llvm/CodeGen/FastISel.h               |   39 +-
 include/llvm/CodeGen/FunctionLoweringInfo.h   |   33 +-
 include/llvm/CodeGen/GCMetadata.h             |   38 +-
 include/llvm/CodeGen/GCMetadataPrinter.h      |   25 +-
 include/llvm/CodeGen/GCStrategy.h             |    2 +-
 .../CodeGen/GlobalISel/InstructionSelector.h  |    2 +-
 .../llvm/CodeGen/GlobalISel/LegalizerHelper.h |    2 +-
 .../CodeGen/GlobalISel/MachineIRBuilder.h     |    2 +-
 include/llvm/CodeGen/LexicalScopes.h          |    3 +-
 include/llvm/CodeGen/LiveInterval.h           |    4 +-
 include/llvm/CodeGen/LiveRegUnits.h           |    2 +-
 include/llvm/CodeGen/MIRParser/MIRParser.h    |   20 +-
 include/llvm/CodeGen/MIRYamlMapping.h         |  140 +-
 include/llvm/CodeGen/MachineBasicBlock.h      |    4 +-
 include/llvm/CodeGen/MachineFunction.h        |    4 +-
 .../llvm/CodeGen/MachineFunctionInitializer.h |   38 -
 include/llvm/CodeGen/MachineFunctionPass.h    |    2 +-
 include/llvm/CodeGen/MachineMemOperand.h      |    2 +-
 include/llvm/CodeGen/MachineModuleInfo.h      |   15 +-
 include/llvm/CodeGen/MachineModuleInfoImpls.h |    4 +-
 include/llvm/CodeGen/MachineOperand.h         |    2 +-
 include/llvm/CodeGen/MachineRegisterInfo.h    |    4 +-
 include/llvm/CodeGen/RegAllocRegistry.h       |   17 +-
 include/llvm/CodeGen/RegisterPressure.h       |   10 +-
 include/llvm/CodeGen/RegisterUsageInfo.h      |   17 +-
 include/llvm/CodeGen/ScheduleDAG.h            |    2 +-
 include/llvm/CodeGen/ScheduleDAGInstrs.h      |    2 +-
 include/llvm/CodeGen/ScheduleDFS.h            |    2 +-
 include/llvm/CodeGen/SchedulerRegistry.h      |   17 +-
 include/llvm/CodeGen/SelectionDAG.h           |   39 +-
 include/llvm/CodeGen/SelectionDAGNodes.h      |    4 +-
 include/llvm/CodeGen/SlotIndexes.h            |    2 +-
 include/llvm/CodeGen/StackProtector.h         |   19 +-
 include/llvm/CodeGen/TailDuplicator.h         |   37 +-
 .../CodeGen/TargetLoweringObjectFileImpl.h    |    8 +-
 include/llvm/CodeGen/TargetPassConfig.h       |   11 +
 include/llvm/CodeGen/TargetSchedule.h         |    3 +-
 include/llvm/CodeGen/VirtRegMap.h             |    9 +-
 include/llvm/Config/abi-breaking.h.cmake      |    3 +
 include/llvm/Config/config.h.cmake            |   21 -
 include/llvm/DebugInfo/CodeView/CVRecord.h    |    6 +-
 include/llvm/DebugInfo/CodeView/CodeView.h    |   18 +
 .../CodeView/DebugChecksumsSubsection.h       |    4 +-
 .../CodeView/DebugCrossExSubsection.h         |   64 +
 .../CodeView/DebugCrossImpSubsection.h        |   88 +
 .../CodeView/DebugInlineeLinesSubsection.h    |    7 +-
 .../DebugInfo/CodeView/DebugLinesSubsection.h |   10 +-
 .../CodeView/DebugStringTableSubsection.h     |    3 +
 .../CodeView/DebugSubsectionRecord.h          |   14 +-
 .../CodeView/DebugSubsectionVisitor.h         |  132 +-
 .../CodeView/DebugSymbolRVASubsection.h       |   59 +
 .../CodeView/DebugSymbolsSubsection.h         |    3 +
 include/llvm/DebugInfo/CodeView/EnumTables.h  |    2 +-
 .../llvm/DebugInfo/CodeView/TypeSerializer.h  |    2 +
 .../DebugInfo/CodeView/TypeTableBuilder.h     |    5 +-
 include/llvm/DebugInfo/DIContext.h            |    2 +
 .../DWARF/DWARFAbbreviationDeclaration.h      |    4 +-
 .../DebugInfo/DWARF/DWARFAcceleratorTable.h   |    2 +-
 include/llvm/DebugInfo/DWARF/DWARFAttribute.h |    2 +-
 .../llvm/DebugInfo/DWARF/DWARFCompileUnit.h   |    6 +-
 include/llvm/DebugInfo/DWARF/DWARFContext.h   |   35 +-
 .../DebugInfo/DWARF/DWARFDebugInfoEntry.h     |    2 +-
 .../llvm/DebugInfo/DWARF/DWARFDebugPubTable.h |    2 +-
 .../DebugInfo/DWARF/DWARFDebugRangeList.h     |    2 +-
 include/llvm/DebugInfo/DWARF/DWARFDie.h       |    7 +-
 include/llvm/DebugInfo/DWARF/DWARFFormValue.h |    2 +-
 include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h  |    2 +-
 include/llvm/DebugInfo/DWARF/DWARFUnit.h      |   32 +-
 .../llvm/DebugInfo/MSF/MappedBlockStream.h    |    1 -
 .../DebugInfo/PDB/DIA/DIAEnumDebugStreams.h   |    1 +
 .../DebugInfo/PDB/DIA/DIAEnumLineNumbers.h    |    1 +
 .../DebugInfo/PDB/DIA/DIAEnumSourceFiles.h    |    1 +
 .../llvm/DebugInfo/PDB/DIA/DIAEnumSymbols.h   |    1 +
 .../PDB/Native/DbiModuleDescriptor.h          |    5 +-
 include/llvm/DebugInfo/PDB/Native/DbiStream.h |    3 +-
 include/llvm/DebugInfo/PDB/PDBSymbol.h        |    4 +
 .../llvm/ExecutionEngine/ExecutionEngine.h    |    2 +-
 .../Orc/CompileOnDemandLayer.h                |    2 +-
 .../llvm/ExecutionEngine/Orc/ExecutionUtils.h |    2 +-
 .../llvm/ExecutionEngine/Orc/IRCompileLayer.h |    2 +-
 .../Orc/RTDyldObjectLinkingLayer.h            |    2 +-
 .../ExecutionEngine/RTDyldMemoryManager.h     |    2 +-
 include/llvm/IR/Attributes.h                  |    4 +-
 include/llvm/IR/BasicBlock.h                  |    4 +-
 include/llvm/IR/CallSite.h                    |    4 +-
 include/llvm/IR/Constants.h                   |    6 +-
 include/llvm/IR/DataLayout.h                  |    2 +-
 include/llvm/IR/DebugInfoMetadata.h           |   11 +-
 include/llvm/IR/DiagnosticInfo.h              |    4 +-
 include/llvm/IR/Dominators.h                  |    6 +
 include/llvm/IR/Function.h                    |    4 +-
 include/llvm/IR/GetElementPtrTypeIterator.h   |    4 +-
 include/llvm/IR/GlobalValue.h                 |    2 +-
 include/llvm/IR/GlobalVariable.h              |    9 +-
 include/llvm/IR/IRBuilder.h                   |   26 +-
 include/llvm/IR/InstrTypes.h                  |    2 +-
 include/llvm/IR/Instruction.h                 |    8 +-
 include/llvm/IR/Instructions.h                |  101 +-
 include/llvm/IR/IntrinsicsAMDGPU.td           |   10 +
 include/llvm/IR/Metadata.h                    |    8 +-
 include/llvm/IR/Module.h                      |    4 +-
 include/llvm/IR/ModuleSummaryIndex.h          |    4 +-
 include/llvm/IR/OperandTraits.h               |    3 -
 include/llvm/IR/PatternMatch.h                |    2 +-
 include/llvm/IR/Statepoint.h                  |    2 +-
 include/llvm/IR/Type.h                        |    2 +-
 include/llvm/IR/Use.h                         |    2 +-
 include/llvm/IR/Value.h                       |    2 +-
 include/llvm/LTO/LTO.h                        |    9 +-
 include/llvm/LinkAllIR.h                      |    2 +-
 include/llvm/LinkAllPasses.h                  |    4 +-
 include/llvm/MC/MCAsmInfo.h                   |    8 +-
 include/llvm/MC/MCAssembler.h                 |    6 +-
 include/llvm/MC/MCCodeView.h                  |    4 +-
 include/llvm/MC/MCContext.h                   |    2 +-
 include/llvm/MC/MCELFObjectWriter.h           |    2 +-
 include/llvm/MC/MCFragment.h                  |    2 +-
 include/llvm/MC/MCMachObjectWriter.h          |    4 +-
 include/llvm/MC/MCObjectFileInfo.h            |    4 +
 include/llvm/MC/MCParser/MCAsmParser.h        |    2 +-
 include/llvm/MC/MCSection.h                   |    2 +-
 include/llvm/MC/MCSectionMachO.h              |    2 +-
 include/llvm/MC/MCSymbolWasm.h                |    2 +-
 include/llvm/MC/MCTargetOptions.h             |    6 +
 include/llvm/MC/MCWasmObjectWriter.h          |   30 +-
 include/llvm/Object/Archive.h                 |    2 +-
 include/llvm/Object/COFF.h                    |    9 +-
 include/llvm/Object/COFFModuleDefinition.h    |    2 +-
 include/llvm/Object/Decompressor.h            |    2 +-
 include/llvm/Object/ELF.h                     |    2 +-
 include/llvm/Object/ELFObjectFile.h           |    2 +-
 include/llvm/Object/ELFTypes.h                |    2 +-
 include/llvm/Object/IRObjectFile.h            |   15 +
 include/llvm/Object/IRSymtab.h                |   15 +-
 include/llvm/Object/MachO.h                   |    6 +-
 include/llvm/Object/MachOUniversal.h          |    2 +-
 include/llvm/Object/ObjectFile.h              |   10 +-
 include/llvm/Object/RelocVisitor.h            |    4 +-
 include/llvm/Object/SymbolicFile.h            |    7 +-
 include/llvm/Object/Wasm.h                    |    2 +-
 include/llvm/Object/WindowsResource.h         |   74 +-
 include/llvm/ObjectYAML/COFFYAML.h            |    2 +-
 .../ObjectYAML/CodeViewYAMLDebugSections.h    |   32 +-
 include/llvm/ObjectYAML/CodeViewYAMLTypes.h   |    5 +
 include/llvm/ObjectYAML/DWARFYAML.h           |   14 +-
 include/llvm/ObjectYAML/ELFYAML.h             |    2 +-
 include/llvm/ObjectYAML/MachOYAML.h           |    8 +-
 include/llvm/ObjectYAML/WasmYAML.h            |    2 +-
 include/llvm/Option/ArgList.h                 |    2 +-
 include/llvm/Pass.h                           |    2 +-
 .../ProfileData/Coverage/CoverageMapping.h    |    6 +-
 .../llvm/Support}/AMDGPUCodeObjectMetadata.h  |    6 +-
 include/llvm/Support/BinaryStreamArray.h      |  262 +-
 include/llvm/Support/BinaryStreamReader.h     |   20 +-
 include/llvm/Support/CBindingWrapping.h       |    2 +-
 include/llvm/Support/COFF.h                   |  724 ----
 include/llvm/Support/Casting.h                |   45 +-
 include/llvm/Support/CommandLine.h            |   77 +-
 include/llvm/Support/ConvertUTF.h             |    2 +-
 include/llvm/Support/DataTypes.h.cmake        |    4 +-
 include/llvm/Support/Endian.h                 |  150 +-
 include/llvm/Support/Error.h                  |   31 +-
 include/llvm/Support/ErrorOr.h                |   18 +-
 include/llvm/Support/FileSystem.h             |   55 -
 include/llvm/Support/FormatVariadic.h         |    2 +-
 include/llvm/Support/GCOV.h                   |    4 +-
 include/llvm/Support/GenericDomTree.h         |    2 +-
 include/llvm/Support/LowLevelTypeImpl.h       |    2 +-
 include/llvm/Support/MachO.h                  | 2038 ----------
 include/llvm/Support/MathExtras.h             |    2 +-
 include/llvm/Support/MemoryBuffer.h           |    4 +-
 include/llvm/Support/Solaris.h                |    2 +-
 include/llvm/Support/SourceMgr.h              |    2 +-
 include/llvm/Support/StringPool.h             |   24 +-
 include/llvm/Support/TargetRegistry.h         |    4 +-
 include/llvm/Support/raw_sha1_ostream.h       |    4 +-
 include/llvm/Support/type_traits.h            |   20 +-
 include/llvm/Target/TargetInstrInfo.h         |    2 +-
 include/llvm/Target/TargetLowering.h          |   69 +
 include/llvm/Target/TargetMachine.h           |   12 +-
 include/llvm/Target/TargetOptions.h           |   10 +-
 include/llvm/Target/TargetSubtargetInfo.h     |    2 +-
 include/llvm/Transforms/IPO/FunctionAttrs.h   |    2 +-
 .../llvm/Transforms/Scalar/GVNExpression.h    |   13 +
 .../llvm/Transforms/Utils/EscapeEnumerator.h  |    2 +-
 .../Transforms/Utils/FunctionComparator.h     |    2 +-
 .../ImportedFunctionsInliningStatistics.h     |    2 +-
 include/llvm/Transforms/Utils/Local.h         |    2 +-
 .../llvm/Transforms/Utils/LoopVersioning.h    |    2 +-
 .../Transforms/Utils/OrderedInstructions.h    |   51 +
 include/llvm/Transforms/Utils/ValueMapper.h   |    2 +-
 .../llvm/Transforms/Vectorize/SLPVectorizer.h |    2 +-
 include/llvm/module.modulemap                 |   44 +-
 lib/Analysis/AliasAnalysisEvaluator.cpp       |    2 +-
 lib/Analysis/AliasSetTracker.cpp              |    2 +-
 lib/Analysis/BranchProbabilityInfo.cpp        |   47 +-
 lib/Analysis/CFLGraph.h                       |    1 -
 lib/Analysis/CallPrinter.cpp                  |    2 +-
 lib/Analysis/CaptureTracking.cpp              |    2 +-
 lib/Analysis/CodeMetrics.cpp                  |    2 +-
 lib/Analysis/ConstantFolding.cpp              |   42 +-
 lib/Analysis/GlobalsModRef.cpp                |   16 +-
 lib/Analysis/InlineCost.cpp                   |    4 +-
 lib/Analysis/InstCount.cpp                    |    3 +-
 lib/Analysis/InstructionSimplify.cpp          |   70 +-
 lib/Analysis/LLVMBuild.txt                    |    2 +-
 lib/Analysis/LazyBranchProbabilityInfo.cpp    |    8 +-
 lib/Analysis/LazyCallGraph.cpp                |    3 +-
 lib/Analysis/LazyValueInfo.cpp                |  194 +-
 lib/Analysis/Lint.cpp                         |    2 +-
 lib/Analysis/MemDepPrinter.cpp                |    2 +-
 lib/Analysis/MemDerefPrinter.cpp              |    4 +-
 lib/Analysis/MemoryDependenceAnalysis.cpp     |    6 +-
 lib/Analysis/MemorySSAUpdater.cpp             |   19 +-
 lib/Analysis/ModuleDebugInfoPrinter.cpp       |    2 +-
 lib/Analysis/ModuleSummaryAnalysis.cpp        |   10 +
 lib/Analysis/ObjCARCInstKind.cpp              |    2 +-
 lib/Analysis/RegionPrinter.cpp                |    4 +-
 lib/Analysis/ScalarEvolution.cpp              |    4 +-
 lib/Analysis/ScalarEvolutionNormalization.cpp |    2 +-
 lib/Analysis/TargetTransformInfo.cpp          |    8 +
 lib/Analysis/ValueTracking.cpp                |   20 +-
 lib/Analysis/VectorUtils.cpp                  |    8 +-
 lib/AsmParser/LLParser.cpp                    |    4 +-
 lib/AsmParser/LLVMBuild.txt                   |    2 +-
 lib/BinaryFormat/CMakeLists.txt               |    8 +
 lib/{Support => BinaryFormat}/Dwarf.cpp       |  200 +-
 lib/BinaryFormat/LLVMBuild.txt                |   22 +
 lib/BinaryFormat/Magic.cpp                    |  216 +
 lib/Bitcode/Reader/BitcodeReader.cpp          |   26 +-
 lib/Bitcode/Reader/MetadataLoader.cpp         |    2 +-
 lib/Bitcode/Writer/BitcodeWriter.cpp          |   10 +-
 lib/CMakeLists.txt                            |    1 +
 lib/CodeGen/Analysis.cpp                      |    2 +-
 lib/CodeGen/AsmPrinter/ARMException.cpp       |    2 +-
 lib/CodeGen/AsmPrinter/AsmPrinter.cpp         |   10 +-
 lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp    |    2 +-
 .../AsmPrinter/AsmPrinterInlineAsm.cpp        |    2 +-
 lib/CodeGen/AsmPrinter/CodeViewDebug.cpp      |   72 +-
 lib/CodeGen/AsmPrinter/CodeViewDebug.h        |   51 +-
 lib/CodeGen/AsmPrinter/DIEHash.cpp            |    4 +-
 lib/CodeGen/AsmPrinter/DebugLocStream.h       |    2 +-
 lib/CodeGen/AsmPrinter/DwarfAccelTable.h      |    2 +-
 lib/CodeGen/AsmPrinter/DwarfCFIException.cpp  |    2 +-
 lib/CodeGen/AsmPrinter/DwarfCompileUnit.h     |    2 +-
 lib/CodeGen/AsmPrinter/DwarfDebug.cpp         |    2 +-
 lib/CodeGen/AsmPrinter/DwarfExpression.cpp    |    2 +-
 lib/CodeGen/AsmPrinter/DwarfUnit.cpp          |    4 +-
 lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp    |    4 +-
 lib/CodeGen/AsmPrinter/LLVMBuild.txt          |    2 +-
 lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp     |   23 +-
 lib/CodeGen/AsmPrinter/WinException.cpp       |    4 +-
 lib/CodeGen/BasicTargetTransformInfo.cpp      |    2 +-
 lib/CodeGen/BranchFolding.cpp                 |   36 +-
 lib/CodeGen/BranchRelaxation.cpp              |    6 +-
 lib/CodeGen/BuiltinGCs.cpp                    |    2 +-
 lib/CodeGen/CalcSpillWeights.cpp              |    2 +-
 lib/CodeGen/CodeGen.cpp                       |    2 +-
 lib/CodeGen/CodeGenPrepare.cpp                |  302 +-
 lib/CodeGen/DFAPacketizer.cpp                 |   85 +-
 lib/CodeGen/DeadMachineInstructionElim.cpp    |    2 +-
 lib/CodeGen/DwarfEHPrepare.cpp                |    4 +-
 lib/CodeGen/ExpandISelPseudos.cpp             |    2 +-
 lib/CodeGen/ExpandPostRAPseudos.cpp           |    2 +-
 lib/CodeGen/ExpandReductions.cpp              |    6 +-
 lib/CodeGen/FaultMaps.cpp                     |    2 +-
 lib/CodeGen/FuncletLayout.cpp                 |    2 +-
 lib/CodeGen/GCMetadata.cpp                    |   16 +-
 lib/CodeGen/GCMetadataPrinter.cpp             |    7 +-
 lib/CodeGen/GlobalISel/IRTranslator.cpp       |   19 +-
 lib/CodeGen/GlobalISel/Legalizer.cpp          |    1 -
 lib/CodeGen/GlobalMerge.cpp                   |    3 +-
 lib/CodeGen/IfConversion.cpp                  |    2 +-
 lib/CodeGen/ImplicitNullChecks.cpp            |   10 +-
 lib/CodeGen/InlineSpiller.cpp                 |   31 +-
 lib/CodeGen/LLVMTargetMachine.cpp             |  141 +-
 lib/CodeGen/LexicalScopes.cpp                 |    2 +-
 lib/CodeGen/LiveIntervalAnalysis.cpp          |    4 +-
 lib/CodeGen/LiveIntervalUnion.cpp             |    6 +-
 lib/CodeGen/LiveRegMatrix.cpp                 |    6 +-
 lib/CodeGen/LocalStackSlotAllocation.cpp      |    2 +-
 lib/CodeGen/MIRParser/MILexer.h               |    2 +-
 lib/CodeGen/MIRParser/MIParser.cpp            |   44 +-
 lib/CodeGen/MIRParser/MIParser.h              |   16 +-
 lib/CodeGen/MIRParser/MIRParser.cpp           |  144 +-
 lib/CodeGen/MIRPrinter.cpp                    |   65 +-
 lib/CodeGen/MIRPrintingPass.cpp               |    4 +-
 lib/CodeGen/MachineBlockPlacement.cpp         |    4 +-
 lib/CodeGen/MachineCSE.cpp                    |    2 +-
 lib/CodeGen/MachineCopyPropagation.cpp        |    2 +-
 lib/CodeGen/MachineDominanceFrontier.cpp      |    1 -
 lib/CodeGen/MachineDominators.cpp             |    2 +-
 lib/CodeGen/MachineFunction.cpp               |    3 -
 lib/CodeGen/MachineFunctionPass.cpp           |    2 +-
 lib/CodeGen/MachineFunctionPrinterPass.cpp    |    2 +-
 lib/CodeGen/MachineInstr.cpp                  |    6 +-
 lib/CodeGen/MachineLICM.cpp                   |    2 +-
 lib/CodeGen/MachineModuleInfo.cpp             |   16 +-
 lib/CodeGen/MachineOutliner.cpp               |    6 +-
 lib/CodeGen/MachinePipeliner.cpp              |    2 +-
 lib/CodeGen/MachineRegionInfo.cpp             |    2 +-
 lib/CodeGen/MachineRegisterInfo.cpp           |    2 +-
 lib/CodeGen/MachineScheduler.cpp              |    8 +-
 lib/CodeGen/MachineSink.cpp                   |    2 +-
 lib/CodeGen/MachineTraceMetrics.cpp           |    2 +-
 lib/CodeGen/MachineVerifier.cpp               |   11 +-
 lib/CodeGen/OptimizePHIs.cpp                  |    2 +-
 lib/CodeGen/PatchableFunction.cpp             |    2 +-
 lib/CodeGen/PeepholeOptimizer.cpp             |    2 +-
 lib/CodeGen/PostRAHazardRecognizer.cpp        |    2 +-
 lib/CodeGen/RegAllocBase.cpp                  |    5 +-
 lib/CodeGen/RegAllocBasic.cpp                 |    2 +-
 lib/CodeGen/RegAllocGreedy.cpp                |   96 +-
 lib/CodeGen/RegAllocPBQP.cpp                  |   16 +-
 lib/CodeGen/RegisterClassInfo.cpp             |    2 +-
 lib/CodeGen/RegisterPressure.cpp              |    4 +-
 lib/CodeGen/RegisterUsageInfo.cpp             |   15 +-
 lib/CodeGen/RenameIndependentSubregs.cpp      |    9 +-
 lib/CodeGen/ResetMachineFunctionPass.cpp      |    2 +-
 lib/CodeGen/ScheduleDAG.cpp                   |    8 +-
 lib/CodeGen/ScheduleDAGInstrs.cpp             |    6 +-
 lib/CodeGen/ScheduleDAGPrinter.cpp            |    2 +-
 lib/CodeGen/SelectionDAG/DAGCombiner.cpp      |   32 +-
 lib/CodeGen/SelectionDAG/FastISel.cpp         |    2 +-
 lib/CodeGen/SelectionDAG/LegalizeDAG.cpp      |    8 +
 lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp  |    2 +-
 .../SelectionDAG/ScheduleDAGRRList.cpp        |    2 +-
 lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp  |    2 +-
 lib/CodeGen/SelectionDAG/SelectionDAG.cpp     |   19 +-
 .../SelectionDAG/SelectionDAGBuilder.cpp      |  249 +-
 .../SelectionDAG/SelectionDAGBuilder.h        |   14 +-
 .../SelectionDAG/SelectionDAGDumper.cpp       |    2 +-
 lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp |   28 +-
 .../SelectionDAG/SelectionDAGPrinter.cpp      |    2 +-
 .../SelectionDAG/StatepointLowering.cpp       |    4 +-
 lib/CodeGen/SelectionDAG/TargetLowering.cpp   |    6 +-
 lib/CodeGen/ShadowStackGCLowering.cpp         |    2 +-
 lib/CodeGen/SjLjEHPrepare.cpp                 |    2 +-
 lib/CodeGen/StackMaps.cpp                     |    2 +-
 lib/CodeGen/StackProtector.cpp                |    8 +
 lib/CodeGen/StackSlotColoring.cpp             |    2 +-
 lib/CodeGen/TailDuplication.cpp               |   14 +-
 lib/CodeGen/TailDuplicator.cpp                |   38 +-
 lib/CodeGen/TargetFrameLoweringImpl.cpp       |   14 +-
 lib/CodeGen/TargetLoweringBase.cpp            |    8 +-
 lib/CodeGen/TargetLoweringObjectFileImpl.cpp  |  122 +-
 lib/CodeGen/TargetOptionsImpl.cpp             |    4 +-
 lib/CodeGen/TargetPassConfig.cpp              |   71 +
 lib/CodeGen/TargetRegisterInfo.cpp            |    2 +-
 lib/CodeGen/TargetSchedule.cpp                |   24 +-
 lib/CodeGen/TargetSubtargetInfo.cpp           |    2 +-
 lib/CodeGen/VirtRegMap.cpp                    |   11 +
 lib/CodeGen/WinEHPrepare.cpp                  |    2 +-
 lib/CodeGen/XRayInstrumentation.cpp           |   30 +-
 lib/DebugInfo/CodeView/CMakeLists.txt         |   11 +-
 .../CodeView/DebugChecksumsSubsection.cpp     |    4 +-
 .../CodeView/DebugCrossExSubsection.cpp       |   51 +
 .../CodeView/DebugCrossImpSubsection.cpp      |   91 +
 .../CodeView/DebugInlineeLinesSubsection.cpp  |    9 +-
 .../CodeView/DebugLinesSubsection.cpp         |   11 +-
 .../CodeView/DebugStringTableSubsection.cpp   |   14 +-
 .../CodeView/DebugSubsectionRecord.cpp        |   28 +-
 .../CodeView/DebugSubsectionVisitor.cpp       |   84 +-
 .../CodeView/DebugSymbolRVASubsection.cpp     |   31 +
 .../CodeView/TypeTableCollection.cpp          |    3 +-
 .../DWARF/DWARFAbbreviationDeclaration.cpp    |    5 +-
 lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp |    5 +-
 lib/DebugInfo/DWARF/DWARFCompileUnit.cpp      |    4 +-
 lib/DebugInfo/DWARF/DWARFContext.cpp          |  149 +-
 lib/DebugInfo/DWARF/DWARFDebugFrame.cpp       |    8 +-
 lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp   |    2 +-
 lib/DebugInfo/DWARF/DWARFDebugLine.cpp        |    2 +-
 lib/DebugInfo/DWARF/DWARFDebugLoc.cpp         |    6 +-
 lib/DebugInfo/DWARF/DWARFDebugMacro.cpp       |    4 +-
 lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp    |    4 +-
 lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp   |    2 +-
 lib/DebugInfo/DWARF/DWARFDie.cpp              |   41 +-
 lib/DebugInfo/DWARF/DWARFFormValue.cpp        |   10 +-
 lib/DebugInfo/DWARF/DWARFGdbIndex.cpp         |    2 +-
 lib/DebugInfo/DWARF/DWARFTypeUnit.cpp         |    2 +-
 lib/DebugInfo/DWARF/DWARFUnit.cpp             |   60 +-
 lib/DebugInfo/DWARF/DWARFUnitIndex.cpp        |    4 +-
 lib/DebugInfo/DWARF/LLVMBuild.txt             |    2 +-
 lib/DebugInfo/PDB/DIA/DIAEnumDebugStreams.cpp |    4 +-
 lib/DebugInfo/PDB/DIA/DIAEnumLineNumbers.cpp  |    2 +-
 lib/DebugInfo/PDB/DIA/DIAEnumSourceFiles.cpp  |    2 +-
 lib/DebugInfo/PDB/DIA/DIAEnumSymbols.cpp      |    2 +-
 lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp        |    7 +-
 .../PDB/Native/DbiModuleDescriptorBuilder.cpp |   16 +-
 lib/DebugInfo/PDB/Native/DbiStream.cpp        |    6 +-
 lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp |    2 +-
 .../PDB/Native/ModuleDebugStream.cpp          |    4 +-
 lib/DebugInfo/PDB/Native/PublicsStream.cpp    |   10 +-
 lib/DebugInfo/PDB/PDBContext.cpp              |    2 +-
 lib/DebugInfo/PDB/PDBSymbolBlock.cpp          |    2 +-
 .../PDB/PDBSymbolCompilandDetails.cpp         |    2 +-
 lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp   |    2 +-
 lib/DebugInfo/PDB/PDBSymbolCustom.cpp         |    2 +-
 lib/DebugInfo/PDB/PDBSymbolFunc.cpp           |    2 +-
 lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp   |    2 +-
 lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp |    2 +-
 lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp   |    2 +-
 lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp  |    2 +-
 lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp     |    2 +-
 lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp  |    2 +-
 lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp     |    2 +-
 .../PDB/PDBSymbolTypeFunctionSig.cpp          |    2 +-
 lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp    |    2 +-
 .../PDB/PDBSymbolTypeVTableShape.cpp          |    2 +-
 lib/DebugInfo/PDB/PDBSymbolUnknown.cpp        |    2 +-
 lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp |    2 +-
 .../Symbolize/SymbolizableObjectFile.cpp      |    2 +-
 lib/DebugInfo/Symbolize/Symbolize.cpp         |    4 +-
 .../IntelJITEvents/IntelJITEventListener.cpp  |    2 +-
 .../IntelJITEvents/jitprofiling.c             |    2 +-
 .../Interpreter/ExternalFunctions.cpp         |    4 +-
 .../OProfileJIT/OProfileJITEventListener.cpp  |    2 +-
 lib/ExecutionEngine/Orc/IndirectionUtils.cpp  |    2 +-
 lib/ExecutionEngine/Orc/OrcMCJITReplacement.h |    4 +-
 .../RuntimeDyld/RuntimeDyld.cpp               |    4 +-
 .../RuntimeDyld/RuntimeDyldELF.cpp            |    2 +-
 .../RuntimeDyld/RuntimeDyldImpl.h             |    2 +-
 .../RuntimeDyld/Targets/RuntimeDyldCOFFI386.h |    4 +-
 .../Targets/RuntimeDyldCOFFThumb.h            |    4 +-
 .../Targets/RuntimeDyldCOFFX86_64.h           |    4 +-
 .../Targets/RuntimeDyldELFMips.cpp            |    2 +-
 lib/ExecutionEngine/SectionMemoryManager.cpp  |    2 +-
 lib/Fuzzer/FuzzerDriver.cpp                   |    4 +-
 lib/Fuzzer/FuzzerExtFunctionsDlsymWin.cpp     |    2 +
 lib/Fuzzer/FuzzerLoop.cpp                     |    2 +-
 lib/Fuzzer/FuzzerMerge.cpp                    |    4 +-
 lib/Fuzzer/FuzzerMutate.cpp                   |    2 +-
 lib/Fuzzer/FuzzerShmemPosix.cpp               |    6 +-
 lib/Fuzzer/FuzzerShmemWindows.cpp             |    4 +-
 lib/Fuzzer/FuzzerTracePC.cpp                  |    2 +-
 lib/Fuzzer/FuzzerTraceState.cpp               |    2 +-
 lib/Fuzzer/FuzzerUtilWindows.cpp              |    2 +
 lib/Fuzzer/afl/afl_driver.cpp                 |   14 +-
 lib/Fuzzer/test/AFLDriverTest.cpp             |    2 +-
 lib/Fuzzer/test/AbsNegAndConstant64Test.cpp   |    6 +-
 lib/Fuzzer/test/AbsNegAndConstantTest.cpp     |    6 +-
 lib/Fuzzer/test/AccumulateAllocationsTest.cpp |    2 +-
 lib/Fuzzer/test/BadStrcmpTest.cpp             |    2 +-
 lib/Fuzzer/test/BufferOverflowOnInput.cpp     |    2 +-
 lib/Fuzzer/test/CallerCalleeTest.cpp          |    2 +-
 lib/Fuzzer/test/CleanseTest.cpp               |    2 +-
 lib/Fuzzer/test/CustomMutatorTest.cpp         |    2 +-
 lib/Fuzzer/test/CxxStringEqTest.cpp           |    4 +-
 lib/Fuzzer/test/DSOTestMain.cpp               |    2 +-
 lib/Fuzzer/test/DivTest.cpp                   |    2 +-
 .../test/FourIndependentBranchesTest.cpp      |    2 +-
 lib/Fuzzer/test/FullCoverageSetTest.cpp       |    2 +-
 lib/Fuzzer/test/FuzzerUnittest.cpp            |    4 +-
 lib/Fuzzer/test/LeakTest.cpp                  |    2 +-
 lib/Fuzzer/test/LeakTimeoutTest.cpp           |    2 +-
 lib/Fuzzer/test/LoadTest.cpp                  |    2 +-
 lib/Fuzzer/test/Memcmp64BytesTest.cpp         |    2 +-
 lib/Fuzzer/test/MemcmpTest.cpp                |    2 +-
 lib/Fuzzer/test/NotinstrumentedTest.cpp       |    2 +-
 lib/Fuzzer/test/NthRunCrashTest.cpp           |    2 +-
 lib/Fuzzer/test/NullDerefOnEmptyTest.cpp      |    2 +-
 lib/Fuzzer/test/NullDerefTest.cpp             |    2 +-
 lib/Fuzzer/test/OneHugeAllocTest.cpp          |    2 +-
 .../test/OutOfMemorySingleLargeMallocTest.cpp |    2 +-
 lib/Fuzzer/test/OutOfMemoryTest.cpp           |    2 +-
 lib/Fuzzer/test/RepeatedBytesTest.cpp         |    2 +-
 lib/Fuzzer/test/RepeatedMemcmp.cpp            |    3 +-
 lib/Fuzzer/test/ShrinkControlFlowTest.cpp     |    6 +-
 lib/Fuzzer/test/ShrinkValueProfileTest.cpp    |    6 +-
 lib/Fuzzer/test/SignedIntOverflowTest.cpp     |    4 +-
 lib/Fuzzer/test/SimpleCmpTest.cpp             |    2 +-
 lib/Fuzzer/test/SimpleDictionaryTest.cpp      |    2 +-
 lib/Fuzzer/test/SimpleHashTest.cpp            |    2 +-
 lib/Fuzzer/test/SimpleTest.cpp                |    2 +-
 lib/Fuzzer/test/SimpleThreadedTest.cpp        |    2 +-
 lib/Fuzzer/test/SingleByteInputTest.cpp       |    4 +-
 lib/Fuzzer/test/SingleMemcmpTest.cpp          |    2 +-
 lib/Fuzzer/test/SingleStrcmpTest.cpp          |    2 +-
 lib/Fuzzer/test/SingleStrncmpTest.cpp         |    2 +-
 lib/Fuzzer/test/SpamyTest.cpp                 |    2 +-
 lib/Fuzzer/test/StrcmpTest.cpp                |    4 +-
 lib/Fuzzer/test/StrncmpOOBTest.cpp            |    6 +-
 lib/Fuzzer/test/StrncmpTest.cpp               |    2 +-
 lib/Fuzzer/test/StrstrTest.cpp                |    4 +-
 lib/Fuzzer/test/SwapCmpTest.cpp               |    2 +-
 lib/Fuzzer/test/Switch2Test.cpp               |    8 +-
 lib/Fuzzer/test/SwitchTest.cpp                |    8 +-
 lib/Fuzzer/test/TableLookupTest.cpp           |    4 +-
 lib/Fuzzer/test/ThreadedLeakTest.cpp          |    2 +-
 lib/Fuzzer/test/ThreadedTest.cpp              |    2 +-
 lib/Fuzzer/test/TimeoutEmptyTest.cpp          |    2 +-
 lib/Fuzzer/test/TimeoutTest.cpp               |    2 +-
 lib/Fuzzer/test/TraceMallocTest.cpp           |    2 +-
 lib/Fuzzer/test/TwoDifferentBugsTest.cpp      |    2 +-
 lib/IR/AsmWriter.cpp                          |    2 +-
 lib/IR/Attributes.cpp                         |    4 +-
 lib/IR/Comdat.cpp                             |    2 +-
 lib/IR/ConstantRange.cpp                      |   40 +-
 lib/IR/Constants.cpp                          |   13 +-
 lib/IR/Core.cpp                               |   12 +
 lib/IR/DIBuilder.cpp                          |    4 +-
 lib/IR/DataLayout.cpp                         |    2 +-
 lib/IR/DebugInfo.cpp                          |    2 +-
 lib/IR/DebugLoc.cpp                           |    2 +-
 lib/IR/DiagnosticInfo.cpp                     |    6 +-
 lib/IR/DiagnosticPrinter.cpp                  |    4 +-
 lib/IR/Dominators.cpp                         |   20 +-
 lib/IR/Function.cpp                           |    4 +-
 lib/IR/Globals.cpp                            |    8 +-
 lib/IR/IRBuilder.cpp                          |   34 +-
 lib/IR/InlineAsm.cpp                          |    2 +-
 lib/IR/Instruction.cpp                        |   10 +-
 lib/IR/Instructions.cpp                       |  135 +-
 lib/IR/IntrinsicInst.cpp                      |    2 +-
 lib/IR/LLVMBuild.txt                          |    2 +-
 lib/IR/LLVMContext.cpp                        |    2 +-
 lib/IR/LLVMContextImpl.h                      |    2 +-
 lib/IR/LegacyPassManager.cpp                  |    2 +-
 lib/IR/Metadata.cpp                           |    4 +-
 lib/IR/Module.cpp                             |    6 +-
 lib/IR/OptBisect.cpp                          |    2 +-
 lib/IR/Type.cpp                               |    2 +-
 lib/IR/TypeFinder.cpp                         |    2 +-
 lib/IR/ValueSymbolTable.cpp                   |    2 +-
 lib/IR/Verifier.cpp                           |    6 +-
 lib/LLVMBuild.txt                             |    1 +
 lib/LTO/LTO.cpp                               |   74 +-
 lib/LTO/ThinLTOCodeGenerator.cpp              |    2 +-
 lib/MC/ELFObjectWriter.cpp                    |   20 +-
 lib/MC/MCAsmBackend.cpp                       |    2 +-
 lib/MC/MCAsmInfo.cpp                          |    2 +-
 lib/MC/MCAsmInfoDarwin.cpp                    |    2 +-
 lib/MC/MCAsmInfoELF.cpp                       |    2 +-
 lib/MC/MCAssembler.cpp                        |    4 +-
 lib/MC/MCCodeView.cpp                         |    2 +-
 lib/MC/MCContext.cpp                          |    8 +-
 lib/MC/MCDisassembler/Disassembler.cpp        |    2 +-
 lib/MC/MCDisassembler/MCRelocationInfo.cpp    |    2 +-
 lib/MC/MCDwarf.cpp                            |    6 +-
 lib/MC/MCELFStreamer.cpp                      |    6 +-
 lib/MC/MCExpr.cpp                             |    6 +-
 lib/MC/MCFragment.cpp                         |    4 +-
 lib/MC/MCInstPrinter.cpp                      |    2 +-
 lib/MC/MCInstrAnalysis.cpp                    |    2 +-
 lib/MC/MCMachOStreamer.cpp                    |    2 +-
 lib/MC/MCNullStreamer.cpp                     |    2 +-
 lib/MC/MCObjectFileInfo.cpp                   |   20 +-
 lib/MC/MCObjectWriter.cpp                     |    6 +-
 lib/MC/MCParser/AsmLexer.cpp                  |    2 +-
 lib/MC/MCParser/AsmParser.cpp                 |    8 +-
 lib/MC/MCParser/COFFAsmParser.cpp             |    4 +-
 lib/MC/MCParser/DarwinAsmParser.cpp           |    6 +-
 lib/MC/MCParser/ELFAsmParser.cpp              |    4 +-
 lib/MC/MCParser/MCAsmLexer.cpp                |    2 +-
 lib/MC/MCParser/MCAsmParser.cpp               |    2 +-
 lib/MC/MCParser/MCTargetAsmParser.cpp         |    2 +-
 lib/MC/MCRegisterInfo.cpp                     |    2 +-
 lib/MC/MCSection.cpp                          |    2 +-
 lib/MC/MCSectionCOFF.cpp                      |    2 +-
 lib/MC/MCSectionELF.cpp                       |    4 +-
 lib/MC/MCStreamer.cpp                         |    6 +-
 lib/MC/MCSubtargetInfo.cpp                    |    2 +-
 lib/MC/MCSymbol.cpp                           |    2 +-
 lib/MC/MCSymbolELF.cpp                        |    4 +-
 lib/MC/MCTargetOptions.cpp                    |    2 +-
 lib/MC/MCWasmObjectTargetWriter.cpp           |   10 +-
 lib/MC/MCWinEH.cpp                            |    4 +-
 lib/MC/MachObjectWriter.cpp                   |    4 +-
 lib/MC/StringTableBuilder.cpp                 |    4 +-
 lib/MC/SubtargetFeature.cpp                   |    2 +-
 lib/MC/WasmObjectWriter.cpp                   |  271 +-
 lib/MC/WinCOFFObjectWriter.cpp                |    6 +-
 lib/MC/WinCOFFStreamer.cpp                    |    4 +-
 lib/Object/Archive.cpp                        |    2 +-
 lib/Object/ArchiveWriter.cpp                  |    3 +-
 lib/Object/Binary.cpp                         |   71 +-
 lib/Object/COFFImportFile.cpp                 |   14 +-
 lib/Object/COFFObjectFile.cpp                 |    2 +-
 lib/Object/Decompressor.cpp                   |    2 +-
 lib/Object/ELF.cpp                            |   34 +-
 lib/Object/ELFObjectFile.cpp                  |    6 +-
 lib/Object/IRObjectFile.cpp                   |   33 +-
 lib/Object/IRSymtab.cpp                       |   47 +-
 lib/Object/LLVMBuild.txt                      |    2 +-
 lib/Object/MachOObjectFile.cpp                |   12 +-
 lib/Object/ModuleSymbolTable.cpp              |    6 +-
 lib/Object/Object.cpp                         |    2 +-
 lib/Object/ObjectFile.cpp                     |   61 +-
 lib/Object/SymbolicFile.cpp                   |   64 +-
 lib/Object/WasmObjectFile.cpp                 |    4 +-
 lib/Object/WindowsResource.cpp                |  518 ++-
 lib/ObjectYAML/CodeViewYAMLDebugSections.cpp  |  551 ++-
 lib/ObjectYAML/CodeViewYAMLSymbols.cpp        |   13 +
 lib/ObjectYAML/CodeViewYAMLTypes.cpp          |   28 +-
 lib/ObjectYAML/DWARFEmitter.cpp               |    2 +-
 lib/ObjectYAML/DWARFVisitor.h                 |    2 +-
 lib/ObjectYAML/ELFYAML.cpp                    |   26 +-
 lib/ObjectYAML/MachOYAML.cpp                  |    4 +-
 lib/ObjectYAML/ObjectYAML.cpp                 |    2 +-
 lib/ObjectYAML/WasmYAML.cpp                   |    2 +-
 lib/Option/Arg.cpp                            |    2 +-
 lib/Passes/PassBuilder.cpp                    |   10 +
 lib/ProfileData/Coverage/CoverageMapping.cpp  |    2 +-
 .../Coverage/CoverageMappingWriter.cpp        |    2 +-
 lib/ProfileData/InstrProf.cpp                 |    4 +-
 lib/ProfileData/InstrProfReader.cpp           |    2 +-
 lib/ProfileData/InstrProfWriter.cpp           |    2 +-
 lib/ProfileData/SampleProfWriter.cpp          |    2 +-
 lib/Support/AMDGPUCodeObjectMetadata.cpp      |  218 +
 lib/Support/ARMAttributeParser.cpp            |    2 +-
 lib/Support/ARMBuildAttrs.cpp                 |    2 +-
 lib/Support/Atomic.cpp                        |    2 +
 lib/Support/CMakeLists.txt                    |    2 +-
 lib/Support/CommandLine.cpp                   |   18 +-
 lib/Support/ConvertUTF.cpp                    |    2 -
 lib/Support/ConvertUTFWrapper.cpp             |    2 +-
 lib/Support/Errno.cpp                         |    2 +-
 lib/Support/Error.cpp                         |    1 -
 lib/Support/FormattedStream.cpp               |    2 +-
 lib/Support/LockFileManager.cpp               |    6 +-
 lib/Support/MD5.cpp                           |    2 +-
 lib/Support/Mutex.cpp                         |    2 +-
 lib/Support/Path.cpp                          |  176 +-
 lib/Support/PrettyStackTrace.cpp              |    2 +-
 lib/Support/Process.cpp                       |    2 +-
 lib/Support/RWMutex.cpp                       |    2 +-
 lib/Support/SHA1.cpp                          |    2 +-
 lib/Support/Signals.cpp                       |   18 +-
 lib/Support/SourceMgr.cpp                     |    6 +-
 lib/Support/SpecialCaseList.cpp               |    2 +-
 lib/Support/Statistic.cpp                     |    2 +-
 lib/Support/StringExtras.cpp                  |    2 +-
 lib/Support/TargetRegistry.cpp                |    3 +-
 lib/Support/ThreadLocal.cpp                   |    2 +-
 lib/Support/Timer.cpp                         |    2 +-
 lib/Support/TrigramIndex.cpp                  |    2 +-
 lib/Support/Triple.cpp                        |   21 +-
 lib/Support/Unix/DynamicLibrary.inc           |    7 +-
 lib/Support/Unix/Path.inc                     |    2 +-
 lib/Support/Unix/Signals.inc                  |    2 +-
 lib/Support/Unix/Threading.inc                |    8 +-
 lib/Support/Windows/DynamicLibrary.inc        |    2 +-
 lib/Support/Windows/WindowsSupport.h          |    4 +-
 lib/Support/YAMLParser.cpp                    |    2 +-
 lib/TableGen/StringMatcher.cpp                |    2 +-
 lib/Target/AArch64/AArch64AsmPrinter.cpp      |    6 +-
 .../AArch64DeadRegisterDefinitionsPass.cpp    |    2 +-
 .../AArch64/AArch64ExpandPseudoInsts.cpp      |    2 +-
 lib/Target/AArch64/AArch64FastISel.cpp        |    6 +-
 lib/Target/AArch64/AArch64ISelDAGToDAG.cpp    |    2 +-
 lib/Target/AArch64/AArch64ISelLowering.cpp    |    6 +-
 lib/Target/AArch64/AArch64InstrInfo.cpp       |    2 +-
 lib/Target/AArch64/AArch64LegalizerInfo.cpp   |    4 +-
 .../AArch64/AArch64LoadStoreOptimizer.cpp     |    2 +-
 lib/Target/AArch64/AArch64PBQPRegAlloc.cpp    |    2 +-
 .../AArch64/AArch64RegisterBankInfo.cpp       |    4 +-
 .../AArch64/AArch64TargetObjectFile.cpp       |    2 +-
 .../AArch64/AArch64TargetTransformInfo.cpp    |    2 +-
 .../AArch64/AsmParser/AArch64AsmParser.cpp    |    2 +-
 .../MCTargetDesc/AArch64AsmBackend.cpp        |    4 +-
 .../MCTargetDesc/AArch64ELFObjectWriter.cpp   |    2 +-
 .../MCTargetDesc/AArch64ELFStreamer.cpp       |    2 +-
 .../MCTargetDesc/AArch64MachObjectWriter.cpp  |    2 +-
 lib/Target/AMDGPU/AMDGPU.td                   |   33 +-
 lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp     |    6 +-
 .../AMDGPU/AMDGPUAnnotateUniformValues.cpp    |    2 +-
 lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp        |   12 +-
 lib/Target/AMDGPU/AMDGPUAsmPrinter.h          |    2 +-
 lib/Target/AMDGPU/AMDGPUCallLowering.cpp      |    2 +-
 lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp    |    4 +-
 lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp      |    6 +-
 lib/Target/AMDGPU/AMDGPUISelLowering.cpp      |    2 +-
 lib/Target/AMDGPU/AMDGPUInstrInfo.h           |    2 +-
 lib/Target/AMDGPU/AMDGPUInstructionSelector.h |    2 +-
 lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp     |   10 +-
 lib/Target/AMDGPU/AMDGPUMCInstLower.cpp       |    1 -
 .../AMDGPU/AMDGPUMachineCFGStructurizer.cpp   |    2 +-
 lib/Target/AMDGPU/AMDGPUMachineFunction.h     |    2 +-
 lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp     |   20 +-
 lib/Target/AMDGPU/AMDGPURegAsmNames.inc.cpp   |  353 ++
 lib/Target/AMDGPU/AMDGPURegisterBankInfo.h    |    1 -
 lib/Target/AMDGPU/AMDGPURegisterInfo.cpp      |    1 -
 lib/Target/AMDGPU/AMDGPUSubtarget.h           |   17 +-
 lib/Target/AMDGPU/AMDGPUTargetMachine.cpp     |   18 +-
 lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp  |    6 +-
 .../AMDGPU/AMDGPUTargetTransformInfo.cpp      |    2 +-
 .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp      |  106 +-
 lib/Target/AMDGPU/CMakeLists.txt              |    1 +
 .../Disassembler/AMDGPUDisassembler.cpp       |    7 +-
 .../AMDGPU/Disassembler/AMDGPUDisassembler.h  |    2 +-
 lib/Target/AMDGPU/GCNHazardRecognizer.cpp     |    2 +-
 .../AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp  |    2 +-
 .../AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp  |    2 +-
 .../AMDGPUCodeObjectMetadataStreamer.cpp      |  197 +-
 .../AMDGPUCodeObjectMetadataStreamer.h        |    2 +-
 .../MCTargetDesc/AMDGPUELFObjectWriter.cpp    |    2 +-
 .../MCTargetDesc/AMDGPUTargetStreamer.cpp     |   32 +-
 .../MCTargetDesc/AMDGPUTargetStreamer.h       |   12 -
 .../AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp |    2 +-
 lib/Target/AMDGPU/Processors.td               |   81 +-
 .../AMDGPU/R600ControlFlowFinalizer.cpp       |    4 +-
 lib/Target/AMDGPU/R600EmitClauseMarkers.cpp   |    2 +-
 lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp |    2 +-
 lib/Target/AMDGPU/R600FrameLowering.cpp       |    2 +-
 lib/Target/AMDGPU/R600ISelLowering.cpp        |    2 +-
 lib/Target/AMDGPU/R600InstrInfo.cpp           |    4 +-
 lib/Target/AMDGPU/R600MachineScheduler.cpp    |    4 +-
 lib/Target/AMDGPU/R600Packetizer.cpp          |    2 +-
 lib/Target/AMDGPU/SIDebuggerInsertNops.cpp    |    2 +-
 lib/Target/AMDGPU/SIFixSGPRCopies.cpp         |    2 +-
 lib/Target/AMDGPU/SIFoldOperands.cpp          |    3 +-
 lib/Target/AMDGPU/SIFrameLowering.cpp         |    2 +-
 lib/Target/AMDGPU/SIISelLowering.cpp          |    6 +-
 lib/Target/AMDGPU/SIInsertWaitcnts.cpp        |    2 +-
 lib/Target/AMDGPU/SIInstrInfo.cpp             |    2 +-
 lib/Target/AMDGPU/SILowerControlFlow.cpp      |    2 +-
 lib/Target/AMDGPU/SILowerI1Copies.cpp         |    2 +-
 lib/Target/AMDGPU/SIMachineFunctionInfo.h     |    2 +-
 lib/Target/AMDGPU/SIMachineScheduler.cpp      |    2 +-
 lib/Target/AMDGPU/SIPeepholeSDWA.cpp          |  103 +-
 lib/Target/AMDGPU/SIRegisterInfo.cpp          |   62 +-
 lib/Target/AMDGPU/SIRegisterInfo.h            |    4 +-
 lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp    |   43 +-
 lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h      |    7 -
 lib/Target/AMDGPU/VOP3Instructions.td         |    9 +-
 lib/Target/ARM/ARMAsmPrinter.cpp              |    4 +-
 lib/Target/ARM/ARMBaseInstrInfo.cpp           |    2 +-
 lib/Target/ARM/ARMBaseRegisterInfo.cpp        |    4 +-
 lib/Target/ARM/ARMCallLowering.cpp            |   16 +-
 lib/Target/ARM/ARMConstantIslandPass.cpp      |    2 +-
 lib/Target/ARM/ARMFastISel.cpp                |    2 +-
 lib/Target/ARM/ARMFrameLowering.cpp           |    2 +-
 lib/Target/ARM/ARMISelLowering.cpp            |    9 +-
 lib/Target/ARM/ARMInstrVFP.td                 |    9 +-
 lib/Target/ARM/ARMInstructionSelector.cpp     |   68 +-
 lib/Target/ARM/ARMLegalizerInfo.cpp           |    2 +-
 lib/Target/ARM/ARMLoadStoreOptimizer.cpp      |    2 +-
 lib/Target/ARM/ARMMCInstLower.cpp             |    2 +-
 lib/Target/ARM/ARMRegisterBankInfo.cpp        |   33 +-
 lib/Target/ARM/ARMSubtarget.cpp               |    4 +-
 lib/Target/ARM/ARMTargetMachine.cpp           |   15 +
 lib/Target/ARM/ARMTargetObjectFile.cpp        |    6 +-
 lib/Target/ARM/AsmParser/ARMAsmParser.cpp     |   19 +-
 .../ARM/Disassembler/ARMDisassembler.cpp      |    2 +-
 lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp |   66 +-
 .../ARM/MCTargetDesc/ARMAsmBackendDarwin.h    |    2 +-
 .../ARM/MCTargetDesc/ARMELFObjectWriter.cpp   |    2 +-
 .../ARM/MCTargetDesc/ARMELFStreamer.cpp       |    4 +-
 lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h   |    3 +
 .../ARM/MCTargetDesc/ARMMCCodeEmitter.cpp     |   12 +-
 .../ARM/MCTargetDesc/ARMMCTargetDesc.cpp      |    2 +-
 .../MCTargetDesc/ARMMachORelocationInfo.cpp   |    2 +-
 .../ARM/MCTargetDesc/ARMMachObjectWriter.cpp  |    4 +-
 .../MCTargetDesc/ARMWinCOFFObjectWriter.cpp   |    2 +-
 lib/Target/ARM/Thumb1FrameLowering.cpp        |    6 +-
 lib/Target/ARM/Thumb1InstrInfo.cpp            |    2 +-
 lib/Target/ARM/Thumb2InstrInfo.cpp            |   20 +-
 lib/Target/ARM/Thumb2SizeReduction.cpp        |    2 +-
 lib/Target/AVR/AVR.h                          |    2 +-
 lib/Target/AVR/AVRAsmPrinter.cpp              |    2 +-
 lib/Target/AVR/AVRRegisterInfo.cpp            |    2 -
 lib/Target/AVR/AVRSubtarget.cpp               |    2 +-
 lib/Target/AVR/AVRSubtarget.h                 |    3 +-
 lib/Target/AVR/AVRTargetMachine.cpp           |    4 +-
 lib/Target/AVR/AVRTargetObjectFile.cpp        |    2 +-
 lib/Target/AVR/AsmParser/AVRAsmParser.cpp     |    6 +-
 .../AVR/Disassembler/AVRDisassembler.cpp      |    4 +-
 .../AVR/MCTargetDesc/AVRELFStreamer.cpp       |    2 +-
 lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp     |    2 +-
 .../AVR/MCTargetDesc/AVRMCTargetDesc.cpp      |    2 +-
 lib/Target/BPF/BPFAsmPrinter.cpp              |    4 +-
 lib/Target/BPF/BPFInstrInfo.cpp               |    2 +-
 lib/Target/BPF/BPFRegisterInfo.cpp            |    6 +-
 lib/Target/BPF/BPFTargetMachine.cpp           |    6 +-
 .../BPF/Disassembler/BPFDisassembler.cpp      |    4 +-
 lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp |    2 +-
 .../BPF/MCTargetDesc/BPFELFObjectWriter.cpp   |    2 +-
 .../BPF/MCTargetDesc/BPFMCTargetDesc.cpp      |    2 +-
 lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h |    2 +-
 .../Hexagon/AsmParser/HexagonAsmParser.cpp    |    6 +-
 lib/Target/Hexagon/BitTracker.cpp             |    2 +-
 .../Disassembler/HexagonDisassembler.cpp      |    6 +-
 lib/Target/Hexagon/HexagonAsmPrinter.cpp      |    4 +-
 lib/Target/Hexagon/HexagonBitSimplify.cpp     |    2 +-
 lib/Target/Hexagon/HexagonBitTracker.cpp      |    2 +-
 lib/Target/Hexagon/HexagonBlockRanges.h       |    2 +-
 lib/Target/Hexagon/HexagonCommonGEP.cpp       |   16 +-
 .../Hexagon/HexagonConstPropagation.cpp       |    2 +-
 lib/Target/Hexagon/HexagonCopyToCombine.cpp   |    2 +-
 lib/Target/Hexagon/HexagonEarlyIfConv.cpp     |    2 +-
 lib/Target/Hexagon/HexagonFixupHwLoops.cpp    |    3 +-
 lib/Target/Hexagon/HexagonFrameLowering.cpp   |    2 +-
 lib/Target/Hexagon/HexagonGenExtract.cpp      |    2 +-
 lib/Target/Hexagon/HexagonGenInsert.cpp       |    4 +-
 lib/Target/Hexagon/HexagonGenMux.cpp          |    7 +-
 lib/Target/Hexagon/HexagonISelLowering.cpp    |    4 +-
 lib/Target/Hexagon/HexagonInstrInfo.cpp       |    6 +-
 .../Hexagon/HexagonLoopIdiomRecognition.cpp   |    4 +-
 .../Hexagon/HexagonMachineScheduler.cpp       |   57 +-
 lib/Target/Hexagon/HexagonPatterns.td         |   89 +-
 lib/Target/Hexagon/HexagonSplitDouble.cpp     |    2 +-
 lib/Target/Hexagon/HexagonTargetMachine.cpp   |   36 +-
 .../Hexagon/HexagonTargetObjectFile.cpp       |    2 +-
 lib/Target/Hexagon/HexagonVLIWPacketizer.cpp  |    2 +-
 .../MCTargetDesc/HexagonAsmBackend.cpp        |    2 +-
 .../MCTargetDesc/HexagonInstPrinter.cpp       |    2 +-
 .../MCTargetDesc/HexagonMCCodeEmitter.cpp     |    2 +-
 .../MCTargetDesc/HexagonMCELFStreamer.cpp     |    2 +-
 .../MCTargetDesc/HexagonMCShuffler.cpp        |    2 +-
 .../MCTargetDesc/HexagonMCTargetDesc.cpp      |    6 +-
 lib/Target/Hexagon/RDFDeadCode.cpp            |    2 +-
 lib/Target/Hexagon/RDFGraph.cpp               |    2 +-
 lib/Target/Hexagon/RDFLiveness.cpp            |    2 +-
 lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp |    2 +-
 lib/Target/Lanai/LanaiTargetObjectFile.cpp    |    2 +-
 .../MCTargetDesc/LanaiELFObjectWriter.cpp     |    2 +-
 .../Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp |    2 +-
 .../Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp  |    2 +-
 lib/Target/MSP430/MSP430AsmPrinter.cpp        |    2 +-
 lib/Target/Mips/AsmParser/MipsAsmParser.cpp   |   10 +-
 .../Mips/Disassembler/MipsDisassembler.cpp    |    4 +-
 .../Mips/MCTargetDesc/MipsAsmBackend.cpp      |    2 +-
 .../Mips/MCTargetDesc/MipsELFObjectWriter.cpp |    2 +-
 .../Mips/MCTargetDesc/MipsELFStreamer.cpp     |    2 +-
 .../Mips/MCTargetDesc/MipsMCCodeEmitter.cpp   |    2 +-
 lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp   |    4 +-
 .../Mips/MCTargetDesc/MipsOptionRecord.cpp    |    4 +-
 .../Mips/MCTargetDesc/MipsTargetStreamer.cpp  |    4 +-
 lib/Target/Mips/Mips.td                       |    3 +
 lib/Target/Mips/Mips16FrameLowering.cpp       |    4 +-
 lib/Target/Mips/MipsAsmPrinter.cpp            |    4 +-
 lib/Target/Mips/MipsCCState.cpp               |   61 +-
 lib/Target/Mips/MipsCCState.h                 |   33 +-
 lib/Target/Mips/MipsCallingConv.td            |   10 +-
 lib/Target/Mips/MipsConstantIslandPass.cpp    |    2 +-
 lib/Target/Mips/MipsDSPInstrInfo.td           |    5 +-
 lib/Target/Mips/MipsFastISel.cpp              |    2 +-
 lib/Target/Mips/MipsISelLowering.cpp          |   79 +-
 lib/Target/Mips/MipsISelLowering.h            |   27 +
 lib/Target/Mips/MipsInstrFPU.td               |   18 +-
 lib/Target/Mips/MipsInstrInfo.td              |    6 +
 lib/Target/Mips/MipsMachineFunction.cpp       |    8 +-
 lib/Target/Mips/MipsOptimizePICCall.cpp       |    2 +-
 lib/Target/Mips/MipsOs16.cpp                  |    2 +-
 lib/Target/Mips/MipsRegisterInfo.cpp          |    4 +-
 lib/Target/Mips/MipsSEFrameLowering.cpp       |    2 +-
 lib/Target/Mips/MipsSEISelDAGToDAG.cpp        |    2 +-
 lib/Target/Mips/MipsSubtarget.cpp             |    8 +-
 lib/Target/Mips/MipsSubtarget.h               |    5 +
 lib/Target/Mips/MipsTargetMachine.cpp         |    2 +-
 lib/Target/Mips/MipsTargetObjectFile.cpp      |    2 +-
 lib/Target/NVPTX/NVPTXAsmPrinter.cpp          |    4 +-
 lib/Target/NVPTX/NVPTXGenericToNVVM.cpp       |    2 +-
 lib/Target/NVPTX/NVPTXISelLowering.cpp        |    2 +-
 lib/Target/NVPTX/NVPTXInstrInfo.cpp           |    2 +-
 lib/Target/NVPTX/NVPTXLowerArgs.cpp           |    2 +-
 lib/Target/NVPTX/NVPTXPeephole.cpp            |    2 +-
 lib/Target/NVPTX/NVPTXTargetMachine.cpp       |    2 +-
 lib/Target/NVPTX/NVVMIntrRange.cpp            |    2 +-
 .../PowerPC/InstPrinter/PPCInstPrinter.cpp    |    2 +-
 .../PowerPC/MCTargetDesc/PPCAsmBackend.cpp    |    6 +-
 .../MCTargetDesc/PPCELFObjectWriter.cpp       |    2 +-
 lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp |    2 +-
 .../PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp  |    6 +-
 .../MCTargetDesc/PPCMachObjectWriter.cpp      |    8 +-
 lib/Target/PowerPC/PPCAsmPrinter.cpp          |   10 +-
 lib/Target/PowerPC/PPCBoolRetToInt.cpp        |   42 +-
 lib/Target/PowerPC/PPCBranchSelector.cpp      |    2 +-
 lib/Target/PowerPC/PPCCTRLoops.cpp            |    2 +-
 lib/Target/PowerPC/PPCEarlyReturn.cpp         |    2 +-
 lib/Target/PowerPC/PPCFastISel.cpp            |    4 +-
 lib/Target/PowerPC/PPCISelDAGToDAG.cpp        |   65 +-
 lib/Target/PowerPC/PPCISelLowering.cpp        |   10 +-
 lib/Target/PowerPC/PPCInstrVSX.td             |   51 +
 lib/Target/PowerPC/PPCMCInstLower.cpp         |    2 +-
 lib/Target/PowerPC/PPCMIPeephole.cpp          |    2 +-
 lib/Target/PowerPC/PPCTLSDynamicCall.cpp      |    2 +-
 lib/Target/PowerPC/PPCTOCRegDeps.cpp          |    2 +-
 lib/Target/PowerPC/PPCTargetMachine.cpp       |    2 +-
 lib/Target/PowerPC/PPCVSXCopy.cpp             |    2 +-
 lib/Target/PowerPC/PPCVSXFMAMutate.cpp        |    2 +-
 lib/Target/PowerPC/PPCVSXSwapRemoval.cpp      |    2 +-
 .../RISCV/MCTargetDesc/RISCVAsmBackend.cpp    |    2 +-
 .../RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp |    2 +-
 .../RISCV/MCTargetDesc/RISCVMCTargetDesc.h    |    2 +-
 lib/Target/RISCV/RISCVTargetMachine.cpp       |    2 +-
 lib/Target/Sparc/AsmParser/SparcAsmParser.cpp |    4 +-
 .../Sparc/Disassembler/SparcDisassembler.cpp  |    4 +-
 .../Sparc/MCTargetDesc/SparcAsmBackend.cpp    |    2 +-
 .../Sparc/MCTargetDesc/SparcMCAsmInfo.cpp     |    2 +-
 lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp |    1 -
 lib/Target/Sparc/SparcAsmPrinter.cpp          |    2 +-
 lib/Target/Sparc/SparcMCInstLower.cpp         |    2 +-
 lib/Target/Sparc/SparcTargetMachine.cpp       |    4 +-
 lib/Target/Sparc/SparcTargetObjectFile.cpp    |    2 +-
 .../SystemZ/AsmParser/SystemZAsmParser.cpp    |    2 +-
 .../MCTargetDesc/SystemZMCAsmBackend.cpp      |    2 +-
 .../MCTargetDesc/SystemZMCObjectWriter.cpp    |    2 +-
 lib/Target/SystemZ/SystemZHazardRecognizer.h  |    2 +-
 lib/Target/SystemZ/SystemZISelLowering.cpp    |   25 +-
 lib/Target/SystemZ/SystemZInstrInfo.cpp       |    2 +-
 lib/Target/SystemZ/SystemZLDCleanup.cpp       |    2 +-
 lib/Target/SystemZ/SystemZRegisterInfo.cpp    |    2 +-
 lib/Target/SystemZ/SystemZShortenInst.cpp     |    2 +-
 lib/Target/SystemZ/SystemZSubtarget.h         |    2 +-
 lib/Target/SystemZ/SystemZTDC.cpp             |    6 +-
 lib/Target/SystemZ/SystemZTargetMachine.cpp   |    4 +-
 lib/Target/Target.cpp                         |    4 +-
 lib/Target/TargetLoweringObjectFile.cpp       |   16 +-
 lib/Target/TargetMachineC.cpp                 |    4 +-
 .../Disassembler/WebAssemblyDisassembler.cpp  |    2 +-
 .../InstPrinter/WebAssemblyInstPrinter.h      |    2 +-
 .../MCTargetDesc/WebAssemblyAsmBackend.cpp    |    2 +-
 .../MCTargetDesc/WebAssemblyMCCodeEmitter.cpp |    2 +-
 .../MCTargetDesc/WebAssemblyMCTargetDesc.h    |    2 +-
 .../MCTargetDesc/WebAssemblyTargetStreamer.h  |    2 +-
 .../WebAssemblyWasmObjectWriter.cpp           |   11 +-
 lib/Target/WebAssembly/WebAssemblyCFGSort.cpp |    2 +-
 .../WebAssembly/WebAssemblyCFGStackify.cpp    |    2 +-
 .../WebAssemblyCallIndirectFixup.cpp          |    2 +-
 .../WebAssembly/WebAssemblyFastISel.cpp       |    2 +-
 .../WebAssemblyFixIrreducibleControlFlow.cpp  |    2 +-
 .../WebAssembly/WebAssemblyISelDAGToDAG.cpp   |    2 +-
 .../WebAssembly/WebAssemblyLowerBrUnless.cpp  |    2 +-
 .../WebAssemblyPrepareForLiveIntervals.cpp    |    2 +-
 .../WebAssembly/WebAssemblyRegNumbering.cpp   |    2 +-
 .../WebAssembly/WebAssemblyRegStackify.cpp    |    2 +-
 .../WebAssemblyReplacePhysRegs.cpp            |    2 +-
 .../WebAssemblySetP2AlignOperands.cpp         |    2 +-
 .../WebAssembly/WebAssemblyStoreResults.cpp   |    2 +-
 .../WebAssembly/WebAssemblyTargetMachine.cpp  |    4 +-
 .../X86/AsmParser/X86AsmInstrumentation.cpp   |    4 +-
 lib/Target/X86/AsmParser/X86Operand.h         |    2 +-
 lib/Target/X86/CMakeLists.txt                 |    1 -
 .../X86/Disassembler/X86Disassembler.cpp      |    2 +-
 .../Disassembler/X86DisassemblerDecoder.cpp   |    8 +-
 .../X86/InstPrinter/X86ATTInstPrinter.cpp     |    2 +-
 .../X86/InstPrinter/X86InstComments.cpp       |    2 +-
 .../X86/InstPrinter/X86IntelInstPrinter.cpp   |    2 +-
 lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp |    4 +-
 .../X86/MCTargetDesc/X86ELFObjectWriter.cpp   |    2 +-
 lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp  |    2 +-
 .../X86/MCTargetDesc/X86MachObjectWriter.cpp  |    4 +-
 .../MCTargetDesc/X86WinCOFFObjectWriter.cpp   |    2 +-
 lib/Target/X86/X86AsmPrinter.cpp              |    2 +-
 lib/Target/X86/X86FastISel.cpp                |    6 +
 lib/Target/X86/X86FrameLowering.cpp           |    2 +-
 lib/Target/X86/X86ISelLowering.cpp            |  191 +-
 lib/Target/X86/X86InstrFragmentsSIMD.td       |   41 +-
 lib/Target/X86/X86InstrInfo.cpp               | 3400 +++++++++++++++-
 lib/Target/X86/X86InstrSSE.td                 |   10 +-
 lib/Target/X86/X86MCInstLower.cpp             |   16 +-
 lib/Target/X86/X86OptimizeLEAs.cpp            |    2 +-
 lib/Target/X86/X86SchedHaswell.td             |   33 +
 lib/Target/X86/X86SchedSandyBridge.td         |   25 +
 lib/Target/X86/X86Schedule.td                 |    4 +
 lib/Target/X86/X86ScheduleBtVer2.td           |   32 +
 lib/Target/X86/X86ScheduleSLM.td              |   27 +
 lib/Target/X86/X86SelectionDAGInfo.cpp        |    4 +-
 lib/Target/X86/X86Subtarget.cpp               |    2 +-
 lib/Target/X86/X86TargetMachine.cpp           |    2 +-
 lib/Target/X86/X86TargetObjectFile.cpp        |    4 +-
 lib/Target/X86/X86TargetTransformInfo.cpp     |   13 +
 lib/Target/X86/X86TargetTransformInfo.h       |    4 +
 lib/Target/X86/X86WinEHState.cpp              |    2 +-
 .../XCore/MCTargetDesc/XCoreMCTargetDesc.cpp  |    4 +-
 lib/Target/XCore/XCoreAsmPrinter.cpp          |    2 +-
 lib/Target/XCore/XCoreTargetMachine.cpp       |    2 +-
 lib/Target/XCore/XCoreTargetMachine.h         |    2 +-
 lib/Target/XCore/XCoreTargetObjectFile.cpp    |    2 +-
 lib/ToolDrivers/llvm-lib/LLVMBuild.txt        |    2 +-
 lib/ToolDrivers/llvm-lib/LibDriver.cpp        |   12 +-
 lib/Transforms/Coroutines/CoroSplit.cpp       |    2 +-
 lib/Transforms/IPO/ElimAvailExtern.cpp        |    2 +-
 lib/Transforms/IPO/ExtractGV.cpp              |   14 +-
 lib/Transforms/IPO/FunctionAttrs.cpp          |    3 +-
 lib/Transforms/IPO/GlobalSplit.cpp            |    2 +-
 lib/Transforms/IPO/IPConstantPropagation.cpp  |    2 +-
 lib/Transforms/IPO/IPO.cpp                    |    4 +-
 lib/Transforms/IPO/InferFunctionAttrs.cpp     |    2 +-
 lib/Transforms/IPO/Inliner.cpp                |    8 +-
 lib/Transforms/IPO/LoopExtractor.cpp          |    2 +-
 lib/Transforms/IPO/LowerTypeTests.cpp         |   56 +-
 lib/Transforms/IPO/PruneEH.cpp                |    4 +-
 lib/Transforms/IPO/SampleProfile.cpp          |    7 +
 lib/Transforms/IPO/StripSymbols.cpp           |    2 +-
 lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp   |   12 +-
 lib/Transforms/IPO/WholeProgramDevirt.cpp     |    2 +-
 .../InstCombine/InstCombineAddSub.cpp         |   23 +-
 .../InstCombine/InstCombineAndOrXor.cpp       |   27 +-
 .../InstCombine/InstCombineCalls.cpp          |   16 +-
 .../InstCombine/InstCombineCasts.cpp          |    8 +-
 .../InstCombine/InstCombineCompares.cpp       |   70 +-
 .../InstCombine/InstCombineInternal.h         |    2 +-
 .../InstCombine/InstCombineMulDivRem.cpp      |   21 +-
 lib/Transforms/InstCombine/InstCombinePHI.cpp |    4 +-
 .../InstCombine/InstCombineSelect.cpp         |    7 +-
 .../InstCombine/InstCombineShifts.cpp         |   30 +-
 .../InstCombineSimplifyDemanded.cpp           |    8 +-
 .../InstCombine/InstCombineVectorOps.cpp      |    9 +-
 .../InstCombine/InstructionCombining.cpp      |   45 +-
 .../Instrumentation/BoundsChecking.cpp        |    2 +-
 .../Instrumentation/DataFlowSanitizer.cpp     |    4 +-
 .../Instrumentation/EfficiencySanitizer.cpp   |    2 +-
 .../Instrumentation/InstrProfiling.cpp        |    2 +-
 .../Instrumentation/ThreadSanitizer.cpp       |    2 +-
 lib/Transforms/ObjCARC/BlotMapVector.h        |    2 +-
 lib/Transforms/ObjCARC/DependencyAnalysis.cpp |    2 +-
 lib/Transforms/ObjCARC/ObjCARCContract.cpp    |    2 +-
 lib/Transforms/ObjCARC/ObjCARCOpts.cpp        |    2 +-
 lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp |    2 +-
 .../ObjCARC/ProvenanceAnalysisEvaluator.cpp   |    4 +-
 lib/Transforms/ObjCARC/PtrState.h             |    2 +-
 .../Scalar/AlignmentFromAssumptions.cpp       |    4 +-
 lib/Transforms/Scalar/ConstantProp.cpp        |    6 +-
 lib/Transforms/Scalar/DCE.cpp                 |    2 +-
 lib/Transforms/Scalar/FlattenCFGPass.cpp      |    2 +-
 lib/Transforms/Scalar/GVNHoist.cpp            |    2 +-
 lib/Transforms/Scalar/GVNSink.cpp             |    6 +-
 lib/Transforms/Scalar/GuardWidening.cpp       |    2 +-
 lib/Transforms/Scalar/IndVarSimplify.cpp      |    7 +-
 .../Scalar/InductiveRangeCheckElimination.cpp |   37 +-
 lib/Transforms/Scalar/InferAddressSpaces.cpp  |    9 +-
 lib/Transforms/Scalar/JumpThreading.cpp       |   10 +-
 lib/Transforms/Scalar/LoadCombine.cpp         |    2 +-
 lib/Transforms/Scalar/LoopIdiomRecognize.cpp  |   77 +-
 lib/Transforms/Scalar/LoopPredication.cpp     |    2 +-
 lib/Transforms/Scalar/LoopRerollPass.cpp      |    4 +-
 lib/Transforms/Scalar/LoopStrengthReduce.cpp  |  167 +-
 lib/Transforms/Scalar/LoopUnswitch.cpp        |   16 +-
 .../Scalar/LowerExpectIntrinsic.cpp           |    4 +-
 lib/Transforms/Scalar/LowerGuardIntrinsic.cpp |    2 +-
 lib/Transforms/Scalar/MemCpyOptimizer.cpp     |    8 +-
 lib/Transforms/Scalar/NewGVN.cpp              |   80 +-
 lib/Transforms/Scalar/Reg2Mem.cpp             |    2 +-
 .../Scalar/RewriteStatepointsForGC.cpp        |   20 +-
 lib/Transforms/Scalar/SCCP.cpp                |    4 +-
 lib/Transforms/Scalar/SROA.cpp                |   19 +-
 lib/Transforms/Scalar/Scalar.cpp              |    6 +-
 lib/Transforms/Scalar/Scalarizer.cpp          |    2 +-
 .../Scalar/SeparateConstOffsetFromGEP.cpp     |   10 +-
 lib/Transforms/Scalar/SimpleLoopUnswitch.cpp  |    4 +-
 lib/Transforms/Scalar/Sink.cpp                |    2 +-
 lib/Transforms/Scalar/StructurizeCFG.cpp      |    2 +-
 .../Scalar/TailRecursionElimination.cpp       |    4 +-
 lib/Transforms/Utils/CMakeLists.txt           |    1 +
 lib/Transforms/Utils/CloneFunction.cpp        |    2 +-
 lib/Transforms/Utils/CloneModule.cpp          |    4 +-
 lib/Transforms/Utils/DemoteRegToStack.cpp     |    2 +-
 lib/Transforms/Utils/Evaluator.cpp            |    4 +-
 lib/Transforms/Utils/FlattenCFG.cpp           |    2 +-
 lib/Transforms/Utils/FunctionComparator.cpp   |    2 +-
 lib/Transforms/Utils/FunctionImportUtils.cpp  |    2 +-
 lib/Transforms/Utils/GlobalStatus.cpp         |    2 +-
 lib/Transforms/Utils/InlineFunction.cpp       |    6 +-
 lib/Transforms/Utils/InstructionNamer.cpp     |    2 +-
 lib/Transforms/Utils/Local.cpp                |    2 +-
 lib/Transforms/Utils/LoopSimplify.cpp         |    4 +-
 lib/Transforms/Utils/LoopUnroll.cpp           |    2 +-
 lib/Transforms/Utils/LoopUnrollRuntime.cpp    |    2 +-
 lib/Transforms/Utils/LoopUtils.cpp            |    3 +-
 lib/Transforms/Utils/LowerMemIntrinsics.cpp   |    4 +-
 lib/Transforms/Utils/LowerSwitch.cpp          |    2 +-
 lib/Transforms/Utils/MetaRenamer.cpp          |    2 +-
 lib/Transforms/Utils/OrderedInstructions.cpp  |   33 +
 lib/Transforms/Utils/SSAUpdater.cpp           |    4 +-
 lib/Transforms/Utils/SanitizerStats.cpp       |    2 +-
 lib/Transforms/Utils/SimplifyCFG.cpp          |    5 +-
 lib/Transforms/Utils/SimplifyInstructions.cpp |    2 +-
 lib/Transforms/Utils/SimplifyLibCalls.cpp     |    2 +-
 lib/Transforms/Utils/StripGCRelocates.cpp     |    2 +-
 .../Utils/StripNonLineTableDebugInfo.cpp      |    2 +-
 lib/Transforms/Utils/SymbolRewriter.cpp       |    2 +-
 lib/Transforms/Utils/Utils.cpp                |    2 +-
 lib/Transforms/Vectorize/BBVectorize.cpp      |    2 +-
 lib/Transforms/Vectorize/LoopVectorize.cpp    |    9 +-
 lib/Transforms/Vectorize/Vectorize.cpp        |    2 +-
 lib/XRay/InstrumentationMap.cpp               |    2 +-
 .../BranchProbabilityInfo/libfunc_call.ll     |  264 ++
 .../gep-constanfolding-error.ll               |    2 +-
 .../lvi-after-jumpthreading.ll                |   70 +-
 test/Bindings/OCaml/core.ml                   |   11 +
 test/Bitcode/ptest-old.ll                     |    2 +-
 test/BugPoint/unsymbolized.ll                 |   21 +
 test/CMakeLists.txt                           |    2 +-
 .../arm64-irtranslator-stackprotect.ll        |    2 +-
 .../AArch64/GlobalISel/arm64-irtranslator.ll  |   23 +-
 .../GlobalISel/arm64-regbankselect.mir        |  158 +-
 .../AArch64/GlobalISel/call-translator-ios.ll |    4 +-
 .../AArch64/GlobalISel/call-translator.ll     |   10 +-
 .../CodeGen/AArch64/GlobalISel/debug-insts.ll |    4 +-
 .../GlobalISel/localizer-in-O0-pipeline.mir   |   16 +-
 test/CodeGen/AArch64/GlobalISel/localizer.mir |   80 +-
 .../GlobalISel/regbankselect-dbg-value.mir    |    2 +-
 .../GlobalISel/regbankselect-default.mir      |  164 +-
 .../AArch64/GlobalISel/select-binop.mir       |  200 +-
 .../AArch64/GlobalISel/select-bitcast.mir     |   32 +-
 .../AArch64/GlobalISel/select-fp-casts.mir    |   72 +-
 .../AArch64/GlobalISel/select-int-ext.mir     |   46 +-
 .../GlobalISel/select-int-ptr-casts.mir       |   24 +-
 .../AArch64/GlobalISel/select-load.mir        |  100 +-
 .../AArch64/GlobalISel/select-muladd.mir      |   14 +-
 .../AArch64/GlobalISel/select-store.mir       |   84 +-
 .../AArch64/GlobalISel/select-trunc.mir       |   12 +-
 .../CodeGen/AArch64/GlobalISel/select-xor.mir |   30 +-
 test/CodeGen/AArch64/GlobalISel/select.mir    |   52 +-
 .../GlobalISel/varargs-ios-translator.ll      |    2 +-
 .../arm64-fast-isel-conversion-fallback.ll    |  131 +
 test/CodeGen/AArch64/spill-undef.mir          |   67 +
 .../AMDGPU/GlobalISel/legalize-icmp.mir       |   24 +
 .../AMDGPU/GlobalISel/legalize-select.mir     |   28 +
 .../AMDGPU/GlobalISel/regbankselect.mir       |   14 +-
 test/CodeGen/AMDGPU/add.v2i16.ll              |    8 +-
 test/CodeGen/AMDGPU/ashr.v2i16.ll             |    2 +-
 test/CodeGen/AMDGPU/branch-relax-spill.ll     |  420 +-
 .../AMDGPU/clamp-omod-special-case.mir        |   20 +
 test/CodeGen/AMDGPU/exceed-max-sgprs.ll       |  142 +-
 test/CodeGen/AMDGPU/fabs.f16.ll               |    8 +-
 test/CodeGen/AMDGPU/fadd.f16.ll               |    6 +-
 test/CodeGen/AMDGPU/fcanonicalize.f16.ll      |    6 +-
 test/CodeGen/AMDGPU/flat-scratch-reg.ll       |   14 +-
 test/CodeGen/AMDGPU/fmul.f16.ll               |    6 +-
 test/CodeGen/AMDGPU/fneg-fabs.f16.ll          |    8 +-
 test/CodeGen/AMDGPU/fneg.f16.ll               |    2 +-
 test/CodeGen/AMDGPU/fptosi.f16.ll             |    2 +-
 test/CodeGen/AMDGPU/fptoui.f16.ll             |    2 +-
 test/CodeGen/AMDGPU/fsub.f16.ll               |    4 +-
 test/CodeGen/AMDGPU/hsa-note-no-func.ll       |   13 +
 .../AMDGPU/illegal-sgpr-to-vgpr-copy.ll       |   20 +-
 test/CodeGen/AMDGPU/immv216.ll                |   38 +-
 test/CodeGen/AMDGPU/indirect-addressing-si.ll |    2 +-
 test/CodeGen/AMDGPU/inline-asm.ll             |   12 +-
 .../CodeGen/AMDGPU/insert_vector_elt.v2i16.ll |    8 +-
 test/CodeGen/AMDGPU/limit-coalesce.mir        |   14 +-
 test/CodeGen/AMDGPU/llvm.amdgcn.alignb.ll     |   23 +
 .../AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll     |   22 +-
 .../AMDGPU/llvm.amdgcn.mqsad.u32.u8.ll        |   57 +-
 .../AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll      |   22 +-
 test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll       |    2 +-
 test/CodeGen/AMDGPU/llvm.maxnum.f16.ll        |    6 +-
 test/CodeGen/AMDGPU/llvm.minnum.f16.ll        |    6 +-
 .../AMDGPU/partial-sgpr-to-vgpr-spills.ll     |   10 +-
 .../AMDGPU/promote-alloca-array-aggregate.ll  |  131 +
 ...dependent-subregs-invalid-mac-operands.mir |   69 +
 test/CodeGen/AMDGPU/scratch-simple.ll         |    6 +-
 test/CodeGen/AMDGPU/sdwa-peephole.ll          |   24 +-
 test/CodeGen/AMDGPU/shl.v2i16.ll              |    2 +-
 test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll    |   88 +-
 test/CodeGen/AMDGPU/skip-if-dead.ll           |   12 +-
 test/CodeGen/AMDGPU/sminmax.v2i16.ll          |    8 +-
 test/CodeGen/AMDGPU/spill-scavenge-offset.ll  |   14 +-
 test/CodeGen/AMDGPU/sub.v2i16.ll              |   12 +-
 .../AMDGPU/undefined-subreg-liverange.ll      |    6 +-
 test/CodeGen/AMDGPU/v_mac_f16.ll              |   31 +-
 .../arm-instruction-select-combos.mir         |  149 +
 .../ARM/GlobalISel/arm-instruction-select.mir |  115 +-
 .../ARM/GlobalISel/arm-irtranslator.ll        |   90 +-
 test/CodeGen/ARM/GlobalISel/arm-isel.ll       |   84 +-
 .../ARM/GlobalISel/arm-legalize-fp.mir        |   36 +-
 test/CodeGen/ARM/GlobalISel/arm-legalizer.mir |  242 +-
 .../ARM/GlobalISel/arm-regbankselect.mir      |  281 +-
 test/CodeGen/ARM/clang-section.ll             |  140 +
 test/CodeGen/ARM/cortex-a57-misched-vfma.ll   |   91 +-
 test/CodeGen/ARM/invalidated-save-point.ll    |    4 +-
 test/CodeGen/Generic/llc-start-stop.ll        |    4 +-
 test/CodeGen/Hexagon/common-gep-inbounds.ll   |   20 +
 test/CodeGen/Hexagon/mux-undef.ll             |   27 +
 .../generic-virtual-registers-error.mir       |    1 -
 ...c-virtual-registers-with-regbank-error.mir |    1 -
 .../MIR/AArch64/register-operand-bank.mir     |    4 +-
 .../MIR/AArch64/stack-object-local-offset.mir |    4 +-
 test/CodeGen/MIR/Generic/frame-info.mir       |    5 +
 .../function-missing-machine-function.mir     |   13 -
 test/CodeGen/MIR/X86/callee-saved-info.mir    |    4 +-
 test/CodeGen/MIR/X86/empty0.mir               |    6 +
 test/CodeGen/MIR/X86/empty1.mir               |    8 +
 test/CodeGen/MIR/X86/empty2.mir               |    8 +
 test/CodeGen/MIR/X86/fixed-stack-objects.mir  |    2 +-
 test/CodeGen/MIR/X86/generic-instr-type.mir   |   10 +-
 test/CodeGen/MIR/X86/inline-asm.mir           |   12 +
 .../MIR/X86/register-operand-class.mir        |   12 +-
 test/CodeGen/MIR/X86/roundtrip.mir            |   20 +
 .../X86/simple-register-allocation-hints.mir  |    2 +-
 .../X86/spill-slot-fixed-stack-objects.mir    |    2 +-
 .../MIR/X86/stack-object-debug-info.mir       |    5 +-
 test/CodeGen/MIR/X86/stack-objects.mir        |    9 +-
 .../MIR/X86/variable-sized-stack-objects.mir  |    8 +-
 test/CodeGen/MIR/X86/virtual-registers.mir    |   12 +-
 test/CodeGen/Mips/biggot.ll                   |    3 +
 test/CodeGen/Mips/cconv/vector.ll             | 1657 ++++++++
 test/CodeGen/Mips/ctlz-v.ll                   |   12 +-
 test/CodeGen/Mips/cttz-v.ll                   |   19 +-
 test/CodeGen/Mips/dsp-r1.ll                   |   12 +-
 test/CodeGen/Mips/fmadd1.ll                   |  435 +-
 test/CodeGen/Mips/llvm-ir/mul.ll              |    2 +-
 test/CodeGen/Mips/llvm-ir/sdiv.ll             |   12 +-
 test/CodeGen/Mips/llvm-ir/srem.ll             |   11 +-
 test/CodeGen/Mips/llvm-ir/udiv.ll             |   11 +-
 test/CodeGen/Mips/llvm-ir/urem.ll             |    6 +-
 test/CodeGen/Mips/micromips-gp-rc.ll          |    2 +-
 test/CodeGen/Mips/mips64fpldst.ll             |   12 +-
 test/CodeGen/Mips/pbqp-reserved-physreg.ll    |   35 +
 test/CodeGen/Mips/return-vector.ll            |   33 +-
 test/CodeGen/Mips/tailcall/tailcall.ll        |    4 +-
 test/CodeGen/PowerPC/BoolRetToIntTest-2.ll    |   19 +
 test/CodeGen/PowerPC/BoolRetToIntTest.ll      |   28 +-
 test/CodeGen/PowerPC/crbits.ll                |   46 +-
 test/CodeGen/PowerPC/logic-ops-on-compares.ll |   73 +-
 .../memCmpUsedInZeroEqualityComparison.ll     |  281 +-
 test/CodeGen/PowerPC/ppc-crbits-onoff.ll      |   18 +-
 test/CodeGen/PowerPC/setcc-logic.ll           |   16 +-
 test/CodeGen/PowerPC/testComparesinesc.ll     |  121 +
 test/CodeGen/PowerPC/testComparesinesi.ll     |  121 +
 test/CodeGen/PowerPC/testComparesinesll.ll    |  125 +
 test/CodeGen/PowerPC/testComparesiness.ll     |  121 +
 test/CodeGen/PowerPC/testComparesineuc.ll     |  136 +
 test/CodeGen/PowerPC/testComparesineui.ll     |  121 +
 test/CodeGen/PowerPC/testComparesineull.ll    |  125 +
 test/CodeGen/PowerPC/testComparesineus.ll     |  137 +
 test/CodeGen/PowerPC/testComparesllnesll.ll   |  125 +
 test/CodeGen/PowerPC/testComparesllneull.ll   |  125 +
 test/CodeGen/PowerPC/vec_int_ext.ll           |   90 +
 test/CodeGen/X86/2006-05-11-InstrSched.ll     |    2 +-
 .../X86/GlobalISel/irtranslator-call.ll       |   30 -
 .../GlobalISel/irtranslator-callingconv.ll    |  122 +-
 .../irtranslator-callingconv_64bit.ll         |   25 -
 .../X86/GlobalISel/legalize-mul-scalar.mir    |   18 +-
 .../X86/GlobalISel/legalize-mul-v128.mir      |   18 +-
 .../X86/GlobalISel/legalize-mul-v256.mir      |   18 +-
 .../X86/GlobalISel/legalize-mul-v512.mir      |   18 +-
 .../X86/GlobalISel/regbankselect-AVX2.mir     |   20 +-
 .../X86/GlobalISel/regbankselect-AVX512.mir   |   20 +-
 .../X86/GlobalISel/regbankselect-X32.mir      |   10 +-
 .../X86/GlobalISel/regbankselect-X86_64.mir   |  170 +-
 .../X86/GlobalISel/select-add-v128.mir        |   72 +-
 .../X86/GlobalISel/select-add-v256.mir        |   72 +-
 .../X86/GlobalISel/select-add-v512.mir        |   24 +-
 .../CodeGen/X86/GlobalISel/select-add-x32.mir |   20 +-
 test/CodeGen/X86/GlobalISel/select-add.mir    |   72 +-
 test/CodeGen/X86/GlobalISel/select-cmp.mir    |  130 +-
 .../X86/GlobalISel/select-constant.mir        |   12 +-
 .../X86/GlobalISel/select-ext-x86-64.mir      |   16 +-
 test/CodeGen/X86/GlobalISel/select-ext.mir    |   22 +-
 test/CodeGen/X86/GlobalISel/select-gep.mir    |    6 +-
 test/CodeGen/X86/GlobalISel/select-inc.mir    |    8 +-
 .../X86/GlobalISel/select-leaf-constant.mir   |    8 +-
 .../GlobalISel/select-memop-scalar-x32.mir    |   56 +-
 .../X86/GlobalISel/select-memop-scalar.mir    |   76 +-
 .../X86/GlobalISel/select-memop-v128.mir      |   24 +-
 .../X86/GlobalISel/select-memop-v256.mir      |   32 +-
 .../X86/GlobalISel/select-memop-v512.mir      |   16 +-
 .../X86/GlobalISel/select-mul-scalar.mir      |   18 +-
 .../CodeGen/X86/GlobalISel/select-mul-vec.mir |   90 +-
 .../X86/GlobalISel/select-sub-v128.mir        |   72 +-
 .../X86/GlobalISel/select-sub-v256.mir        |   72 +-
 .../X86/GlobalISel/select-sub-v512.mir        |   24 +-
 test/CodeGen/X86/GlobalISel/select-sub.mir    |   60 +-
 test/CodeGen/X86/GlobalISel/select-trunc.mir  |   24 +-
 test/CodeGen/X86/O0-pipeline.ll               |    2 +-
 test/CodeGen/X86/atom-fixup-lea3.ll           |   11 +-
 test/CodeGen/X86/avx-schedule.ll              |   32 +-
 test/CodeGen/X86/avx-splat.ll                 |   36 +-
 test/CodeGen/X86/avx512-cvt.ll                |    4 -
 test/CodeGen/X86/build-vector-128.ll          |   96 +-
 test/CodeGen/X86/buildvec-insertvec.ll        |   76 +-
 .../X86/clear_upper_vector_element_bits.ll    |  201 +-
 test/CodeGen/X86/fast-isel-nontemporal.ll     |  108 +-
 test/CodeGen/X86/full-lsr.ll                  |   10 +-
 test/CodeGen/X86/haddsub-2.ll                 |  362 +-
 test/CodeGen/X86/haddsub-undef.ll             |    5 +-
 test/CodeGen/X86/hoist-spill.ll               |    2 -
 test/CodeGen/X86/loop-strength-reduce4.ll     |   15 +-
 test/CodeGen/X86/madd.ll                      |   78 +-
 test/CodeGen/X86/masked-iv-safe.ll            |   16 +-
 test/CodeGen/X86/memcmp.ll                    |  330 +-
 .../X86/merge-consecutive-loads-128.ll        |   36 +-
 test/CodeGen/X86/mul-constant-i16.ll          |  139 +-
 test/CodeGen/X86/mul-constant-i32.ll          | 1578 +++++++-
 test/CodeGen/X86/mul-constant-i64.ll          | 1605 +++++++-
 test/CodeGen/X86/mul-constant-result.ll       | 1291 ++++++
 test/CodeGen/X86/nontemporal-loads.ll         |  779 ++--
 test/CodeGen/X86/pr32659.ll                   |   83 +
 test/CodeGen/X86/select.ll                    |   14 +-
 test/CodeGen/X86/selectiondag-dominator.ll    |   30 +
 test/CodeGen/X86/sse-intrinsics-fast-isel.ll  |   74 +-
 test/CodeGen/X86/sse1.ll                      |   80 +-
 test/CodeGen/X86/sse2-intrinsics-fast-isel.ll |  167 +-
 test/CodeGen/X86/sse3-avx-addsub-2.ll         |   14 +-
 .../CodeGen/X86/sse42-intrinsics-fast-isel.ll |    6 +-
 test/CodeGen/X86/stack-folding-fp-avx1.ll     |   21 +-
 test/CodeGen/X86/stack-folding-int-sse42.ll   |   17 +-
 test/CodeGen/X86/trunc-to-bool.ll             |   70 +-
 test/CodeGen/X86/vec_fp_to_int.ll             |   18 +-
 test/CodeGen/X86/vec_int_to_fp.ll             |  306 +-
 test/CodeGen/X86/vec_set.ll                   |   24 +-
 test/CodeGen/X86/vector-compare-results.ll    |  538 ++-
 test/CodeGen/X86/vector-rem.ll                |   34 +-
 test/CodeGen/X86/vector-sext.ll               |  876 ++--
 test/CodeGen/X86/vector-shuffle-v48.ll        |   49 +
 .../X86/vector-shuffle-variable-128.ll        |  314 +-
 test/CodeGen/X86/vector-sqrt.ll               |   18 +-
 test/CodeGen/X86/vector-unsigned-cmp.ll       |  134 +-
 ...rs-cleared-in-machine-functions-liveins.ll |    4 +-
 test/CodeGen/X86/vshift-1.ll                  |    9 +-
 test/CodeGen/X86/vshift-2.ll                  |    9 +-
 test/CodeGen/X86/x86-interleaved-access.ll    |   93 +
 .../Inputs/dwarfdump-str-offsets-dwp.s        |  277 ++
 .../Inputs/dwarfdump-str-offsets-invalid-1.s  |   34 +
 .../dwarfdump-str-offsets-invalid-1.x86_64.o  |  Bin 0 -> 824 bytes
 .../Inputs/dwarfdump-str-offsets-invalid-2.s  |   36 +
 .../dwarfdump-str-offsets-invalid-2.x86_64.o  |  Bin 0 -> 832 bytes
 .../Inputs/dwarfdump-str-offsets-invalid-3.s  |   88 +
 .../dwarfdump-str-offsets-invalid-3.x86_64.o  |  Bin 0 -> 2296 bytes
 .../Inputs/dwarfdump-str-offsets-invalid-4.s  |   50 +
 .../dwarfdump-str-offsets-invalid-4.x86_64.o  |  Bin 0 -> 1264 bytes
 .../Inputs/dwarfdump-str-offsets-invalid-5.s  |   10 +
 .../dwarfdump-str-offsets-invalid-5.x86_64.o  |  Bin 0 -> 464 bytes
 test/DebugInfo/Inputs/dwarfdump-str-offsets.s |  500 +++
 .../Inputs/dwarfdump-str-offsets.x86_64.o     |  Bin 0 -> 4000 bytes
 test/DebugInfo/PDB/DIA/pdbdump-flags.test     |    8 +-
 .../PDB/DIA/pdbdump-linenumbers.test          |    4 +-
 .../PDB/DIA/pdbdump-symbol-format.test        |   10 +-
 .../PDB/Inputs/debug-subsections.yaml         |   91 +
 .../PDB/Inputs/simple-line-info.yaml          |   44 -
 .../PDB/Native/pdb-native-compilands.test     |    4 +-
 .../PDB/Native/pdb-native-summary.test        |    2 +-
 .../PDB/pdb-longname-truncation.test          |    2 +-
 test/DebugInfo/PDB/pdb-minimal-construct.test |   22 +-
 test/DebugInfo/PDB/pdb-yaml-symbols.test      |    2 +-
 test/DebugInfo/PDB/pdb-yaml-types.test        |    6 +-
 .../PDB/pdbdump-debug-subsections.test        |  210 +
 test/DebugInfo/PDB/pdbdump-headers.test       |   70 +-
 .../PDB/pdbdump-merge-ids-and-types.test      |   18 +-
 test/DebugInfo/PDB/pdbdump-mergeids.test      |   12 +-
 test/DebugInfo/PDB/pdbdump-mergetypes.test    |   10 +-
 test/DebugInfo/PDB/pdbdump-raw-blocks.test    |   10 +-
 test/DebugInfo/PDB/pdbdump-raw-stream.test    |    4 +-
 test/DebugInfo/PDB/pdbdump-readwrite.test     |   10 +-
 test/DebugInfo/PDB/pdbdump-source-names.test  |    8 +-
 test/DebugInfo/PDB/pdbdump-write.test         |   14 +-
 .../PDB/pdbdump-yaml-lineinfo-write.test      |   71 -
 test/DebugInfo/PDB/pdbdump-yaml-lineinfo.test |   60 -
 test/DebugInfo/PDB/pdbdump-yaml-types.test    |    2 +-
 test/DebugInfo/PDB/pdbdump-yaml.test          |    4 +-
 .../dwarfdump-str-offsets-invalid.test        |   24 +
 test/DebugInfo/dwarfdump-str-offsets.test     |   76 +
 test/FileCheck/check-dag.txt                  |    9 +
 test/Instrumentation/MemorySanitizer/csr.ll   |    2 +-
 .../MemorySanitizer/msan_x86intrinsics.ll     |    2 +-
 .../MemorySanitizer/vector_arith.ll           |    2 +-
 .../MemorySanitizer/vector_cmp.ll             |    2 +-
 .../MemorySanitizer/vector_cvt.ll             |    2 +-
 .../MemorySanitizer/vector_pack.ll            |    2 +-
 .../MemorySanitizer/vector_shift.ll           |    2 +-
 test/LTO/ARM/Inputs/thumb.ll                  |   15 +
 test/LTO/ARM/link-arm-and-thumb.ll            |   32 +
 test/LTO/Resolution/X86/linker-redef.ll       |   16 +
 test/Linker/Inputs/thumb.ll                   |   16 +
 test/Linker/link-arm-and-thumb.ll             |   23 +
 test/MC/AMDGPU/sopp-err.s                     |    8 +-
 test/MC/AMDGPU/sym_option.s                   |    4 +-
 test/MC/ARM/arm-thumb-tail-call.ll            |   25 +
 test/MC/ARM/big-endian-thumb2-fixup.s         |    6 +
 .../ARM/t2-modified-immediate-fixup-error1.s  |   13 +
 .../ARM/t2-modified-immediate-fixup-error2.s  |   12 +
 test/MC/ARM/t2-modified-immediate-fixup.s     |   45 +
 test/MC/ARM/thumb2-diagnostics.s              |    2 -
 test/MC/AsmParser/empty-comment.s             |    4 +
 .../Disassembler/Mips/micromips-dsp/valid.txt |    2 +-
 test/MC/ELF/ARM/clang-section.s               |  399 ++
 test/MC/MachO/alias.s                         |   12 +
 test/MC/MachO/variable-exprs.s                |    8 +-
 test/MC/Mips/dsp/invalid.s                    |    4 +-
 test/MC/Mips/micromips-dsp/invalid.s          |    2 +
 test/MC/Mips/micromips-dsp/valid.s            |    2 +-
 test/MC/WebAssembly/reloc-code.ll             |   20 +-
 test/Object/AMDGPU/elf-definitions.yaml       |   21 +-
 test/Object/objc-imageinfo-coff.ll            |   15 +
 test/Object/objc-imageinfo-elf.ll             |   15 +
 test/Object/objc-imageinfo-macho.ll           |   15 +
 test/Transforms/CodeGenPrepare/X86/memcmp.ll  |  337 ++
 test/Transforms/ConstProp/sse.ll              |    2 +-
 test/Transforms/DCE/calls-errno.ll            |    4 +
 test/Transforms/GVNSink/sink-common-code.ll   |   57 +-
 test/Transforms/IRCE/correct-loop-info.ll     |  182 +
 .../IndVarSimplify/lftr_disabled.ll           |   28 +
 .../NVPTX/clone_constexpr.ll                  |   36 +
 test/Transforms/Inline/basictest.ll           |   24 +
 .../InstCombine/constant-fold-libfunc.ll      |   20 +
 .../InstCombine/insert-extract-shuffle.ll     |   23 +
 test/Transforms/InstCombine/intrinsics.ll     |  154 +-
 test/Transforms/InstCombine/lshr.ll           |   43 +-
 test/Transforms/InstSimplify/call.ll          |   33 +
 test/Transforms/InstSimplify/compare.ll       |    2 +-
 .../InstSimplify/simplify-nested-bitcast.ll   |   54 +
 test/Transforms/InstSimplify/vector_gep.ll    |    2 +-
 .../X86/interleaved-accesses-64bits-avx.ll    |   61 +-
 .../LoopIdiom/X86/unordered-atomic-memcpy.ll  |  452 +++
 .../unordered-atomic-memcpy-noarch.ll         |   28 +
 .../LoopStrengthReduce/X86/canonical.ll       |    2 +-
 .../LoopStrengthReduce/X86/ivchain-X86.ll     |    4 +-
 .../X86/lsr-expand-quadratic.ll               |   14 +-
 .../LoopStrengthReduce/X86/lsr-insns-1.ll     |    4 +-
 .../LoopStrengthReduce/X86/lsr-insns-2.ll     |    4 +-
 .../LoopStrengthReduce/X86/nested-loop.ll     |   22 +-
 .../AArch64/loop-vectorization-factors.ll     |   46 +-
 .../LowerExpectIntrinsic/PR33346.ll           |   22 +
 test/Transforms/LowerTypeTests/simple.ll      |   16 +-
 test/Transforms/LowerTypeTests/simplify.ll    |   37 +
 test/Transforms/NewGVN/completeness.ll        |    2 +-
 test/Transforms/NewGVN/loadforward.ll         |    4 +-
 test/Transforms/NewGVN/pr32403.ll             |    3 +-
 test/Transforms/NewGVN/pr32897.ll             |    1 -
 test/Transforms/NewGVN/pr33187.ll             |  148 +
 test/Transforms/SLPVectorizer/X86/arith-fp.ll |   48 +-
 .../X86/reverse_extract_elements.ll           |  138 +
 test/Transforms/SROA/address-spaces.ll        |   28 +
 .../SampleProfile/Inputs/indirect-call.prof   |    3 +
 .../Transforms/SampleProfile/indirect-call.ll |   13 +
 test/Transforms/Sink/badloadsink.ll           |   18 +
 test/Transforms/ThinLTOBitcodeWriter/split.ll |    4 +
 .../Util/PredicateInfo/condprop2.ll           |    2 +-
 .../Util/PredicateInfo/testandor2.ll          |    2 +-
 test/lit.cfg                                  |    3 +
 test/lit.site.cfg.in                          |    1 +
 .../llvm-cvtres/Inputs/test_resource.obj.coff |  Bin 0 -> 3468 bytes
 test/tools/llvm-cvtres/object.test            |  229 ++
 .../llvm-cvtres/{resource.test => parse.test} |    2 +-
 test/tools/llvm-dwarfdump/X86/brief.s         |  131 +
 test/tools/llvm-dwarfdump/X86/lit.local.cfg   |    2 +
 test/tools/llvm-pdbdump/class-layout.test     |    2 +-
 .../complex-padding-graphical.test            |    2 +-
 test/tools/llvm-pdbdump/enum-layout.test      |    2 +-
 test/tools/llvm-pdbdump/load-address.test     |    4 +-
 test/tools/llvm-pdbdump/raw-stream-data.test  |    6 +-
 test/tools/llvm-pdbdump/regex-filter.test     |   20 +-
 .../simple-padding-graphical.test             |    2 +-
 test/tools/llvm-pdbdump/symbol-filters.test   |   16 +-
 .../Inputs/trivial.elf-amdhsa-kaveri          |  Bin 13208 -> 0 bytes
 .../Inputs/trivial.obj.elf-amdhsa-gfx803      |  Bin 0 -> 2208 bytes
 .../llvm-readobj/amdgpu-elf-definitions.test  |   11 +
 test/tools/llvm-readobj/amdgpu-elf-defs.test  |   28 -
 test/tools/llvm-readobj/elf-sec-flags.test    |   29 +-
 tools/LLVMBuild.txt                           |    2 +-
 tools/bugpoint/OptimizerDriver.cpp            |    5 +-
 tools/dsymutil/DwarfLinker.cpp                |    9 +-
 tools/llc/llc.cpp                             |   95 +-
 tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp     |    3 +
 tools/llvm-cvtres/LLVMBuild.txt               |    2 +-
 tools/llvm-cvtres/llvm-cvtres.cpp             |   73 +-
 tools/llvm-cvtres/llvm-cvtres.h               |    2 -
 tools/llvm-dwarfdump/llvm-dwarfdump.cpp       |    4 +
 tools/llvm-lto2/llvm-lto2.cpp                 |    2 +
 tools/llvm-mc/llvm-mc.cpp                     |   22 +-
 tools/llvm-nm/llvm-nm.cpp                     |    2 +-
 tools/llvm-objdump/MachODump.cpp              |    7 +-
 .../llvm-pdbdump/C13DebugFragmentVisitor.cpp  |   87 -
 tools/llvm-pdbdump/C13DebugFragmentVisitor.h  |   60 -
 .../Analyze.cpp                               |    0
 .../{llvm-pdbdump => llvm-pdbutil}/Analyze.h  |    0
 .../CMakeLists.txt                            |    5 +-
 .../CompactTypeDumpVisitor.cpp                |    0
 .../CompactTypeDumpVisitor.h                  |    0
 tools/{llvm-pdbdump => llvm-pdbutil}/Diff.cpp |    2 +-
 tools/{llvm-pdbdump => llvm-pdbutil}/Diff.h   |    0
 .../LLVMBuild.txt                             |    4 +-
 .../LLVMOutputStyle.cpp                       |  308 +-
 .../LLVMOutputStyle.h                         |    0
 .../LinePrinter.cpp                           |    2 +-
 .../LinePrinter.h                             |    0
 .../OutputStyle.h                             |    0
 .../PdbYaml.cpp                               |    0
 .../{llvm-pdbdump => llvm-pdbutil}/PdbYaml.h  |    0
 .../PrettyBuiltinDumper.cpp                   |    2 +-
 .../PrettyBuiltinDumper.h                     |    0
 .../PrettyClassDefinitionDumper.cpp           |    2 +-
 .../PrettyClassDefinitionDumper.h             |    0
 .../PrettyClassLayoutGraphicalDumper.cpp      |    2 +-
 .../PrettyClassLayoutGraphicalDumper.h        |    0
 .../PrettyCompilandDumper.cpp                 |    4 +-
 .../PrettyCompilandDumper.h                   |    2 +-
 .../PrettyEnumDumper.cpp                      |    2 +-
 .../PrettyEnumDumper.h                        |    0
 .../PrettyExternalSymbolDumper.cpp            |    0
 .../PrettyExternalSymbolDumper.h              |    0
 .../PrettyFunctionDumper.cpp                  |    2 +-
 .../PrettyFunctionDumper.h                    |    0
 .../PrettyTypeDumper.cpp                      |  128 +-
 .../PrettyTypeDumper.h                        |    0
 .../PrettyTypedefDumper.cpp                   |    2 +-
 .../PrettyTypedefDumper.h                     |    2 +-
 .../PrettyVariableDumper.cpp                  |    2 +-
 .../PrettyVariableDumper.h                    |    0
 .../StreamUtil.cpp                            |    0
 .../StreamUtil.h                              |    0
 .../YAMLOutputStyle.cpp                       |   75 +-
 .../YAMLOutputStyle.h                         |    0
 .../fuzzer/CMakeLists.txt                     |    6 +-
 .../fuzzer/llvm-pdbutil-fuzzer.cpp}           |    4 +-
 .../llvm-pdbutil.cpp}                         |  140 +-
 .../llvm-pdbutil.h}                           |   41 +-
 tools/llvm-readobj/CMakeLists.txt             |    1 +
 tools/llvm-readobj/COFFDumper.cpp             |    2 +-
 tools/llvm-readobj/COFFImportDumper.cpp       |    2 +-
 tools/llvm-readobj/ELFDumper.cpp              |  202 +-
 tools/llvm-readobj/LLVMBuild.txt              |    2 +-
 tools/llvm-stress/llvm-stress.cpp             |    3 +
 tools/llvm-xray/xray-extract.cc               |    2 +-
 tools/obj2yaml/dwarf2yaml.cpp                 |    1 +
 tools/obj2yaml/macho2yaml.cpp                 |    2 +-
 tools/yaml2obj/yaml2elf.cpp                   |    2 +-
 tools/yaml2obj/yaml2macho.cpp                 |    6 +-
 unittests/ADT/DAGDeltaAlgorithmTest.cpp       |    2 +-
 unittests/ADT/DeltaAlgorithmTest.cpp          |    2 +-
 unittests/ADT/DenseMapTest.cpp                |    2 +-
 unittests/ADT/DepthFirstIteratorTest.cpp      |    2 +-
 unittests/ADT/FoldingSet.cpp                  |    2 +-
 unittests/ADT/HashingTest.cpp                 |    2 +-
 unittests/ADT/ImmutableMapTest.cpp            |    2 +-
 unittests/ADT/ImmutableSetTest.cpp            |    2 +-
 unittests/ADT/IteratorTest.cpp                |    2 +-
 unittests/ADT/MapVectorTest.cpp               |    2 +-
 unittests/ADT/OptionalTest.cpp                |    2 +-
 unittests/ADT/PointerEmbeddedIntTest.cpp      |    2 +-
 unittests/ADT/PointerIntPairTest.cpp          |    2 +-
 unittests/ADT/PointerSumTypeTest.cpp          |    2 +-
 unittests/ADT/PointerUnionTest.cpp            |    2 +-
 unittests/ADT/PostOrderIteratorTest.cpp       |    2 +-
 unittests/ADT/RangeAdapterTest.cpp            |    2 +-
 unittests/ADT/ReverseIterationTest.cpp        |    2 +-
 unittests/ADT/SCCIteratorTest.cpp             |    2 +-
 unittests/ADT/SmallPtrSetTest.cpp             |    4 +-
 unittests/ADT/SmallVectorTest.cpp             |   63 +-
 unittests/ADT/TripleTest.cpp                  |    7 +-
 unittests/ADT/TwineTest.cpp                   |    2 +-
 unittests/ADT/VariadicFunctionTest.cpp        |    4 +-
 .../Analysis/BranchProbabilityInfoTest.cpp    |    2 +-
 unittests/Analysis/CFGTest.cpp                |    2 +-
 unittests/Analysis/CMakeLists.txt             |    1 +
 unittests/Analysis/GlobalsModRefTest.cpp      |   55 +
 unittests/Analysis/LazyCallGraphTest.cpp      |    2 +-
 unittests/Analysis/MemorySSA.cpp              |   48 +-
 unittests/Analysis/ProfileSummaryInfoTest.cpp |    2 +-
 unittests/Analysis/ScalarEvolutionTest.cpp    |    1 -
 unittests/Analysis/TBAATest.cpp               |    2 +-
 unittests/Analysis/UnrollAnalyzer.cpp         |    4 +-
 unittests/BinaryFormat/CMakeLists.txt         |    9 +
 .../{Support => BinaryFormat}/DwarfTest.cpp   |    5 +-
 unittests/BinaryFormat/TestFileMagic.cpp      |  128 +
 unittests/Bitcode/BitstreamReaderTest.cpp     |    2 +-
 unittests/Bitcode/BitstreamWriterTest.cpp     |    2 +-
 unittests/CMakeLists.txt                      |    1 +
 unittests/CodeGen/DIEHashTest.cpp             |    4 +-
 .../MachineInstrBundleIteratorTest.cpp        |    2 +-
 .../DebugInfo/DWARF/DWARFDebugInfoTest.cpp    |    2 +-
 .../DebugInfo/DWARF/DWARFFormValueTest.cpp    |    2 +-
 unittests/DebugInfo/DWARF/DwarfGenerator.cpp  |    4 +-
 .../ExecutionEngine/ExecutionEngineTest.cpp   |    2 +-
 .../ExecutionEngine/MCJIT/MCJITCAPITest.cpp   |    2 +-
 .../MCJIT/MCJITMultipleModuleTest.cpp         |    2 +-
 unittests/ExecutionEngine/MCJIT/MCJITTest.cpp |    2 +-
 .../Orc/CompileOnDemandLayerTest.cpp          |    2 +-
 .../Orc/IndirectionUtilsTest.cpp              |    2 +-
 .../Orc/LazyEmittingLayerTest.cpp             |    2 +-
 .../Orc/ObjectTransformLayerTest.cpp          |    2 +-
 unittests/ExecutionEngine/Orc/OrcCAPITest.cpp |    2 +-
 unittests/ExecutionEngine/Orc/OrcTestCommon.h |    4 +-
 unittests/ExecutionEngine/Orc/QueueChannel.h  |    2 +-
 .../Orc/RTDyldObjectLinkingLayerTest.cpp      |    4 +-
 unittests/IR/AsmWriterTest.cpp                |    2 +-
 unittests/IR/ConstantRangeTest.cpp            |   17 +
 unittests/IR/ConstantsTest.cpp                |    4 +-
 unittests/IR/DebugTypeODRUniquingTest.cpp     |    2 +-
 unittests/IR/DominatorTreeTest.cpp            |   52 +
 unittests/IR/IRBuilderTest.cpp                |    2 +-
 unittests/IR/MetadataTest.cpp                 |    2 +-
 unittests/IR/ModuleTest.cpp                   |    2 +-
 unittests/IR/PassManagerTest.cpp              |    2 +-
 unittests/IR/PatternMatch.cpp                 |    2 +-
 unittests/IR/UserTest.cpp                     |    2 +-
 unittests/IR/ValueTest.cpp                    |    2 +-
 unittests/IR/VerifierTest.cpp                 |    4 +-
 unittests/Linker/LinkModulesTest.cpp          |    4 +-
 unittests/MC/DwarfLineTables.cpp              |    2 +-
 unittests/MC/StringTableBuilderTest.cpp       |    2 +-
 unittests/MI/LiveIntervalTest.cpp             |   15 +-
 unittests/ProfileData/CoverageMappingTest.cpp |    2 +-
 unittests/ProfileData/InstrProfTest.cpp       |    2 +-
 unittests/ProfileData/SampleProfTest.cpp      |    2 +-
 unittests/Support/ARMAttributeParser.cpp      |    2 +-
 unittests/Support/BinaryStreamTest.cpp        |    9 +-
 unittests/Support/CMakeLists.txt              |    6 +-
 unittests/Support/CommandLineTest.cpp         |    6 +-
 unittests/Support/CompressionTest.cpp         |    2 +-
 unittests/Support/CrashRecoveryTest.cpp       |    2 +-
 unittests/Support/DataExtractorTest.cpp       |    2 +-
 .../Support/DynamicLibrary/CMakeLists.txt     |   25 +-
 .../DynamicLibrary/DynamicLibraryTest.cpp     |   53 +-
 .../Support/DynamicLibrary/PipSqueak.cxx      |   42 +-
 unittests/Support/DynamicLibrary/PipSqueak.h  |   13 +
 unittests/Support/EndianStreamTest.cpp        |    2 +-
 unittests/Support/FileOutputBufferTest.cpp    |    2 +-
 unittests/Support/FormatVariadicTest.cpp      |    2 +-
 unittests/Support/LEB128Test.cpp              |    4 +-
 unittests/Support/MD5Test.cpp                 |    2 +-
 unittests/Support/MathExtrasTest.cpp          |    2 +-
 unittests/Support/MemoryBufferTest.cpp        |    2 +-
 unittests/Support/MemoryTest.cpp              |    2 +-
 unittests/Support/Path.cpp                    |   83 +-
 unittests/Support/ProgramTest.cpp             |    4 +-
 unittests/Support/SpecialCaseListTest.cpp     |    2 +-
 unittests/Support/SwapByteOrderTest.cpp       |    2 +-
 unittests/Support/TarWriterTest.cpp           |    2 +-
 unittests/Support/TargetParserTest.cpp        |    2 +-
 unittests/Support/TrigramIndexTest.cpp        |    2 +-
 unittests/Support/YAMLIOTest.cpp              |    1 -
 unittests/Support/YAMLParserTest.cpp          |    2 +-
 .../Support/formatted_raw_ostream_test.cpp    |    2 +-
 unittests/Support/raw_ostream_test.cpp        |    2 +-
 unittests/Support/raw_pwrite_stream_test.cpp  |    2 +-
 unittests/Support/raw_sha1_ostream_test.cpp   |    2 +-
 unittests/Target/AArch64/InstSizes.cpp        |   24 +-
 .../Transforms/Scalar/LoopPassManagerTest.cpp |    2 +-
 unittests/Transforms/Utils/CMakeLists.txt     |    1 +
 .../Transforms/Utils/FunctionComparator.cpp   |    2 +-
 .../Transforms/Utils/OrderedInstructions.cpp  |   65 +
 .../Transforms/Utils/ValueMapperTest.cpp      |    2 +-
 utils/FileCheck/FileCheck.cpp                 |    2 +-
 utils/TableGen/CMakeLists.txt                 |    1 -
 utils/TableGen/TableGen.cpp                   |    6 -
 utils/TableGen/TableGenBackends.h             |    1 -
 utils/TableGen/X86FoldTablesEmitter.cpp       |  732 ----
 utils/gdb-scripts/prettyprinters.py           |   36 +-
 utils/git-svn/git-llvm                        |    5 +
 utils/opt-viewer/optrecord.py                 |    2 +-
 utils/release/test-release.sh                 |   29 +-
 1717 files changed, 40056 insertions(+), 15366 deletions(-)
 create mode 100644 include/llvm/BinaryFormat/COFF.h
 rename include/llvm/{Support => BinaryFormat}/Dwarf.def (100%)
 rename include/llvm/{Support => BinaryFormat}/Dwarf.h (92%)
 rename include/llvm/{Support => BinaryFormat}/ELF.h (98%)
 rename include/llvm/{Support => BinaryFormat}/ELFRelocs/AArch64.def (100%)
 rename include/llvm/{Support => BinaryFormat}/ELFRelocs/AMDGPU.def (100%)
 rename include/llvm/{Support => BinaryFormat}/ELFRelocs/ARM.def (100%)
 rename include/llvm/{Support => BinaryFormat}/ELFRelocs/AVR.def (100%)
 rename include/llvm/{Support => BinaryFormat}/ELFRelocs/BPF.def (100%)
 rename include/llvm/{Support => BinaryFormat}/ELFRelocs/Hexagon.def (100%)
 rename include/llvm/{Support => BinaryFormat}/ELFRelocs/Lanai.def (100%)
 rename include/llvm/{Support => BinaryFormat}/ELFRelocs/Mips.def (100%)
 rename include/llvm/{Support => BinaryFormat}/ELFRelocs/PowerPC.def (100%)
 rename include/llvm/{Support => BinaryFormat}/ELFRelocs/PowerPC64.def (100%)
 rename include/llvm/{Support => BinaryFormat}/ELFRelocs/RISCV.def (100%)
 rename include/llvm/{Support => BinaryFormat}/ELFRelocs/Sparc.def (100%)
 rename include/llvm/{Support => BinaryFormat}/ELFRelocs/SystemZ.def (100%)
 rename include/llvm/{Support => BinaryFormat}/ELFRelocs/WebAssembly.def (100%)
 rename include/llvm/{Support => BinaryFormat}/ELFRelocs/i386.def (100%)
 rename include/llvm/{Support => BinaryFormat}/ELFRelocs/x86_64.def (100%)
 rename include/llvm/{Support => BinaryFormat}/MachO.def (100%)
 create mode 100644 include/llvm/BinaryFormat/MachO.h
 create mode 100644 include/llvm/BinaryFormat/Magic.h
 rename include/llvm/{Support => BinaryFormat}/Wasm.h (80%)
 rename include/llvm/{Support => BinaryFormat}/WasmRelocs/WebAssembly.def (100%)
 delete mode 100644 include/llvm/CodeGen/MachineFunctionInitializer.h
 create mode 100644 include/llvm/DebugInfo/CodeView/DebugCrossExSubsection.h
 create mode 100644 include/llvm/DebugInfo/CodeView/DebugCrossImpSubsection.h
 create mode 100644 include/llvm/DebugInfo/CodeView/DebugSymbolRVASubsection.h
 rename {lib/Target/AMDGPU/MCTargetDesc => include/llvm/Support}/AMDGPUCodeObjectMetadata.h (98%)
 delete mode 100644 include/llvm/Support/COFF.h
 delete mode 100644 include/llvm/Support/MachO.h
 create mode 100644 include/llvm/Transforms/Utils/OrderedInstructions.h
 create mode 100644 lib/BinaryFormat/CMakeLists.txt
 rename lib/{Support => BinaryFormat}/Dwarf.cpp (77%)
 create mode 100644 lib/BinaryFormat/LLVMBuild.txt
 create mode 100644 lib/BinaryFormat/Magic.cpp
 create mode 100644 lib/DebugInfo/CodeView/DebugCrossExSubsection.cpp
 create mode 100644 lib/DebugInfo/CodeView/DebugCrossImpSubsection.cpp
 create mode 100644 lib/DebugInfo/CodeView/DebugSymbolRVASubsection.cpp
 create mode 100644 lib/Support/AMDGPUCodeObjectMetadata.cpp
 create mode 100644 lib/Target/AMDGPU/AMDGPURegAsmNames.inc.cpp
 create mode 100644 lib/Transforms/Utils/OrderedInstructions.cpp
 create mode 100644 test/Analysis/BranchProbabilityInfo/libfunc_call.ll
 create mode 100644 test/BugPoint/unsymbolized.ll
 create mode 100644 test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll
 create mode 100644 test/CodeGen/AArch64/spill-undef.mir
 create mode 100644 test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir
 create mode 100644 test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir
 create mode 100644 test/CodeGen/AMDGPU/llvm.amdgcn.alignb.ll
 create mode 100644 test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll
 create mode 100644 test/CodeGen/AMDGPU/rename-independent-subregs-invalid-mac-operands.mir
 create mode 100644 test/CodeGen/ARM/GlobalISel/arm-instruction-select-combos.mir
 create mode 100644 test/CodeGen/ARM/clang-section.ll
 create mode 100644 test/CodeGen/Hexagon/common-gep-inbounds.ll
 create mode 100644 test/CodeGen/Hexagon/mux-undef.ll
 delete mode 100644 test/CodeGen/MIR/Generic/function-missing-machine-function.mir
 create mode 100644 test/CodeGen/MIR/X86/empty0.mir
 create mode 100644 test/CodeGen/MIR/X86/empty1.mir
 create mode 100644 test/CodeGen/MIR/X86/empty2.mir
 create mode 100644 test/CodeGen/MIR/X86/inline-asm.mir
 create mode 100644 test/CodeGen/MIR/X86/roundtrip.mir
 create mode 100644 test/CodeGen/Mips/cconv/vector.ll
 create mode 100644 test/CodeGen/Mips/pbqp-reserved-physreg.ll
 create mode 100644 test/CodeGen/PowerPC/BoolRetToIntTest-2.ll
 create mode 100644 test/CodeGen/PowerPC/testComparesinesc.ll
 create mode 100644 test/CodeGen/PowerPC/testComparesinesi.ll
 create mode 100644 test/CodeGen/PowerPC/testComparesinesll.ll
 create mode 100644 test/CodeGen/PowerPC/testComparesiness.ll
 create mode 100644 test/CodeGen/PowerPC/testComparesineuc.ll
 create mode 100644 test/CodeGen/PowerPC/testComparesineui.ll
 create mode 100644 test/CodeGen/PowerPC/testComparesineull.ll
 create mode 100644 test/CodeGen/PowerPC/testComparesineus.ll
 create mode 100644 test/CodeGen/PowerPC/testComparesllnesll.ll
 create mode 100644 test/CodeGen/PowerPC/testComparesllneull.ll
 create mode 100644 test/CodeGen/PowerPC/vec_int_ext.ll
 delete mode 100644 test/CodeGen/X86/GlobalISel/irtranslator-call.ll
 delete mode 100644 test/CodeGen/X86/GlobalISel/irtranslator-callingconv_64bit.ll
 create mode 100644 test/CodeGen/X86/mul-constant-result.ll
 create mode 100644 test/CodeGen/X86/pr32659.ll
 create mode 100644 test/CodeGen/X86/selectiondag-dominator.ll
 create mode 100644 test/CodeGen/X86/vector-shuffle-v48.ll
 create mode 100644 test/DebugInfo/Inputs/dwarfdump-str-offsets-dwp.s
 create mode 100644 test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-1.s
 create mode 100644 test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-1.x86_64.o
 create mode 100644 test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-2.s
 create mode 100644 test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-2.x86_64.o
 create mode 100644 test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-3.s
 create mode 100644 test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-3.x86_64.o
 create mode 100644 test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-4.s
 create mode 100644 test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-4.x86_64.o
 create mode 100644 test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-5.s
 create mode 100644 test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-5.x86_64.o
 create mode 100644 test/DebugInfo/Inputs/dwarfdump-str-offsets.s
 create mode 100644 test/DebugInfo/Inputs/dwarfdump-str-offsets.x86_64.o
 create mode 100644 test/DebugInfo/PDB/Inputs/debug-subsections.yaml
 delete mode 100644 test/DebugInfo/PDB/Inputs/simple-line-info.yaml
 create mode 100644 test/DebugInfo/PDB/pdbdump-debug-subsections.test
 delete mode 100644 test/DebugInfo/PDB/pdbdump-yaml-lineinfo-write.test
 delete mode 100644 test/DebugInfo/PDB/pdbdump-yaml-lineinfo.test
 create mode 100644 test/DebugInfo/dwarfdump-str-offsets-invalid.test
 create mode 100644 test/DebugInfo/dwarfdump-str-offsets.test
 create mode 100644 test/LTO/ARM/Inputs/thumb.ll
 create mode 100644 test/LTO/ARM/link-arm-and-thumb.ll
 create mode 100644 test/LTO/Resolution/X86/linker-redef.ll
 create mode 100644 test/Linker/Inputs/thumb.ll
 create mode 100644 test/Linker/link-arm-and-thumb.ll
 create mode 100644 test/MC/ARM/arm-thumb-tail-call.ll
 create mode 100644 test/MC/ARM/t2-modified-immediate-fixup-error1.s
 create mode 100644 test/MC/ARM/t2-modified-immediate-fixup-error2.s
 create mode 100644 test/MC/ARM/t2-modified-immediate-fixup.s
 create mode 100644 test/MC/AsmParser/empty-comment.s
 create mode 100644 test/MC/ELF/ARM/clang-section.s
 create mode 100644 test/MC/MachO/alias.s
 create mode 100644 test/Object/objc-imageinfo-coff.ll
 create mode 100644 test/Object/objc-imageinfo-elf.ll
 create mode 100644 test/Object/objc-imageinfo-macho.ll
 create mode 100644 test/Transforms/CodeGenPrepare/X86/memcmp.ll
 create mode 100644 test/Transforms/IRCE/correct-loop-info.ll
 create mode 100644 test/Transforms/IndVarSimplify/lftr_disabled.ll
 create mode 100644 test/Transforms/InferAddressSpaces/NVPTX/clone_constexpr.ll
 create mode 100644 test/Transforms/InstCombine/constant-fold-libfunc.ll
 create mode 100644 test/Transforms/InstSimplify/simplify-nested-bitcast.ll
 create mode 100644 test/Transforms/LoopIdiom/X86/unordered-atomic-memcpy.ll
 create mode 100644 test/Transforms/LoopIdiom/unordered-atomic-memcpy-noarch.ll
 create mode 100644 test/Transforms/LowerExpectIntrinsic/PR33346.ll
 create mode 100644 test/Transforms/LowerTypeTests/simplify.ll
 create mode 100644 test/Transforms/NewGVN/pr33187.ll
 create mode 100644 test/Transforms/SLPVectorizer/X86/reverse_extract_elements.ll
 create mode 100644 test/Transforms/Sink/badloadsink.ll
 create mode 100644 test/tools/llvm-cvtres/Inputs/test_resource.obj.coff
 create mode 100644 test/tools/llvm-cvtres/object.test
 rename test/tools/llvm-cvtres/{resource.test => parse.test} (93%)
 create mode 100644 test/tools/llvm-dwarfdump/X86/brief.s
 create mode 100644 test/tools/llvm-dwarfdump/X86/lit.local.cfg
 delete mode 100755 test/tools/llvm-readobj/Inputs/trivial.elf-amdhsa-kaveri
 create mode 100644 test/tools/llvm-readobj/Inputs/trivial.obj.elf-amdhsa-gfx803
 create mode 100644 test/tools/llvm-readobj/amdgpu-elf-definitions.test
 delete mode 100644 test/tools/llvm-readobj/amdgpu-elf-defs.test
 delete mode 100644 tools/llvm-pdbdump/C13DebugFragmentVisitor.cpp
 delete mode 100644 tools/llvm-pdbdump/C13DebugFragmentVisitor.h
 rename tools/{llvm-pdbdump => llvm-pdbutil}/Analyze.cpp (100%)
 rename tools/{llvm-pdbdump => llvm-pdbutil}/Analyze.h (100%)
 rename tools/{llvm-pdbdump => llvm-pdbutil}/CMakeLists.txt (89%)
 rename tools/{llvm-pdbdump => llvm-pdbutil}/CompactTypeDumpVisitor.cpp (100%)
 rename tools/{llvm-pdbdump => llvm-pdbutil}/CompactTypeDumpVisitor.h (100%)
 rename tools/{llvm-pdbdump => llvm-pdbutil}/Diff.cpp (99%)
 rename tools/{llvm-pdbdump => llvm-pdbutil}/Diff.h (100%)
 rename tools/{llvm-pdbdump => llvm-pdbutil}/LLVMBuild.txt (88%)
 rename tools/{llvm-pdbdump => llvm-pdbutil}/LLVMOutputStyle.cpp (75%)
 rename tools/{llvm-pdbdump => llvm-pdbutil}/LLVMOutputStyle.h (100%)
 rename tools/{llvm-pdbdump => llvm-pdbutil}/LinePrinter.cpp (99%)
 rename tools/{llvm-pdbdump => llvm-pdbutil}/LinePrinter.h (100%)
 rename tools/{llvm-pdbdump => llvm-pdbutil}/OutputStyle.h (100%)
 rename tools/{llvm-pdbdump => llvm-pdbutil}/PdbYaml.cpp (100%)
 rename tools/{llvm-pdbdump => llvm-pdbutil}/PdbYaml.h (100%)
 rename tools/{llvm-pdbdump => llvm-pdbutil}/PrettyBuiltinDumper.cpp (98%)
 rename tools/{llvm-pdbdump => llvm-pdbutil}/PrettyBuiltinDumper.h (100%)
 rename tools/{llvm-pdbdump => llvm-pdbutil}/PrettyClassDefinitionDumper.cpp (99%)
 rename tools/{llvm-pdbdump => llvm-pdbutil}/PrettyClassDefinitionDumper.h (100%)
 rename tools/{llvm-pdbdump => llvm-pdbutil}/PrettyClassLayoutGraphicalDumper.cpp (99%)
 rename tools/{llvm-pdbdump => llvm-pdbutil}/PrettyClassLayoutGraphicalDumper.h (100%)
 rename tools/{llvm-pdbdump => llvm-pdbutil}/PrettyCompilandDumper.cpp (98%)
 rename tools/{llvm-pdbdump => llvm-pdbutil}/PrettyCompilandDumper.h (95%)
 rename tools/{llvm-pdbdump => llvm-pdbutil}/PrettyEnumDumper.cpp (98%)
 rename tools/{llvm-pdbdump => llvm-pdbutil}/PrettyEnumDumper.h (100%)
 rename tools/{llvm-pdbdump => llvm-pdbutil}/PrettyExternalSymbolDumper.cpp (100%)
 rename tools/{llvm-pdbdump => llvm-pdbutil}/PrettyExternalSymbolDumper.h (100%)
 rename tools/{llvm-pdbdump => llvm-pdbutil}/PrettyFunctionDumper.cpp (99%)
 rename tools/{llvm-pdbdump => llvm-pdbutil}/PrettyFunctionDumper.h (100%)
 rename tools/{llvm-pdbdump => llvm-pdbutil}/PrettyTypeDumper.cpp (68%)
 rename tools/{llvm-pdbdump => llvm-pdbutil}/PrettyTypeDumper.h (100%)
 rename tools/{llvm-pdbdump => llvm-pdbutil}/PrettyTypedefDumper.cpp (99%)
 rename tools/{llvm-pdbdump => llvm-pdbutil}/PrettyTypedefDumper.h (94%)
 rename tools/{llvm-pdbdump => llvm-pdbutil}/PrettyVariableDumper.cpp (99%)
 rename tools/{llvm-pdbdump => llvm-pdbutil}/PrettyVariableDumper.h (100%)
 rename tools/{llvm-pdbdump => llvm-pdbutil}/StreamUtil.cpp (100%)
 rename tools/{llvm-pdbdump => llvm-pdbutil}/StreamUtil.h (100%)
 rename tools/{llvm-pdbdump => llvm-pdbutil}/YAMLOutputStyle.cpp (84%)
 rename tools/{llvm-pdbdump => llvm-pdbutil}/YAMLOutputStyle.h (100%)
 rename tools/{llvm-pdbdump => llvm-pdbutil}/fuzzer/CMakeLists.txt (53%)
 rename tools/{llvm-pdbdump/fuzzer/llvm-pdbdump-fuzzer.cpp => llvm-pdbutil/fuzzer/llvm-pdbutil-fuzzer.cpp} (96%)
 rename tools/{llvm-pdbdump/llvm-pdbdump.cpp => llvm-pdbutil/llvm-pdbutil.cpp} (90%)
 rename tools/{llvm-pdbdump/llvm-pdbdump.h => llvm-pdbutil/llvm-pdbutil.h} (85%)
 create mode 100644 unittests/Analysis/GlobalsModRefTest.cpp
 create mode 100644 unittests/BinaryFormat/CMakeLists.txt
 rename unittests/{Support => BinaryFormat}/DwarfTest.cpp (97%)
 create mode 100644 unittests/BinaryFormat/TestFileMagic.cpp
 create mode 100644 unittests/Transforms/Utils/OrderedInstructions.cpp
 delete mode 100644 utils/TableGen/X86FoldTablesEmitter.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index a5b96569f9c6..431785d3dd0c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -385,6 +385,7 @@ else()
   option(LLVM_ENABLE_LOCAL_SUBMODULE_VISIBILITY "Compile with -fmodules-local-submodule-visibility." ON)
 endif()
 option(LLVM_ENABLE_CXX1Y "Compile with C++1y enabled." OFF)
+option(LLVM_ENABLE_CXX1Z "Compile with C++1z enabled." OFF)
 option(LLVM_ENABLE_LIBCXX "Use libc++ if available." OFF)
 option(LLVM_ENABLE_LLD "Use lld as C and C++ linker." OFF)
 option(LLVM_ENABLE_PEDANTIC "Compile with pedantic enabled." ON)
@@ -853,7 +854,6 @@ if( LLVM_INCLUDE_UTILS )
   add_subdirectory(utils/not)
   add_subdirectory(utils/llvm-lit)
   add_subdirectory(utils/yaml-bench)
-  add_subdirectory(utils/unittest)
 else()
   if ( LLVM_INCLUDE_TESTS )
     message(FATAL_ERROR "Including tests when not building utils will not work.
@@ -897,6 +897,10 @@ if( LLVM_INCLUDE_TESTS )
   endif()
   add_subdirectory(test)
   add_subdirectory(unittests)
+  if( LLVM_INCLUDE_UTILS )
+    add_subdirectory(utils/unittest)
+  endif()
+
   if (WIN32)
     # This utility is used to prevent crashing tests from calling Dr. Watson on
     # Windows.
diff --git a/bindings/go/llvm/ir.go b/bindings/go/llvm/ir.go
index fe191beb3813..222097034307 100644
--- a/bindings/go/llvm/ir.go
+++ b/bindings/go/llvm/ir.go
@@ -611,6 +611,12 @@ func (t Type) StructElementTypes() []Type {
 }
 
 // Operations on array, pointer, and vector types (sequence types)
+func (t Type) Subtypes() (ret []Type) {
+	ret = make([]Type, C.LLVMGetNumContainedTypes(t.C))
+	C.LLVMGetSubtypes(t.C, llvmTypeRefPtr(&ret[0]))
+	return
+}
+
 func ArrayType(elementType Type, elementCount int) (t Type) {
 	t.C = C.LLVMArrayType(elementType.C, C.unsigned(elementCount))
 	return
diff --git a/bindings/go/llvm/ir_test.go b/bindings/go/llvm/ir_test.go
index c823615a4293..325ee4890f4c 100644
--- a/bindings/go/llvm/ir_test.go
+++ b/bindings/go/llvm/ir_test.go
@@ -134,3 +134,29 @@ func TestDebugLoc(t *testing.T) {
 		t.Errorf("Got metadata %v as scope, though wanted %v", loc.Scope.C, scope.C)
 	}
 }
+
+func TestSubtypes(t *testing.T) {
+	cont := NewContext()
+	defer cont.Dispose()
+
+	int_pointer := PointerType(cont.Int32Type(), 0)
+	int_inner := int_pointer.Subtypes()
+	if len(int_inner) != 1 {
+		t.Errorf("Got size %d, though wanted 1")
+	}
+	if int_inner[0] != cont.Int32Type() {
+		t.Errorf("Expected int32 type")
+	}
+
+	st_pointer := cont.StructType([]Type{cont.Int32Type(), cont.Int8Type()}, false)
+	st_inner := st_pointer.Subtypes()
+	if len(st_inner) != 2 {
+		t.Errorf("Got size %d, though wanted 2")
+	}
+	if st_inner[0] != cont.Int32Type() {
+		t.Errorf("Expected first struct field to be int32")
+	}
+	if st_inner[1] != cont.Int8Type() {
+		t.Errorf("Expected second struct field to be int8")
+	}
+}
diff --git a/bindings/ocaml/llvm/llvm.ml b/bindings/ocaml/llvm/llvm.ml
index 399fd2d27c20..6e8ca662ef67 100644
--- a/bindings/ocaml/llvm/llvm.ml
+++ b/bindings/ocaml/llvm/llvm.ml
@@ -459,6 +459,8 @@ external is_packed : lltype -> bool = "llvm_is_packed"
 external is_opaque : lltype -> bool = "llvm_is_opaque"
 
 (*--... Operations on pointer, vector, and array types .....................--*)
+
+external subtypes : lltype -> lltype array = "llvm_subtypes"
 external array_type : lltype -> int -> lltype = "llvm_array_type"
 external pointer_type : lltype -> lltype = "llvm_pointer_type"
 external qualified_pointer_type : lltype -> int -> lltype
diff --git a/bindings/ocaml/llvm/llvm.mli b/bindings/ocaml/llvm/llvm.mli
index 4068126e2cbf..c422e78f5d2d 100644
--- a/bindings/ocaml/llvm/llvm.mli
+++ b/bindings/ocaml/llvm/llvm.mli
@@ -658,6 +658,9 @@ val is_opaque : lltype -> bool
 
 (** {7 Operations on pointer, vector, and array types} *)
 
+(** [subtypes ty] returns [ty]'s subtypes *)
+val subtypes : lltype -> lltype array
+
 (** [array_type ty n] returns the array type containing [n] elements of type
     [ty]. See the method [llvm::ArrayType::get]. *)
 val array_type : lltype -> int -> lltype
diff --git a/bindings/ocaml/llvm/llvm_ocaml.c b/bindings/ocaml/llvm/llvm_ocaml.c
index af04ea25c8ab..4b6d1c5072bc 100644
--- a/bindings/ocaml/llvm/llvm_ocaml.c
+++ b/bindings/ocaml/llvm/llvm_ocaml.c
@@ -506,6 +506,20 @@ CAMLprim value llvm_is_opaque(LLVMTypeRef StructTy) {
 
 /*--... Operations on array, pointer, and vector types .....................--*/
 
+/* lltype -> lltype array */
+CAMLprim value llvm_subtypes(LLVMTypeRef Ty) {
+    CAMLparam0();
+    CAMLlocal1(Arr);
+
+    unsigned Size = LLVMGetNumContainedTypes(Ty);
+
+    Arr = caml_alloc(Size, 0);
+
+    LLVMGetSubtypes(Ty, (LLVMTypeRef *) Arr);
+
+    CAMLreturn(Arr);
+}
+
 /* lltype -> int -> lltype */
 CAMLprim LLVMTypeRef llvm_array_type(LLVMTypeRef ElementTy, value Count) {
   return LLVMArrayType(ElementTy, Int_val(Count));
diff --git a/cmake/modules/HandleLLVMOptions.cmake b/cmake/modules/HandleLLVMOptions.cmake
index 3dd16d51f0b7..c3325db11788 100644
--- a/cmake/modules/HandleLLVMOptions.cmake
+++ b/cmake/modules/HandleLLVMOptions.cmake
@@ -101,6 +101,10 @@ else()
   message(FATAL_ERROR "Unknown value for LLVM_ABI_BREAKING_CHECKS: \"${LLVM_ABI_BREAKING_CHECKS}\"!")
 endif()
 
+if( LLVM_REVERSE_ITERATION )
+  set( LLVM_ENABLE_REVERSE_ITERATION 1 )
+endif()
+
 if(WIN32)
   set(LLVM_HAVE_LINK_VERSION_SCRIPT 0)
   if(CYGWIN)
@@ -381,6 +385,9 @@ elseif( LLVM_COMPILER_IS_GCC_COMPATIBLE )
   if (LLVM_ENABLE_CXX1Y)
     check_cxx_compiler_flag("-std=c++1y" CXX_SUPPORTS_CXX1Y)
     append_if(CXX_SUPPORTS_CXX1Y "-std=c++1y" CMAKE_CXX_FLAGS)
+  elseif(LLVM_ENABLE_CXX1Z)
+    check_cxx_compiler_flag("-std=c++1z" CXX_SUPPORTS_CXX1Z)
+    append_if(CXX_SUPPORTS_CXX1Z "-std=c++1z" CMAKE_CXX_FLAGS)
   else()
     check_cxx_compiler_flag("-std=c++11" CXX_SUPPORTS_CXX11)
     if (CXX_SUPPORTS_CXX11)
diff --git a/cmake/modules/TableGen.cmake b/cmake/modules/TableGen.cmake
index da0858e54d44..17ae1c9e7717 100644
--- a/cmake/modules/TableGen.cmake
+++ b/cmake/modules/TableGen.cmake
@@ -30,19 +30,43 @@ function(tablegen project ofn)
     endif()
   endif()
 
-  add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${ofn}.tmp
-    # Generate tablegen output in a temporary file.
-    COMMAND ${${project}_TABLEGEN_EXE} ${ARGN} -I ${CMAKE_CURRENT_SOURCE_DIR}
-    ${LLVM_TABLEGEN_FLAGS} 
-    ${LLVM_TARGET_DEFINITIONS_ABSOLUTE}
-    -o ${CMAKE_CURRENT_BINARY_DIR}/${ofn}.tmp
-    # The file in LLVM_TARGET_DEFINITIONS may be not in the current
-    # directory and local_tds may not contain it, so we must
-    # explicitly list it here:
-    DEPENDS ${${project}_TABLEGEN_TARGET} ${local_tds} ${global_tds}
-    ${LLVM_TARGET_DEFINITIONS_ABSOLUTE}
-    COMMENT "Building ${ofn}..."
-    )
+  # We need both _TABLEGEN_TARGET and _TABLEGEN_EXE in the  DEPENDS list
+  # (both the target and the file) to have .inc files rebuilt on
+  # a tablegen change, as cmake does not propagate file-level dependencies
+  # of custom targets. See the following ticket for more information:
+  # https://cmake.org/Bug/view.php?id=15858
+  # We could always have just one dependency on both the target and
+  # the file, but these 2 cases would produce cleaner cmake files.
+  if (${${project}_TABLEGEN_TARGET} STREQUAL ${${project}_TABLEGEN_EXE})
+    add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${ofn}.tmp
+      # Generate tablegen output in a temporary file.
+      COMMAND ${${project}_TABLEGEN_EXE} ${ARGN} -I ${CMAKE_CURRENT_SOURCE_DIR}
+      ${LLVM_TABLEGEN_FLAGS}
+      ${LLVM_TARGET_DEFINITIONS_ABSOLUTE}
+      -o ${CMAKE_CURRENT_BINARY_DIR}/${ofn}.tmp
+      # The file in LLVM_TARGET_DEFINITIONS may be not in the current
+      # directory and local_tds may not contain it, so we must
+      # explicitly list it here:
+      DEPENDS ${${project}_TABLEGEN_TARGET} ${local_tds} ${global_tds}
+      ${LLVM_TARGET_DEFINITIONS_ABSOLUTE}
+      COMMENT "Building ${ofn}..."
+      )
+  else()
+    add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${ofn}.tmp
+      # Generate tablegen output in a temporary file.
+      COMMAND ${${project}_TABLEGEN_EXE} ${ARGN} -I ${CMAKE_CURRENT_SOURCE_DIR}
+      ${LLVM_TABLEGEN_FLAGS}
+      ${LLVM_TARGET_DEFINITIONS_ABSOLUTE}
+      -o ${CMAKE_CURRENT_BINARY_DIR}/${ofn}.tmp
+      # The file in LLVM_TARGET_DEFINITIONS may be not in the current
+      # directory and local_tds may not contain it, so we must
+      # explicitly list it here:
+      DEPENDS ${${project}_TABLEGEN_TARGET} ${${project}_TABLEGEN_EXE}
+        ${local_tds} ${global_tds}
+      ${LLVM_TARGET_DEFINITIONS_ABSOLUTE}
+      COMMENT "Building ${ofn}..."
+      )
+  endif()
   add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${ofn}
     # Only update the real output file if there are any differences.
     # This prevents recompilation of all the files depending on it if there
diff --git a/docs/AMDGPUUsage.rst b/docs/AMDGPUUsage.rst
index 81c067b317d3..caa697ca28cd 100644
--- a/docs/AMDGPUUsage.rst
+++ b/docs/AMDGPUUsage.rst
@@ -1,109 +1,3441 @@
-==============================
-User Guide for AMDGPU Back-end
-==============================
+=============================
+User Guide for AMDGPU Backend
+=============================
+
+.. contents::
+   :local:
 
 Introduction
 ============
 
-The AMDGPU back-end provides ISA code generation for AMD GPUs, starting with
-the R600 family up until the current Volcanic Islands (GCN Gen 3).
+The AMDGPU backend provides ISA code generation for AMD GPUs, starting with the
+R600 family up until the current GCN families. It lives in the
+``lib/Target/AMDGPU`` directory.
 
-Refer to `AMDGPU section in Architecture & Platform Information for Compiler Writers <CompilerWriterInfo.html#amdgpu>`_
-for additional documentation.
+LLVM
+====
 
-Conventions
-===========
+.. _amdgpu-target-triples:
+
+Target Triples
+--------------
+
+Use the ``clang -target <Architecture>-<Vendor>-<OS>-<Environment>`` option to
+specify the target triple:
+
+  .. table:: AMDGPU Target Triples
+     :name: amdgpu-target-triples-table
+
+     ============ ======== ========= ===========
+     Architecture Vendor   OS        Environment
+     ============ ======== ========= ===========
+     r600         amd      <empty>   <empty>
+     amdgcn       amd      <empty>   <empty>
+     amdgcn       amd      amdhsa    <empty>
+     amdgcn       amd      amdhsa    opencl
+     amdgcn       amd      amdhsa    amdgizcl
+     amdgcn       amd      amdhsa    amdgiz
+     amdgcn       amd      amdhsa    hcc
+     ============ ======== ========= ===========
+
+``r600-amd--``
+  Supports AMD GPUs HD2XXX-HD6XXX for graphics and compute shaders executed on
+  the MESA runtime.
+
+``amdgcn-amd--``
+  Supports AMD GPUs GCN 6 onwards for graphics and compute shaders executed on
+  the MESA runtime.
+
+``amdgcn-amd-amdhsa-``
+  Supports AMD GCN GPUs GFX6 onwards for compute kernels executed on HSA [HSA]_
+  compatible runtimes such as AMD's ROCm [AMD-ROCm]_.
+
+``amdgcn-amd-amdhsa-opencl``
+  Supports AMD GCN GPUs GFX6 onwards for OpenCL compute kernels executed on HSA
+  [HSA]_ compatible runtimes such as AMD's ROCm [AMD-ROCm]_. See
+  :ref:`amdgpu-opencl`.
+
+``amdgcn-amd-amdhsa-amdgizcl``
+  Same as ``amdgcn-amd-amdhsa-opencl`` except a different address space mapping
+  is used (see :ref:`amdgpu-address-spaces`).
+
+``amdgcn-amd-amdhsa-amdgiz``
+  Same as ``amdgcn-amd-amdhsa-`` except a different address space mapping is
+  used (see :ref:`amdgpu-address-spaces`).
+
+``amdgcn-amd-amdhsa-hcc``
+  Supports AMD GCN GPUs GFX6 onwards for AMD HC language compute kernels
+  executed on HSA [HSA]_ compatible runtimes such as AMD's ROCm [AMD-ROCm]_. See
+  :ref:`amdgpu-hcc`.
+
+.. _amdgpu-processors:
+
+Processors
+----------
+
+Use the ``clang -mcpu <Processor>`` option to specify the AMD GPU processor. The
+names from both the *Processor* and *Alternative Processor* can be used.
+
+  .. table:: AMDGPU Processors
+     :name: amdgpu-processors-table
+
+     ========== =========== ============ ===== ======= ==================
+     Processor  Alternative Target       dGPU/ Runtime Example
+                Processor   Triple       APU   Support Products
+                            Architecture
+     ========== =========== ============ ===== ======= ==================
+     **R600** [AMD-R6xx]_
+     --------------------------------------------------------------------
+     r600                   r600         dGPU
+     r630                   r600         dGPU
+     rs880                  r600         dGPU
+     rv670                  r600         dGPU
+     **R700** [AMD-R7xx]_
+     --------------------------------------------------------------------
+     rv710                  r600         dGPU
+     rv730                  r600         dGPU
+     rv770                  r600         dGPU
+     **Evergreen** [AMD-Evergreen]_
+     --------------------------------------------------------------------
+     cedar                  r600         dGPU
+     redwood                r600         dGPU
+     sumo                   r600         dGPU
+     juniper                r600         dGPU
+     cypress                r600         dGPU
+     **Northern Islands** [AMD-Cayman-Trinity]_
+     --------------------------------------------------------------------
+     barts                  r600         dGPU
+     turks                  r600         dGPU
+     caicos                 r600         dGPU
+     cayman                 r600         dGPU
+     **GCN GFX6 (Southern Islands (SI))** [AMD-Souther-Islands]_
+     --------------------------------------------------------------------
+     gfx600     - SI        amdgcn       dGPU
+                - tahiti
+     gfx601     - pitcairn  amdgcn       dGPU
+                - verde
+                - oland
+                - hainan
+     **GCN GFX7 (Sea Islands (CI))** [AMD-Sea-Islands]_
+     --------------------------------------------------------------------
+     gfx700     - bonaire   amdgcn       dGPU          - Radeon HD 7790
+                                                       - Radeon HD 8770
+                                                       - R7 260
+                                                       - R7 260X
+     \          - kaveri    amdgcn       APU           - A6-7000
+                                                       - A6 Pro-7050B
+                                                       - A8-7100
+                                                       - A8 Pro-7150B
+                                                       - A10-7300
+                                                       - A10 Pro-7350B
+                                                       - FX-7500
+                                                       - A8-7200P
+                                                       - A10-7400P
+                                                       - FX-7600P
+     gfx701     - hawaii    amdgcn       dGPU  ROCm    - FirePro W8100
+                                                       - FirePro W9100
+                                                       - FirePro S9150
+                                                       - FirePro S9170
+     gfx702                              dGPU  ROCm    - Radeon R9 290
+                                                       - Radeon R9 290x
+                                                       - Radeon R390
+                                                       - Radeon R390x
+     gfx703     - kabini    amdgcn       APU           - E1-2100
+                - mullins                              - E1-2200
+                                                       - E1-2500
+                                                       - E2-3000
+                                                       - E2-3800
+                                                       - A4-5000
+                                                       - A4-5100
+                                                       - A6-5200
+                                                       - A4 Pro-3340B
+     **GCN GFX8 (Volcanic Islands (VI))** [AMD-Volcanic-Islands]_
+     --------------------------------------------------------------------
+     gfx800     - iceland   amdgcn       dGPU          - FirePro S7150
+                                                       - FirePro S7100
+                                                       - FirePro W7100
+                                                       - Radeon R285
+                                                       - Radeon R9 380
+                                                       - Radeon R9 385
+                                                       - Mobile FirePro
+                                                         M7170
+     gfx801     - carrizo   amdgcn       APU           - A6-8500P
+                                                       - Pro A6-8500B
+                                                       - A8-8600P
+                                                       - Pro A8-8600B
+                                                       - FX-8800P
+                                                       - Pro A12-8800B
+     \                      amdgcn       APU   ROCm    - A10-8700P
+                                                       - Pro A10-8700B
+                                                       - A10-8780P
+     \                      amdgcn       APU           - A10-9600P
+                                                       - A10-9630P
+                                                       - A12-9700P
+                                                       - A12-9730P
+                                                       - FX-9800P
+                                                       - FX-9830P
+     \                      amdgcn       APU           - E2-9010
+                                                       - A6-9210
+                                                       - A9-9410
+     gfx802     - tonga     amdgcn       dGPU  ROCm    Same as gfx800
+     gfx803     - fiji      amdgcn       dGPU  ROCm    - Radeon R9 Nano
+                                                       - Radeon R9 Fury
+                                                       - Radeon R9 FuryX
+                                                       - Radeon Pro Duo
+                                                       - FirePro S9300x2
+     \          - polaris10 amdgcn       dGPU  ROCm    - Radeon RX 470
+                                                       - Radeon RX 480
+     \          - polaris11 amdgcn       dGPU  ROCm    - Radeon RX 460
+     gfx804                 amdgcn       dGPU          Same as gfx803
+     gfx810     - stoney    amdgcn       APU
+     **GCN GFX9**
+     --------------------------------------------------------------------
+     gfx900                 amdgcn       dGPU          - FirePro W9500
+                                                       - FirePro S9500
+                                                       - FirePro S9500x2
+     gfx901                 amdgcn       dGPU  ROCm    Same as gfx900
+                                                       except XNACK is
+                                                       enabled
+     gfx902                 amdgcn       APU           *TBA*
+
+                                                       .. TODO
+                                                          Add product
+                                                          names.
+     gfx903                 amdgcn       APU           Same as gfx902
+                                                       except XNACK is
+                                                       enabled
+     ========== =========== ============ ===== ======= ==================
+
+.. _amdgpu-address-spaces:
 
 Address Spaces
 --------------
 
-The AMDGPU back-end uses the following address space mapping:
+The AMDGPU backend uses the following address space mappings.
 
-   ================== =================== ==============
-   LLVM Address Space DWARF Address Space Memory Space
-   ================== =================== ==============
-   0                  1                   Private
-   1                  N/A                 Global
-   2                  N/A                 Constant
-   3                  2                   Local
-   4                  N/A                 Generic (Flat)
-   5                  N/A                 Region
-   ================== =================== ==============
+The memory space names used in the table, aside from the region memory space, is
+from the OpenCL standard.
 
-The terminology in the table, aside from the region memory space, is from the
-OpenCL standard.
+LLVM Address Space number is used throughout LLVM (for example, in LLVM IR).
 
-LLVM Address Space is used throughout LLVM (for example, in LLVM IR). DWARF
-Address Space is emitted in DWARF, and is used by tools, such as debugger,
-profiler and others.
+  .. table:: Address Space Mapping
+     :name: amdgpu-address-space-mapping-table
+
+     ================== ================= ================= ================= =================
+     LLVM Address Space Memory Space
+     ------------------ -----------------------------------------------------------------------
+     \                  Current Default   amdgiz/amdgizcl   hcc               Future Default
+     ================== ================= ================= ================= =================
+     0                  Private (Scratch) Generic (Flat)    Generic (Flat)    Generic (Flat)
+     1                  Global            Global            Global            Global
+     2                  Constant          Constant          Constant          Region (GDS)
+     3                  Local (group/LDS) Local (group/LDS) Local (group/LDS) Local (group/LDS)
+     4                  Generic (Flat)    Region (GDS)      Region (GDS)      Constant
+     5                  Region (GDS)      Private (Scratch) Private (Scratch) Private (Scratch)
+     ================== ================= ================= ================= =================
+
+Current Default
+  This is the current default address space mapping used for all languages
+  except hcc. This will shortly be deprecated.
+
+amdgiz/amdgizcl
+  This is the current address space mapping used when ``amdgiz`` or ``amdgizcl``
+  is specified as the target triple environment value.
+
+hcc
+  This is the current address space mapping used when ``hcc`` is specified as
+  the target triple environment value.This will shortly be deprecated.
+
+Future Default
+  This will shortly be the only address space mapping for all languages using
+  AMDGPU backend.
+
+.. _amdgpu-memory-scopes:
+
+Memory Scopes
+-------------
+
+This section provides LLVM memory synchronization scopes supported by the AMDGPU
+backend memory model when the target triple OS is ``amdhsa`` (see
+:ref:`amdgpu-amdhsa-memory-model` and :ref:`amdgpu-target-triples`).
+
+The memory model supported is based on the HSA memory model [HSA]_ which is
+based in turn on HRF-indirect with scope inclusion [HRF]_. The happens-before
+relation is transitive over the synchonizes-with relation independent of scope,
+and synchonizes-with allows the memory scope instances to be inclusive (see
+table :ref:`amdgpu-amdhsa-llvm-sync-scopes-amdhsa-table`).
+
+This is different to the OpenCL [OpenCL]_ memory model which does not have scope
+inclusion and requires the memory scopes to exactly match. However, this
+is conservatively correct for OpenCL.
+
+  .. table:: AMDHSA LLVM Sync Scopes for AMDHSA
+     :name: amdgpu-amdhsa-llvm-sync-scopes-amdhsa-table
+
+     ================ ==========================================================
+     LLVM Sync Scope  Description
+     ================ ==========================================================
+     *none*           The default: ``system``.
+
+                      Synchronizes with, and participates in modification and
+                      seq_cst total orderings with, other operations (except
+                      image operations) for all address spaces (except private,
+                      or generic that accesses private) provided the other
+                      operation's sync scope is:
+
+                      - ``system``.
+                      - ``agent`` and executed by a thread on the same agent.
+                      - ``workgroup`` and executed by a thread in the same
+                        workgroup.
+                      - ``wavefront`` and executed by a thread in the same
+                        wavefront.
+
+     ``agent``        Synchronizes with, and participates in modification and
+                      seq_cst total orderings with, other operations (except
+                      image operations) for all address spaces (except private,
+                      or generic that accesses private) provided the other
+                      operation's sync scope is:
+
+                      - ``system`` or ``agent`` and executed by a thread on the
+                        same agent.
+                      - ``workgroup`` and executed by a thread in the same
+                        workgroup.
+                      - ``wavefront`` and executed by a thread in the same
+                        wavefront.
+
+     ``workgroup``    Synchronizes with, and participates in modification and
+                      seq_cst total orderings with, other operations (except
+                      image operations) for all address spaces (except private,
+                      or generic that accesses private) provided the other
+                      operation's sync scope is:
+
+                      - ``system``, ``agent`` or ``workgroup`` and executed by a
+                        thread in the same workgroup.
+                      - ``wavefront`` and executed by a thread in the same
+                        wavefront.
+
+     ``wavefront``    Synchronizes with, and participates in modification and
+                      seq_cst total orderings with, other operations (except
+                      image operations) for all address spaces (except private,
+                      or generic that accesses private) provided the other
+                      operation's sync scope is:
+
+                      - ``system``, ``agent``, ``workgroup`` or ``wavefront``
+                        and executed by a thread in the same wavefront.
+
+     ``singlethread`` Only synchronizes with, and participates in modification
+                      and seq_cst total orderings with, other operations (except
+                      image operations) running in the same thread for all
+                      address spaces (for example, in signal handlers).
+     ================ ==========================================================
+
+AMDGPU Intrinsics
+-----------------
+
+The AMDGPU backend implements the following intrinsics.
+
+*This section is WIP.*
+
+.. TODO
+   List AMDGPU intrinsics
+
+Code Object
+===========
+
+The AMDGPU backend generates a standard ELF [ELF]_ relocatable code object that
+can be linked by ``lld`` to produce a standard ELF shared code object which can
+be loaded and executed on an AMDGPU target.
+
+Header
+------
+
+The AMDGPU backend uses the following ELF header:
+
+  .. table:: AMDGPU ELF Header
+     :name: amdgpu-elf-header-table
+
+     ========================== =========================
+     Field                      Value
+     ========================== =========================
+     ``e_ident[EI_CLASS]``      ``ELFCLASS64``
+     ``e_ident[EI_DATA]``       ``ELFDATA2LSB``
+     ``e_ident[EI_OSABI]``      ``ELFOSABI_AMDGPU_HSA``
+     ``e_ident[EI_ABIVERSION]`` ``ELFABIVERSION_AMDGPU_HSA``
+     ``e_type``                 ``ET_REL`` or ``ET_DYN``
+     ``e_machine``              ``EM_AMDGPU``
+     ``e_entry``                0
+     ``e_flags``                0
+     ========================== =========================
+
+..
+
+  .. table:: AMDGPU ELF Header Enumeration Values
+     :name: amdgpu-elf-header-enumeration-values-table
+
+     ============================ =====
+     Name                         Value
+     ============================ =====
+     ``EM_AMDGPU``                224
+     ``ELFOSABI_AMDGPU_HSA``      64
+     ``ELFABIVERSION_AMDGPU_HSA`` 1
+     ============================ =====
+
+``e_ident[EI_CLASS]``
+  The ELF class is always ``ELFCLASS64``. The AMDGPU backend only supports 64 bit
+  applications.
+
+``e_ident[EI_DATA]``
+  All AMDGPU targets use ELFDATA2LSB for little-endian byte ordering.
+
+``e_ident[EI_OSABI]``
+  The AMD GPU architecture specific OS ABI of ``ELFOSABI_AMDGPU_HSA`` is used to
+  specify that the code object conforms to the AMD HSA runtime ABI [HSA]_.
+
+``e_ident[EI_ABIVERSION]``
+  The AMD GPU architecture specific OS ABI version of
+  ``ELFABIVERSION_AMDGPU_HSA`` is used to specify the version of AMD HSA runtime
+  ABI to which the code object conforms.
+
+``e_type``
+  Can be one of the following values:
+
+
+  ``ET_REL``
+    The type produced by the AMD GPU backend compiler as it is relocatable code
+    object.
+
+  ``ET_DYN``
+    The type produced by the linker as it is a shared code object.
+
+  The AMD HSA runtime loader requires a ``ET_DYN`` code object.
+
+``e_machine``
+  The value ``EM_AMDGPU`` is used for the machine for all members of the AMD GPU
+  architecture family. The specific member is specified in the
+  ``NT_AMD_AMDGPU_ISA`` entry in the ``.note`` section (see
+  :ref:`amdgpu-note-records`).
+
+``e_entry``
+  The entry point is 0 as the entry points for individual kernels must be
+  selected in order to invoke them through AQL packets.
+
+``e_flags``
+  The value is 0 as no flags are used.
+
+Sections
+--------
+
+An AMDGPU target ELF code object has the standard ELF sections which include:
+
+  .. table:: AMDGPU ELF Sections
+     :name: amdgpu-elf-sections-table
+
+     ================== ================ =================================
+     Name               Type             Attributes
+     ================== ================ =================================
+     ``.bss``           ``SHT_NOBITS``   ``SHF_ALLOC`` + ``SHF_WRITE``
+     ``.data``          ``SHT_PROGBITS`` ``SHF_ALLOC`` + ``SHF_WRITE``
+     ``.debug_``\ *\**  ``SHT_PROGBITS`` *none*
+     ``.dynamic``       ``SHT_DYNAMIC``  ``SHF_ALLOC``
+     ``.dynstr``        ``SHT_PROGBITS`` ``SHF_ALLOC``
+     ``.dynsym``        ``SHT_PROGBITS`` ``SHF_ALLOC``
+     ``.got``           ``SHT_PROGBITS`` ``SHF_ALLOC`` + ``SHF_WRITE``
+     ``.hash``          ``SHT_HASH``     ``SHF_ALLOC``
+     ``.note``          ``SHT_NOTE``     *none*
+     ``.rela``\ *name*  ``SHT_RELA``     *none*
+     ``.rela.dyn``      ``SHT_RELA``     *none*
+     ``.rodata``        ``SHT_PROGBITS`` ``SHF_ALLOC``
+     ``.shstrtab``      ``SHT_STRTAB``   *none*
+     ``.strtab``        ``SHT_STRTAB``   *none*
+     ``.symtab``        ``SHT_SYMTAB``   *none*
+     ``.text``          ``SHT_PROGBITS`` ``SHF_ALLOC`` + ``SHF_EXECINSTR``
+     ================== ================ =================================
+
+These sections have their standard meanings (see [ELF]_) and are only generated
+if needed.
+
+``.debug``\ *\**
+  The standard DWARF sections. See :ref:`amdgpu-dwarf` for information on the
+  DWARF produced by the AMDGPU backend.
+
+``.dynamic``, ``.dynstr``, ``.dynstr``, ``.hash``
+  The standard sections used by a dynamic loader.
+
+``.note``
+  See :ref:`amdgpu-note-records` for the note records supported by the AMDGPU
+  backend.
+
+``.rela``\ *name*, ``.rela.dyn``
+  For relocatable code objects, *name* is the name of the section that the
+  relocation records apply. For example, ``.rela.text`` is the section name for
+  relocation records associated with the ``.text`` section.
+
+  For linked shared code objects, ``.rela.dyn`` contains all the relocation
+  records from each of the relocatable code object's ``.rela``\ *name* sections.
+
+  See :ref:`amdgpu-relocation-records` for the relocation records supported by
+  the AMDGPU backend.
+
+``.text``
+  The executable machine code for the kernels and functions they call. Generated
+  as position independent code. See :ref:`amdgpu-code-conventions` for
+  information on conventions used in the isa generation.
+
+.. _amdgpu-note-records:
+
+Note Records
+------------
+
+As required by ``ELFCLASS64``, minimal zero byte padding must be generated after
+the ``name`` field to ensure the ``desc`` field is 4 byte aligned. In addition,
+minimal zero byte padding must be generated to ensure the ``desc`` field size is
+a multiple of 4 bytes. The ``sh_addralign`` field of the ``.note`` section must
+be at least 4 to indicate at least 8 byte alignment.
+
+The AMDGPU backend code object uses the following ELF note records in the
+``.note`` section. The *Description* column specifies the layout of the note
+record’s ``desc`` field. All fields are consecutive bytes. Note records with
+variable size strings have a corresponding ``*_size`` field that specifies the
+number of bytes, including the terminating null character, in the string. The
+string(s) come immediately after the preceding fields.
+
+Additional note records can be present.
+
+  .. table:: AMDGPU ELF Note Records
+     :name: amdgpu-elf-note-records-table
+
+     ===== ========================== ==========================================
+     Name  Type                       Description
+     ===== ========================== ==========================================
+     "AMD" ``NT_AMD_AMDGPU_METADATA`` <metadata null terminated string>
+     "AMD" ``NT_AMD_AMDGPU_ISA``      <isa name null terminated string>
+     ===== ========================== ==========================================
+
+..
+
+  .. table:: AMDGPU ELF Note Record Enumeration Values
+     :name: amdgpu-elf-note-record-enumeration-values-table
+
+     ============================= =====
+     Name                          Value
+     ============================= =====
+     *reserved*                    0-9
+     ``NT_AMD_AMDGPU_METADATA``    10
+     ``NT_AMD_AMDGPU_ISA``         11
+     ============================= =====
+
+``NT_AMD_AMDGPU_ISA``
+  Specifies the instruction set architecture used by the machine code contained
+  in the code object.
+
+  This note record is required for code objects containing machine code for
+  processors matching the ``amdgcn`` architecture in table
+  :ref:`amdgpu-processors`.
+
+  The null terminated string has the following syntax:
+
+    *architecture*\ ``-``\ *vendor*\ ``-``\ *os*\ ``-``\ *environment*\ ``-``\ *processor*
+
+  where:
+
+    *architecture*
+      The architecture from table :ref:`amdgpu-target-triples-table`.
+
+      This is always ``amdgcn`` when the target triple OS is ``amdhsa`` (see
+      :ref:`amdgpu-target-triples`).
+
+    *vendor*
+      The vendor from table :ref:`amdgpu-target-triples-table`.
+
+      For the AMDGPU backend this is always ``amd``.
+
+    *os*
+      The OS from table :ref:`amdgpu-target-triples-table`.
+
+    *environment*
+      An environment from table :ref:`amdgpu-target-triples-table`, or blank if
+      the environment has no affect on the execution of the code object.
+
+      For the AMDGPU backend this is currently always blank.
+    *processor*
+      The processor from table :ref:`amdgpu-processors-table`.
+
+  For example:
+
+    ``amdgcn-amd-amdhsa--gfx901``
+
+``NT_AMD_AMDGPU_METADATA``
+  Specifies extensible metadata associated with the code object. See
+  :ref:`amdgpu-code-object-metadata` for the syntax of the code object metadata
+  string.
+
+  This note record is required and must contain the minimum information
+  necessary to support the ROCM kernel queries. For example, the segment sizes
+  needed in a dispatch packet. In addition, a high level language runtime may
+  require other information to be included. For example, the AMD OpenCL runtime
+  records kernel argument information.
+
+  .. TODO
+     Is the string null terminated? It probably should not if YAML allows it to
+     contain null characters, otherwise it should be.
+
+.. _amdgpu-code-object-metadata:
+
+Code Object Metadata
+--------------------
+
+The code object metadata is specified by the ``NT_AMD_AMDHSA_METADATA`` note
+record (see :ref:`amdgpu-note-records`).
+
+The metadata is specified as a YAML formated string (see [YAML]_ and
+:doc:`YamlIO`).
+
+The metadata is represented as a single YAML document comprised of the mapping
+defined in table :ref:`amdgpu-amdhsa-code-object-metadata-mapping-table` and
+referenced tables.
+
+For boolean values, the string values of ``false`` and ``true`` are used for
+false and true respectively.
+
+Additional information can be added to the mappings. To avoid conflicts, any
+non-AMD key names should be prefixed by "*vendor-name*.".
+
+  .. table:: AMDHSA Code Object Metadata Mapping
+     :name: amdgpu-amdhsa-code-object-metadata-mapping-table
+
+     ========== ============== ========= =======================================
+     String Key Value Type     Required? Description
+     ========== ============== ========= =======================================
+     "Version"  sequence of    Required  - The first integer is the major
+                2 integers                 version. Currently 1.
+                                         - The second integer is the minor
+                                           version. Currently 0.
+     "Printf"   sequence of              Each string is encoded information
+                strings                  about a printf function call. The
+                                         encoded information is organized as
+                                         fields separated by colon (':'):
+
+                                         ``ID:N:S[0]:S[1]:...:S[N-1]:FormatString``
+
+                                         where:
+
+                                         ``ID``
+                                           A 32 bit integer as a unique id for
+                                           each printf function call
+
+                                         ``N``
+                                           A 32 bit integer equal to the number
+                                           of arguments of printf function call
+                                           minus 1
+
+                                         ``S[i]`` (where i = 0, 1, ... , N-1)
+                                           32 bit integers for the size in bytes
+                                           of the i-th FormatString argument of
+                                           the printf function call
+
+                                         FormatString
+                                           The format string passed to the
+                                           printf function call.
+     "Kernels"  sequence of    Required  Sequence of the mappings for each
+                mapping                  kernel in the code object. See
+                                         :ref:`amdgpu-amdhsa-code-object-kernel-metadata-mapping-table`
+                                         for the definition of the mapping.
+     ========== ============== ========= =======================================
+
+..
+
+  .. table:: AMDHSA Code Object Kernel Metadata Mapping
+     :name: amdgpu-amdhsa-code-object-kernel-metadata-mapping-table
+
+     ================= ============== ========= ================================
+     String Key        Value Type     Required? Description
+     ================= ============== ========= ================================
+     "Name"            string         Required  Source name of the kernel.
+     "SymbolName"      string         Required  Name of the kernel
+                                                descriptor ELF symbol.
+     "Language"        string                   Source language of the kernel.
+                                                Values include:
+
+                                                - "OpenCL C"
+                                                - "OpenCL C++"
+                                                - "HCC"
+                                                - "OpenMP"
+
+     "LanguageVersion" sequence of              - The first integer is the major
+                       2 integers                 version.
+                                                - The second integer is the
+                                                  minor version.
+     "Attrs"           mapping                  Mapping of kernel attributes.
+                                                See
+                                                :ref:`amdgpu-amdhsa-code-object-kernel-attribute-metadata-mapping-table`
+                                                for the mapping definition.
+     "Arguments"       sequence of              Sequence of mappings of the
+                       mapping                  kernel arguments. See
+                                                :ref:`amdgpu-amdhsa-code-object-kernel-argument-metadata-mapping-table`
+                                                for the definition of the mapping.
+     "CodeProps"       mapping                  Mapping of properties related to
+                                                the kernel code. See
+                                                :ref:`amdgpu-amdhsa-code-object-kernel-code-properties-metadata-mapping-table`
+                                                for the mapping definition.
+     "DebugProps"      mapping                  Mapping of properties related to
+                                                the kernel debugging. See
+                                                :ref:`amdgpu-amdhsa-code-object-kernel-debug-properties-metadata-mapping-table`
+                                                for the mapping definition.
+     ================= ============== ========= ================================
+
+..
+
+  .. table:: AMDHSA Code Object Kernel Attribute Metadata Mapping
+     :name: amdgpu-amdhsa-code-object-kernel-attribute-metadata-mapping-table
+
+     =================== ============== ========= ==============================
+     String Key          Value Type     Required? Description
+     =================== ============== ========= ==============================
+     "ReqdWorkGroupSize" sequence of              The dispatch work-group size
+                         3 integers               X, Y, Z must correspond to the
+                                                  specified values.
+
+                                                  Corresponds to the OpenCL
+                                                  ``reqd_work_group_size``
+                                                  attribute.
+     "WorkGroupSizeHint" sequence of              The dispatch work-group size
+                         3 integers               X, Y, Z is likely to be the
+                                                  specified values.
+
+                                                  Corresponds to the OpenCL
+                                                  ``work_group_size_hint``
+                                                  attribute.
+     "VecTypeHint"       string                   The name of a scalar or vector
+                                                  type.
+
+                                                  Corresponds to the OpenCL
+                                                  ``vec_type_hint`` attribute.
+     =================== ============== ========= ==============================
+
+..
+
+  .. table:: AMDHSA Code Object Kernel Argument Metadata Mapping
+     :name: amdgpu-amdhsa-code-object-kernel-argument-metadata-mapping-table
+
+     ================= ============== ========= ================================
+     String Key        Value Type     Required? Description
+     ================= ============== ========= ================================
+     "Name"            string                   Kernel argument name.
+     "TypeName"        string                   Kernel argument type name.
+     "Size"            integer        Required  Kernel argument size in bytes.
+     "Align"           integer        Required  Kernel argument alignment in
+                                                bytes. Must be a power of two.
+     "ValueKind"       string         Required  Kernel argument kind that
+                                                specifies how to set up the
+                                                corresponding argument.
+                                                Values include:
+
+                                                "ByValue"
+                                                  The argument is copied
+                                                  directly into the kernarg.
+
+                                                "GlobalBuffer"
+                                                  A global address space pointer
+                                                  to the buffer data is passed
+                                                  in the kernarg.
+
+                                                "DynamicSharedPointer"
+                                                  A group address space pointer
+                                                  to dynamically allocated LDS
+                                                  is passed in the kernarg.
+
+                                                "Sampler"
+                                                  A global address space
+                                                  pointer to a S# is passed in
+                                                  the kernarg.
+
+                                                "Image"
+                                                  A global address space
+                                                  pointer to a T# is passed in
+                                                  the kernarg.
+
+                                                "Pipe"
+                                                  A global address space pointer
+                                                  to an OpenCL pipe is passed in
+                                                  the kernarg.
+
+                                                "Queue"
+                                                  A global address space pointer
+                                                  to an OpenCL device enqueue
+                                                  queue is passed in the
+                                                  kernarg.
+
+                                                "HiddenGlobalOffsetX"
+                                                  The OpenCL grid dispatch
+                                                  global offset for the X
+                                                  dimension is passed in the
+                                                  kernarg.
+
+                                                "HiddenGlobalOffsetY"
+                                                  The OpenCL grid dispatch
+                                                  global offset for the Y
+                                                  dimension is passed in the
+                                                  kernarg.
+
+                                                "HiddenGlobalOffsetZ"
+                                                  The OpenCL grid dispatch
+                                                  global offset for the Z
+                                                  dimension is passed in the
+                                                  kernarg.
+
+                                                "HiddenNone"
+                                                  An argument that is not used
+                                                  by the kernel. Space needs to
+                                                  be left for it, but it does
+                                                  not need to be set up.
+
+                                                "HiddenPrintfBuffer"
+                                                  A global address space pointer
+                                                  to the runtime printf buffer
+                                                  is passed in kernarg.
+
+                                                "HiddenDefaultQueue"
+                                                  A global address space pointer
+                                                  to the OpenCL device enqueue
+                                                  queue that should be used by
+                                                  the kernel by default is
+                                                  passed in the kernarg.
+
+                                                "HiddenCompletionAction"
+                                                  *TBD*
+
+                                                  .. TODO
+                                                     Add description.
+
+     "ValueType"       string         Required  Kernel argument value type. Only
+                                                present if "ValueKind" is
+                                                "ByValue". For vector data
+                                                types, the value is for the
+                                                element type. Values include:
+
+                                                - "Struct"
+                                                - "I8"
+                                                - "U8"
+                                                - "I16"
+                                                - "U16"
+                                                - "F16"
+                                                - "I32"
+                                                - "U32"
+                                                - "F32"
+                                                - "I64"
+                                                - "U64"
+                                                - "F64"
+
+                                                .. TODO
+                                                   How can it be determined if a
+                                                   vector type, and what size
+                                                   vector?
+     "PointeeAlign"    integer                  Alignment in bytes of pointee
+                                                type for pointer type kernel
+                                                argument. Must be a power
+                                                of 2. Only present if
+                                                "ValueKind" is
+                                                "DynamicSharedPointer".
+     "AddrSpaceQual"   string                   Kernel argument address space
+                                                qualifier. Only present if
+                                                "ValueKind" is "GlobalBuffer" or
+                                                "DynamicSharedPointer". Values
+                                                are:
+
+                                                - "Private"
+                                                - "Global"
+                                                - "Constant"
+                                                - "Local"
+                                                - "Generic"
+                                                - "Region"
+
+                                                .. TODO
+                                                   Is GlobalBuffer only Global
+                                                   or Constant? Is
+                                                   DynamicSharedPointer always
+                                                   Local? Can HCC allow Generic?
+                                                   How can Private or Region
+                                                   ever happen?
+     "AccQual"         string                   Kernel argument access
+                                                qualifier. Only present if
+                                                "ValueKind" is "Image" or
+                                                "Pipe". Values
+                                                are:
+
+                                                - "ReadOnly"
+                                                - "WriteOnly"
+                                                - "ReadWrite"
+
+                                                .. TODO
+                                                   Does this apply to
+                                                   GlobalBuffer?
+     "ActualAcc"       string                   The actual memory accesses
+                                                performed by the kernel on the
+                                                kernel argument. Only present if
+                                                "ValueKind" is "GlobalBuffer",
+                                                "Image", or "Pipe". This may be
+                                                more restrictive than indicated
+                                                by "AccQual" to reflect what the
+                                                kernel actual does. If not
+                                                present then the runtime must
+                                                assume what is implied by
+                                                "AccQual" and "IsConst". Values
+                                                are:
+
+                                                - "ReadOnly"
+                                                - "WriteOnly"
+                                                - "ReadWrite"
+
+     "IsConst"         boolean                  Indicates if the kernel argument
+                                                is const qualified. Only present
+                                                if "ValueKind" is
+                                                "GlobalBuffer".
+
+     "IsRestrict"      boolean                  Indicates if the kernel argument
+                                                is restrict qualified. Only
+                                                present if "ValueKind" is
+                                                "GlobalBuffer".
+
+     "IsVolatile"      boolean                  Indicates if the kernel argument
+                                                is volatile qualified. Only
+                                                present if "ValueKind" is
+                                                "GlobalBuffer".
+
+     "IsPipe"          boolean                  Indicates if the kernel argument
+                                                is pipe qualified. Only present
+                                                if "ValueKind" is "Pipe".
+
+                                                .. TODO
+                                                   Can GlobalBuffer be pipe
+                                                   qualified?
+     ================= ============== ========= ================================
+
+..
+
+  .. table:: AMDHSA Code Object Kernel Code Properties Metadata Mapping
+     :name: amdgpu-amdhsa-code-object-kernel-code-properties-metadata-mapping-table
+
+     ============================ ============== ========= =====================
+     String Key                   Value Type     Required? Description
+     ============================ ============== ========= =====================
+     "KernargSegmentSize"         integer        Required  The size in bytes of
+                                                           the kernarg segment
+                                                           that holds the values
+                                                           of the arguments to
+                                                           the kernel.
+     "GroupSegmentFixedSize"      integer        Required  The amount of group
+                                                           segment memory
+                                                           required by a
+                                                           work-group in
+                                                           bytes. This does not
+                                                           include any
+                                                           dynamically allocated
+                                                           group segment memory
+                                                           that may be added
+                                                           when the kernel is
+                                                           dispatched.
+     "PrivateSegmentFixedSize"    integer        Required  The amount of fixed
+                                                           private address space
+                                                           memory required for a
+                                                           work-item in
+                                                           bytes. If
+                                                           IsDynamicCallstack
+                                                           is 1 then additional
+                                                           space must be added
+                                                           to this value for the
+                                                           call stack.
+     "KernargSegmentAlign"        integer        Required  The maximum byte
+                                                           alignment of
+                                                           arguments in the
+                                                           kernarg segment. Must
+                                                           be a power of 2.
+     "WavefrontSize"              integer        Required  Wavefront size. Must
+                                                           be a power of 2.
+     "NumSGPRs"                   integer                  Number of scalar
+                                                           registers used by a
+                                                           wavefront for
+                                                           GFX6-GFX9. This
+                                                           includes the special
+                                                           SGPRs for VCC, Flat
+                                                           Scratch (GFX7-GFX9)
+                                                           and XNACK (for
+                                                           GFX8-GFX9). It does
+                                                           not include the 16
+                                                           SGPR added if a trap
+                                                           handler is
+                                                           enabled. It is not
+                                                           rounded up to the
+                                                           allocation
+                                                           granularity.
+     "NumVGPRs"                   integer                  Number of vector
+                                                           registers used by
+                                                           each work-item for
+                                                           GFX6-GFX9
+     "MaxFlatWorkgroupSize"       integer                  Maximum flat
+                                                           work-group size
+                                                           supported by the
+                                                           kernel in work-items.
+     "IsDynamicCallStack"         boolean                  Indicates if the
+                                                           generated machine
+                                                           code is using a
+                                                           dynamically sized
+                                                           call stack.
+     "IsXNACKEnabled"             boolean                  Indicates if the
+                                                           generated machine
+                                                           code is capable of
+                                                           supporting XNACK.
+     ============================ ============== ========= =====================
+
+..
+
+  .. table:: AMDHSA Code Object Kernel Debug Properties Metadata Mapping
+     :name: amdgpu-amdhsa-code-object-kernel-debug-properties-metadata-mapping-table
+
+     =================================== ============== ========= ==============
+     String Key                          Value Type     Required? Description
+     =================================== ============== ========= ==============
+     "DebuggerABIVersion"                string
+     "ReservedNumVGPRs"                  integer
+     "ReservedFirstVGPR"                 integer
+     "PrivateSegmentBufferSGPR"          integer
+     "WavefrontPrivateSegmentOffsetSGPR" integer
+     =================================== ============== ========= ==============
+
+.. TODO
+   Plan to remove the debug properties metadata.   
+
+.. _amdgpu-symbols:
+
+Symbols
+-------
+
+Symbols include the following:
+
+  .. table:: AMDGPU ELF Symbols
+     :name: amdgpu-elf-symbols-table
+
+     ===================== ============== ============= ==================
+     Name                  Type           Section       Description
+     ===================== ============== ============= ==================
+     *link-name*           ``STT_OBJECT`` - ``.data``   Global variable
+                                          - ``.rodata``
+                                          - ``.bss``
+     *link-name*\ ``@kd``  ``STT_OBJECT`` - ``.rodata`` Kernel descriptor
+     *link-name*           ``STT_FUNC``   - ``.text``   Kernel entry point
+     ===================== ============== ============= ==================
+
+Global variable
+  Global variables both used and defined by the compilation unit.
+
+  If the symbol is defined in the compilation unit then it is allocated in the
+  appropriate section according to if it has initialized data or is readonly.
+
+  If the symbol is external then its section is ``STN_UNDEF`` and the loader
+  will resolve relocations using the defintion provided by another code object
+  or explicitly defined by the runtime.
+
+  All global symbols, whether defined in the compilation unit or external, are
+  accessed by the machine code indirectly throught a GOT table entry. This
+  allows them to be preemptable. The GOT table is only supported when the target
+  triple OS is ``amdhsa`` (see :ref:`amdgpu-target-triples`).
+
+  .. TODO
+     Add description of linked shared object symbols. Seems undefined symbols
+     are marked as STT_NOTYPE.
+
+Kernel descriptor
+  Every HSA kernel has an associated kernel descriptor. It is the address of the
+  kernel descriptor that is used in the AQL dispatch packet used to invoke the
+  kernel, not the kernel entry point. The layout of the HSA kernel descriptor is
+  defined in :ref:`amdgpu-amdhsa-kernel-descriptor`.
+
+Kernel entry point
+  Every HSA kernel also has a symbol for its machine code entry point.
+
+.. _amdgpu-relocation-records:
+
+Relocation Records
+------------------
+
+AMDGPU backend generates ``Elf64_Rela`` relocation records. Supported
+relocatable fields are:
+
+``word32``
+  This specifies a 32-bit field occupying 4 bytes with arbitrary byte
+  alignment. These values use the same byte order as other word values in the
+  AMD GPU architecture.
+
+``word64``
+  This specifies a 64-bit field occupying 8 bytes with arbitrary byte
+  alignment. These values use the same byte order as other word values in the
+  AMD GPU architecture.
+
+Following notations are used for specifying relocation calculations:
+
+**A**
+  Represents the addend used to compute the value of the relocatable field.
+
+**G**
+  Represents the offset into the global offset table at which the relocation
+  entry’s symbol will reside during execution.
+
+**GOT**
+  Represents the address of the global offset table.
+
+**P**
+  Represents the place (section offset for ``et_rel`` or address for ``et_dyn``)
+  of the storage unit being relocated (computed using ``r_offset``).
+
+**S**
+  Represents the value of the symbol whose index resides in the relocation
+  entry.
+
+The following relocation types are supported:
+
+  .. table:: AMDGPU ELF Relocation Records
+     :name: amdgpu-elf-relocation-records-table
+
+     ==========================  =====  ==========  ==============================
+     Relocation Type             Value  Field       Calculation
+     ==========================  =====  ==========  ==============================
+     ``R_AMDGPU_NONE``           0      *none*      *none*
+     ``R_AMDGPU_ABS32_LO``       1      ``word32``  (S + A) & 0xFFFFFFFF
+     ``R_AMDGPU_ABS32_HI``       2      ``word32``  (S + A) >> 32
+     ``R_AMDGPU_ABS64``          3      ``word64``  S + A
+     ``R_AMDGPU_REL32``          4      ``word32``  S + A - P
+     ``R_AMDGPU_REL64``          5      ``word64``  S + A - P
+     ``R_AMDGPU_ABS32``          6      ``word32``  S + A
+     ``R_AMDGPU_GOTPCREL``       7      ``word32``  G + GOT + A - P
+     ``R_AMDGPU_GOTPCREL32_LO``  8      ``word32``  (G + GOT + A - P) & 0xFFFFFFFF
+     ``R_AMDGPU_GOTPCREL32_HI``  9      ``word32``  (G + GOT + A - P) >> 32
+     ``R_AMDGPU_REL32_LO``       10     ``word32``  (S + A - P) & 0xFFFFFFFF
+     ``R_AMDGPU_REL32_HI``       11     ``word32``  (S + A - P) >> 32
+     ==========================  =====  ==========  ==============================
+
+.. _amdgpu-dwarf:
+
+DWARF
+-----
+
+Standard DWARF [DWARF]_ Version 2 sections can be generated. These contain
+information that maps the code object executable code and data to the source
+language constructs. It can be used by tools such as debuggers and profilers.
+
+Address Space Mapping
+~~~~~~~~~~~~~~~~~~~~~
+
+The following address space mapping is used:
+
+  .. table:: AMDGPU DWARF Address Space Mapping
+     :name: amdgpu-dwarf-address-space-mapping-table
+
+     =================== =================
+     DWARF Address Space Memory Space
+     =================== =================
+     1                   Private (Scratch)
+     2                   Local (group/LDS)
+     *omitted*           Global
+     *omitted*           Constant
+     *omitted*           Generic (Flat)
+     *not supported*     Region (GDS)
+     =================== =================
+
+See :ref:`amdgpu-address-spaces` for infomration on the memory space terminology
+used in the table.
+
+An ``address_class`` attribute is generated on pointer type DIEs to specify the
+DWARF address space of the value of the pointer when it is in the *private* or
+*local* address space. Otherwise the attribute is omitted.
+
+An ``XDEREF`` operation is generated in location list expressions for variables
+that are allocated in the *private* and *local* address space. Otherwise no
+``XDREF`` is omitted.
+
+Register Mapping
+~~~~~~~~~~~~~~~~
+
+*This section is WIP.*
+
+.. TODO
+   Define DWARF register enumeration.
+
+   If want to present a wavefront state then should expose vector registers as
+   64 wide (rather than per work-item view that LLVM uses). Either as seperate
+   registers, or a 64x4 byte single register. In either case use a new LANE op
+   (akin to XDREF) to select the current lane usage in a location
+   expression. This would also allow scalar register spilling to vector register
+   lanes to be expressed (currently no debug information is being generated for
+   spilling). If choose a wide single register approach then use LANE in
+   conjunction with PIECE operation to select the dword part of the register for
+   the current lane. If the separate register approach then use LANE to select
+   the register.
+
+Source Text
+~~~~~~~~~~~
+
+*This section is WIP.*
+
+.. TODO
+   DWARF extension to include runtime generated source text.
+
+.. _amdgpu-code-conventions:
+
+Code Conventions
+================
+
+AMDHSA
+------
+
+This section provides code conventions used when the target triple OS is
+``amdhsa`` (see :ref:`amdgpu-target-triples`).
+
+Kernel Dispatch
+~~~~~~~~~~~~~~~
+
+The HSA architected queuing language (AQL) defines a user space memory interface
+that can be used to control the dispatch of kernels, in an agent independent
+way. An agent can have zero or more AQL queues created for it using the ROCm
+runtime, in which AQL packets (all of which are 64 bytes) can be placed. See the
+*HSA Platform System Architecture Specification* [HSA]_ for the AQL queue
+mechanics and packet layouts.
+
+The packet processor of a kernel agent is responsible for detecting and
+dispatching HSA kernels from the AQL queues associated with it. For AMD GPUs the
+packet processor is implemented by the hardware command processor (CP),
+asynchronous dispatch controller (ADC) and shader processor input controller
+(SPI).
+
+The ROCm runtime can be used to allocate an AQL queue object. It uses the kernel
+mode driver to initialize and register the AQL queue with CP.
+
+To dispatch a kernel the following actions are performed. This can occur in the
+CPU host program, or from an HSA kernel executing on a GPU.
+
+1. A pointer to an AQL queue for the kernel agent on which the kernel is to be
+   executed is obtained.
+2. A pointer to the kernel descriptor (see
+   :ref:`amdgpu-amdhsa-kernel-descriptor`) of the kernel to execute is
+   obtained. It must be for a kernel that is contained in a code object that that
+   was loaded by the ROCm runtime on the kernel agent with which the AQL queue is
+   associated.
+3. Space is allocated for the kernel arguments using the ROCm runtime allocator
+   for a memory region with the kernarg property for the kernel agent that will
+   execute the kernel. It must be at least 16 byte aligned.
+4. Kernel argument values are assigned to the kernel argument memory
+   allocation. The layout is defined in the *HSA Programmer’s Language Reference*
+   [HSA]_. For AMDGPU the kernel execution directly accesses the kernel argument
+   memory in the same way constant memory is accessed. (Note that the HSA
+   specification allows an implementation to copy the kernel argument contents to
+   another location that is accessed by the kernel.)
+5. An AQL kernel dispatch packet is created on the AQL queue. The ROCm runtime
+   api uses 64 bit atomic operations to reserve space in the AQL queue for the
+   packet. The packet must be set up, and the final write must use an atomic
+   store release to set the packet kind to ensure the packet contents are
+   visible to the kernel agent. AQL defines a doorbell signal mechanism to
+   notify the kernel agent that the AQL queue has been updated. These rules, and
+   the layout of the AQL queue and kernel dispatch packet is defined in the *HSA
+   System Architecture Specification* [HSA]_.
+6. A kernel dispatch packet includes information about the actual dispatch,
+   such as grid and work-group size, together with information from the code
+   object about the kernel, such as segment sizes. The ROCm runtime queries on
+   the kernel symbol can be used to obtain the code object values which are
+   recorded in the :ref:`amdgpu-code-object-metadata`.
+7. CP executes micro-code and is responsible for detecting and setting up the
+   GPU to execute the wavefronts of a kernel dispatch.
+8. CP ensures that when the a wavefront starts executing the kernel machine
+   code, the scalar general purpose registers (SGPR) and vector general purpose
+   registers (VGPR) are set up as required by the machine code. The required
+   setup is defined in the :ref:`amdgpu-amdhsa-kernel-descriptor`. The initial
+   register state is defined in
+   :ref:`amdgpu-amdhsa-initial-kernel-execution-state`.
+9. The prolog of the kernel machine code (see
+   :ref:`amdgpu-amdhsa-kernel-prolog`) sets up the machine state as necessary
+   before continuing executing the machine code that corresponds to the kernel.
+10. When the kernel dispatch has completed execution, CP signals the completion
+    signal specified in the kernel dispatch packet if not 0.
+
+.. _amdgpu-amdhsa-memory-spaces:
+
+Memory Spaces
+~~~~~~~~~~~~~
+
+The memory space properties are:
+
+  .. table:: AMDHSA Memory Spaces
+     :name: amdgpu-amdhsa-memory-spaces-table
+
+     ================= =========== ======== ======= ==================
+     Memory Space Name HSA Segment Hardware Address NULL Value
+                       Name        Name     Size
+     ================= =========== ======== ======= ==================
+     Private           private     scratch  32      0x00000000
+     Local             group       LDS      32      0xFFFFFFFF
+     Global            global      global   64      0x0000000000000000
+     Constant          constant    *same as 64      0x0000000000000000
+                                   global*
+     Generic           flat        flat     64      0x0000000000000000
+     Region            N/A         GDS      32      *not implemented
+                                                    for AMDHSA*
+     ================= =========== ======== ======= ==================
+
+The global and constant memory spaces both use global virtual addresses, which
+are the same virtual address space used by the CPU. However, some virtual
+addresses may only be accessible to the CPU, some only accessible by the GPU,
+and some by both.
+
+Using the constant memory space indicates that the data will not change during
+the execution of the kernel. This allows scalar read instructions to be
+used. The vector and scalar L1 caches are invalidated of volatile data before
+each kernel dispatch execution to allow constant memory to change values between
+kernel dispatches.
+
+The local memory space uses the hardware Local Data Store (LDS) which is
+automatically allocated when the hardware creates work-groups of wavefronts, and
+freed when all the wavefronts of a work-group have terminated. The data store
+(DS) instructions can be used to access it.
+
+The private memory space uses the hardware scratch memory support. If the kernel
+uses scratch, then the hardware allocates memory that is accessed using
+wavefront lane dword (4 byte) interleaving. The mapping used from private
+address to physical address is:
+
+  ``wavefront-scratch-base +
+  (private-address * wavefront-size * 4) +
+  (wavefront-lane-id * 4)``
+
+There are different ways that the wavefront scratch base address is determined
+by a wavefront (see :ref:`amdgpu-amdhsa-initial-kernel-execution-state`). This
+memory can be accessed in an interleaved manner using buffer instruction with
+the scratch buffer descriptor and per wave scratch offset, by the scratch
+instructions, or by flat instructions. If each lane of a wavefront accesses the
+same private address, the interleaving results in adjacent dwords being accessed
+and hence requires fewer cache lines to be fetched. Multi-dword access is not
+supported except by flat and scratch instructions in GFX9.
+
+The generic address space uses the hardware flat address support available in
+GFX7-GFX9. This uses two fixed ranges of virtual addresses (the private and
+local appertures), that are outside the range of addressible global memory, to
+map from a flat address to a private or local address.
+
+FLAT instructions can take a flat address and access global, private (scratch)
+and group (LDS) memory depending in if the address is within one of the
+apperture ranges. Flat access to scratch requires hardware aperture setup and
+setup in the kernel prologue (see :ref:`amdgpu-amdhsa-flat-scratch`). Flat
+access to LDS requires hardware aperture setup and M0 (GFX7-GFX8) register setup
+(see :ref:`amdgpu-amdhsa-m0`).
+
+To convert between a segment address and a flat address the base address of the
+appertures address can be used. For GFX7-GFX8 these are available in the
+:ref:`amdgpu-amdhsa-hsa-aql-queue` the address of which can be obtained with
+Queue Ptr SGPR (see :ref:`amdgpu-amdhsa-initial-kernel-execution-state`). For
+GFX9 the appature base addresses are directly available as inline constant
+registers ``SRC_SHARED_BASE/LIMIT`` and ``SRC_PRIVATE_BASE/LIMIT``. In 64 bit
+address mode the apperture sizes are 2^32 bytes and the base is aligned to 2^32
+which makes it easier to convert from flat to segment or segment to flat.
+
+HSA Image and Samplers
+~~~~~~~~~~~~~~~~~~~~~~
+
+Image and sample handles created by the ROCm runtime are 64 bit addresses of a
+hardware 32 byte V# and 48 byte S# object respectively. In order to support the
+HSA ``query_sampler`` operations two extra dwords are used to store the HSA BRIG
+enumeration values for the queries that are not trivially deducible from the S#
+representation.
+
+HSA Signals
+~~~~~~~~~~~
+
+Signal handles created by the ROCm runtime are 64 bit addresses of a structure
+allocated in memory accessible from both the CPU and GPU. The structure is
+defined by the ROCm runtime and subject to change between releases (see
+[AMD-ROCm-github]_).
+
+.. _amdgpu-amdhsa-hsa-aql-queue:
+
+HSA AQL Queue
+~~~~~~~~~~~~~
+
+The AQL queue structure is defined by the ROCm runtime and subject to change
+between releases (see [AMD-ROCm-github]_). For some processors it contains
+fields needed to implement certain language features such as the flat address
+aperture bases. It also contains fields used by CP such as managing the
+allocation of scratch memory.
+
+.. _amdgpu-amdhsa-kernel-descriptor:
+
+Kernel Descriptor
+~~~~~~~~~~~~~~~~~
+
+A kernel descriptor consists of the information needed by CP to initiate the
+execution of a kernel, including the entry point address of the machine code
+that implements the kernel.
+
+Kernel Descriptor for GFX6-GFX9
++++++++++++++++++++++++++++++++
+
+CP microcode requires the Kernel descritor to be allocated on 64 byte alignment.
+
+  .. table:: Kernel Descriptor for GFX6-GFX9
+     :name: amdgpu-amdhsa-kernel-descriptor-gfx6-gfx9-table
+
+     ======= ======= =============================== ===========================
+     Bits    Size    Field Name                      Description
+     ======= ======= =============================== ===========================
+     31:0    4 bytes group_segment_fixed_size        The amount of fixed local
+                                                     address space memory
+                                                     required for a work-group
+                                                     in bytes. This does not
+                                                     include any dynamically
+                                                     allocated local address
+                                                     space memory that may be
+                                                     added when the kernel is
+                                                     dispatched.
+     63:32   4 bytes private_segment_fixed_size      The amount of fixed
+                                                     private address space
+                                                     memory required for a
+                                                     work-item in bytes. If
+                                                     is_dynamic_callstack is 1
+                                                     then additional space must
+                                                     be added to this value for
+                                                     the call stack.
+     95:64   4 bytes max_flat_workgroup_size         Maximum flat work-group
+                                                     size supported by the
+                                                     kernel in work-items.
+     96      1 bit   is_dynamic_call_stack           Indicates if the generated
+                                                     machine code is using a
+                                                     dynamically sized call
+                                                     stack.
+     97      1 bit   is_xnack_enabled                Indicates if the generated
+                                                     machine code is capable of
+                                                     suppoting XNACK.
+     127:98  30 bits                                 Reserved. Must be 0.
+     191:128 8 bytes kernel_code_entry_byte_offset   Byte offset (possibly
+                                                     negative) from base
+                                                     address of kernel
+                                                     descriptor to kernel's
+                                                     entry point instruction
+                                                     which must be 256 byte
+                                                     aligned.
+     383:192 24                                      Reserved. Must be 0.
+             bytes
+     415:384 4 bytes compute_pgm_rsrc1               Compute Shader (CS)
+                                                     program settings used by
+                                                     CP to set up
+                                                     ``COMPUTE_PGM_RSRC1``
+                                                     configuration
+                                                     register. See
+                                                     :ref:`amdgpu-amdhsa-compute_pgm_rsrc1_t-gfx6-gfx9-table`.
+     447:416 4 bytes compute_pgm_rsrc2               Compute Shader (CS)
+                                                     program settings used by
+                                                     CP to set up
+                                                     ``COMPUTE_PGM_RSRC2``
+                                                     configuration
+                                                     register. See
+                                                     :ref:`amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx9-table`.
+     448     1 bit   enable_sgpr_private_segment     Enable the setup of the
+                     _buffer                         SGPR user data registers
+                                                     (see
+                                                     :ref:`amdgpu-amdhsa-initial-kernel-execution-state`).
+
+                                                     The total number of SGPR
+                                                     user data registers
+                                                     requested must not exceed
+                                                     16 and match value in
+                                                     ``compute_pgm_rsrc2.user_sgpr.user_sgpr_count``.
+                                                     Any requests beyond 16
+                                                     will be ignored.
+     449     1 bit   enable_sgpr_dispatch_ptr        *see above*
+     450     1 bit   enable_sgpr_queue_ptr           *see above*
+     451     1 bit   enable_sgpr_kernarg_segment_ptr *see above*
+     452     1 bit   enable_sgpr_dispatch_id         *see above*
+     453     1 bit   enable_sgpr_flat_scratch_init   *see above*
+     454     1 bit   enable_sgpr_private_segment     *see above*
+                     _size
+     455     1 bit   enable_sgpr_grid_workgroup      Not implemented in CP and
+                     _count_X                        should always be 0.
+     456     1 bit   enable_sgpr_grid_workgroup      Not implemented in CP and
+                     _count_Y                        should always be 0.
+     457     1 bit   enable_sgpr_grid_workgroup      Not implemented in CP and
+                     _count_Z                        should always be 0.
+     463:458 6 bits                                  Reserved. Must be 0.
+     511:464 4                                       Reserved. Must be 0.
+             bytes
+     512     **Total size 64 bytes.**
+     ======= ===================================================================
+
+..
+
+  .. table:: compute_pgm_rsrc1 for GFX6-GFX9
+     :name: amdgpu-amdhsa-compute_pgm_rsrc1_t-gfx6-gfx9-table
+
+     ======= ======= =============================== ===========================================================================
+     Bits    Size    Field Name                      Description
+     ======= ======= =============================== ===========================================================================
+     5:0     6 bits  granulated_workitem_vgpr_count  Number of vector registers
+                                                     used by each work-item,
+                                                     granularity is device
+                                                     specific:
+
+                                                     GFX6-9
+                                                       roundup((max-vgpg + 1)
+                                                       / 4) - 1
+
+                                                     Used by CP to set up
+                                                     ``COMPUTE_PGM_RSRC1.VGPRS``.
+     9:6     4 bits  granulated_wavefront_sgpr_count Number of scalar registers
+                                                     used by a wavefront,
+                                                     granularity is device
+                                                     specific:
+
+                                                     GFX6-8
+                                                       roundup((max-sgpg + 1)
+                                                       / 8) - 1
+                                                     GFX9
+                                                       roundup((max-sgpg + 1)
+                                                       / 16) - 1
+
+                                                     Includes the special SGPRs
+                                                     for VCC, Flat Scratch (for
+                                                     GFX7 onwards) and XNACK
+                                                     (for GFX8 onwards). It does
+                                                     not include the 16 SGPR
+                                                     added if a trap handler is
+                                                     enabled.
+
+                                                     Used by CP to set up
+                                                     ``COMPUTE_PGM_RSRC1.SGPRS``.
+     11:10   2 bits  priority                        Must be 0.
+
+                                                     Start executing wavefront
+                                                     at the specified priority.
+
+                                                     CP is responsible for
+                                                     filling in
+                                                     ``COMPUTE_PGM_RSRC1.PRIORITY``.
+     13:12   2 bits  float_mode_round_32             Wavefront starts execution
+                                                     with specified rounding
+                                                     mode for single (32
+                                                     bit) floating point
+                                                     precision floating point
+                                                     operations.
+
+                                                     Floating point rounding
+                                                     mode values are defined in
+                                                     :ref:`amdgpu-amdhsa-floating-point-rounding-mode-enumeration-values-table`.
+
+                                                     Used by CP to set up
+                                                     ``COMPUTE_PGM_RSRC1.FLOAT_MODE``.
+     15:14   2 bits  float_mode_round_16_64          Wavefront starts execution
+                                                     with specified rounding
+                                                     denorm mode for half/double (16
+                                                     and 64 bit) floating point
+                                                     precision floating point
+                                                     operations.
+
+                                                     Floating point rounding
+                                                     mode values are defined in
+                                                     :ref:`amdgpu-amdhsa-floating-point-rounding-mode-enumeration-values-table`.
+
+                                                     Used by CP to set up
+                                                     ``COMPUTE_PGM_RSRC1.FLOAT_MODE``.
+     17:16   2 bits  float_mode_denorm_32            Wavefront starts execution
+                                                     with specified denorm mode
+                                                     for single (32
+                                                     bit)  floating point
+                                                     precision floating point
+                                                     operations.
+
+                                                     Floating point denorm mode
+                                                     values are defined in
+                                                     :ref:`amdgpu-amdhsa-floating-point-denorm-mode-enumeration-values-table`.
+
+                                                     Used by CP to set up
+                                                     ``COMPUTE_PGM_RSRC1.FLOAT_MODE``.
+     19:18   2 bits  float_mode_denorm_16_64         Wavefront starts execution
+                                                     with specified denorm mode
+                                                     for half/double (16
+                                                     and 64 bit) floating point
+                                                     precision floating point
+                                                     operations.
+
+                                                     Floating point denorm mode
+                                                     values are defined in
+                                                     :ref:`amdgpu-amdhsa-floating-point-denorm-mode-enumeration-values-table`.
+
+                                                     Used by CP to set up
+                                                     ``COMPUTE_PGM_RSRC1.FLOAT_MODE``.
+     20      1 bit   priv                            Must be 0.
+
+                                                     Start executing wavefront
+                                                     in privilege trap handler
+                                                     mode.
+
+                                                     CP is responsible for
+                                                     filling in
+                                                     ``COMPUTE_PGM_RSRC1.PRIV``.
+     21      1 bit   enable_dx10_clamp               Wavefront starts execution
+                                                     with DX10 clamp mode
+                                                     enabled. Used by the vector
+                                                     ALU to force DX-10 style
+                                                     treatment of NaN's (when
+                                                     set, clamp NaN to zero,
+                                                     otherwise pass NaN
+                                                     through).
+
+                                                     Used by CP to set up
+                                                     ``COMPUTE_PGM_RSRC1.DX10_CLAMP``.
+     22      1 bit   debug_mode                      Must be 0.
+
+                                                     Start executing wavefront
+                                                     in single step mode.
+
+                                                     CP is responsible for
+                                                     filling in
+                                                     ``COMPUTE_PGM_RSRC1.DEBUG_MODE``.
+     23      1 bit   enable_ieee_mode                Wavefront starts execution
+                                                     with IEEE mode
+                                                     enabled. Floating point
+                                                     opcodes that support
+                                                     exception flag gathering
+                                                     will quiet and propagate
+                                                     signaling-NaN inputs per
+                                                     IEEE 754-2008. Min_dx10 and
+                                                     max_dx10 become IEEE
+                                                     754-2008 compliant due to
+                                                     signaling-NaN propagation
+                                                     and quieting.
+
+                                                     Used by CP to set up
+                                                     ``COMPUTE_PGM_RSRC1.IEEE_MODE``.
+     24      1 bit   bulky                           Must be 0.
+
+                                                     Only one work-group allowed
+                                                     to execute on a compute
+                                                     unit.
+
+                                                     CP is responsible for
+                                                     filling in
+                                                     ``COMPUTE_PGM_RSRC1.BULKY``.
+     25      1 bit   cdbg_user                       Must be 0.
+
+                                                     Flag that can be used to
+                                                     control debugging code.
+
+                                                     CP is responsible for
+                                                     filling in
+                                                     ``COMPUTE_PGM_RSRC1.CDBG_USER``.
+     31:26   6 bits                                  Reserved. Must be 0.
+     32      **Total size 4 bytes**
+     ======= ===================================================================================================================
+
+..
+
+  .. table:: compute_pgm_rsrc2 for GFX6-GFX9
+     :name: amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx9-table
+
+     ======= ======= =============================== ===========================================================================
+     Bits    Size    Field Name                      Description
+     ======= ======= =============================== ===========================================================================
+     0       1 bit   enable_sgpr_private_segment     Enable the setup of the
+                     _wave_offset                    SGPR wave scratch offset
+                                                     system register (see
+                                                     :ref:`amdgpu-amdhsa-initial-kernel-execution-state`).
+
+                                                     Used by CP to set up
+                                                     ``COMPUTE_PGM_RSRC2.SCRATCH_EN``.
+     5:1     5 bits  user_sgpr_count                 The total number of SGPR
+                                                     user data registers
+                                                     requested. This number must
+                                                     match the number of user
+                                                     data registers enabled.
+
+                                                     Used by CP to set up
+                                                     ``COMPUTE_PGM_RSRC2.USER_SGPR``.
+     6       1 bit   enable_trap_handler             Set to 1 if code contains a
+                                                     TRAP instruction which
+                                                     requires a trap hander to
+                                                     be enabled.
+
+                                                     CP sets
+                                                     ``COMPUTE_PGM_RSRC2.TRAP_PRESENT``
+                                                     if the runtime has
+                                                     installed a trap handler
+                                                     regardless of the setting
+                                                     of this field.
+     7       1 bit   enable_sgpr_workgroup_id_x      Enable the setup of the
+                                                     system SGPR register for
+                                                     the work-group id in the X
+                                                     dimension (see
+                                                     :ref:`amdgpu-amdhsa-initial-kernel-execution-state`).
+
+                                                     Used by CP to set up
+                                                     ``COMPUTE_PGM_RSRC2.TGID_X_EN``.
+     8       1 bit   enable_sgpr_workgroup_id_y      Enable the setup of the
+                                                     system SGPR register for
+                                                     the work-group id in the Y
+                                                     dimension (see
+                                                     :ref:`amdgpu-amdhsa-initial-kernel-execution-state`).
+
+                                                     Used by CP to set up
+                                                     ``COMPUTE_PGM_RSRC2.TGID_Y_EN``.
+     9       1 bit   enable_sgpr_workgroup_id_z      Enable the setup of the
+                                                     system SGPR register for
+                                                     the work-group id in the Z
+                                                     dimension (see
+                                                     :ref:`amdgpu-amdhsa-initial-kernel-execution-state`).
+
+                                                     Used by CP to set up
+                                                     ``COMPUTE_PGM_RSRC2.TGID_Z_EN``.
+     10      1 bit   enable_sgpr_workgroup_info      Enable the setup of the
+                                                     system SGPR register for
+                                                     work-group information (see
+                                                     :ref:`amdgpu-amdhsa-initial-kernel-execution-state`).
+
+                                                     Used by CP to set up
+                                                     ``COMPUTE_PGM_RSRC2.TGID_SIZE_EN``.
+     12:11   2 bits  enable_vgpr_workitem_id         Enable the setup of the
+                                                     VGPR system registers used
+                                                     for the work-item ID.
+                                                     :ref:`amdgpu-amdhsa-system-vgpr-work-item-id-enumeration-values-table`
+                                                     defines the values.
+
+                                                     Used by CP to set up
+                                                     ``COMPUTE_PGM_RSRC2.TIDIG_CMP_CNT``.
+     13      1 bit   enable_exception_address_watch  Must be 0.
+
+                                                     Wavefront starts execution
+                                                     with address watch
+                                                     exceptions enabled which
+                                                     are generated when L1 has
+                                                     witnessed a thread access
+                                                     an *address of
+                                                     interest*.
+
+                                                     CP is responsible for
+                                                     filling in the address
+                                                     watch bit in
+                                                     ``COMPUTE_PGM_RSRC2.EXCP_EN_MSB``
+                                                     according to what the
+                                                     runtime requests.
+     14      1 bit   enable_exception_memory         Must be 0.
+
+                                                     Wavefront starts execution
+                                                     with memory violation
+                                                     exceptions exceptions
+                                                     enabled which are generated
+                                                     when a memory violation has
+                                                     occurred for this wave from
+                                                     L1 or LDS
+                                                     (write-to-read-only-memory,
+                                                     mis-aligned atomic, LDS
+                                                     address out of range,
+                                                     illegal address, etc.).
+
+                                                     CP sets the memory
+                                                     violation bit in
+                                                     ``COMPUTE_PGM_RSRC2.EXCP_EN_MSB``
+                                                     according to what the
+                                                     runtime requests.
+     23:15   9 bits  granulated_lds_size             Must be 0.
+
+                                                     CP uses the rounded value
+                                                     from the dispatch packet,
+                                                     not this value, as the
+                                                     dispatch may contain
+                                                     dynamically allocated group
+                                                     segment memory. CP writes
+                                                     directly to
+                                                     ``COMPUTE_PGM_RSRC2.LDS_SIZE``.
+
+                                                     Amount of group segment
+                                                     (LDS) to allocate for each
+                                                     work-group. Granularity is
+                                                     device specific:
+
+                                                     GFX6:
+                                                       roundup(lds-size / (64 * 4))
+                                                     GFX7-GFX9:
+                                                       roundup(lds-size / (128 * 4))
+
+     24      1 bit   enable_exception_ieee_754_fp    Wavefront starts execution
+                     _invalid_operation              with specified exceptions
+                                                     enabled.
+
+                                                     Used by CP to set up
+                                                     ``COMPUTE_PGM_RSRC2.EXCP_EN``
+                                                     (set from bits 0..6).
+
+                                                     IEEE 754 FP Invalid
+                                                     Operation
+     25      1 bit   enable_exception_fp_denormal    FP Denormal one or more
+                     _source                         input operands is a
+                                                     denormal number
+     26      1 bit   enable_exception_ieee_754_fp    IEEE 754 FP Division by
+                     _division_by_zero               Zero
+     27      1 bit   enable_exception_ieee_754_fp    IEEE 754 FP FP Overflow
+                     _overflow
+     28      1 bit   enable_exception_ieee_754_fp    IEEE 754 FP Underflow
+                     _underflow
+     29      1 bit   enable_exception_ieee_754_fp    IEEE 754 FP Inexact
+                     _inexact
+     30      1 bit   enable_exception_int_divide_by  Integer Division by Zero
+                     _zero                           (rcp_iflag_f32 instruction
+                                                     only)
+     31      1 bit                                   Reserved. Must be 0.
+     32      **Total size 4 bytes.**
+     ======= ===================================================================================================================
+
+..
+
+  .. table:: Floating Point Rounding Mode Enumeration Values
+     :name: amdgpu-amdhsa-floating-point-rounding-mode-enumeration-values-table
+
+     ===================================== ===== ===============================
+     Enumeration Name                      Value Description
+     ===================================== ===== ===============================
+     AMD_FLOAT_ROUND_MODE_NEAR_EVEN        0     Round Ties To Even
+     AMD_FLOAT_ROUND_MODE_PLUS_INFINITY    1     Round Toward +infinity
+     AMD_FLOAT_ROUND_MODE_MINUS_INFINITY   2     Round Toward -infinity
+     AMD_FLOAT_ROUND_MODE_ZERO             3     Round Toward 0
+     ===================================== ===== ===============================
+
+..
+
+  .. table:: Floating Point Denorm Mode Enumeration Values
+     :name: amdgpu-amdhsa-floating-point-denorm-mode-enumeration-values-table
+
+     ===================================== ===== ===============================
+     Enumeration Name                      Value Description
+     ===================================== ===== ===============================
+     AMD_FLOAT_DENORM_MODE_FLUSH_SRC_DST   0     Flush Source and Destination
+                                                 Denorms
+     AMD_FLOAT_DENORM_MODE_FLUSH_DST       1     Flush Output Denorms
+     AMD_FLOAT_DENORM_MODE_FLUSH_SRC       2     Flush Source Denorms
+     AMD_FLOAT_DENORM_MODE_FLUSH_NONE      3     No Flush
+     ===================================== ===== ===============================
+
+..
+
+  .. table:: System VGPR Work-Item ID Enumeration Values
+     :name: amdgpu-amdhsa-system-vgpr-work-item-id-enumeration-values-table
+
+     ===================================== ===== ===============================
+     Enumeration Name                      Value Description
+     ===================================== ===== ===============================
+     AMD_SYSTEM_VGPR_WORKITEM_ID_X         0     Set work-item X dimension ID.
+     AMD_SYSTEM_VGPR_WORKITEM_ID_X_Y       1     Set work-item X and Y
+                                                 dimensions ID.
+     AMD_SYSTEM_VGPR_WORKITEM_ID_X_Y_Z     2     Set work-item X, Y and Z
+                                                 dimensions ID.
+     AMD_SYSTEM_VGPR_WORKITEM_ID_UNDEFINED 3     Undefined.
+     ===================================== ===== ===============================
+
+.. _amdgpu-amdhsa-initial-kernel-execution-state:
+
+Initial Kernel Execution State
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This section defines the register state that will be set up by the packet
+processor prior to the start of execution of every wavefront. This is limited by
+the constraints of the hardware controllers of CP/ADC/SPI.
+
+The order of the SGPR registers is defined, but the compiler can specify which
+ones are actually setup in the kernel descriptor using the ``enable_sgpr_*`` bit
+fields (see :ref:`amdgpu-amdhsa-kernel-descriptor`). The register numbers used
+for enabled registers are dense starting at SGPR0: the first enabled register is
+SGPR0, the next enabled register is SGPR1 etc.; disabled registers do not have
+an SGPR number.
+
+The initial SGPRs comprise up to 16 User SRGPs that are set by CP and apply to
+all waves of the grid. It is possible to specify more than 16 User SGPRs using
+the ``enable_sgpr_*`` bit fields, in which case only the first 16 are actually
+initialized. These are then immediately followed by the System SGPRs that are
+set up by ADC/SPI and can have different values for each wave of the grid
+dispatch.
+
+SGPR register initial state is defined in
+:ref:`amdgpu-amdhsa-sgpr-register-set-up-order-table`.
+
+  .. table:: SGPR Register Set Up Order
+     :name: amdgpu-amdhsa-sgpr-register-set-up-order-table
+
+     ========== ========================== ====== ==============================
+     SGPR Order Name                       Number Description
+                (kernel descriptor enable  of
+                field)                     SGPRs
+     ========== ========================== ====== ==============================
+     First      Private Segment Buffer     4      V# that can be used, together
+                (enable_sgpr_private              with Scratch Wave Offset as an
+                _segment_buffer)                  offset, to access the private
+                                                  memory space using a segment
+                                                  address.
+
+                                                  CP uses the value provided by
+                                                  the runtime.
+     then       Dispatch Ptr               2      64 bit address of AQL dispatch
+                (enable_sgpr_dispatch_ptr)        packet for kernel dispatch
+                                                  actually executing.
+     then       Queue Ptr                  2      64 bit address of amd_queue_t
+                (enable_sgpr_queue_ptr)           object for AQL queue on which
+                                                  the dispatch packet was
+                                                  queued.
+     then       Kernarg Segment Ptr        2      64 bit address of Kernarg
+                (enable_sgpr_kernarg              segment. This is directly
+                _segment_ptr)                     copied from the
+                                                  kernarg_address in the kernel
+                                                  dispatch packet.
+
+                                                  Having CP load it once avoids
+                                                  loading it at the beginning of
+                                                  every wavefront.
+     then       Dispatch Id                2      64 bit Dispatch ID of the
+                (enable_sgpr_dispatch_id)         dispatch packet being
+                                                  executed.
+     then       Flat Scratch Init          2      This is 2 SGPRs:
+                (enable_sgpr_flat_scratch
+                _init)                            GFX6
+                                                    Not supported.
+                                                  GFX7-GFX8
+                                                    The first SGPR is a 32 bit
+                                                    byte offset from
+                                                    ``SH_HIDDEN_PRIVATE_BASE_VIMID``
+                                                    to per SPI base of memory
+                                                    for scratch for the queue
+                                                    executing the kernel
+                                                    dispatch. CP obtains this
+                                                    from the runtime.
+
+                                                    This is the same offset used
+                                                    in computing the Scratch
+                                                    Segment Buffer base
+                                                    address. The value of
+                                                    Scratch Wave Offset must be
+                                                    added by the kernel machine
+                                                    code and moved to SGPRn-4
+                                                    for use as the FLAT SCRATCH
+                                                    BASE in flat memory
+                                                    instructions.
+
+                                                    The second SGPR is 32 bit
+                                                    byte size of a single
+                                                    work-item’s scratch memory
+                                                    usage. This is directly
+                                                    loaded from the kernel
+                                                    dispatch packet Private
+                                                    Segment Byte Size and
+                                                    rounded up to a multiple of
+                                                    DWORD.
+
+                                                    The kernel code must move to
+                                                    SGPRn-3 for use as the FLAT
+                                                    SCRATCH SIZE in flat memory
+                                                    instructions. Having CP load
+                                                    it once avoids loading it at
+                                                    the beginning of every
+                                                    wavefront.
+                                                  GFX9
+                                                    This is the 64 bit base
+                                                    address of the per SPI
+                                                    scratch backing memory
+                                                    managed by SPI for the queue
+                                                    executing the kernel
+                                                    dispatch. CP obtains this
+                                                    from the runtime (and
+                                                    divides it if there are
+                                                    multiple Shader Arrays each
+                                                    with its own SPI). The value
+                                                    of Scratch Wave Offset must
+                                                    be added by the kernel
+                                                    machine code and moved to
+                                                    SGPRn-4 and SGPRn-3 for use
+                                                    as the FLAT SCRATCH BASE in
+                                                    flat memory instructions.
+     then       Private Segment Size       1      The 32 bit byte size of a
+                (enable_sgpr_private              single work-item’s scratch
+                _segment_size)                    memory allocation. This is the
+                                                  value from the kernel dispatch
+                                                  packet Private Segment Byte
+                                                  Size rounded up by CP to a
+                                                  multiple of DWORD.
+
+                                                  Having CP load it once avoids
+                                                  loading it at the beginning of
+                                                  every wavefront.
+
+                                                  This is not used for
+                                                  GFX7-GFX8 since it is the same
+                                                  value as the second SGPR of
+                                                  Flat Scratch Init. However, it
+                                                  may be needed for GFX9 which
+                                                  changes the meaning of the
+                                                  Flat Scratch Init value.
+     then       Grid Work-Group Count X    1      32 bit count of the number of
+                (enable_sgpr_grid                 work-groups in the X dimension
+                _workgroup_count_X)               for the grid being
+                                                  executed. Computed from the
+                                                  fields in the kernel dispatch
+                                                  packet as ((grid_size.x +
+                                                  workgroup_size.x - 1) /
+                                                  workgroup_size.x).
+     then       Grid Work-Group Count Y    1      32 bit count of the number of
+                (enable_sgpr_grid                 work-groups in the Y dimension
+                _workgroup_count_Y &&             for the grid being
+                less than 16 previous             executed. Computed from the
+                SGPRs)                            fields in the kernel dispatch
+                                                  packet as ((grid_size.y +
+                                                  workgroup_size.y - 1) /
+                                                  workgroupSize.y).
+
+                                                  Only initialized if <16
+                                                  previous SGPRs initialized.
+     then       Grid Work-Group Count Z    1      32 bit count of the number of
+                (enable_sgpr_grid                 work-groups in the Z dimension
+                _workgroup_count_Z &&             for the grid being
+                less than 16 previous             executed. Computed from the
+                SGPRs)                            fields in the kernel dispatch
+                                                  packet as ((grid_size.z +
+                                                  workgroup_size.z - 1) /
+                                                  workgroupSize.z).
+
+                                                  Only initialized if <16
+                                                  previous SGPRs initialized.
+     then       Work-Group Id X            1      32 bit work-group id in X
+                (enable_sgpr_workgroup_id         dimension of grid for
+                _X)                               wavefront.
+     then       Work-Group Id Y            1      32 bit work-group id in Y
+                (enable_sgpr_workgroup_id         dimension of grid for
+                _Y)                               wavefront.
+     then       Work-Group Id Z            1      32 bit work-group id in Z
+                (enable_sgpr_workgroup_id         dimension of grid for
+                _Z)                               wavefront.
+     then       Work-Group Info            1      {first_wave, 14’b0000,
+                (enable_sgpr_workgroup            ordered_append_term[10:0],
+                _info)                            threadgroup_size_in_waves[5:0]}
+     then       Scratch Wave Offset        1      32 bit byte offset from base
+                (enable_sgpr_private              of scratch base of queue
+                _segment_wave_offset)             executing the kernel
+                                                  dispatch. Must be used as an
+                                                  offset with Private
+                                                  segment address when using
+                                                  Scratch Segment Buffer. It
+                                                  must be used to set up FLAT
+                                                  SCRATCH for flat addressing
+                                                  (see
+                                                  :ref:`amdgpu-amdhsa-flat-scratch`).
+     ========== ========================== ====== ==============================
+
+The order of the VGPR registers is defined, but the compiler can specify which
+ones are actually setup in the kernel descriptor using the ``enable_vgpr*`` bit
+fields (see :ref:`amdgpu-amdhsa-kernel-descriptor`). The register numbers used
+for enabled registers are dense starting at VGPR0: the first enabled register is
+VGPR0, the next enabled register is VGPR1 etc.; disabled registers do not have a
+VGPR number.
+
+VGPR register initial state is defined in
+:ref:`amdgpu-amdhsa-vgpr-register-set-up-order-table`.
+
+  .. table:: VGPR Register Set Up Order
+     :name: amdgpu-amdhsa-vgpr-register-set-up-order-table
+
+     ========== ========================== ====== ==============================
+     VGPR Order Name                       Number Description
+                (kernel descriptor enable  of
+                field)                     VGPRs
+     ========== ========================== ====== ==============================
+     First      Work-Item Id X             1      32 bit work item id in X
+                (Always initialized)              dimension of work-group for
+                                                  wavefront lane.
+     then       Work-Item Id Y             1      32 bit work item id in Y
+                (enable_vgpr_workitem_id          dimension of work-group for
+                > 0)                              wavefront lane.
+     then       Work-Item Id Z             1      32 bit work item id in Z
+                (enable_vgpr_workitem_id          dimension of work-group for
+                > 1)                              wavefront lane.
+     ========== ========================== ====== ==============================
+
+The setting of registers is is done by GPU CP/ADC/SPI hardware as follows:
+
+1. SGPRs before the Work-Group Ids are set by CP using the 16 User Data
+   registers.
+2. Work-group Id registers X, Y, Z are set by ADC which supports any
+   combination including none.
+3. Scratch Wave Offset is set by SPI in a per wave basis which is why its value
+   cannot included with the flat scratch init value which is per queue.
+4. The VGPRs are set by SPI which only supports specifying either (X), (X, Y)
+   or (X, Y, Z).
+
+Flat Scratch register pair are adjacent SGRRs so they can be moved as a 64 bit
+value to the hardware required SGPRn-3 and SGPRn-4 respectively.
+
+The global segment can be accessed either using buffer instructions (GFX6 which
+has V# 64 bit address support), flat instructions (GFX7-9), or global
+instructions (GFX9).
+
+If buffer operations are used then the compiler can generate a V# with the
+following properties:
+
+* base address of 0
+* no swizzle
+* ATC: 1 if IOMMU present (such as APU)
+* ptr64: 1
+* MTYPE set to support memory coherence that matches the runtime (such as CC for
+  APU and NC for dGPU).
+
+.. _amdgpu-amdhsa-kernel-prolog:
+
+Kernel Prolog
+~~~~~~~~~~~~~
+
+.. _amdgpu-amdhsa-m0:
+
+M0
+++
+
+GFX6-GFX8
+  The M0 register must be initialized with a value at least the total LDS size
+  if the kernel may access LDS via DS or flat operations. Total LDS size is
+  available in dispatch packet. For M0, it is also possible to use maximum
+  possible value of LDS for given target (0x7FFF for GFX6 and 0xFFFF for
+  GFX7-GFX8).
+GFX9
+  The M0 register is not used for range checking LDS accesses and so does not
+  need to be initialized in the prolog.
+
+.. _amdgpu-amdhsa-flat-scratch:
+
+Flat Scratch
+++++++++++++
+
+If the kernel may use flat operations to access scratch memory, the prolog code
+must set up FLAT_SCRATCH register pair (FLAT_SCRATCH_LO/FLAT_SCRATCH_HI which
+are in SGPRn-4/SGPRn-3). Initialization uses Flat Scratch Init and Scratch Wave
+Offset SGPR registers (see :ref:`amdgpu-amdhsa-initial-kernel-execution-state`):
+
+GFX6
+  Flat scratch is not supported.
+
+GFX7-8
+  1. The low word of Flat Scratch Init is 32 bit byte offset from
+     ``SH_HIDDEN_PRIVATE_BASE_VIMID`` to the base of scratch backing memory
+     being managed by SPI for the queue executing the kernel dispatch. This is
+     the same value used in the Scratch Segment Buffer V# base address. The
+     prolog must add the value of Scratch Wave Offset to get the wave's byte
+     scratch backing memory offset from ``SH_HIDDEN_PRIVATE_BASE_VIMID``. Since
+     FLAT_SCRATCH_LO is in units of 256 bytes, the offset must be right shifted
+     by 8 before moving into FLAT_SCRATCH_LO.
+  2. The second word of Flat Scratch Init is 32 bit byte size of a single
+     work-items scratch memory usage. This is directly loaded from the kernel
+     dispatch packet Private Segment Byte Size and rounded up to a multiple of
+     DWORD. Having CP load it once avoids loading it at the beginning of every
+     wavefront. The prolog must move it to FLAT_SCRATCH_LO for use as FLAT SCRATCH
+     SIZE.
+GFX9
+  The Flat Scratch Init is the 64 bit address of the base of scratch backing
+  memory being managed by SPI for the queue executing the kernel dispatch. The
+  prolog must add the value of Scratch Wave Offset and moved to the FLAT_SCRATCH
+  pair for use as the flat scratch base in flat memory instructions.
+
+.. _amdgpu-amdhsa-memory-model:
+
+Memory Model
+~~~~~~~~~~~~
+
+This section describes the mapping of LLVM memory model onto AMDGPU machine code
+(see :ref:`memmodel`). *The implementation is WIP.*
+
+.. TODO
+   Update when implementation complete.
+
+   Support more relaxed OpenCL memory model to be controled by environment
+   component of target triple.
+
+The AMDGPU backend supports the memory synchronization scopes specified in
+:ref:`amdgpu-memory-scopes`.
+
+The code sequences used to implement the memory model are defined in table
+:ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx6-gfx9-table`.
+
+The sequences specify the order of instructions that a single thread must
+execute. The ``s_waitcnt`` and ``buffer_wbinvl1_vol`` are defined with respect
+to other memory instructions executed by the same thread. This allows them to be
+moved earlier or later which can allow them to be combined with other instances
+of the same instruction, or hoisted/sunk out of loops to improve
+performance. Only the instructions related to the memory model are given;
+additional ``s_waitcnt`` instructions are required to ensure registers are
+defined before being used. These may be able to be combined with the memory
+model ``s_waitcnt`` instructions as described above.
+
+The AMDGPU memory model supports both the HSA [HSA]_ memory model, and the
+OpenCL [OpenCL]_ memory model. The HSA memory model uses a single happens-before
+relation for all address spaces (see :ref:`amdgpu-address-spaces`). The OpenCL
+memory model which has separate happens-before relations for the global and
+local address spaces, and only a fence specifying both global and local address
+space joins the relationships. Since the LLVM ``memfence`` instruction does not
+allow an address space to be specified the OpenCL fence has to convervatively
+assume both local and global address space was specified. However, optimizations
+can often be done to eliminate the additional ``s_waitcnt``instructions when
+there are no intervening corresponding ``ds/flat_load/store/atomic`` memory
+instructions. The code sequences in the table indicate what can be omitted for
+the OpenCL memory. The target triple environment is used to determine if the
+source language is OpenCL (see :ref:`amdgpu-opencl`).
+
+``ds/flat_load/store/atomic`` instructions to local memory are termed LDS
+operations.
+
+``buffer/global/flat_load/store/atomic`` instructions to global memory are
+termed vector memory operations.
+
+For GFX6-GFX9:
+
+* Each agent has multiple compute units (CU).
+* Each CU has multiple SIMDs that execute wavefronts.
+* The wavefronts for a single work-group are executed in the same CU but may be
+  executed by different SIMDs.
+* Each CU has a single LDS memory shared by the wavefronts of the work-groups
+  executing on it.
+* All LDS operations of a CU are performed as wavefront wide operations in a
+  global order and involve no caching. Completion is reported to a wavefront in
+  execution order.
+* The LDS memory has multiple request queues shared by the SIMDs of a
+  CU. Therefore, the LDS operations performed by different waves of a work-group
+  can be reordered relative to each other, which can result in reordering the
+  visibility of vector memory operations with respect to LDS operations of other
+  wavefronts in the same work-group. A ``s_waitcnt lgkmcnt(0)`` is required to
+  ensure synchonization between LDS operations and vector memory operations
+  between waves of a work-group, but not between operations performed by the
+  same wavefront.
+* The vector memory operations are performed as wavefront wide operations and
+  completion is reported to a wavefront in execution order. The exception is
+  that for GFX7-9 ``flat_load/store/atomic`` instructions can report out of
+  vector memory order if they access LDS memory, and out of LDS operation order
+  if they access global memory.
+* The vector memory operations access a vector L1 cache shared by all wavefronts
+  on a CU. Therefore, no special action is required for coherence between
+  wavefronts in the same work-group. A ``buffer_wbinvl1_vol`` is required for
+  coherence between waves executing in different work-groups as they may be
+  executing on different CUs.
+* The scalar memory operations access a scalar L1 cache shared by all wavefronts
+  on a group of CUs. The scalar and vector L1 caches are not coherent. However,
+  scalar operations are used in a restricted way so do not impact the memory
+  model. See :ref:`amdgpu-amdhsa-memory-spaces`.
+* The vector and scalar memory operations use an L2 cache shared by all CUs on
+  the same agent.
+* The L2 cache has independent channels to service disjoint ranges of virtual
+  addresses.
+* Each CU has a separate request queue per channel. Therefore, the vector and
+  scalar memory operations performed by waves executing in different work-groups
+  (which may be executing on different CUs) of an agent can be reordered
+  relative to each other. A ``s_waitcnt vmcnt(0)`` is required to ensure
+  synchonization between vector memory operations of different CUs. It ensures a
+  previous vector memory operation has completed before executing a subsequent
+  vector memory or LDS operation and so can be used to meet the requirements of
+  acquire and release.
+* The L2 cache can be kept coherent with other agents on some targets, or ranges
+  of virtual addresses can be set up to bypass it to ensure system coherence.
+
+Private address space uses ``buffer_load/store`` using the scratch V# (GFX6-8),
+or ``scratch_load/store`` (GFX9). Since only a single thread is accessing the
+memory, atomic memory orderings are not meaningful and all accesses are treated
+as non-atomic.
+
+Constant address space uses ``buffer/global_load`` instructions (or equivalent
+scalar memory instructions). Since the constant address space contents do not
+change during the execution of a kernel dispatch it is not legal to perform
+stores, and atomic memory orderings are not meaningful and all access are
+treated as non-atomic.
+
+A memory synchronization scope wider than work-group is not meaningful for the
+group (LDS) address space and is treated as work-group.
+
+The memory model does not support the region address space which is treated as
+non-atomic.
+
+Acquire memory ordering is not meaningful on store atomic instructions and is
+treated as non-atomic.
+
+Release memory ordering is not meaningful on load atomic instructions and is
+treated a non-atomic.
+
+Acquire-release memory ordering is not meaningful on load or store atomic
+instructions and is treated as acquire and release respectively.
+
+AMDGPU backend only uses scalar memory operations to access memory that is
+proven to not change during the execution of the kernel dispatch. This includes
+constant address space and global address space for program scope const
+variables. Therefore the kernel machine code does not have to maintain the
+scalar L1 cache to ensure it is coherent with the vector L1 cache. The scalar
+and vector L1 caches are invalidated between kernel dispatches by CP since
+constant address space data may change between kernel dispatch executions. See
+:ref:`amdgpu-amdhsa-memory-spaces`.
+
+The one exeception is if scalar writes are used to spill SGPR registers. In this
+case the AMDGPU backend ensures the memory location used to spill is never
+accessed by vector memory operations at the same time. If scalar writes are used
+then a ``s_dcache_wb`` is inserted before the ``s_endpgm`` and before a function
+return since the locations may be used for vector memory instructions by a
+future wave that uses the same scratch area, or a function call that creates a
+frame at the same address, respectively. There is no need for a ``s_dcache_inv``
+as all scalar writes are write-before-read in the same thread.
+
+Scratch backing memory (which is used for the private address space) is accessed
+with MTYPE NC_NV (non-coherenent non-volatile). Since the private address space
+is only accessed by a single thread, and is always write-before-read,
+there is never a need to invalidate these entries from the L1 cache. Hence all
+cache invalidates are done as ``*_vol`` to only invalidate the volatile cache
+lines.
+
+On dGPU the kernarg backing memory is accessed as UC (uncached) to avoid needing
+to invalidate the L2 cache. This also causes it to be treated as non-volatile
+and so is not invalidated by ``*_vol``. On APU it is accessed as CC (cache
+coherent) and so the L2 cache will coherent with the CPU and other agents.
+
+  .. table:: AMDHSA Memory Model Code Sequences GFX6-GFX9
+     :name: amdgpu-amdhsa-memory-model-code-sequences-gfx6-gfx9-table
+
+     ============ ============ ============== ========== =======================
+     LLVM Instr   LLVM Memory  LLVM Memory    AMDGPU     AMDGPU Machine Code
+                  Ordering     Sync Scope     Address
+                                              Space
+     ============ ============ ============== ========== =======================
+     **Non-Atomic**
+     ---------------------------------------------------------------------------
+     load         *none*       *none*         - global   non-volatile
+                                              - generic    1. buffer/global/flat_load
+                                                         volatile
+                                                           1. buffer/global/flat_load
+                                                              glc=1
+     load         *none*       *none*         - local    1. ds_load
+     store        *none*       *none*         - global   1. buffer/global/flat_store
+                                              - generic
+     store        *none*       *none*         - local    1. ds_store
+     **Unordered Atomic**
+     ---------------------------------------------------------------------------
+     load atomic  unordered    *any*          *any*      *Same as non-atomic*.
+     store atomic unordered    *any*          *any*      *Same as non-atomic*.
+     atomicrmw    unordered    *any*          *any*      *Same as monotonic
+                                                         atomic*.
+     **Monotonic Atomic**
+     ---------------------------------------------------------------------------
+     load atomic  monotonic    - singlethread - global   1. buffer/global/flat_load
+                               - wavefront    - generic
+                               - workgroup
+     load atomic  monotonic    - singlethread - local    1. ds_load
+                               - wavefront
+                               - workgroup
+     load atomic  monotonic    - agent        - global   1. buffer/global/flat_load
+                               - system       - generic     glc=1
+     store atomic monotonic    - singlethread - global   1. buffer/global/flat_store
+                               - wavefront    - generic
+                               - workgroup
+                               - agent
+                               - system
+     store atomic monotonic    - singlethread - local    1. ds_store
+                               - wavefront
+                               - workgroup
+     atomicrmw    monotonic    - singlethread - global   1. buffer/global/flat_atomic
+                               - wavefront    - generic
+                               - workgroup
+                               - agent
+                               - system
+     atomicrmw    monotonic    - singlethread - local    1. ds_atomic
+                               - wavefront
+                               - workgroup
+     **Acquire Atomic**
+     ---------------------------------------------------------------------------
+     load atomic  acquire      - singlethread - global   1. buffer/global/ds/flat_load
+                               - wavefront    - local
+                                              - generic
+     load atomic  acquire      - workgroup    - global   1. buffer/global_load
+     load atomic  acquire      - workgroup    - local    1. ds/flat_load
+                                              - generic  2. s_waitcnt lgkmcnt(0)
+
+                                                           - If OpenCL, omit
+                                                             waitcnt.
+                                                           - Must happen before
+                                                             any following
+                                                             global/generic
+                                                             load/load
+                                                             atomic/store/store
+                                                             atomic/atomicrmw.
+                                                           - Ensures any
+                                                             following global
+                                                             data read is no
+                                                             older than the load
+                                                             atomic value being
+                                                             acquired.
+
+     load atomic  acquire      - agent        - global   1. buffer/global_load
+                               - system                     glc=1
+                                                         2. s_waitcnt vmcnt(0)
+
+                                                           - Must happen before
+                                                             following
+                                                             buffer_wbinvl1_vol.
+                                                           - Ensures the load
+                                                             has completed
+                                                             before invalidating
+                                                             the cache.
+
+                                                         3. buffer_wbinvl1_vol
+
+                                                           - Must happen before
+                                                             any following
+                                                             global/generic
+                                                             load/load
+                                                             atomic/atomicrmw.
+                                                           - Ensures that
+                                                             following
+                                                             loads will not see
+                                                             stale global data.
+
+     load atomic  acquire      - agent        - generic  1. flat_load glc=1
+                               - system                  2. s_waitcnt vmcnt(0) &
+                                                            lgkmcnt(0)
+
+                                                           - If OpenCL omit
+                                                             lgkmcnt(0).
+                                                           - Must happen before
+                                                             following
+                                                             buffer_wbinvl1_vol.
+                                                           - Ensures the flat_load
+                                                             has completed
+                                                             before invalidating
+                                                             the cache.
+
+                                                         3. buffer_wbinvl1_vol
+
+                                                           - Must happen before
+                                                             any following
+                                                             global/generic
+                                                             load/load
+                                                             atomic/atomicrmw.
+                                                           - Ensures that
+                                                             following loads
+                                                             will not see stale
+                                                             global data.
+
+     atomicrmw    acquire      - singlethread - global   1. buffer/global/ds/flat_atomic
+                               - wavefront    - local
+                                              - generic
+     atomicrmw    acquire      - workgroup    - global   1. buffer/global_atomic
+     atomicrmw    acquire      - workgroup    - local    1. ds/flat_atomic
+                                              - generic  2. waitcnt lgkmcnt(0)
+
+                                                           - If OpenCL, omit
+                                                             waitcnt.
+                                                           - Must happen before
+                                                             any following
+                                                             global/generic
+                                                             load/load
+                                                             atomic/store/store
+                                                             atomic/atomicrmw.
+                                                           - Ensures any
+                                                             following global
+                                                             data read is no
+                                                             older than the
+                                                             atomicrmw value
+                                                             being acquired.
+
+     atomicrmw    acquire      - agent        - global   1. buffer/global_atomic
+                               - system                  2. s_waitcnt vmcnt(0)
+
+                                                           - Must happen before
+                                                             following
+                                                             buffer_wbinvl1_vol.
+                                                           - Ensures the
+                                                             atomicrmw has
+                                                             completed before
+                                                             invalidating the
+                                                             cache.
+
+                                                         3. buffer_wbinvl1_vol
+
+                                                           - Must happen before
+                                                             any following
+                                                             global/generic
+                                                             load/load
+                                                             atomic/atomicrmw.
+                                                           - Ensures that
+                                                             following loads
+                                                             will not see stale
+                                                             global data.
+
+     atomicrmw    acquire      - agent        - generic  1. flat_atomic
+                               - system                  2. s_waitcnt vmcnt(0) &
+                                                            lgkmcnt(0)
+
+                                                           - If OpenCL, omit
+                                                             lgkmcnt(0).
+                                                           - Must happen before
+                                                             following
+                                                             buffer_wbinvl1_vol.
+                                                           - Ensures the
+                                                             atomicrmw has
+                                                             completed before
+                                                             invalidating the
+                                                             cache.
+
+                                                         3. buffer_wbinvl1_vol
+
+                                                           - Must happen before
+                                                             any following
+                                                             global/generic
+                                                             load/load
+                                                             atomic/atomicrmw.
+                                                           - Ensures that
+                                                             following loads
+                                                             will not see stale
+                                                             global data.
+
+     fence        acquire      - singlethread *none*     *none*
+                               - wavefront
+     fence        acquire      - workgroup    *none*     1. s_waitcnt lgkmcnt(0)
+
+                                                           - If OpenCL and
+                                                             address space is
+                                                             not generic, omit
+                                                             waitcnt. However,
+                                                             since LLVM
+                                                             currently has no
+                                                             address space on
+                                                             the fence need to
+                                                             conservatively
+                                                             always generate. If
+                                                             fence had an
+                                                             address space then
+                                                             set to address
+                                                             space of OpenCL
+                                                             fence flag, or to
+                                                             generic if both
+                                                             local and global
+                                                             flags are
+                                                             specified.
+                                                           - Must happen after
+                                                             any preceding
+                                                             local/generic load
+                                                             atomic/atomicrmw
+                                                             with an equal or
+                                                             wider sync scope
+                                                             and memory ordering
+                                                             stronger than
+                                                             unordered (this is
+                                                             termed the
+                                                             fence-paired-atomic).
+                                                           - Must happen before
+                                                             any following
+                                                             global/generic
+                                                             load/load
+                                                             atomic/store/store
+                                                             atomic/atomicrmw.
+                                                           - Ensures any
+                                                             following global
+                                                             data read is no
+                                                             older than the
+                                                             value read by the
+                                                             fence-paired-atomic.
+
+     fence        acquire      - agent        *none*     1. s_waitcnt vmcnt(0) &
+                               - system                     lgkmcnt(0)
+
+                                                           - If OpenCL and
+                                                             address space is
+                                                             not generic, omit
+                                                             lgkmcnt(0).
+                                                             However, since LLVM
+                                                             currently has no
+                                                             address space on
+                                                             the fence need to
+                                                             conservatively
+                                                             always generate
+                                                             (see comment for
+                                                             previous fence).
+                                                           - Could be split into
+                                                             separate s_waitcnt
+                                                             vmcnt(0) and
+                                                             s_waitcnt
+                                                             lgkmcnt(0) to allow
+                                                             them to be
+                                                             independently moved
+                                                             according to the
+                                                             following rules.
+                                                           - s_waitcnt vmcnt(0)
+                                                             must happen after
+                                                             any preceding
+                                                             global/generic load
+                                                             atomic/atomicrmw
+                                                             with an equal or
+                                                             wider sync scope
+                                                             and memory ordering
+                                                             stronger than
+                                                             unordered (this is
+                                                             termed the
+                                                             fence-paired-atomic).
+                                                           - s_waitcnt lgkmcnt(0)
+                                                             must happen after
+                                                             any preceding
+                                                             group/generic load
+                                                             atomic/atomicrmw
+                                                             with an equal or
+                                                             wider sync scope
+                                                             and memory ordering
+                                                             stronger than
+                                                             unordered (this is
+                                                             termed the
+                                                             fence-paired-atomic).
+                                                           - Must happen before
+                                                             the following
+                                                             buffer_wbinvl1_vol.
+                                                           - Ensures that the
+                                                             fence-paired atomic
+                                                             has completed
+                                                             before invalidating
+                                                             the
+                                                             cache. Therefore
+                                                             any following
+                                                             locations read must
+                                                             be no older than
+                                                             the value read by
+                                                             the
+                                                             fence-paired-atomic.
+
+                                                         2. buffer_wbinvl1_vol
+
+                                                           - Must happen before
+                                                             any following global/generic
+                                                             load/load
+                                                             atomic/store/store
+                                                             atomic/atomicrmw.
+                                                           - Ensures that
+                                                             following loads
+                                                             will not see stale
+                                                             global data.
+
+     **Release Atomic**
+     ---------------------------------------------------------------------------
+     store atomic release      - singlethread - global   1. buffer/global/ds/flat_store
+                               - wavefront    - local
+                                              - generic
+     store atomic release      - workgroup    - global   1. s_waitcnt lgkmcnt(0)
+                                              - generic
+                                                           - If OpenCL, omit
+                                                             waitcnt.
+                                                           - Must happen after
+                                                             any preceding
+                                                             local/generic
+                                                             load/store/load
+                                                             atomic/store
+                                                             atomic/atomicrmw.
+                                                           - Must happen before
+                                                             the following
+                                                             store.
+                                                           - Ensures that all
+                                                             memory operations
+                                                             to local have
+                                                             completed before
+                                                             performing the
+                                                             store that is being
+                                                             released.
+
+                                                         2. buffer/global/flat_store
+     store atomic release      - workgroup    - local    1. ds_store
+     store atomic release      - agent        - global   1. s_waitcnt vmcnt(0) &
+                               - system       - generic     lgkmcnt(0)
+
+                                                           - If OpenCL, omit
+                                                             lgkmcnt(0).
+                                                           - Could be split into
+                                                             separate s_waitcnt
+                                                             vmcnt(0) and
+                                                             s_waitcnt
+                                                             lgkmcnt(0) to allow
+                                                             them to be
+                                                             independently moved
+                                                             according to the
+                                                             following rules.
+                                                           - s_waitcnt vmcnt(0)
+                                                             must happen after
+                                                             any preceding
+                                                             global/generic
+                                                             load/store/load
+                                                             atomic/store
+                                                             atomic/atomicrmw.
+                                                           - s_waitcnt lgkmcnt(0)
+                                                             must happen after
+                                                             any preceding
+                                                             local/generic
+                                                             load/store/load
+                                                             atomic/store
+                                                             atomic/atomicrmw.
+                                                           - Must happen before
+                                                             the following
+                                                             store.
+                                                           - Ensures that all
+                                                             memory operations
+                                                             to global have
+                                                             completed before
+                                                             performing the
+                                                             store that is being
+                                                             released.
+
+                                                         2. buffer/global/ds/flat_store
+     atomicrmw    release      - singlethread - global   1. buffer/global/ds/flat_atomic
+                               - wavefront    - local
+                                              - generic
+     atomicrmw    release      - workgroup    - global   1. s_waitcnt lgkmcnt(0)
+                                              - generic
+                                                           - If OpenCL, omit
+                                                             waitcnt.
+                                                           - Must happen after
+                                                             any preceding
+                                                             local/generic
+                                                             load/store/load
+                                                             atomic/store
+                                                             atomic/atomicrmw.
+                                                           - Must happen before
+                                                             the following
+                                                             atomicrmw.
+                                                           - Ensures that all
+                                                             memory operations
+                                                             to local have
+                                                             completed before
+                                                             performing the
+                                                             atomicrmw that is
+                                                             being released.
+
+                                                         2. buffer/global/flat_atomic
+     atomicrmw    release      - workgroup    - local    1. ds_atomic
+     atomicrmw    release      - agent        - global   1. s_waitcnt vmcnt(0) &
+                               - system       - generic     lgkmcnt(0)
+
+                                                           - If OpenCL, omit
+                                                             lgkmcnt(0).
+                                                           - Could be split into
+                                                             separate s_waitcnt
+                                                             vmcnt(0) and
+                                                             s_waitcnt
+                                                             lgkmcnt(0) to allow
+                                                             them to be
+                                                             independently moved
+                                                             according to the
+                                                             following rules.
+                                                           - s_waitcnt vmcnt(0)
+                                                             must happen after
+                                                             any preceding
+                                                             global/generic
+                                                             load/store/load
+                                                             atomic/store
+                                                             atomic/atomicrmw.
+                                                           - s_waitcnt lgkmcnt(0)
+                                                             must happen after
+                                                             any preceding
+                                                             local/generic
+                                                             load/store/load
+                                                             atomic/store
+                                                             atomic/atomicrmw.
+                                                           - Must happen before
+                                                             the following
+                                                             atomicrmw.
+                                                           - Ensures that all
+                                                             memory operations
+                                                             to global and local
+                                                             have completed
+                                                             before performing
+                                                             the atomicrmw that
+                                                             is being released.
+
+                                                         2. buffer/global/ds/flat_atomic*
+     fence        release      - singlethread *none*     *none*
+                               - wavefront
+     fence        release      - workgroup    *none*     1. s_waitcnt lgkmcnt(0)
+
+                                                           - If OpenCL and
+                                                             address space is
+                                                             not generic, omit
+                                                             waitcnt. However,
+                                                             since LLVM
+                                                             currently has no
+                                                             address space on
+                                                             the fence need to
+                                                             conservatively
+                                                             always generate
+                                                             (see comment for
+                                                             previous fence).
+                                                           - Must happen after
+                                                             any preceding
+                                                             local/generic
+                                                             load/load
+                                                             atomic/store/store
+                                                             atomic/atomicrmw.
+                                                           - Must happen before
+                                                             any following store
+                                                             atomic/atomicrmw
+                                                             with an equal or
+                                                             wider sync scope
+                                                             and memory ordering
+                                                             stronger than
+                                                             unordered (this is
+                                                             termed the
+                                                             fence-paired-atomic).
+                                                           - Ensures that all
+                                                             memory operations
+                                                             to local have
+                                                             completed before
+                                                             performing the
+                                                             following
+                                                             fence-paired-atomic.
+
+     fence        release      - agent        *none*     1. s_waitcnt vmcnt(0) &
+                               - system                     lgkmcnt(0)
+
+                                                           - If OpenCL and
+                                                             address space is
+                                                             not generic, omit
+                                                             lgkmcnt(0).
+                                                             However, since LLVM
+                                                             currently has no
+                                                             address space on
+                                                             the fence need to
+                                                             conservatively
+                                                             always generate
+                                                             (see comment for
+                                                             previous fence).
+                                                           - Could be split into
+                                                             separate s_waitcnt
+                                                             vmcnt(0) and
+                                                             s_waitcnt
+                                                             lgkmcnt(0) to allow
+                                                             them to be
+                                                             independently moved
+                                                             according to the
+                                                             following rules.
+                                                           - s_waitcnt vmcnt(0)
+                                                             must happen after
+                                                             any preceding
+                                                             global/generic
+                                                             load/store/load
+                                                             atomic/store
+                                                             atomic/atomicrmw.
+                                                           - s_waitcnt lgkmcnt(0)
+                                                             must happen after
+                                                             any preceding
+                                                             local/generic
+                                                             load/store/load
+                                                             atomic/store
+                                                             atomic/atomicrmw.
+                                                           - Must happen before
+                                                             any following store
+                                                             atomic/atomicrmw
+                                                             with an equal or
+                                                             wider sync scope
+                                                             and memory ordering
+                                                             stronger than
+                                                             unordered (this is
+                                                             termed the
+                                                             fence-paired-atomic).
+                                                           - Ensures that all
+                                                             memory operations
+                                                             to global have
+                                                             completed before
+                                                             performing the
+                                                             following
+                                                             fence-paired-atomic.
+
+     **Acquire-Release Atomic**
+     ---------------------------------------------------------------------------
+     atomicrmw    acq_rel      - singlethread - global   1. buffer/global/ds/flat_atomic
+                               - wavefront    - local
+                                              - generic
+     atomicrmw    acq_rel      - workgroup    - global   1. s_waitcnt lgkmcnt(0)
+
+                                                           - If OpenCL, omit
+                                                             waitcnt.
+                                                           - Must happen after
+                                                             any preceding
+                                                             local/generic
+                                                             load/store/load
+                                                             atomic/store
+                                                             atomic/atomicrmw.
+                                                           - Must happen before
+                                                             the following
+                                                             atomicrmw.
+                                                           - Ensures that all
+                                                             memory operations
+                                                             to local have
+                                                             completed before
+                                                             performing the
+                                                             atomicrmw that is
+                                                             being released.
+
+                                                         2. buffer/global_atomic
+     atomicrmw    acq_rel      - workgroup    - local    1. ds_atomic
+                                                         2. s_waitcnt lgkmcnt(0)
+
+                                                           - If OpenCL, omit
+                                                             waitcnt.
+                                                           - Must happen before
+                                                             any following
+                                                             global/generic
+                                                             load/load
+                                                             atomic/store/store
+                                                             atomic/atomicrmw.
+                                                           - Ensures any
+                                                             following global
+                                                             data read is no
+                                                             older than the load
+                                                             atomic value being
+                                                             acquired.
+
+     atomicrmw    acq_rel      - workgroup    - generic  1. s_waitcnt lgkmcnt(0)
+
+                                                           - If OpenCL, omit
+                                                             waitcnt.
+                                                           - Must happen after
+                                                             any preceding
+                                                             local/generic
+                                                             load/store/load
+                                                             atomic/store
+                                                             atomic/atomicrmw.
+                                                           - Must happen before
+                                                             the following
+                                                             atomicrmw.
+                                                           - Ensures that all
+                                                             memory operations
+                                                             to local have
+                                                             completed before
+                                                             performing the
+                                                             atomicrmw that is
+                                                             being released.
+
+                                                         2. flat_atomic
+                                                         3. s_waitcnt lgkmcnt(0)
+
+                                                           - If OpenCL, omit
+                                                             waitcnt.
+                                                           - Must happen before
+                                                             any following
+                                                             global/generic
+                                                             load/load
+                                                             atomic/store/store
+                                                             atomic/atomicrmw.
+                                                           - Ensures any
+                                                             following global
+                                                             data read is no
+                                                             older than the load
+                                                             atomic value being
+                                                             acquired.
+     atomicrmw    acq_rel      - agent        - global   1. s_waitcnt vmcnt(0) &
+                               - system                     lgkmcnt(0)
+
+                                                           - If OpenCL, omit
+                                                             lgkmcnt(0).
+                                                           - Could be split into
+                                                             separate s_waitcnt
+                                                             vmcnt(0) and
+                                                             s_waitcnt
+                                                             lgkmcnt(0) to allow
+                                                             them to be
+                                                             independently moved
+                                                             according to the
+                                                             following rules.
+                                                           - s_waitcnt vmcnt(0)
+                                                             must happen after
+                                                             any preceding
+                                                             global/generic
+                                                             load/store/load
+                                                             atomic/store
+                                                             atomic/atomicrmw.
+                                                           - s_waitcnt lgkmcnt(0)
+                                                             must happen after
+                                                             any preceding
+                                                             local/generic
+                                                             load/store/load
+                                                             atomic/store
+                                                             atomic/atomicrmw.
+                                                           - Must happen before
+                                                             the following
+                                                             atomicrmw.
+                                                           - Ensures that all
+                                                             memory operations
+                                                             to global have
+                                                             completed before
+                                                             performing the
+                                                             atomicrmw that is
+                                                             being released.
+
+                                                         2. buffer/global_atomic
+                                                         3. s_waitcnt vmcnt(0)
+
+                                                           - Must happen before
+                                                             following
+                                                             buffer_wbinvl1_vol.
+                                                           - Ensures the
+                                                             atomicrmw has
+                                                             completed before
+                                                             invalidating the
+                                                             cache.
+
+                                                         4. buffer_wbinvl1_vol
+
+                                                           - Must happen before
+                                                             any following
+                                                             global/generic
+                                                             load/load
+                                                             atomic/atomicrmw.
+                                                           - Ensures that
+                                                             following loads
+                                                             will not see stale
+                                                             global data.
+
+     atomicrmw    acq_rel      - agent        - generic  1. s_waitcnt vmcnt(0) &
+                               - system                     lgkmcnt(0)
+
+                                                           - If OpenCL, omit
+                                                             lgkmcnt(0).
+                                                           - Could be split into
+                                                             separate s_waitcnt
+                                                             vmcnt(0) and
+                                                             s_waitcnt
+                                                             lgkmcnt(0) to allow
+                                                             them to be
+                                                             independently moved
+                                                             according to the
+                                                             following rules.
+                                                           - s_waitcnt vmcnt(0)
+                                                             must happen after
+                                                             any preceding
+                                                             global/generic
+                                                             load/store/load
+                                                             atomic/store
+                                                             atomic/atomicrmw.
+                                                           - s_waitcnt lgkmcnt(0)
+                                                             must happen after
+                                                             any preceding
+                                                             local/generic
+                                                             load/store/load
+                                                             atomic/store
+                                                             atomic/atomicrmw.
+                                                           - Must happen before
+                                                             the following
+                                                             atomicrmw.
+                                                           - Ensures that all
+                                                             memory operations
+                                                             to global have
+                                                             completed before
+                                                             performing the
+                                                             atomicrmw that is
+                                                             being released.
+
+                                                         2. flat_atomic
+                                                         3. s_waitcnt vmcnt(0) &
+                                                            lgkmcnt(0)
+
+                                                           - If OpenCL, omit
+                                                             lgkmcnt(0).
+                                                           - Must happen before
+                                                             following
+                                                             buffer_wbinvl1_vol.
+                                                           - Ensures the
+                                                             atomicrmw has
+                                                             completed before
+                                                             invalidating the
+                                                             cache.
+
+                                                         4. buffer_wbinvl1_vol
+
+                                                           - Must happen before
+                                                             any following
+                                                             global/generic
+                                                             load/load
+                                                             atomic/atomicrmw.
+                                                           - Ensures that
+                                                             following loads
+                                                             will not see stale
+                                                             global data.
+
+     fence        acq_rel      - singlethread *none*     *none*
+                               - wavefront
+     fence        acq_rel      - workgroup    *none*     1. s_waitcnt lgkmcnt(0)
+
+                                                           - If OpenCL and
+                                                             address space is
+                                                             not generic, omit
+                                                             waitcnt. However,
+                                                             since LLVM
+                                                             currently has no
+                                                             address space on
+                                                             the fence need to
+                                                             conservatively
+                                                             always generate
+                                                             (see comment for
+                                                             previous fence).
+                                                           - Must happen after
+                                                             any preceding
+                                                             local/generic
+                                                             load/load
+                                                             atomic/store/store
+                                                             atomic/atomicrmw.
+                                                           - Must happen before
+                                                             any following
+                                                             global/generic
+                                                             load/load
+                                                             atomic/store/store
+                                                             atomic/atomicrmw.
+                                                           - Ensures that all
+                                                             memory operations
+                                                             to local have
+                                                             completed before
+                                                             performing any
+                                                             following global
+                                                             memory operations.
+                                                           - Ensures that the
+                                                             preceding
+                                                             local/generic load
+                                                             atomic/atomicrmw
+                                                             with an equal or
+                                                             wider sync scope
+                                                             and memory ordering
+                                                             stronger than
+                                                             unordered (this is
+                                                             termed the
+                                                             fence-paired-atomic)
+                                                             has completed
+                                                             before following
+                                                             global memory
+                                                             operations. This
+                                                             satisfies the
+                                                             requirements of
+                                                             acquire.
+                                                           - Ensures that all
+                                                             previous memory
+                                                             operations have
+                                                             completed before a
+                                                             following
+                                                             local/generic store
+                                                             atomic/atomicrmw
+                                                             with an equal or
+                                                             wider sync scope
+                                                             and memory ordering
+                                                             stronger than
+                                                             unordered (this is
+                                                             termed the
+                                                             fence-paired-atomic).
+                                                             This satisfies the
+                                                             requirements of
+                                                             release.
+
+     fence        acq_rel      - agent        *none*     1. s_waitcnt vmcnt(0) &
+                               - system                     lgkmcnt(0)
+
+                                                           - If OpenCL and
+                                                             address space is
+                                                             not generic, omit
+                                                             lgkmcnt(0).
+                                                             However, since LLVM
+                                                             currently has no
+                                                             address space on
+                                                             the fence need to
+                                                             conservatively
+                                                             always generate
+                                                             (see comment for
+                                                             previous fence).
+                                                           - Could be split into
+                                                             separate s_waitcnt
+                                                             vmcnt(0) and
+                                                             s_waitcnt
+                                                             lgkmcnt(0) to allow
+                                                             them to be
+                                                             independently moved
+                                                             according to the
+                                                             following rules.
+                                                           - s_waitcnt vmcnt(0)
+                                                             must happen after
+                                                             any preceding
+                                                             global/generic
+                                                             load/store/load
+                                                             atomic/store
+                                                             atomic/atomicrmw.
+                                                           - s_waitcnt lgkmcnt(0)
+                                                             must happen after
+                                                             any preceding
+                                                             local/generic
+                                                             load/store/load
+                                                             atomic/store
+                                                             atomic/atomicrmw.
+                                                           - Must happen before
+                                                             the following
+                                                             buffer_wbinvl1_vol.
+                                                           - Ensures that the
+                                                             preceding
+                                                             global/local/generic
+                                                             load
+                                                             atomic/atomicrmw
+                                                             with an equal or
+                                                             wider sync scope
+                                                             and memory ordering
+                                                             stronger than
+                                                             unordered (this is
+                                                             termed the
+                                                             fence-paired-atomic)
+                                                             has completed
+                                                             before invalidating
+                                                             the cache. This
+                                                             satisfies the
+                                                             requirements of
+                                                             acquire.
+                                                           - Ensures that all
+                                                             previous memory
+                                                             operations have
+                                                             completed before a
+                                                             following
+                                                             global/local/generic
+                                                             store
+                                                             atomic/atomicrmw
+                                                             with an equal or
+                                                             wider sync scope
+                                                             and memory ordering
+                                                             stronger than
+                                                             unordered (this is
+                                                             termed the
+                                                             fence-paired-atomic).
+                                                             This satisfies the
+                                                             requirements of
+                                                             release.
+
+                                                         2. buffer_wbinvl1_vol
+
+                                                           - Must happen before
+                                                             any following
+                                                             global/generic
+                                                             load/load
+                                                             atomic/store/store
+                                                             atomic/atomicrmw.
+                                                           - Ensures that
+                                                             following loads
+                                                             will not see stale
+                                                             global data. This
+                                                             satisfies the
+                                                             requirements of
+                                                             acquire.
+
+     **Sequential Consistent Atomic**
+     ---------------------------------------------------------------------------
+     load atomic  seq_cst      - singlethread - global   *Same as corresponding
+                               - wavefront    - local    load atomic acquire*.
+                               - workgroup    - generic
+     load atomic  seq_cst      - agent        - global   1. s_waitcnt vmcnt(0)
+                               - system       - local
+                                              - generic    - Must happen after
+                                                             preceding
+                                                             global/generic load
+                                                             atomic/store
+                                                             atomic/atomicrmw
+                                                             with memory
+                                                             ordering of seq_cst
+                                                             and with equal or
+                                                             wider sync scope.
+                                                             (Note that seq_cst
+                                                             fences have their
+                                                             own s_waitcnt
+                                                             vmcnt(0) and so do
+                                                             not need to be
+                                                             considered.)
+                                                           - Ensures any
+                                                             preceding
+                                                             sequential
+                                                             consistent global
+                                                             memory instructions
+                                                             have completed
+                                                             before executing
+                                                             this sequentially
+                                                             consistent
+                                                             instruction. This
+                                                             prevents reordering
+                                                             a seq_cst store
+                                                             followed by a
+                                                             seq_cst load (Note
+                                                             that seq_cst is
+                                                             stronger than
+                                                             acquire/release as
+                                                             the reordering of
+                                                             load acquire
+                                                             followed by a store
+                                                             release is
+                                                             prevented by the
+                                                             waitcnt vmcnt(0) of
+                                                             the release, but
+                                                             there is nothing
+                                                             preventing a store
+                                                             release followed by
+                                                             load acquire from
+                                                             competing out of
+                                                             order.)
+
+                                                         2. *Following
+                                                            instructions same as
+                                                            corresponding load
+                                                            atomic acquire*.
+
+     store atomic seq_cst      - singlethread - global   *Same as corresponding
+                               - wavefront    - local    store atomic release*.
+                               - workgroup    - generic
+     store atomic seq_cst      - agent        - global   *Same as corresponding
+                               - system       - generic  store atomic release*.
+     atomicrmw    seq_cst      - singlethread - global   *Same as corresponding
+                               - wavefront    - local    atomicrmw acq_rel*.
+                               - workgroup    - generic
+     atomicrmw    seq_cst      - agent        - global   *Same as corresponding
+                               - system       - generic  atomicrmw acq_rel*.
+     fence        seq_cst      - singlethread *none*     *Same as corresponding
+                               - wavefront               fence acq_rel*.
+                               - workgroup
+                               - agent
+                               - system
+     ============ ============ ============== ========== =======================
+
+The memory order also adds the single thread optimization constrains defined in
+table
+:ref:`amdgpu-amdhsa-memory-model-single-thread-optimization-constraints-gfx6-gfx9-table`.
+
+  .. table:: AMDHSA Memory Model Single Thread Optimization Constraints GFX6-GFX9
+     :name: amdgpu-amdhsa-memory-model-single-thread-optimization-constraints-gfx6-gfx9-table
+
+     ============ ==============================================================
+     LLVM Memory  Optimization Constraints
+     Ordering
+     ============ ==============================================================
+     unordered    *none*
+     monotonic    *none*
+     acquire      - If a load atomic/atomicrmw then no following load/load
+                    atomic/store/ store atomic/atomicrmw/fence instruction can
+                    be moved before the acquire.
+                  - If a fence then same as load atomic, plus no preceding
+                    associated fence-paired-atomic can be moved after the fence.
+     release      - If a store atomic/atomicrmw then no preceeding load/load
+                    atomic/store/ store atomic/atomicrmw/fence instruction can
+                    be moved after the release.
+                  - If a fence then same as store atomic, plus no following
+                    associated fence-paired-atomic can be moved before the
+                    fence.
+     acq_rel      Same constraints as both acquire and release.
+     seq_cst      - If a load atomic then same constraints as acquire, plus no
+                    preceding sequentially consistent load atomic/store
+                    atomic/atomicrmw/fence instruction can be moved after the
+                    seq_cst.
+                  - If a store atomic then the same constraints as release, plus
+                    no following sequentially consistent load atomic/store
+                    atomic/atomicrmw/fence instruction can be moved before the
+                    seq_cst.
+                  - If an atomicrmw/fence then same constraints as acq_rel.
+     ============ ==============================================================
 
 Trap Handler ABI
-----------------
-The OS element of the target triple controls the trap handler behavior.
+~~~~~~~~~~~~~~~~
 
-HSA OS
-^^^^^^
-For code objects generated by AMDGPU back-end for the HSA OS, the runtime
-installs a trap handler that supports the s_trap instruction with the following
-usage:
+For code objects generated by AMDGPU backend for HSA [HSA]_ compatible runtimes
+(such as ROCm [AMD-ROCm]_), the runtime installs a trap handler that supports
+the ``s_trap`` instruction with the following usage:
 
- +--------------+-------------+-------------------+----------------------------+
- |Usage         |Code Sequence|Trap Handler Inputs|Description                 |
- +==============+=============+===================+============================+
- |reserved      |s_trap 0x00  |                   |Reserved by hardware.       |
- +--------------+-------------+-------------------+----------------------------+
- |HSA debugtrap |s_trap 0x01  |SGPR0-1: queue_ptr |Reserved for HSA debugtrap  |
- |(arg)         |             |VGPR0: arg         |intrinsic (not implemented).|
- +--------------+-------------+-------------------+----------------------------+
- |llvm.trap     |s_trap 0x02  |SGPR0-1: queue_ptr |Causes dispatch to be       |
- |              |             |                   |terminated and its          |
- |              |             |                   |associated queue put into   |
- |              |             |                   |the error state.            |
- +--------------+-------------+-------------------+----------------------------+
- |llvm.debugtrap| s_trap 0x03 |SGPR0-1: queue_ptr |If debugger not installed   |
- |              |             |                   |handled same as llvm.trap.  |
- +--------------+-------------+-------------------+----------------------------+
- |debugger      |s_trap 0x07  |                   |Reserved for debugger       |
- |breakpoint    |             |                   |breakpoints.                |
- +--------------+-------------+-------------------+----------------------------+
- |debugger      |s_trap 0x08  |                   |Reserved for debugger.      |
- +--------------+-------------+-------------------+----------------------------+
- |debugger      |s_trap 0xfe  |                   |Reserved for debugger.      |
- +--------------+-------------+-------------------+----------------------------+
- |debugger      |s_trap 0xff  |                   |Reserved for debugger.      |
- +--------------+-------------+-------------------+----------------------------+
+  .. table:: AMDGPU Trap Handler for AMDHSA OS
+     :name: amdgpu-trap-handler-for-amdhsa-os-table
 
-Non-HSA OS
-^^^^^^^^^^
-For code objects generated by AMDGPU back-end for non-HSA OS, the runtime does
-not install a trap handler. The llvm.trap and llvm.debugtrap instructions are
-handler as follows:
+     =================== =============== =============== =======================
+     Usage               Code Sequence   Trap Handler    Description
+                                         Inputs
+     =================== =============== =============== =======================
+     reserved            ``s_trap 0x00``                 Reserved by hardware.
+     ``debugtrap(arg)``  ``s_trap 0x01`` ``SGPR0-1``:    Reserved for HSA
+                                           ``queue_ptr`` ``debugtrap``
+                                         ``VGPR0``:      intrinsic (not
+                                           ``arg``       implemented).
+     ``llvm.trap``       ``s_trap 0x02`` ``SGPR0-1``:    Causes dispatch to be
+                                           ``queue_ptr`` terminated and its
+                                                         associated queue put
+                                                         into the error state.
+     ``llvm.debugtrap``  ``s_trap 0x03`` ``SGPR0-1``:    If debugger not
+                                           ``queue_ptr`` installed handled
+                                                         same as ``llvm.trap``.
+     debugger breakpoint ``s_trap 0x07``                 Reserved for  debugger
+                                                         breakpoints.
+     debugger            ``s_trap 0x08``                 Reserved for debugger.
+     debugger            ``s_trap 0xfe``                 Reserved for debugger.
+     debugger            ``s_trap 0xff``                 Reserved for debugger.
+     =================== =============== =============== =======================
 
-   =============== ============= ===============================================
-   Usage           Code Sequence Description
-   =============== ============= ===============================================
-   llvm.trap           s_endpgm      Causes wavefront to be terminated.
-   llvm.debugtrap      Nothing       Compiler warning generated that there is no trap handler installed.
-   =============== ============= ===============================================
+Non-AMDHSA
+----------
+
+Trap Handler ABI
+~~~~~~~~~~~~~~~~
+
+For code objects generated by AMDGPU backend for non-amdhsa OS, the runtime does
+not install a trap handler. The ``llvm.trap`` and ``llvm.debugtrap``
+instructions are handled as follows:
+
+  .. table:: AMDGPU Trap Handler for Non-AMDHSA OS
+     :name: amdgpu-trap-handler-for-non-amdhsa-os-table
+
+     =============== =============== ===========================================
+     Usage           Code Sequence   Description
+     =============== =============== ===========================================
+     llvm.trap       s_endpgm        Causes wavefront to be terminated.
+     llvm.debugtrap  *none*          Compiler warning given that there is no
+                                     trap handler installed.
+     =============== =============== ===========================================
+
+Source Languages
+================
+
+.. _amdgpu-opencl:
+
+OpenCL
+------
+
+When generating code for the OpenCL language the target triple environment
+should be ``opencl`` or ``amdgizcl`` (see :ref:`amdgpu-target-triples`).
+
+When the language is OpenCL the following differences occur:
+
+1. The OpenCL memory model is used (see :ref:`amdgpu-amdhsa-memory-model`).
+2. The AMDGPU backend adds additional arguments to the kernel.
+3. Additional metadata is generated (:ref:`amdgpu-code-object-metadata`).
+
+.. TODO
+   Specify what affect this has. Hidden arguments added. Additional metadata
+   generated.
+
+.. _amdgpu-hcc:
+
+HCC
+---
+
+When generating code for the OpenCL language the target triple environment
+should be ``hcc`` (see :ref:`amdgpu-target-triples`).
+
+When the language is OpenCL the following differences occur:
+
+1. The HSA memory model is used (see :ref:`amdgpu-amdhsa-memory-model`).
+
+.. TODO
+   Specify what affect this has.
 
 Assembler
-=========
+---------
 
 AMDGPU backend has LLVM-MC based assembler which is currently in development.
-It supports Southern Islands ISA, Sea Islands and Volcanic Islands.
+It supports AMDGCN GFX6-GFX8.
 
-This document describes general syntax for instructions and operands. For more
-information about instructions, their semantics and supported combinations
-of operands, refer to one of Instruction Set Architecture manuals.
+This section describes general syntax for instructions and operands. For more
+information about instructions, their semantics and supported combinations of
+operands, refer to one of instruction set architecture manuals
+[AMD-Souther-Islands]_ [AMD-Sea-Islands]_ [AMD-Volcanic-Islands]_.
 
-An instruction has the following syntax (register operands are
-normally comma-separated while extra operands are space-separated):
+An instruction has the following syntax (register operands are normally
+comma-separated while extra operands are space-separated):
 
 *<opcode> <register_operand0>, ... <extra_operand0> ...*
 
-
 Operands
---------
+~~~~~~~~
 
 The following syntax for register operands is supported:
 
@@ -140,8 +3472,11 @@ The following extra operands are supported:
   - dst_unused (UNUSED_PAD, UNUSED_SEXT, UNUSED_PRESERVE)
   - abs, neg, sext
 
-DS Instructions Examples
-------------------------
+Instruction Examples
+~~~~~~~~~~~~~~~~~~~~
+
+DS
+~~
 
 .. code-block:: nasm
 
@@ -153,8 +3488,8 @@ DS Instructions Examples
 
 For full list of supported instructions, refer to "LDS/GDS instructions" in ISA Manual.
 
-FLAT Instruction Examples
---------------------------
+FLAT
+++++
 
 .. code-block:: nasm
 
@@ -166,8 +3501,8 @@ FLAT Instruction Examples
 
 For full list of supported instructions, refer to "FLAT instructions" in ISA Manual.
 
-MUBUF Instruction Examples
----------------------------
+MUBUF
++++++
 
 .. code-block:: nasm
 
@@ -179,8 +3514,8 @@ MUBUF Instruction Examples
 
 For full list of supported instructions, refer to "MUBUF Instructions" in ISA Manual.
 
-SMRD/SMEM Instruction Examples
--------------------------------
+SMRD/SMEM
++++++++++
 
 .. code-block:: nasm
 
@@ -192,8 +3527,8 @@ SMRD/SMEM Instruction Examples
 
 For full list of supported instructions, refer to "Scalar Memory Operations" in ISA Manual.
 
-SOP1 Instruction Examples
---------------------------
+SOP1
+++++
 
 .. code-block:: nasm
 
@@ -207,8 +3542,8 @@ SOP1 Instruction Examples
 
 For full list of supported instructions, refer to "SOP1 Instructions" in ISA Manual.
 
-SOP2 Instruction Examples
--------------------------
+SOP2
+++++
 
 .. code-block:: nasm
 
@@ -224,8 +3559,8 @@ SOP2 Instruction Examples
 
 For full list of supported instructions, refer to "SOP2 Instructions" in ISA Manual.
 
-SOPC Instruction Examples
---------------------------
+SOPC
+++++
 
 .. code-block:: nasm
 
@@ -236,8 +3571,8 @@ SOPC Instruction Examples
 
 For full list of supported instructions, refer to "SOPC Instructions" in ISA Manual.
 
-SOPP Instruction Examples
---------------------------
+SOPP
+++++
 
 .. code-block:: nasm
 
@@ -259,8 +3594,8 @@ Unless otherwise mentioned, little verification is performed on the operands
 of SOPP Instructions, so it is up to the programmer to be familiar with the
 range or acceptable values.
 
-Vector ALU Instruction Examples
--------------------------------
+VALU
+++++
 
 For vector ALU instruction opcodes (VOP1, VOP2, VOP3, VOPC, VOP_DPP, VOP_SDWA),
 the assembler will automatically use optimal encoding based on its operands.
@@ -314,19 +3649,20 @@ VOP_SDWA examples:
 For full list of supported instructions, refer to "Vector ALU instructions".
 
 HSA Code Object Directives
---------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 AMDGPU ABI defines auxiliary data in output code object. In assembly source,
 one can specify them with assembler directives.
 
 .hsa_code_object_version major, minor
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
++++++++++++++++++++++++++++++++++++++
 
 *major* and *minor* are integers that specify the version of the HSA code
 object that will be generated by the assembler.
 
 .hsa_code_object_isa [major, minor, stepping, vendor, arch]
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
 
 *major*, *minor*, and *stepping* are all integers that describe the instruction
 set architecture (ISA) version of the assembly program.
@@ -338,13 +3674,13 @@ By default, the assembler will derive the ISA version, *vendor*, and *arch*
 from the value of the -mcpu option that is passed to the assembler.
 
 .amdgpu_hsa_kernel (name)
-^^^^^^^^^^^^^^^^^^^^^^^^^
++++++++++++++++++++++++++
 
 This directives specifies that the symbol with given name is a kernel entry point
 (label) and the object should contain corresponding symbol of type STT_AMDGPU_HSA_KERNEL.
 
 .amd_kernel_code_t
-^^^^^^^^^^^^^^^^^^
+++++++++++++++++++
 
 This directive marks the beginning of a list of key / value pairs that are used
 to specify the amd_kernel_code_t object that will be emitted by the assembler.
@@ -403,3 +3739,25 @@ Here is an example of a minimal amd_kernel_code_t specification:
      s_endpgm
    .Lfunc_end0:
         .size   hello_world, .Lfunc_end0-hello_world
+
+Additional Documentation
+========================
+
+.. [AMD-R6xx] `AMD R6xx shader ISA <http://developer.amd.com/wordpress/media/2012/10/R600_Instruction_Set_Architecture.pdf>`__
+.. [AMD-R7xx] `AMD R7xx shader ISA <http://developer.amd.com/wordpress/media/2012/10/R700-Family_Instruction_Set_Architecture.pdf>`__
+.. [AMD-Evergreen] `AMD Evergreen shader ISA <http://developer.amd.com/wordpress/media/2012/10/AMD_Evergreen-Family_Instruction_Set_Architecture.pdf>`__
+.. [AMD-Cayman-Trinity] `AMD Cayman/Trinity shader ISA <http://developer.amd.com/wordpress/media/2012/10/AMD_HD_6900_Series_Instruction_Set_Architecture.pdf>`__
+.. [AMD-Souther-Islands] `AMD Southern Islands Series ISA <http://developer.amd.com/wordpress/media/2012/12/AMD_Southern_Islands_Instruction_Set_Architecture.pdf>`__
+.. [AMD-Sea-Islands] `AMD Sea Islands Series ISA <http://developer.amd.com/wordpress/media/2013/07/AMD_Sea_Islands_Instruction_Set_Architecture.pdf>`_
+.. [AMD-Volcanic-Islands] `AMD GCN3 Instruction Set Architecture <http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2013/12/AMD_GCN3_Instruction_Set_Architecture_rev1.1.pdf>`__
+.. [AMD-OpenCL_Programming-Guide]  `AMD Accelerated Parallel Processing OpenCL Programming Guide <http://developer.amd.com/download/AMD_Accelerated_Parallel_Processing_OpenCL_Programming_Guide.pdf>`_
+.. [AMD-APP-SDK] `AMD Accelerated Parallel Processing APP SDK Documentation <http://developer.amd.com/tools/heterogeneous-computing/amd-accelerated-parallel-processing-app-sdk/documentation/>`__
+.. [AMD-ROCm] `ROCm: Open Platform for Development, Discovery and Education Around GPU Computing <http://gpuopen.com/compute-product/rocm/>`__
+.. [AMD-ROCm-github] `ROCm github <http://github.com/RadeonOpenCompute>`__
+.. [HSA] `Heterogeneous System Architecture (HSA) Foundation <http://www.hsafoundation.com/>`__
+.. [ELF] `Executable and Linkable Format (ELF) <http://www.sco.com/developers/gabi/>`__
+.. [DWARF] `DWARF Debugging Information Format <http://dwarfstd.org/>`__
+.. [YAML] `YAML Ain’t Markup Language (YAML™) Version 1.2 <http://www.yaml.org/spec/1.2/spec.html>`__
+.. [OpenCL] `The OpenCL Specification Version 2.0 <http://www.khronos.org/registry/cl/specs/opencl-2.0.pdf>`__
+.. [HRF] `Heterogeneous-race-free Memory Models <http://benedictgaster.org/wp-content/uploads/2014/01/asplos269-FINAL.pdf>`__
+.. [AMD-AMDGPU-Compute-Application-Binary-Interface] `AMDGPU Compute Application Binary Interface <https://github.com/RadeonOpenCompute/ROCm-ComputeABI-Doc/blob/master/AMDGPU-ABI.md>`__
diff --git a/docs/CodeGenerator.rst b/docs/CodeGenerator.rst
index 106fc8456f61..bcdc72283566 100644
--- a/docs/CodeGenerator.rst
+++ b/docs/CodeGenerator.rst
@@ -2642,59 +2642,6 @@ to ensure valid register usage and operand types.
 The AMDGPU backend
 ------------------
 
-The AMDGPU code generator lives in the lib/Target/AMDGPU directory, and is an
-open source native AMD GCN ISA code generator.
-
-Target triples supported
-^^^^^^^^^^^^^^^^^^^^^^^^
-
-The following are the known target triples that are supported by the AMDGPU
-backend.
-
-* **amdgcn--** --- AMD GCN GPUs (AMDGPU.7.0.0+)
-* **amdgcn--amdhsa** --- AMD GCN GPUs (AMDGPU.7.0.0+) with HSA support
-* **r600--** --- AMD GPUs HD2XXX-HD6XXX
-
-Relocations
-^^^^^^^^^^^
-
-Supported relocatable fields are:
-
-* **word32** --- This specifies a 32-bit field occupying 4 bytes with arbitrary
-  byte alignment. These values use the same byte order as other word values in
-  the AMD GPU architecture
-* **word64** --- This specifies a 64-bit field occupying 8 bytes with arbitrary
-  byte alignment. These values use the same byte order as other word values in
-  the AMD GPU architecture
-
-Following notations are used for specifying relocation calculations:
-
-* **A** --- Represents the addend used to compute the value of the relocatable
-  field
-* **G** --- Represents the offset into the global offset table at which the
-  relocation entry’s symbol will reside during execution.
-* **GOT** --- Represents the address of the global offset table.
-* **P** --- Represents the place (section offset or address) of the storage unit
-  being relocated (computed using ``r_offset``)
-* **S** --- Represents the value of the symbol whose index resides in the
-  relocation entry
-
-AMDGPU Backend generates *Elf64_Rela* relocation records with the following
-supported relocation types:
-
-  ==========================  =====  ==========  ==============================
-  Relocation type             Value  Field       Calculation
-  ==========================  =====  ==========  ==============================
-  ``R_AMDGPU_NONE``           0      ``none``    ``none``
-  ``R_AMDGPU_ABS32_LO``       1      ``word32``  (S + A) & 0xFFFFFFFF
-  ``R_AMDGPU_ABS32_HI``       2      ``word32``  (S + A) >> 32
-  ``R_AMDGPU_ABS64``          3      ``word64``  S + A
-  ``R_AMDGPU_REL32``          4      ``word32``  S + A - P
-  ``R_AMDGPU_REL64``          5      ``word64``  S + A - P
-  ``R_AMDGPU_ABS32``          6      ``word32``  S + A
-  ``R_AMDGPU_GOTPCREL``       7      ``word32``  G + GOT + A - P
-  ``R_AMDGPU_GOTPCREL32_LO``  8      ``word32``  (G + GOT + A - P) & 0xFFFFFFFF
-  ``R_AMDGPU_GOTPCREL32_HI``  9      ``word32``  (G + GOT + A - P) >> 32
-  ``R_AMDGPU_REL32_LO``       10     ``word32``  (S + A - P) & 0xFFFFFFFF
-  ``R_AMDGPU_REL32_HI``       11     ``word32``  (S + A - P) >> 32
-  ==========================  =====  ==========  ==============================
+The AMDGPU code generator lives in the ``lib/Target/AMDGPU``
+directory. This code generator is capable of targeting a variety of
+AMD GPU processors. Refer to :doc:`AMDGPUUsage` for more information.
diff --git a/docs/CompilerWriterInfo.rst b/docs/CompilerWriterInfo.rst
index 8ce999033b7f..24375fb70d4e 100644
--- a/docs/CompilerWriterInfo.rst
+++ b/docs/CompilerWriterInfo.rst
@@ -72,16 +72,7 @@ Other documents, collections, notes
 AMDGPU
 ------
 
-* `AMD R6xx shader ISA <http://developer.amd.com/wordpress/media/2012/10/R600_Instruction_Set_Architecture.pdf>`_
-* `AMD R7xx shader ISA <http://developer.amd.com/wordpress/media/2012/10/R700-Family_Instruction_Set_Architecture.pdf>`_
-* `AMD Evergreen shader ISA <http://developer.amd.com/wordpress/media/2012/10/AMD_Evergreen-Family_Instruction_Set_Architecture.pdf>`_
-* `AMD Cayman/Trinity shader ISA <http://developer.amd.com/wordpress/media/2012/10/AMD_HD_6900_Series_Instruction_Set_Architecture.pdf>`_
-* `AMD Southern Islands Series ISA <http://developer.amd.com/wordpress/media/2012/12/AMD_Southern_Islands_Instruction_Set_Architecture.pdf>`_
-* `AMD Sea Islands Series ISA <http://developer.amd.com/wordpress/media/2013/07/AMD_Sea_Islands_Instruction_Set_Architecture.pdf>`_
-* `AMD GCN3 Instruction Set Architecture <http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2013/12/AMD_GCN3_Instruction_Set_Architecture_rev1.1.pdf>`__
-* `AMD GPU Programming Guide <http://developer.amd.com/download/AMD_Accelerated_Parallel_Processing_OpenCL_Programming_Guide.pdf>`_
-* `AMD Compute Resources <http://developer.amd.com/tools/heterogeneous-computing/amd-accelerated-parallel-processing-app-sdk/documentation/>`_
-* `AMDGPU Compute Application Binary Interface <https://github.com/RadeonOpenCompute/ROCm-ComputeABI-Doc/blob/master/AMDGPU-ABI.md>`__
+Refer to :doc:`AMDGPUUsage` for additional documentation.
 
 RISC-V
 ------
diff --git a/docs/LangRef.rst b/docs/LangRef.rst
index 2e339183ef11..e063f6bd35fe 100644
--- a/docs/LangRef.rst
+++ b/docs/LangRef.rst
@@ -6691,15 +6691,14 @@ Semantics:
 The value produced is ``op1`` \* 2\ :sup:`op2` mod 2\ :sup:`n`,
 where ``n`` is the width of the result. If ``op2`` is (statically or
 dynamically) equal to or larger than the number of bits in
-``op1``, the result is undefined. If the arguments are vectors, each
-vector element of ``op1`` is shifted by the corresponding shift amount
-in ``op2``.
+``op1``, this instruction returns a :ref:`poison value <poisonvalues>`.
+If the arguments are vectors, each vector element of ``op1`` is shifted
+by the corresponding shift amount in ``op2``.
 
-If the ``nuw`` keyword is present, then the shift produces a :ref:`poison
-value <poisonvalues>` if it shifts out any non-zero bits. If the
-``nsw`` keyword is present, then the shift produces a :ref:`poison
-value <poisonvalues>` if it shifts out any bits that disagree with the
-resultant sign bit.
+If the ``nuw`` keyword is present, then the shift produces a poison
+value if it shifts out any non-zero bits.
+If the ``nsw`` keyword is present, then the shift produces a poison
+value it shifts out any bits that disagree with the resultant sign bit.
 
 Example:
 """"""""
@@ -6742,13 +6741,12 @@ Semantics:
 This instruction always performs a logical shift right operation. The
 most significant bits of the result will be filled with zero bits after
 the shift. If ``op2`` is (statically or dynamically) equal to or larger
-than the number of bits in ``op1``, the result is undefined. If the
-arguments are vectors, each vector element of ``op1`` is shifted by the
-corresponding shift amount in ``op2``.
+than the number of bits in ``op1``, this instruction returns a :ref:`poison
+value <poisonvalues>`. If the arguments are vectors, each vector element
+of ``op1`` is shifted by the corresponding shift amount in ``op2``.
 
 If the ``exact`` keyword is present, the result value of the ``lshr`` is
-a :ref:`poison value <poisonvalues>` if any of the bits shifted out are
-non-zero.
+a poison value if any of the bits shifted out are non-zero.
 
 Example:
 """"""""
@@ -6793,13 +6791,12 @@ Semantics:
 This instruction always performs an arithmetic shift right operation,
 The most significant bits of the result will be filled with the sign bit
 of ``op1``. If ``op2`` is (statically or dynamically) equal to or larger
-than the number of bits in ``op1``, the result is undefined. If the
-arguments are vectors, each vector element of ``op1`` is shifted by the
-corresponding shift amount in ``op2``.
+than the number of bits in ``op1``, this instruction returns a :ref:`poison
+value <poisonvalues>`. If the arguments are vectors, each vector element
+of ``op1`` is shifted by the corresponding shift amount in ``op2``.
 
 If the ``exact`` keyword is present, the result value of the ``ashr`` is
-a :ref:`poison value <poisonvalues>` if any of the bits shifted out are
-non-zero.
+a poison value if any of the bits shifted out are non-zero.
 
 Example:
 """"""""
diff --git a/docs/ReleaseNotes.rst b/docs/ReleaseNotes.rst
index bc35e62189a2..95025fb91c72 100644
--- a/docs/ReleaseNotes.rst
+++ b/docs/ReleaseNotes.rst
@@ -43,6 +43,17 @@ Non-comprehensive list of changes in this release
 * LLVM's ``WeakVH`` has been renamed to ``WeakTrackingVH`` and a new ``WeakVH``
   has been introduced.  The new ``WeakVH`` nulls itself out on deletion, but
   does not track values across RAUW.
+  
+* A new library named ``BinaryFormat`` has been created which holds a collection
+  of code which previously lived in ``Support``.  This includes the
+  ``file_magic`` structure and ``identify_magic`` functions, as well as all the
+  structure and type definitions for DWARF, ELF, COFF, WASM, and MachO file
+  formats.
+  
+* The tool ``llvm-pdbdump`` has been renamed ``llvm-pdbutil`` to better reflect
+  its nature as a general purpose PDB manipulation / diagnostics tool that does
+  more than just dumping contents.
+  
 
 * ... next change ...
 
diff --git a/docs/index.rst b/docs/index.rst
index 220df1566bd5..54b608236530 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -360,10 +360,10 @@ For API clients and LLVM developers.
   Answers some questions about the new Attributes infrastructure.
 
 :doc:`NVPTXUsage`
-   This document describes using the NVPTX back-end to compile GPU kernels.
+   This document describes using the NVPTX backend to compile GPU kernels.
 
 :doc:`AMDGPUUsage`
-   This document describes how to use the AMDGPU back-end.
+   This document describes using the AMDGPU backend to compile GPU kernels.
 
 :doc:`StackMaps`
   LLVM support for mapping instruction addresses to the location of
diff --git a/examples/ExceptionDemo/ExceptionDemo.cpp b/examples/ExceptionDemo/ExceptionDemo.cpp
index a8b82e1da778..d4c2a8cc5ad9 100644
--- a/examples/ExceptionDemo/ExceptionDemo.cpp
+++ b/examples/ExceptionDemo/ExceptionDemo.cpp
@@ -49,7 +49,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/IR/Verifier.h"
+#include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/ExecutionEngine/MCJIT.h"
 #include "llvm/ExecutionEngine/SectionMemoryManager.h"
 #include "llvm/IR/DataLayout.h"
@@ -59,7 +59,7 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Module.h"
-#include "llvm/Support/Dwarf.h"
+#include "llvm/IR/Verifier.h"
 #include "llvm/Support/TargetSelect.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Transforms/Scalar.h"
diff --git a/include/llvm-c/Core.h b/include/llvm-c/Core.h
index 0a1d8faf99b7..22cef23007c3 100644
--- a/include/llvm-c/Core.h
+++ b/include/llvm-c/Core.h
@@ -1039,6 +1039,20 @@ LLVMBool LLVMIsOpaqueStruct(LLVMTypeRef StructTy);
  */
 LLVMTypeRef LLVMGetElementType(LLVMTypeRef Ty);
 
+/**
+ * Returns type's subtypes
+ *
+ * @see llvm::Type::subtypes()
+ */
+void LLVMGetSubtypes(LLVMTypeRef Tp, LLVMTypeRef *Arr);
+
+/**
+ *  Return the number of types in the derived type.
+ *
+ * @see llvm::Type::getNumContainedTypes()
+ */
+unsigned LLVMGetNumContainedTypes(LLVMTypeRef Tp);
+
 /**
  * Create a fixed size array type that refers to a specific type.
  *
diff --git a/include/llvm-c/ExecutionEngine.h b/include/llvm-c/ExecutionEngine.h
index b72a91a8b137..51830fe139c6 100644
--- a/include/llvm-c/ExecutionEngine.h
+++ b/include/llvm-c/ExecutionEngine.h
@@ -19,9 +19,9 @@
 #ifndef LLVM_C_EXECUTIONENGINE_H
 #define LLVM_C_EXECUTIONENGINE_H
 
-#include "llvm-c/Types.h"
 #include "llvm-c/Target.h"
 #include "llvm-c/TargetMachine.h"
+#include "llvm-c/Types.h"
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/include/llvm-c/Support.h b/include/llvm-c/Support.h
index 735d1fbc78cc..6de184ccab49 100644
--- a/include/llvm-c/Support.h
+++ b/include/llvm-c/Support.h
@@ -14,8 +14,8 @@
 #ifndef LLVM_C_SUPPORT_H
 #define LLVM_C_SUPPORT_H
 
-#include "llvm/Support/DataTypes.h"
 #include "llvm-c/Types.h"
+#include "llvm/Support/DataTypes.h"
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/include/llvm-c/TargetMachine.h b/include/llvm-c/TargetMachine.h
index 1d1f61f1a5b4..f4f7f7698c45 100644
--- a/include/llvm-c/TargetMachine.h
+++ b/include/llvm-c/TargetMachine.h
@@ -19,8 +19,8 @@
 #ifndef LLVM_C_TARGETMACHINE_H
 #define LLVM_C_TARGETMACHINE_H
 
-#include "llvm-c/Types.h"
 #include "llvm-c/Target.h"
+#include "llvm-c/Types.h"
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/include/llvm/ADT/APInt.h b/include/llvm/ADT/APInt.h
index fe75e25bd8d2..ef9c66d2d700 100644
--- a/include/llvm/ADT/APInt.h
+++ b/include/llvm/ADT/APInt.h
@@ -392,6 +392,11 @@ public:
   /// not.
   bool isNullValue() const { return !*this; }
 
+  /// \brief Determine if this is a value of 1.
+  ///
+  /// This checks to see if the value of this APInt is one.
+  bool isOneValue() const { return getActiveBits() == 1; }
+
   /// \brief Determine if this is the largest unsigned value.
   ///
   /// This checks to see if the value of this APInt is the maximum unsigned
diff --git a/include/llvm/ADT/GraphTraits.h b/include/llvm/ADT/GraphTraits.h
index 2c88c4271b48..68149d9e3bf5 100644
--- a/include/llvm/ADT/GraphTraits.h
+++ b/include/llvm/ADT/GraphTraits.h
@@ -52,7 +52,6 @@ struct GraphTraits {
   //    Return total number of nodes in the graph
   //
 
-
   // If anyone tries to use this class without having an appropriate
   // specialization, make an error.  If you get this error, it's because you
   // need to include the appropriate specialization of GraphTraits<> for your
diff --git a/include/llvm/ADT/ImmutableSet.h b/include/llvm/ADT/ImmutableSet.h
index 0724a28306a0..9c9bcb81f76b 100644
--- a/include/llvm/ADT/ImmutableSet.h
+++ b/include/llvm/ADT/ImmutableSet.h
@@ -16,16 +16,16 @@
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/FoldingSet.h"
-#include "llvm/ADT/iterator.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/iterator.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <cassert>
-#include <functional>
-#include <vector>
 #include <cstdint>
+#include <functional>
 #include <iterator>
 #include <new>
+#include <vector>
 
 namespace llvm {
 
diff --git a/include/llvm/ADT/PointerUnion.h b/include/llvm/ADT/PointerUnion.h
index 9eb15524c0f3..7ce70ebc8ce0 100644
--- a/include/llvm/ADT/PointerUnion.h
+++ b/include/llvm/ADT/PointerUnion.h
@@ -19,8 +19,8 @@
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/Support/PointerLikeTypeTraits.h"
 #include <cassert>
-#include <cstdint>
 #include <cstddef>
+#include <cstdint>
 
 namespace llvm {
 
diff --git a/include/llvm/ADT/PostOrderIterator.h b/include/llvm/ADT/PostOrderIterator.h
index a179d29956b1..dc8a9b6e78b2 100644
--- a/include/llvm/ADT/PostOrderIterator.h
+++ b/include/llvm/ADT/PostOrderIterator.h
@@ -17,9 +17,9 @@
 #define LLVM_ADT_POSTORDERITERATOR_H
 
 #include "llvm/ADT/GraphTraits.h"
-#include "llvm/ADT/iterator_range.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/iterator_range.h"
 #include <iterator>
 #include <set>
 #include <utility>
diff --git a/include/llvm/ADT/PriorityWorklist.h b/include/llvm/ADT/PriorityWorklist.h
index 35891e931801..aa531f3337d9 100644
--- a/include/llvm/ADT/PriorityWorklist.h
+++ b/include/llvm/ADT/PriorityWorklist.h
@@ -17,8 +17,8 @@
 #define LLVM_ADT_PRIORITYWORKLIST_H
 
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Compiler.h"
 #include <algorithm>
 #include <cassert>
diff --git a/include/llvm/ADT/SCCIterator.h b/include/llvm/ADT/SCCIterator.h
index 734a58f87da2..784a58dc002f 100644
--- a/include/llvm/ADT/SCCIterator.h
+++ b/include/llvm/ADT/SCCIterator.h
@@ -232,16 +232,6 @@ template <class T> scc_iterator<T> scc_end(const T &G) {
   return scc_iterator<T>::end(G);
 }
 
-/// \brief Construct the begin iterator for a deduced graph type T's Inverse<T>.
-template <class T> scc_iterator<Inverse<T>> scc_begin(const Inverse<T> &G) {
-  return scc_iterator<Inverse<T>>::begin(G);
-}
-
-/// \brief Construct the end iterator for a deduced graph type T's Inverse<T>.
-template <class T> scc_iterator<Inverse<T>> scc_end(const Inverse<T> &G) {
-  return scc_iterator<Inverse<T>>::end(G);
-}
-
 } // end namespace llvm
 
 #endif // LLVM_ADT_SCCITERATOR_H
diff --git a/include/llvm/ADT/SmallPtrSet.h b/include/llvm/ADT/SmallPtrSet.h
index a0b380b237da..a2ad74b1e04a 100644
--- a/include/llvm/ADT/SmallPtrSet.h
+++ b/include/llvm/ADT/SmallPtrSet.h
@@ -21,8 +21,8 @@
 #include "llvm/Support/type_traits.h"
 #include <cassert>
 #include <cstddef>
-#include <cstring>
 #include <cstdlib>
+#include <cstring>
 #include <initializer_list>
 #include <iterator>
 #include <utility>
@@ -31,8 +31,12 @@ namespace llvm {
 
 #if LLVM_ENABLE_ABI_BREAKING_CHECKS
 template <class T = void> struct ReverseIterate { static bool value; };
+#if LLVM_ENABLE_REVERSE_ITERATION
+template <class T> bool ReverseIterate<T>::value = true;
+#else
 template <class T> bool ReverseIterate<T>::value = false;
 #endif
+#endif
 
 /// SmallPtrSetImplBase - This is the common code shared among all the
 /// SmallPtrSet<>'s, which is almost everything.  SmallPtrSet has two modes, one
diff --git a/include/llvm/ADT/SmallVector.h b/include/llvm/ADT/SmallVector.h
index 35c255002001..bf2a62f43aff 100644
--- a/include/llvm/ADT/SmallVector.h
+++ b/include/llvm/ADT/SmallVector.h
@@ -388,7 +388,10 @@ public:
   void swap(SmallVectorImpl &RHS);
 
   /// Add the specified range to the end of the SmallVector.
-  template<typename in_iter>
+  template <typename in_iter,
+            typename = typename std::enable_if<std::is_convertible<
+                typename std::iterator_traits<in_iter>::iterator_category,
+                std::input_iterator_tag>::value>::type>
   void append(in_iter in_start, in_iter in_end) {
     size_type NumInputs = std::distance(in_start, in_end);
     // Grow allocated space if needed.
@@ -426,7 +429,11 @@ public:
     std::uninitialized_fill(this->begin(), this->end(), Elt);
   }
 
-  template <typename in_iter> void assign(in_iter in_start, in_iter in_end) {
+  template <typename in_iter,
+            typename = typename std::enable_if<std::is_convertible<
+                typename std::iterator_traits<in_iter>::iterator_category,
+                std::input_iterator_tag>::value>::type>
+  void assign(in_iter in_start, in_iter in_end) {
     clear();
     append(in_start, in_end);
   }
@@ -579,7 +586,10 @@ public:
     return I;
   }
 
-  template<typename ItTy>
+  template <typename ItTy,
+            typename = typename std::enable_if<std::is_convertible<
+                typename std::iterator_traits<ItTy>::iterator_category,
+                std::input_iterator_tag>::value>::type>
   iterator insert(iterator I, ItTy From, ItTy To) {
     // Convert iterator to elt# to avoid invalidating iterator when we reserve()
     size_t InsertElt = I - this->begin();
@@ -860,7 +870,10 @@ public:
     this->assign(Size, Value);
   }
 
-  template<typename ItTy>
+  template <typename ItTy,
+            typename = typename std::enable_if<std::is_convertible<
+                typename std::iterator_traits<ItTy>::iterator_category,
+                std::input_iterator_tag>::value>::type>
   SmallVector(ItTy S, ItTy E) : SmallVectorImpl<T>(N) {
     this->append(S, E);
   }
diff --git a/include/llvm/ADT/SparseMultiSet.h b/include/llvm/ADT/SparseMultiSet.h
index b3a413aa3aa5..c91e0d70f65a 100644
--- a/include/llvm/ADT/SparseMultiSet.h
+++ b/include/llvm/ADT/SparseMultiSet.h
@@ -21,9 +21,9 @@
 #ifndef LLVM_ADT_SPARSEMULTISET_H
 #define LLVM_ADT_SPARSEMULTISET_H
 
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/SparseSet.h"
-#include "llvm/ADT/STLExtras.h"
 #include <cassert>
 #include <cstdint>
 #include <cstdlib>
diff --git a/include/llvm/ADT/StringExtras.h b/include/llvm/ADT/StringExtras.h
index e22a3f688c40..bbea8619a673 100644
--- a/include/llvm/ADT/StringExtras.h
+++ b/include/llvm/ADT/StringExtras.h
@@ -15,11 +15,11 @@
 #define LLVM_ADT_STRINGEXTRAS_H
 
 #include "llvm/ADT/StringRef.h"
-#include <iterator>
 #include <cassert>
 #include <cstddef>
 #include <cstdint>
 #include <cstring>
+#include <iterator>
 #include <string>
 #include <utility>
 
diff --git a/include/llvm/ADT/StringRef.h b/include/llvm/ADT/StringRef.h
index 4b25f56432df..f6c93a858db1 100644
--- a/include/llvm/ADT/StringRef.h
+++ b/include/llvm/ADT/StringRef.h
@@ -18,8 +18,8 @@
 #include <cstddef>
 #include <cstring>
 #include <limits>
-#include <type_traits>
 #include <string>
+#include <type_traits>
 #include <utility>
 
 namespace llvm {
diff --git a/include/llvm/ADT/iterator_range.h b/include/llvm/ADT/iterator_range.h
index 3dd679bd9b79..3cbf6198eb60 100644
--- a/include/llvm/ADT/iterator_range.h
+++ b/include/llvm/ADT/iterator_range.h
@@ -19,8 +19,8 @@
 #ifndef LLVM_ADT_ITERATOR_RANGE_H
 #define LLVM_ADT_ITERATOR_RANGE_H
 
-#include <utility>
 #include <iterator>
+#include <utility>
 
 namespace llvm {
 
diff --git a/include/llvm/Analysis/AliasAnalysis.h b/include/llvm/Analysis/AliasAnalysis.h
index 1b8b9751faa1..e00ae4f3beec 100644
--- a/include/llvm/Analysis/AliasAnalysis.h
+++ b/include/llvm/Analysis/AliasAnalysis.h
@@ -38,11 +38,11 @@
 #ifndef LLVM_ANALYSIS_ALIASANALYSIS_H
 #define LLVM_ANALYSIS_ALIASANALYSIS_H
 
+#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/PassManager.h"
-#include "llvm/Analysis/MemoryLocation.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
 
 namespace llvm {
 class BasicAAResult;
diff --git a/include/llvm/Analysis/AssumptionCache.h b/include/llvm/Analysis/AssumptionCache.h
index 04c6fd70e07f..58d72afdc1b6 100644
--- a/include/llvm/Analysis/AssumptionCache.h
+++ b/include/llvm/Analysis/AssumptionCache.h
@@ -21,8 +21,8 @@
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
-#include "llvm/IR/ValueHandle.h"
 #include "llvm/IR/PassManager.h"
+#include "llvm/IR/ValueHandle.h"
 #include "llvm/Pass.h"
 #include <memory>
 
diff --git a/include/llvm/Analysis/BranchProbabilityInfo.h b/include/llvm/Analysis/BranchProbabilityInfo.h
index 6a876679543d..94d3d4de6c9d 100644
--- a/include/llvm/Analysis/BranchProbabilityInfo.h
+++ b/include/llvm/Analysis/BranchProbabilityInfo.h
@@ -26,6 +26,7 @@
 
 namespace llvm {
 class LoopInfo;
+class TargetLibraryInfo;
 class raw_ostream;
 
 /// \brief Analysis providing branch probability information.
@@ -43,8 +44,9 @@ class raw_ostream;
 class BranchProbabilityInfo {
 public:
   BranchProbabilityInfo() {}
-  BranchProbabilityInfo(const Function &F, const LoopInfo &LI) {
-    calculate(F, LI);
+  BranchProbabilityInfo(const Function &F, const LoopInfo &LI,
+                        const TargetLibraryInfo *TLI = nullptr) {
+    calculate(F, LI, TLI);
   }
 
   BranchProbabilityInfo(BranchProbabilityInfo &&Arg)
@@ -116,7 +118,8 @@ public:
     return IsLikely ? LikelyProb : LikelyProb.getCompl();
   }
 
-  void calculate(const Function &F, const LoopInfo &LI);
+  void calculate(const Function &F, const LoopInfo &LI,
+                 const TargetLibraryInfo *TLI = nullptr);
 
   /// Forget analysis results for the given basic block.
   void eraseBlock(const BasicBlock *BB);
@@ -171,7 +174,7 @@ private:
   bool calcColdCallHeuristics(const BasicBlock *BB);
   bool calcPointerHeuristics(const BasicBlock *BB);
   bool calcLoopBranchHeuristics(const BasicBlock *BB, const LoopInfo &LI);
-  bool calcZeroHeuristics(const BasicBlock *BB);
+  bool calcZeroHeuristics(const BasicBlock *BB, const TargetLibraryInfo *TLI);
   bool calcFloatingPointHeuristics(const BasicBlock *BB);
   bool calcInvokeHeuristics(const BasicBlock *BB);
 };
diff --git a/include/llvm/Analysis/ConstantFolding.h b/include/llvm/Analysis/ConstantFolding.h
index ff6ca1959153..42034741b8e3 100644
--- a/include/llvm/Analysis/ConstantFolding.h
+++ b/include/llvm/Analysis/ConstantFolding.h
@@ -31,6 +31,7 @@ class DataLayout;
 class Function;
 class GlobalValue;
 class Instruction;
+class ImmutableCallSite;
 class TargetLibraryInfo;
 class Type;
 
@@ -125,11 +126,12 @@ Constant *ConstantFoldLoadThroughGEPIndices(Constant *C,
 
 /// canConstantFoldCallTo - Return true if its even possible to fold a call to
 /// the specified function.
-bool canConstantFoldCallTo(const Function *F);
+bool canConstantFoldCallTo(ImmutableCallSite CS, const Function *F);
 
 /// ConstantFoldCall - Attempt to constant fold a call to the specified function
 /// with the specified arguments, returning null if unsuccessful.
-Constant *ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
+Constant *ConstantFoldCall(ImmutableCallSite CS, Function *F,
+                           ArrayRef<Constant *> Operands,
                            const TargetLibraryInfo *TLI = nullptr);
 
 /// \brief Check whether the given call has no side-effects.
diff --git a/include/llvm/Analysis/DemandedBits.h b/include/llvm/Analysis/DemandedBits.h
index e5fd8a0007fe..e52c66f361c3 100644
--- a/include/llvm/Analysis/DemandedBits.h
+++ b/include/llvm/Analysis/DemandedBits.h
@@ -22,11 +22,11 @@
 #ifndef LLVM_ANALYSIS_DEMANDED_BITS_H
 #define LLVM_ANALYSIS_DEMANDED_BITS_H
 
-#include "llvm/Pass.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
 
 namespace llvm {
 
diff --git a/include/llvm/Analysis/InlineCost.h b/include/llvm/Analysis/InlineCost.h
index d91d08a524dc..ce0b7895f253 100644
--- a/include/llvm/Analysis/InlineCost.h
+++ b/include/llvm/Analysis/InlineCost.h
@@ -14,8 +14,8 @@
 #ifndef LLVM_ANALYSIS_INLINECOST_H
 #define LLVM_ANALYSIS_INLINECOST_H
 
-#include "llvm/Analysis/CallGraphSCCPass.h"
 #include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/CallGraphSCCPass.h"
 #include <cassert>
 #include <climits>
 
diff --git a/include/llvm/Analysis/InstructionSimplify.h b/include/llvm/Analysis/InstructionSimplify.h
index ca48b5483512..be0f32ef444a 100644
--- a/include/llvm/Analysis/InstructionSimplify.h
+++ b/include/llvm/Analysis/InstructionSimplify.h
@@ -41,6 +41,7 @@ template <class T> class ArrayRef;
 class AssumptionCache;
 class DominatorTree;
 class Instruction;
+class ImmutableCallSite;
 class DataLayout;
 class FastMathFlags;
 struct LoopStandardAnalysisResults;
@@ -194,11 +195,12 @@ Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS,
 
 /// Given a function and iterators over arguments, fold the result or return
 /// null.
-Value *SimplifyCall(Value *V, User::op_iterator ArgBegin,
+Value *SimplifyCall(ImmutableCallSite CS, Value *V, User::op_iterator ArgBegin,
                     User::op_iterator ArgEnd, const SimplifyQuery &Q);
 
 /// Given a function and set of arguments, fold the result or return null.
-Value *SimplifyCall(Value *V, ArrayRef<Value *> Args, const SimplifyQuery &Q);
+Value *SimplifyCall(ImmutableCallSite CS, Value *V, ArrayRef<Value *> Args,
+                    const SimplifyQuery &Q);
 
 /// See if we can compute a simplified version of this instruction. If not,
 /// return null.
diff --git a/include/llvm/Analysis/LazyBranchProbabilityInfo.h b/include/llvm/Analysis/LazyBranchProbabilityInfo.h
index 067d7ebfd1f5..e1d404b1ada2 100644
--- a/include/llvm/Analysis/LazyBranchProbabilityInfo.h
+++ b/include/llvm/Analysis/LazyBranchProbabilityInfo.h
@@ -24,6 +24,7 @@ namespace llvm {
 class AnalysisUsage;
 class Function;
 class LoopInfo;
+class TargetLibraryInfo;
 
 /// \brief This is an alternative analysis pass to
 /// BranchProbabilityInfoWrapperPass.  The difference is that with this pass the
@@ -55,14 +56,15 @@ class LazyBranchProbabilityInfoPass : public FunctionPass {
   /// analysis without paying for the overhead if BPI doesn't end up being used.
   class LazyBranchProbabilityInfo {
   public:
-    LazyBranchProbabilityInfo(const Function *F, const LoopInfo *LI)
-        : Calculated(false), F(F), LI(LI) {}
+    LazyBranchProbabilityInfo(const Function *F, const LoopInfo *LI,
+                              const TargetLibraryInfo *TLI)
+        : Calculated(false), F(F), LI(LI), TLI(TLI) {}
 
     /// Retrieve the BPI with the branch probabilities computed.
     BranchProbabilityInfo &getCalculated() {
       if (!Calculated) {
         assert(F && LI && "call setAnalysis");
-        BPI.calculate(*F, *LI);
+        BPI.calculate(*F, *LI, TLI);
         Calculated = true;
       }
       return BPI;
@@ -77,6 +79,7 @@ class LazyBranchProbabilityInfoPass : public FunctionPass {
     bool Calculated;
     const Function *F;
     const LoopInfo *LI;
+    const TargetLibraryInfo *TLI;
   };
 
   std::unique_ptr<LazyBranchProbabilityInfo> LBPI;
diff --git a/include/llvm/Analysis/LazyValueInfo.h b/include/llvm/Analysis/LazyValueInfo.h
index 49e088e533dc..7b178fc7bcc2 100644
--- a/include/llvm/Analysis/LazyValueInfo.h
+++ b/include/llvm/Analysis/LazyValueInfo.h
@@ -100,8 +100,11 @@ public:
   /// Inform the analysis cache that we have erased a block.
   void eraseBlock(BasicBlock *BB);
 
-  /// Print the \LazyValueInfoCache.
-  void printCache(Function &F, raw_ostream &OS);
+  /// Print the \LazyValueInfo Analysis.
+  /// We pass in the DTree that is required for identifying which basic blocks
+  /// we can solve/print for, in the LVIPrinter. The DT is optional
+  /// in LVI, so we need to pass it here as an argument.
+  void printLVI(Function &F, DominatorTree &DTree, raw_ostream &OS);
 
   // For old PM pass. Delete once LazyValueInfoWrapperPass is gone.
   void releaseMemory();
diff --git a/include/llvm/Analysis/LoopInfoImpl.h b/include/llvm/Analysis/LoopInfoImpl.h
index 249fa572c024..6ff4335f1ad5 100644
--- a/include/llvm/Analysis/LoopInfoImpl.h
+++ b/include/llvm/Analysis/LoopInfoImpl.h
@@ -17,8 +17,8 @@
 
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/PostOrderIterator.h"
-#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/IR/Dominators.h"
 
diff --git a/include/llvm/Analysis/MemoryDependenceAnalysis.h b/include/llvm/Analysis/MemoryDependenceAnalysis.h
index a401887016c9..1dbbf6cc6add 100644
--- a/include/llvm/Analysis/MemoryDependenceAnalysis.h
+++ b/include/llvm/Analysis/MemoryDependenceAnalysis.h
@@ -15,8 +15,8 @@
 #define LLVM_ANALYSIS_MEMORYDEPENDENCEANALYSIS_H
 
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/PointerSumType.h"
 #include "llvm/ADT/PointerEmbeddedInt.h"
+#include "llvm/ADT/PointerSumType.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/IR/BasicBlock.h"
diff --git a/include/llvm/Analysis/MemorySSAUpdater.h b/include/llvm/Analysis/MemorySSAUpdater.h
index d30eeeaa95b6..b36b2f01dac6 100644
--- a/include/llvm/Analysis/MemorySSAUpdater.h
+++ b/include/llvm/Analysis/MemorySSAUpdater.h
@@ -34,6 +34,7 @@
 
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/MemorySSA.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Module.h"
@@ -45,7 +46,6 @@
 #include "llvm/Pass.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Analysis/MemorySSA.h"
 
 namespace llvm {
 
diff --git a/include/llvm/Analysis/ObjCARCAnalysisUtils.h b/include/llvm/Analysis/ObjCARCAnalysisUtils.h
index 5f4d8ecbbfbb..e80412a30564 100644
--- a/include/llvm/Analysis/ObjCARCAnalysisUtils.h
+++ b/include/llvm/Analysis/ObjCARCAnalysisUtils.h
@@ -23,8 +23,8 @@
 #ifndef LLVM_LIB_ANALYSIS_OBJCARCANALYSISUTILS_H
 #define LLVM_LIB_ANALYSIS_OBJCARCANALYSISUTILS_H
 
-#include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringSwitch.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/ObjCARCInstKind.h"
 #include "llvm/Analysis/Passes.h"
diff --git a/include/llvm/Analysis/ObjCARCInstKind.h b/include/llvm/Analysis/ObjCARCInstKind.h
index 3b37ddf78f58..02ff03578238 100644
--- a/include/llvm/Analysis/ObjCARCInstKind.h
+++ b/include/llvm/Analysis/ObjCARCInstKind.h
@@ -10,8 +10,8 @@
 #ifndef LLVM_ANALYSIS_OBJCARCINSTKIND_H
 #define LLVM_ANALYSIS_OBJCARCINSTKIND_H
 
-#include "llvm/IR/Instructions.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
 
 namespace llvm {
 namespace objcarc {
diff --git a/include/llvm/Analysis/ScalarEvolutionNormalization.h b/include/llvm/Analysis/ScalarEvolutionNormalization.h
index b73ad95278a0..51c92121c8f0 100644
--- a/include/llvm/Analysis/ScalarEvolutionNormalization.h
+++ b/include/llvm/Analysis/ScalarEvolutionNormalization.h
@@ -36,8 +36,8 @@
 #ifndef LLVM_ANALYSIS_SCALAREVOLUTIONNORMALIZATION_H
 #define LLVM_ANALYSIS_SCALAREVOLUTIONNORMALIZATION_H
 
-#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 
 namespace llvm {
diff --git a/include/llvm/Analysis/TargetLibraryInfo.h b/include/llvm/Analysis/TargetLibraryInfo.h
index 0e3bdaa11c9a..d75e7833279b 100644
--- a/include/llvm/Analysis/TargetLibraryInfo.h
+++ b/include/llvm/Analysis/TargetLibraryInfo.h
@@ -13,6 +13,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/IR/CallSite.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/PassManager.h"
@@ -239,6 +240,13 @@ public:
     return Impl->getLibFunc(FDecl, F);
   }
 
+  /// If a callsite does not have the 'nobuiltin' attribute, return if the
+  /// called function is a known library function and set F to that function.
+  bool getLibFunc(ImmutableCallSite CS, LibFunc &F) const {
+    return !CS.isNoBuiltin() && CS.getCalledFunction() &&
+           getLibFunc(*(CS.getCalledFunction()), F);
+  }
+
   /// Tests whether a library function is available.
   bool has(LibFunc F) const {
     return Impl->getState(F) != TargetLibraryInfoImpl::Unavailable;
diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h
index 7211508e975a..cd8c2cd24244 100644
--- a/include/llvm/Analysis/TargetTransformInfo.h
+++ b/include/llvm/Analysis/TargetTransformInfo.h
@@ -267,6 +267,19 @@ public:
   /// incurs significant execution cost.
   bool isLoweredToCall(const Function *F) const;
 
+  struct LSRCost {
+    /// TODO: Some of these could be merged. Also, a lexical ordering
+    /// isn't always optimal.
+    unsigned Insns;
+    unsigned NumRegs;
+    unsigned AddRecCost;
+    unsigned NumIVMuls;
+    unsigned NumBaseAdds;
+    unsigned ImmCost;
+    unsigned SetupCost;
+    unsigned ScaleCost;
+  };
+
   /// Parameters that control the generic loop unrolling transformation.
   struct UnrollingPreferences {
     /// The cost threshold for the unrolled loop. Should be relative to the
@@ -385,6 +398,10 @@ public:
                              bool HasBaseReg, int64_t Scale,
                              unsigned AddrSpace = 0) const;
 
+  /// \brief Return true if LSR cost of C1 is lower than C1.
+  bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
+                     TargetTransformInfo::LSRCost &C2) const;
+
   /// \brief Return true if the target supports masked load/store
   /// AVX2 and AVX-512 targets allow masks for consecutive load and store
   bool isLegalMaskedStore(Type *DataType) const;
@@ -705,6 +722,10 @@ public:
   /// if false is returned.
   bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
 
+  /// \returns The maximum element size, in bytes, for an element
+  /// unordered-atomic memory intrinsic.
+  unsigned getAtomicMemIntrinsicMaxElementSize() const;
+
   /// \returns A value which is the result of the given memory intrinsic.  New
   /// instructions may be created to extract the result from the given intrinsic
   /// memory operation.  Returns nullptr if the target cannot create a result
@@ -809,6 +830,8 @@ public:
                                      int64_t BaseOffset, bool HasBaseReg,
                                      int64_t Scale,
                                      unsigned AddrSpace) = 0;
+  virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
+                             TargetTransformInfo::LSRCost &C2) = 0;
   virtual bool isLegalMaskedStore(Type *DataType) = 0;
   virtual bool isLegalMaskedLoad(Type *DataType) = 0;
   virtual bool isLegalMaskedScatter(Type *DataType) = 0;
@@ -904,6 +927,7 @@ public:
   virtual unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = 0;
   virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst,
                                   MemIntrinsicInfo &Info) = 0;
+  virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0;
   virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
                                                    Type *ExpectedType) = 0;
   virtual bool areInlineCompatible(const Function *Caller,
@@ -996,6 +1020,10 @@ public:
     return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
                                       Scale, AddrSpace);
   }
+  bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
+                     TargetTransformInfo::LSRCost &C2) override {
+    return Impl.isLSRCostLess(C1, C2);
+  }
   bool isLegalMaskedStore(Type *DataType) override {
     return Impl.isLegalMaskedStore(DataType);
   }
@@ -1201,6 +1229,9 @@ public:
                           MemIntrinsicInfo &Info) override {
     return Impl.getTgtMemIntrinsic(Inst, Info);
   }
+  unsigned getAtomicMemIntrinsicMaxElementSize() const override {
+    return Impl.getAtomicMemIntrinsicMaxElementSize();
+  }
   Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
                                            Type *ExpectedType) override {
     return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
diff --git a/include/llvm/Analysis/TargetTransformInfoImpl.h b/include/llvm/Analysis/TargetTransformInfoImpl.h
index d73a60eba850..72de7c12eb3e 100644
--- a/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -17,13 +17,13 @@
 
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/VectorUtils.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GetElementPtrTypeIterator.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/Type.h"
-#include "llvm/Analysis/VectorUtils.h"
 
 namespace llvm {
 
@@ -229,6 +229,13 @@ public:
     return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1);
   }
 
+  bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2) {
+    return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds,
+                    C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
+           std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds,
+                    C2.ScaleCost, C2.ImmCost, C2.SetupCost);
+  }
+
   bool isLegalMaskedStore(Type *DataType) { return false; }
 
   bool isLegalMaskedLoad(Type *DataType) { return false; }
@@ -420,6 +427,15 @@ public:
     return false;
   }
 
+  unsigned getAtomicMemIntrinsicMaxElementSize() const {
+    // Note for overrides: You must ensure for all element unordered-atomic
+    // memory intrinsics that all power-of-2 element sizes up to, and
+    // including, the return value of this method have a corresponding
+    // runtime lib call. These runtime lib call definitions can be found
+    // in RuntimeLibcalls.h
+    return 0;
+  }
+
   Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
                                            Type *ExpectedType) {
     return nullptr;
diff --git a/include/llvm/BinaryFormat/COFF.h b/include/llvm/BinaryFormat/COFF.h
new file mode 100644
index 000000000000..5171c72b9e67
--- /dev/null
+++ b/include/llvm/BinaryFormat/COFF.h
@@ -0,0 +1,713 @@
+//===-- llvm/BinaryFormat/COFF.h --------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains an definitions used in Windows COFF Files.
+//
+// Structures and enums defined within this file where created using
+// information from Microsoft's publicly available PE/COFF format document:
+//
+// Microsoft Portable Executable and Common Object File Format Specification
+// Revision 8.1 - February 15, 2008
+//
+// As of 5/2/2010, hosted by Microsoft at:
+// http://www.microsoft.com/whdc/system/platform/firmware/pecoff.mspx
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BINARYFORMAT_COFF_H
+#define LLVM_BINARYFORMAT_COFF_H
+
+#include "llvm/Support/DataTypes.h"
+#include <cassert>
+#include <cstring>
+
+namespace llvm {
+namespace COFF {
+
+// The maximum number of sections that a COFF object can have (inclusive).
+const int32_t MaxNumberOfSections16 = 65279;
+
+// The PE signature bytes that follows the DOS stub header.
+static const char PEMagic[] = {'P', 'E', '\0', '\0'};
+
+static const char BigObjMagic[] = {
+    '\xc7', '\xa1', '\xba', '\xd1', '\xee', '\xba', '\xa9', '\x4b',
+    '\xaf', '\x20', '\xfa', '\xf6', '\x6a', '\xa4', '\xdc', '\xb8',
+};
+
+static const char ClGlObjMagic[] = {
+    '\x38', '\xfe', '\xb3', '\x0c', '\xa5', '\xd9', '\xab', '\x4d',
+    '\xac', '\x9b', '\xd6', '\xb6', '\x22', '\x26', '\x53', '\xc2',
+};
+
+// Sizes in bytes of various things in the COFF format.
+enum {
+  Header16Size = 20,
+  Header32Size = 56,
+  NameSize = 8,
+  Symbol16Size = 18,
+  Symbol32Size = 20,
+  SectionSize = 40,
+  RelocationSize = 10
+};
+
+struct header {
+  uint16_t Machine;
+  int32_t NumberOfSections;
+  uint32_t TimeDateStamp;
+  uint32_t PointerToSymbolTable;
+  uint32_t NumberOfSymbols;
+  uint16_t SizeOfOptionalHeader;
+  uint16_t Characteristics;
+};
+
+struct BigObjHeader {
+  enum : uint16_t { MinBigObjectVersion = 2 };
+
+  uint16_t Sig1; ///< Must be IMAGE_FILE_MACHINE_UNKNOWN (0).
+  uint16_t Sig2; ///< Must be 0xFFFF.
+  uint16_t Version;
+  uint16_t Machine;
+  uint32_t TimeDateStamp;
+  uint8_t UUID[16];
+  uint32_t unused1;
+  uint32_t unused2;
+  uint32_t unused3;
+  uint32_t unused4;
+  uint32_t NumberOfSections;
+  uint32_t PointerToSymbolTable;
+  uint32_t NumberOfSymbols;
+};
+
+enum MachineTypes {
+  MT_Invalid = 0xffff,
+
+  IMAGE_FILE_MACHINE_UNKNOWN = 0x0,
+  IMAGE_FILE_MACHINE_AM33 = 0x13,
+  IMAGE_FILE_MACHINE_AMD64 = 0x8664,
+  IMAGE_FILE_MACHINE_ARM = 0x1C0,
+  IMAGE_FILE_MACHINE_ARMNT = 0x1C4,
+  IMAGE_FILE_MACHINE_ARM64 = 0xAA64,
+  IMAGE_FILE_MACHINE_EBC = 0xEBC,
+  IMAGE_FILE_MACHINE_I386 = 0x14C,
+  IMAGE_FILE_MACHINE_IA64 = 0x200,
+  IMAGE_FILE_MACHINE_M32R = 0x9041,
+  IMAGE_FILE_MACHINE_MIPS16 = 0x266,
+  IMAGE_FILE_MACHINE_MIPSFPU = 0x366,
+  IMAGE_FILE_MACHINE_MIPSFPU16 = 0x466,
+  IMAGE_FILE_MACHINE_POWERPC = 0x1F0,
+  IMAGE_FILE_MACHINE_POWERPCFP = 0x1F1,
+  IMAGE_FILE_MACHINE_R4000 = 0x166,
+  IMAGE_FILE_MACHINE_SH3 = 0x1A2,
+  IMAGE_FILE_MACHINE_SH3DSP = 0x1A3,
+  IMAGE_FILE_MACHINE_SH4 = 0x1A6,
+  IMAGE_FILE_MACHINE_SH5 = 0x1A8,
+  IMAGE_FILE_MACHINE_THUMB = 0x1C2,
+  IMAGE_FILE_MACHINE_WCEMIPSV2 = 0x169
+};
+
+enum Characteristics {
+  C_Invalid = 0,
+
+  /// The file does not contain base relocations and must be loaded at its
+  /// preferred base. If this cannot be done, the loader will error.
+  IMAGE_FILE_RELOCS_STRIPPED = 0x0001,
+  /// The file is valid and can be run.
+  IMAGE_FILE_EXECUTABLE_IMAGE = 0x0002,
+  /// COFF line numbers have been stripped. This is deprecated and should be
+  /// 0.
+  IMAGE_FILE_LINE_NUMS_STRIPPED = 0x0004,
+  /// COFF symbol table entries for local symbols have been removed. This is
+  /// deprecated and should be 0.
+  IMAGE_FILE_LOCAL_SYMS_STRIPPED = 0x0008,
+  /// Aggressively trim working set. This is deprecated and must be 0.
+  IMAGE_FILE_AGGRESSIVE_WS_TRIM = 0x0010,
+  /// Image can handle > 2GiB addresses.
+  IMAGE_FILE_LARGE_ADDRESS_AWARE = 0x0020,
+  /// Little endian: the LSB precedes the MSB in memory. This is deprecated
+  /// and should be 0.
+  IMAGE_FILE_BYTES_REVERSED_LO = 0x0080,
+  /// Machine is based on a 32bit word architecture.
+  IMAGE_FILE_32BIT_MACHINE = 0x0100,
+  /// Debugging info has been removed.
+  IMAGE_FILE_DEBUG_STRIPPED = 0x0200,
+  /// If the image is on removable media, fully load it and copy it to swap.
+  IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP = 0x0400,
+  /// If the image is on network media, fully load it and copy it to swap.
+  IMAGE_FILE_NET_RUN_FROM_SWAP = 0x0800,
+  /// The image file is a system file, not a user program.
+  IMAGE_FILE_SYSTEM = 0x1000,
+  /// The image file is a DLL.
+  IMAGE_FILE_DLL = 0x2000,
+  /// This file should only be run on a uniprocessor machine.
+  IMAGE_FILE_UP_SYSTEM_ONLY = 0x4000,
+  /// Big endian: the MSB precedes the LSB in memory. This is deprecated
+  /// and should be 0.
+  IMAGE_FILE_BYTES_REVERSED_HI = 0x8000
+};
+
+enum ResourceTypeID {
+  RID_Cursor = 1,
+  RID_Bitmap = 2,
+  RID_Icon = 3,
+  RID_Menu = 4,
+  RID_Dialog = 5,
+  RID_String = 6,
+  RID_FontDir = 7,
+  RID_Font = 8,
+  RID_Accelerator = 9,
+  RID_RCData = 10,
+  RID_MessageTable = 11,
+  RID_Group_Cursor = 12,
+  RID_Group_Icon = 14,
+  RID_Version = 16,
+  RID_DLGInclude = 17,
+  RID_PlugPlay = 19,
+  RID_VXD = 20,
+  RID_AniCursor = 21,
+  RID_AniIcon = 22,
+  RID_HTML = 23,
+  RID_Manifest = 24,
+};
+
+struct symbol {
+  char Name[NameSize];
+  uint32_t Value;
+  int32_t SectionNumber;
+  uint16_t Type;
+  uint8_t StorageClass;
+  uint8_t NumberOfAuxSymbols;
+};
+
+enum SymbolSectionNumber : int32_t {
+  IMAGE_SYM_DEBUG = -2,
+  IMAGE_SYM_ABSOLUTE = -1,
+  IMAGE_SYM_UNDEFINED = 0
+};
+
+/// Storage class tells where and what the symbol represents
+enum SymbolStorageClass {
+  SSC_Invalid = 0xff,
+
+  IMAGE_SYM_CLASS_END_OF_FUNCTION = -1,  ///< Physical end of function
+  IMAGE_SYM_CLASS_NULL = 0,              ///< No symbol
+  IMAGE_SYM_CLASS_AUTOMATIC = 1,         ///< Stack variable
+  IMAGE_SYM_CLASS_EXTERNAL = 2,          ///< External symbol
+  IMAGE_SYM_CLASS_STATIC = 3,            ///< Static
+  IMAGE_SYM_CLASS_REGISTER = 4,          ///< Register variable
+  IMAGE_SYM_CLASS_EXTERNAL_DEF = 5,      ///< External definition
+  IMAGE_SYM_CLASS_LABEL = 6,             ///< Label
+  IMAGE_SYM_CLASS_UNDEFINED_LABEL = 7,   ///< Undefined label
+  IMAGE_SYM_CLASS_MEMBER_OF_STRUCT = 8,  ///< Member of structure
+  IMAGE_SYM_CLASS_ARGUMENT = 9,          ///< Function argument
+  IMAGE_SYM_CLASS_STRUCT_TAG = 10,       ///< Structure tag
+  IMAGE_SYM_CLASS_MEMBER_OF_UNION = 11,  ///< Member of union
+  IMAGE_SYM_CLASS_UNION_TAG = 12,        ///< Union tag
+  IMAGE_SYM_CLASS_TYPE_DEFINITION = 13,  ///< Type definition
+  IMAGE_SYM_CLASS_UNDEFINED_STATIC = 14, ///< Undefined static
+  IMAGE_SYM_CLASS_ENUM_TAG = 15,         ///< Enumeration tag
+  IMAGE_SYM_CLASS_MEMBER_OF_ENUM = 16,   ///< Member of enumeration
+  IMAGE_SYM_CLASS_REGISTER_PARAM = 17,   ///< Register parameter
+  IMAGE_SYM_CLASS_BIT_FIELD = 18,        ///< Bit field
+  /// ".bb" or ".eb" - beginning or end of block
+  IMAGE_SYM_CLASS_BLOCK = 100,
+  /// ".bf" or ".ef" - beginning or end of function
+  IMAGE_SYM_CLASS_FUNCTION = 101,
+  IMAGE_SYM_CLASS_END_OF_STRUCT = 102, ///< End of structure
+  IMAGE_SYM_CLASS_FILE = 103,          ///< File name
+  /// Line number, reformatted as symbol
+  IMAGE_SYM_CLASS_SECTION = 104,
+  IMAGE_SYM_CLASS_WEAK_EXTERNAL = 105, ///< Duplicate tag
+  /// External symbol in dmert public lib
+  IMAGE_SYM_CLASS_CLR_TOKEN = 107
+};
+
+enum SymbolBaseType {
+  IMAGE_SYM_TYPE_NULL = 0,   ///< No type information or unknown base type.
+  IMAGE_SYM_TYPE_VOID = 1,   ///< Used with void pointers and functions.
+  IMAGE_SYM_TYPE_CHAR = 2,   ///< A character (signed byte).
+  IMAGE_SYM_TYPE_SHORT = 3,  ///< A 2-byte signed integer.
+  IMAGE_SYM_TYPE_INT = 4,    ///< A natural integer type on the target.
+  IMAGE_SYM_TYPE_LONG = 5,   ///< A 4-byte signed integer.
+  IMAGE_SYM_TYPE_FLOAT = 6,  ///< A 4-byte floating-point number.
+  IMAGE_SYM_TYPE_DOUBLE = 7, ///< An 8-byte floating-point number.
+  IMAGE_SYM_TYPE_STRUCT = 8, ///< A structure.
+  IMAGE_SYM_TYPE_UNION = 9,  ///< An union.
+  IMAGE_SYM_TYPE_ENUM = 10,  ///< An enumerated type.
+  IMAGE_SYM_TYPE_MOE = 11,   ///< A member of enumeration (a specific value).
+  IMAGE_SYM_TYPE_BYTE = 12,  ///< A byte; unsigned 1-byte integer.
+  IMAGE_SYM_TYPE_WORD = 13,  ///< A word; unsigned 2-byte integer.
+  IMAGE_SYM_TYPE_UINT = 14,  ///< An unsigned integer of natural size.
+  IMAGE_SYM_TYPE_DWORD = 15  ///< An unsigned 4-byte integer.
+};
+
+enum SymbolComplexType {
+  IMAGE_SYM_DTYPE_NULL = 0,     ///< No complex type; simple scalar variable.
+  IMAGE_SYM_DTYPE_POINTER = 1,  ///< A pointer to base type.
+  IMAGE_SYM_DTYPE_FUNCTION = 2, ///< A function that returns a base type.
+  IMAGE_SYM_DTYPE_ARRAY = 3,    ///< An array of base type.
+
+  /// Type is formed as (base + (derived << SCT_COMPLEX_TYPE_SHIFT))
+  SCT_COMPLEX_TYPE_SHIFT = 4
+};
+
+enum AuxSymbolType { IMAGE_AUX_SYMBOL_TYPE_TOKEN_DEF = 1 };
+
+struct section {
+  char Name[NameSize];
+  uint32_t VirtualSize;
+  uint32_t VirtualAddress;
+  uint32_t SizeOfRawData;
+  uint32_t PointerToRawData;
+  uint32_t PointerToRelocations;
+  uint32_t PointerToLineNumbers;
+  uint16_t NumberOfRelocations;
+  uint16_t NumberOfLineNumbers;
+  uint32_t Characteristics;
+};
+
+enum SectionCharacteristics : uint32_t {
+  SC_Invalid = 0xffffffff,
+
+  IMAGE_SCN_TYPE_NOLOAD = 0x00000002,
+  IMAGE_SCN_TYPE_NO_PAD = 0x00000008,
+  IMAGE_SCN_CNT_CODE = 0x00000020,
+  IMAGE_SCN_CNT_INITIALIZED_DATA = 0x00000040,
+  IMAGE_SCN_CNT_UNINITIALIZED_DATA = 0x00000080,
+  IMAGE_SCN_LNK_OTHER = 0x00000100,
+  IMAGE_SCN_LNK_INFO = 0x00000200,
+  IMAGE_SCN_LNK_REMOVE = 0x00000800,
+  IMAGE_SCN_LNK_COMDAT = 0x00001000,
+  IMAGE_SCN_GPREL = 0x00008000,
+  IMAGE_SCN_MEM_PURGEABLE = 0x00020000,
+  IMAGE_SCN_MEM_16BIT = 0x00020000,
+  IMAGE_SCN_MEM_LOCKED = 0x00040000,
+  IMAGE_SCN_MEM_PRELOAD = 0x00080000,
+  IMAGE_SCN_ALIGN_1BYTES = 0x00100000,
+  IMAGE_SCN_ALIGN_2BYTES = 0x00200000,
+  IMAGE_SCN_ALIGN_4BYTES = 0x00300000,
+  IMAGE_SCN_ALIGN_8BYTES = 0x00400000,
+  IMAGE_SCN_ALIGN_16BYTES = 0x00500000,
+  IMAGE_SCN_ALIGN_32BYTES = 0x00600000,
+  IMAGE_SCN_ALIGN_64BYTES = 0x00700000,
+  IMAGE_SCN_ALIGN_128BYTES = 0x00800000,
+  IMAGE_SCN_ALIGN_256BYTES = 0x00900000,
+  IMAGE_SCN_ALIGN_512BYTES = 0x00A00000,
+  IMAGE_SCN_ALIGN_1024BYTES = 0x00B00000,
+  IMAGE_SCN_ALIGN_2048BYTES = 0x00C00000,
+  IMAGE_SCN_ALIGN_4096BYTES = 0x00D00000,
+  IMAGE_SCN_ALIGN_8192BYTES = 0x00E00000,
+  IMAGE_SCN_LNK_NRELOC_OVFL = 0x01000000,
+  IMAGE_SCN_MEM_DISCARDABLE = 0x02000000,
+  IMAGE_SCN_MEM_NOT_CACHED = 0x04000000,
+  IMAGE_SCN_MEM_NOT_PAGED = 0x08000000,
+  IMAGE_SCN_MEM_SHARED = 0x10000000,
+  IMAGE_SCN_MEM_EXECUTE = 0x20000000,
+  IMAGE_SCN_MEM_READ = 0x40000000,
+  IMAGE_SCN_MEM_WRITE = 0x80000000
+};
+
+struct relocation {
+  uint32_t VirtualAddress;
+  uint32_t SymbolTableIndex;
+  uint16_t Type;
+};
+
+enum RelocationTypeI386 {
+  IMAGE_REL_I386_ABSOLUTE = 0x0000,
+  IMAGE_REL_I386_DIR16 = 0x0001,
+  IMAGE_REL_I386_REL16 = 0x0002,
+  IMAGE_REL_I386_DIR32 = 0x0006,
+  IMAGE_REL_I386_DIR32NB = 0x0007,
+  IMAGE_REL_I386_SEG12 = 0x0009,
+  IMAGE_REL_I386_SECTION = 0x000A,
+  IMAGE_REL_I386_SECREL = 0x000B,
+  IMAGE_REL_I386_TOKEN = 0x000C,
+  IMAGE_REL_I386_SECREL7 = 0x000D,
+  IMAGE_REL_I386_REL32 = 0x0014
+};
+
+enum RelocationTypeAMD64 {
+  IMAGE_REL_AMD64_ABSOLUTE = 0x0000,
+  IMAGE_REL_AMD64_ADDR64 = 0x0001,
+  IMAGE_REL_AMD64_ADDR32 = 0x0002,
+  IMAGE_REL_AMD64_ADDR32NB = 0x0003,
+  IMAGE_REL_AMD64_REL32 = 0x0004,
+  IMAGE_REL_AMD64_REL32_1 = 0x0005,
+  IMAGE_REL_AMD64_REL32_2 = 0x0006,
+  IMAGE_REL_AMD64_REL32_3 = 0x0007,
+  IMAGE_REL_AMD64_REL32_4 = 0x0008,
+  IMAGE_REL_AMD64_REL32_5 = 0x0009,
+  IMAGE_REL_AMD64_SECTION = 0x000A,
+  IMAGE_REL_AMD64_SECREL = 0x000B,
+  IMAGE_REL_AMD64_SECREL7 = 0x000C,
+  IMAGE_REL_AMD64_TOKEN = 0x000D,
+  IMAGE_REL_AMD64_SREL32 = 0x000E,
+  IMAGE_REL_AMD64_PAIR = 0x000F,
+  IMAGE_REL_AMD64_SSPAN32 = 0x0010
+};
+
+enum RelocationTypesARM {
+  IMAGE_REL_ARM_ABSOLUTE = 0x0000,
+  IMAGE_REL_ARM_ADDR32 = 0x0001,
+  IMAGE_REL_ARM_ADDR32NB = 0x0002,
+  IMAGE_REL_ARM_BRANCH24 = 0x0003,
+  IMAGE_REL_ARM_BRANCH11 = 0x0004,
+  IMAGE_REL_ARM_TOKEN = 0x0005,
+  IMAGE_REL_ARM_BLX24 = 0x0008,
+  IMAGE_REL_ARM_BLX11 = 0x0009,
+  IMAGE_REL_ARM_SECTION = 0x000E,
+  IMAGE_REL_ARM_SECREL = 0x000F,
+  IMAGE_REL_ARM_MOV32A = 0x0010,
+  IMAGE_REL_ARM_MOV32T = 0x0011,
+  IMAGE_REL_ARM_BRANCH20T = 0x0012,
+  IMAGE_REL_ARM_BRANCH24T = 0x0014,
+  IMAGE_REL_ARM_BLX23T = 0x0015
+};
+
+enum RelocationTypesARM64 {
+  IMAGE_REL_ARM64_ABSOLUTE = 0x0000,
+  IMAGE_REL_ARM64_ADDR32 = 0x0001,
+  IMAGE_REL_ARM64_ADDR32NB = 0x0002,
+  IMAGE_REL_ARM64_BRANCH26 = 0x0003,
+  IMAGE_REL_ARM64_PAGEBASE_REL2 = 0x0004,
+  IMAGE_REL_ARM64_REL21 = 0x0005,
+  IMAGE_REL_ARM64_PAGEOFFSET_12A = 0x0006,
+  IMAGE_REL_ARM64_PAGEOFFSET_12L = 0x0007,
+  IMAGE_REL_ARM64_SECREL = 0x0008,
+  IMAGE_REL_ARM64_SECREL_LOW12A = 0x0009,
+  IMAGE_REL_ARM64_SECREL_HIGH12A = 0x000A,
+  IMAGE_REL_ARM64_SECREL_LOW12L = 0x000B,
+  IMAGE_REL_ARM64_TOKEN = 0x000C,
+  IMAGE_REL_ARM64_SECTION = 0x000D,
+  IMAGE_REL_ARM64_ADDR64 = 0x000E,
+  IMAGE_REL_ARM64_BRANCH19 = 0x000F,
+  IMAGE_REL_ARM64_BRANCH14 = 0x0010,
+};
+
+enum COMDATType {
+  IMAGE_COMDAT_SELECT_NODUPLICATES = 1,
+  IMAGE_COMDAT_SELECT_ANY,
+  IMAGE_COMDAT_SELECT_SAME_SIZE,
+  IMAGE_COMDAT_SELECT_EXACT_MATCH,
+  IMAGE_COMDAT_SELECT_ASSOCIATIVE,
+  IMAGE_COMDAT_SELECT_LARGEST,
+  IMAGE_COMDAT_SELECT_NEWEST
+};
+
+// Auxiliary Symbol Formats
+struct AuxiliaryFunctionDefinition {
+  uint32_t TagIndex;
+  uint32_t TotalSize;
+  uint32_t PointerToLinenumber;
+  uint32_t PointerToNextFunction;
+  char unused[2];
+};
+
+struct AuxiliarybfAndefSymbol {
+  uint8_t unused1[4];
+  uint16_t Linenumber;
+  uint8_t unused2[6];
+  uint32_t PointerToNextFunction;
+  uint8_t unused3[2];
+};
+
+struct AuxiliaryWeakExternal {
+  uint32_t TagIndex;
+  uint32_t Characteristics;
+  uint8_t unused[10];
+};
+
+enum WeakExternalCharacteristics {
+  IMAGE_WEAK_EXTERN_SEARCH_NOLIBRARY = 1,
+  IMAGE_WEAK_EXTERN_SEARCH_LIBRARY = 2,
+  IMAGE_WEAK_EXTERN_SEARCH_ALIAS = 3
+};
+
+struct AuxiliarySectionDefinition {
+  uint32_t Length;
+  uint16_t NumberOfRelocations;
+  uint16_t NumberOfLinenumbers;
+  uint32_t CheckSum;
+  uint32_t Number;
+  uint8_t Selection;
+  char unused;
+};
+
+struct AuxiliaryCLRToken {
+  uint8_t AuxType;
+  uint8_t unused1;
+  uint32_t SymbolTableIndex;
+  char unused2[12];
+};
+
+union Auxiliary {
+  AuxiliaryFunctionDefinition FunctionDefinition;
+  AuxiliarybfAndefSymbol bfAndefSymbol;
+  AuxiliaryWeakExternal WeakExternal;
+  AuxiliarySectionDefinition SectionDefinition;
+};
+
+/// @brief The Import Directory Table.
+///
+/// There is a single array of these and one entry per imported DLL.
+struct ImportDirectoryTableEntry {
+  uint32_t ImportLookupTableRVA;
+  uint32_t TimeDateStamp;
+  uint32_t ForwarderChain;
+  uint32_t NameRVA;
+  uint32_t ImportAddressTableRVA;
+};
+
+/// @brief The PE32 Import Lookup Table.
+///
+/// There is an array of these for each imported DLL. It represents either
+/// the ordinal to import from the target DLL, or a name to lookup and import
+/// from the target DLL.
+///
+/// This also happens to be the same format used by the Import Address Table
+/// when it is initially written out to the image.
+struct ImportLookupTableEntry32 {
+  uint32_t data;
+
+  /// @brief Is this entry specified by ordinal, or name?
+  bool isOrdinal() const { return data & 0x80000000; }
+
+  /// @brief Get the ordinal value of this entry. isOrdinal must be true.
+  uint16_t getOrdinal() const {
+    assert(isOrdinal() && "ILT entry is not an ordinal!");
+    return data & 0xFFFF;
+  }
+
+  /// @brief Set the ordinal value and set isOrdinal to true.
+  void setOrdinal(uint16_t o) {
+    data = o;
+    data |= 0x80000000;
+  }
+
+  /// @brief Get the Hint/Name entry RVA. isOrdinal must be false.
+  uint32_t getHintNameRVA() const {
+    assert(!isOrdinal() && "ILT entry is not a Hint/Name RVA!");
+    return data;
+  }
+
+  /// @brief Set the Hint/Name entry RVA and set isOrdinal to false.
+  void setHintNameRVA(uint32_t rva) { data = rva; }
+};
+
+/// @brief The DOS compatible header at the front of all PEs.
+struct DOSHeader {
+  uint16_t Magic;
+  uint16_t UsedBytesInTheLastPage;
+  uint16_t FileSizeInPages;
+  uint16_t NumberOfRelocationItems;
+  uint16_t HeaderSizeInParagraphs;
+  uint16_t MinimumExtraParagraphs;
+  uint16_t MaximumExtraParagraphs;
+  uint16_t InitialRelativeSS;
+  uint16_t InitialSP;
+  uint16_t Checksum;
+  uint16_t InitialIP;
+  uint16_t InitialRelativeCS;
+  uint16_t AddressOfRelocationTable;
+  uint16_t OverlayNumber;
+  uint16_t Reserved[4];
+  uint16_t OEMid;
+  uint16_t OEMinfo;
+  uint16_t Reserved2[10];
+  uint32_t AddressOfNewExeHeader;
+};
+
+struct PE32Header {
+  enum { PE32 = 0x10b, PE32_PLUS = 0x20b };
+
+  uint16_t Magic;
+  uint8_t MajorLinkerVersion;
+  uint8_t MinorLinkerVersion;
+  uint32_t SizeOfCode;
+  uint32_t SizeOfInitializedData;
+  uint32_t SizeOfUninitializedData;
+  uint32_t AddressOfEntryPoint; // RVA
+  uint32_t BaseOfCode;          // RVA
+  uint32_t BaseOfData;          // RVA
+  uint32_t ImageBase;
+  uint32_t SectionAlignment;
+  uint32_t FileAlignment;
+  uint16_t MajorOperatingSystemVersion;
+  uint16_t MinorOperatingSystemVersion;
+  uint16_t MajorImageVersion;
+  uint16_t MinorImageVersion;
+  uint16_t MajorSubsystemVersion;
+  uint16_t MinorSubsystemVersion;
+  uint32_t Win32VersionValue;
+  uint32_t SizeOfImage;
+  uint32_t SizeOfHeaders;
+  uint32_t CheckSum;
+  uint16_t Subsystem;
+  // FIXME: This should be DllCharacteristics to match the COFF spec.
+  uint16_t DLLCharacteristics;
+  uint32_t SizeOfStackReserve;
+  uint32_t SizeOfStackCommit;
+  uint32_t SizeOfHeapReserve;
+  uint32_t SizeOfHeapCommit;
+  uint32_t LoaderFlags;
+  // FIXME: This should be NumberOfRvaAndSizes to match the COFF spec.
+  uint32_t NumberOfRvaAndSize;
+};
+
+struct DataDirectory {
+  uint32_t RelativeVirtualAddress;
+  uint32_t Size;
+};
+
+enum DataDirectoryIndex {
+  EXPORT_TABLE = 0,
+  IMPORT_TABLE,
+  RESOURCE_TABLE,
+  EXCEPTION_TABLE,
+  CERTIFICATE_TABLE,
+  BASE_RELOCATION_TABLE,
+  DEBUG_DIRECTORY,
+  ARCHITECTURE,
+  GLOBAL_PTR,
+  TLS_TABLE,
+  LOAD_CONFIG_TABLE,
+  BOUND_IMPORT,
+  IAT,
+  DELAY_IMPORT_DESCRIPTOR,
+  CLR_RUNTIME_HEADER,
+
+  NUM_DATA_DIRECTORIES
+};
+
+enum WindowsSubsystem {
+  IMAGE_SUBSYSTEM_UNKNOWN = 0, ///< An unknown subsystem.
+  IMAGE_SUBSYSTEM_NATIVE = 1,  ///< Device drivers and native Windows processes
+  IMAGE_SUBSYSTEM_WINDOWS_GUI = 2,      ///< The Windows GUI subsystem.
+  IMAGE_SUBSYSTEM_WINDOWS_CUI = 3,      ///< The Windows character subsystem.
+  IMAGE_SUBSYSTEM_OS2_CUI = 5,          ///< The OS/2 character subsytem.
+  IMAGE_SUBSYSTEM_POSIX_CUI = 7,        ///< The POSIX character subsystem.
+  IMAGE_SUBSYSTEM_NATIVE_WINDOWS = 8,   ///< Native Windows 9x driver.
+  IMAGE_SUBSYSTEM_WINDOWS_CE_GUI = 9,   ///< Windows CE.
+  IMAGE_SUBSYSTEM_EFI_APPLICATION = 10, ///< An EFI application.
+  IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER = 11, ///< An EFI driver with boot
+                                                ///  services.
+  IMAGE_SUBSYSTEM_EFI_RUNTIME_DRIVER = 12,      ///< An EFI driver with run-time
+                                                ///  services.
+  IMAGE_SUBSYSTEM_EFI_ROM = 13,                 ///< An EFI ROM image.
+  IMAGE_SUBSYSTEM_XBOX = 14,                    ///< XBOX.
+  IMAGE_SUBSYSTEM_WINDOWS_BOOT_APPLICATION = 16 ///< A BCD application.
+};
+
+enum DLLCharacteristics {
+  /// ASLR with 64 bit address space.
+  IMAGE_DLL_CHARACTERISTICS_HIGH_ENTROPY_VA = 0x0020,
+  /// DLL can be relocated at load time.
+  IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE = 0x0040,
+  /// Code integrity checks are enforced.
+  IMAGE_DLL_CHARACTERISTICS_FORCE_INTEGRITY = 0x0080,
+  ///< Image is NX compatible.
+  IMAGE_DLL_CHARACTERISTICS_NX_COMPAT = 0x0100,
+  /// Isolation aware, but do not isolate the image.
+  IMAGE_DLL_CHARACTERISTICS_NO_ISOLATION = 0x0200,
+  /// Does not use structured exception handling (SEH). No SEH handler may be
+  /// called in this image.
+  IMAGE_DLL_CHARACTERISTICS_NO_SEH = 0x0400,
+  /// Do not bind the image.
+  IMAGE_DLL_CHARACTERISTICS_NO_BIND = 0x0800,
+  ///< Image should execute in an AppContainer.
+  IMAGE_DLL_CHARACTERISTICS_APPCONTAINER = 0x1000,
+  ///< A WDM driver.
+  IMAGE_DLL_CHARACTERISTICS_WDM_DRIVER = 0x2000,
+  ///< Image supports Control Flow Guard.
+  IMAGE_DLL_CHARACTERISTICS_GUARD_CF = 0x4000,
+  /// Terminal Server aware.
+  IMAGE_DLL_CHARACTERISTICS_TERMINAL_SERVER_AWARE = 0x8000
+};
+
+enum DebugType {
+  IMAGE_DEBUG_TYPE_UNKNOWN = 0,
+  IMAGE_DEBUG_TYPE_COFF = 1,
+  IMAGE_DEBUG_TYPE_CODEVIEW = 2,
+  IMAGE_DEBUG_TYPE_FPO = 3,
+  IMAGE_DEBUG_TYPE_MISC = 4,
+  IMAGE_DEBUG_TYPE_EXCEPTION = 5,
+  IMAGE_DEBUG_TYPE_FIXUP = 6,
+  IMAGE_DEBUG_TYPE_OMAP_TO_SRC = 7,
+  IMAGE_DEBUG_TYPE_OMAP_FROM_SRC = 8,
+  IMAGE_DEBUG_TYPE_BORLAND = 9,
+  IMAGE_DEBUG_TYPE_RESERVED10 = 10,
+  IMAGE_DEBUG_TYPE_CLSID = 11,
+  IMAGE_DEBUG_TYPE_VC_FEATURE = 12,
+  IMAGE_DEBUG_TYPE_POGO = 13,
+  IMAGE_DEBUG_TYPE_ILTCG = 14,
+  IMAGE_DEBUG_TYPE_MPX = 15,
+  IMAGE_DEBUG_TYPE_REPRO = 16,
+};
+
+enum BaseRelocationType {
+  IMAGE_REL_BASED_ABSOLUTE = 0,
+  IMAGE_REL_BASED_HIGH = 1,
+  IMAGE_REL_BASED_LOW = 2,
+  IMAGE_REL_BASED_HIGHLOW = 3,
+  IMAGE_REL_BASED_HIGHADJ = 4,
+  IMAGE_REL_BASED_MIPS_JMPADDR = 5,
+  IMAGE_REL_BASED_ARM_MOV32A = 5,
+  IMAGE_REL_BASED_ARM_MOV32T = 7,
+  IMAGE_REL_BASED_MIPS_JMPADDR16 = 9,
+  IMAGE_REL_BASED_DIR64 = 10
+};
+
+enum ImportType { IMPORT_CODE = 0, IMPORT_DATA = 1, IMPORT_CONST = 2 };
+
+enum ImportNameType {
+  /// Import is by ordinal. This indicates that the value in the Ordinal/Hint
+  /// field of the import header is the import's ordinal. If this constant is
+  /// not specified, then the Ordinal/Hint field should always be interpreted
+  /// as the import's hint.
+  IMPORT_ORDINAL = 0,
+  /// The import name is identical to the public symbol name
+  IMPORT_NAME = 1,
+  /// The import name is the public symbol name, but skipping the leading ?,
+  /// @, or optionally _.
+  IMPORT_NAME_NOPREFIX = 2,
+  /// The import name is the public symbol name, but skipping the leading ?,
+  /// @, or optionally _, and truncating at the first @.
+  IMPORT_NAME_UNDECORATE = 3
+};
+
+struct ImportHeader {
+  uint16_t Sig1; ///< Must be IMAGE_FILE_MACHINE_UNKNOWN (0).
+  uint16_t Sig2; ///< Must be 0xFFFF.
+  uint16_t Version;
+  uint16_t Machine;
+  uint32_t TimeDateStamp;
+  uint32_t SizeOfData;
+  uint16_t OrdinalHint;
+  uint16_t TypeInfo;
+
+  ImportType getType() const { return static_cast<ImportType>(TypeInfo & 0x3); }
+
+  ImportNameType getNameType() const {
+    return static_cast<ImportNameType>((TypeInfo & 0x1C) >> 2);
+  }
+};
+
+enum CodeViewIdentifiers {
+  DEBUG_SECTION_MAGIC = 0x4,
+};
+
+inline bool isReservedSectionNumber(int32_t SectionNumber) {
+  return SectionNumber <= 0;
+}
+
+} // End namespace COFF.
+} // End namespace llvm.
+
+#endif
diff --git a/include/llvm/Support/Dwarf.def b/include/llvm/BinaryFormat/Dwarf.def
similarity index 100%
rename from include/llvm/Support/Dwarf.def
rename to include/llvm/BinaryFormat/Dwarf.def
diff --git a/include/llvm/Support/Dwarf.h b/include/llvm/BinaryFormat/Dwarf.h
similarity index 92%
rename from include/llvm/Support/Dwarf.h
rename to include/llvm/BinaryFormat/Dwarf.h
index 3061b7b5fa0f..b7a056b18119 100644
--- a/include/llvm/Support/Dwarf.h
+++ b/include/llvm/BinaryFormat/Dwarf.h
@@ -1,4 +1,4 @@
-//===-- llvm/Support/Dwarf.h ---Dwarf Constants------------------*- C++ -*-===//
+//===-- llvm/BinaryFormat/Dwarf.h ---Dwarf Constants-------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -17,8 +17,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_SUPPORT_DWARF_H
-#define LLVM_SUPPORT_DWARF_H
+#ifndef LLVM_BINARYFORMAT_DWARF_H
+#define LLVM_BINARYFORMAT_DWARF_H
 
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/DataTypes.h"
@@ -37,7 +37,7 @@ namespace dwarf {
 // enumeration base type.
 
 enum LLVMConstants : uint32_t {
-  // LLVM mock tags (see also llvm/Support/Dwarf.def).
+  // LLVM mock tags (see also llvm/BinaryFormat/Dwarf.def).
   DW_TAG_invalid = ~0U,        // Tag for invalid results.
   DW_VIRTUALITY_invalid = ~0U, // Virtuality for invalid results.
   DW_MACINFO_invalid = ~0U,    // Macinfo type for invalid results.
@@ -48,7 +48,7 @@ enum LLVMConstants : uint32_t {
   DW_PUBNAMES_VERSION = 2, // Section version number for .debug_pubnames.
   DW_ARANGES_VERSION = 2,  // Section version number for .debug_aranges.
   // Identifiers we use to distinguish vendor extensions.
-  DWARF_VENDOR_DWARF = 0,  // Defined in v2 or later of the DWARF standard.
+  DWARF_VENDOR_DWARF = 0, // Defined in v2 or later of the DWARF standard.
   DWARF_VENDOR_APPLE = 1,
   DWARF_VENDOR_BORLAND = 2,
   DWARF_VENDOR_GNU = 3,
@@ -64,7 +64,7 @@ const uint64_t DW64_CIE_ID = UINT64_MAX;
 
 enum Tag : uint16_t {
 #define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR) DW_TAG_##NAME = ID,
-#include "llvm/Support/Dwarf.def"
+#include "llvm/BinaryFormat/Dwarf.def"
   DW_TAG_lo_user = 0x4080,
   DW_TAG_hi_user = 0xffff,
   DW_TAG_user_base = 0x1000 // Recommended base for user tags.
@@ -101,20 +101,20 @@ inline bool isType(Tag T) {
 /// Attributes.
 enum Attribute : uint16_t {
 #define HANDLE_DW_AT(ID, NAME, VERSION, VENDOR) DW_AT_##NAME = ID,
-#include "llvm/Support/Dwarf.def"
+#include "llvm/BinaryFormat/Dwarf.def"
   DW_AT_lo_user = 0x2000,
   DW_AT_hi_user = 0x3fff,
 };
 
 enum Form : uint16_t {
 #define HANDLE_DW_FORM(ID, NAME, VERSION, VENDOR) DW_FORM_##NAME = ID,
-#include "llvm/Support/Dwarf.def"
- DW_FORM_lo_user = 0x1f00, ///< Not specified by DWARF.
+#include "llvm/BinaryFormat/Dwarf.def"
+  DW_FORM_lo_user = 0x1f00, ///< Not specified by DWARF.
 };
 
 enum LocationAtom {
 #define HANDLE_DW_OP(ID, NAME, VERSION, VENDOR) DW_OP_##NAME = ID,
-#include "llvm/Support/Dwarf.def"
+#include "llvm/BinaryFormat/Dwarf.def"
   DW_OP_lo_user = 0xe0,
   DW_OP_hi_user = 0xff,
   DW_OP_LLVM_fragment = 0x1000 ///< Only used in LLVM metadata.
@@ -122,7 +122,7 @@ enum LocationAtom {
 
 enum TypeKind {
 #define HANDLE_DW_ATE(ID, NAME, VERSION, VENDOR) DW_ATE_##NAME = ID,
-#include "llvm/Support/Dwarf.def"
+#include "llvm/BinaryFormat/Dwarf.def"
   DW_ATE_lo_user = 0x80,
   DW_ATE_hi_user = 0xff
 };
@@ -161,19 +161,19 @@ enum VisibilityAttribute {
 
 enum VirtualityAttribute {
 #define HANDLE_DW_VIRTUALITY(ID, NAME) DW_VIRTUALITY_##NAME = ID,
-#include "llvm/Support/Dwarf.def"
+#include "llvm/BinaryFormat/Dwarf.def"
   DW_VIRTUALITY_max = 0x02
 };
 
 enum DefaultedMemberAttribute {
 #define HANDLE_DW_DEFAULTED(ID, NAME) DW_DEFAULTED_##NAME = ID,
-#include "llvm/Support/Dwarf.def"
+#include "llvm/BinaryFormat/Dwarf.def"
   DW_DEFAULTED_max = 0x02
 };
 
 enum SourceLanguage {
 #define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR) DW_LANG_##NAME = ID,
-#include "llvm/Support/Dwarf.def"
+#include "llvm/BinaryFormat/Dwarf.def"
   DW_LANG_lo_user = 0x8000,
   DW_LANG_hi_user = 0xffff
 };
@@ -187,9 +187,9 @@ enum CaseSensitivity {
 };
 
 enum CallingConvention {
-  // Calling convention codes
+// Calling convention codes
 #define HANDLE_DW_CC(ID, NAME) DW_CC_##NAME = ID,
-#include "llvm/Support/Dwarf.def"
+#include "llvm/BinaryFormat/Dwarf.def"
   DW_CC_lo_user = 0x40,
   DW_CC_hi_user = 0xff
 };
@@ -217,20 +217,20 @@ enum DiscriminantList {
 /// Line Number Standard Opcode Encodings.
 enum LineNumberOps : uint8_t {
 #define HANDLE_DW_LNS(ID, NAME) DW_LNS_##NAME = ID,
-#include "llvm/Support/Dwarf.def"
+#include "llvm/BinaryFormat/Dwarf.def"
 };
 
 /// Line Number Extended Opcode Encodings.
 enum LineNumberExtendedOps {
 #define HANDLE_DW_LNE(ID, NAME) DW_LNE_##NAME = ID,
-#include "llvm/Support/Dwarf.def"
+#include "llvm/BinaryFormat/Dwarf.def"
   DW_LNE_lo_user = 0x80,
   DW_LNE_hi_user = 0xff
 };
 
 enum LineNumberEntryFormat {
 #define HANDLE_DW_LNCT(ID, NAME) DW_LNCT_##NAME = ID,
-#include "llvm/Support/Dwarf.def"
+#include "llvm/BinaryFormat/Dwarf.def"
   DW_LNCT_lo_user = 0x2000,
   DW_LNCT_hi_user = 0x3fff,
 };
@@ -247,7 +247,7 @@ enum MacinfoRecordType {
 /// DWARF v5 macro information entry type encodings.
 enum MacroEntryType {
 #define HANDLE_DW_MACRO(ID, NAME) DW_MACRO_##NAME = ID,
-#include "llvm/Support/Dwarf.def"
+#include "llvm/BinaryFormat/Dwarf.def"
   DW_MACRO_lo_user = 0xe0,
   DW_MACRO_hi_user = 0xff
 };
@@ -255,14 +255,13 @@ enum MacroEntryType {
 /// DWARF v5 range list entry encoding values.
 enum RangeListEntries {
 #define HANDLE_DW_RLE(ID, NAME) DW_RLE_##NAME = ID,
-#include "llvm/Support/Dwarf.def"
+#include "llvm/BinaryFormat/Dwarf.def"
 };
 
-
 /// Call frame instruction encodings.
 enum CallFrameInfo {
 #define HANDLE_DW_CFA(ID, NAME) DW_CFA_##NAME = ID,
-#include "llvm/Support/Dwarf.def"
+#include "llvm/BinaryFormat/Dwarf.def"
   DW_CFA_extended = 0x00,
 
   DW_CFA_lo_user = 0x1c,
@@ -310,13 +309,13 @@ enum LocationListEntry : unsigned char {
 /// Keep this list in sync with clang's DeclSpec.h ObjCPropertyAttributeKind!
 enum ApplePropertyAttributes {
 #define HANDLE_DW_APPLE_PROPERTY(ID, NAME) DW_APPLE_PROPERTY_##NAME = ID,
-#include "llvm/Support/Dwarf.def"
+#include "llvm/BinaryFormat/Dwarf.def"
 };
 
 /// Constants for unit types in DWARF v5.
 enum UnitType : unsigned char {
 #define HANDLE_DW_UT(ID, NAME) DW_UT_##NAME = ID,
-#include "llvm/Support/Dwarf.def"
+#include "llvm/BinaryFormat/Dwarf.def"
   DW_UT_lo_user = 0x80,
   DW_UT_hi_user = 0xff
 };
@@ -355,10 +354,7 @@ enum GDBIndexEntryKind {
   GIEK_UNUSED7
 };
 
-enum GDBIndexEntryLinkage {
-  GIEL_EXTERNAL,
-  GIEL_STATIC
-};
+enum GDBIndexEntryLinkage { GIEL_EXTERNAL, GIEL_STATIC };
 
 /// \defgroup DwarfConstantsDumping Dwarf constants dumping functions
 ///
@@ -470,8 +466,8 @@ struct PubIndexEntryDescriptor {
   /* implicit */ PubIndexEntryDescriptor(GDBIndexEntryKind Kind)
       : Kind(Kind), Linkage(GIEL_EXTERNAL) {}
   explicit PubIndexEntryDescriptor(uint8_t Value)
-      : Kind(static_cast<GDBIndexEntryKind>((Value & KIND_MASK) >>
-                                            KIND_OFFSET)),
+      : Kind(
+            static_cast<GDBIndexEntryKind>((Value & KIND_MASK) >> KIND_OFFSET)),
         Linkage(static_cast<GDBIndexEntryLinkage>((Value & LINKAGE_MASK) >>
                                                   LINKAGE_OFFSET)) {}
   uint8_t toBits() const {
diff --git a/include/llvm/Support/ELF.h b/include/llvm/BinaryFormat/ELF.h
similarity index 98%
rename from include/llvm/Support/ELF.h
rename to include/llvm/BinaryFormat/ELF.h
index 33f20a809d6c..3724f555c283 100644
--- a/include/llvm/Support/ELF.h
+++ b/include/llvm/BinaryFormat/ELF.h
@@ -1,4 +1,4 @@
-//===-- llvm/Support/ELF.h - ELF constants and data structures --*- C++ -*-===//
+//===-- llvm/BinaryFormat/ELF.h - ELF constants and structures --*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -17,8 +17,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_SUPPORT_ELF_H
-#define LLVM_SUPPORT_ELF_H
+#ifndef LLVM_BINARYFORMAT_ELF_H
+#define LLVM_BINARYFORMAT_ELF_H
 
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/DataTypes.h"
@@ -808,12 +808,7 @@ enum : unsigned {
   SHF_MIPS_STRING = 0x80000000,
 
   // Make code section unreadable when in execute-only mode
-  SHF_ARM_PURECODE = 0x20000000,
-
-  SHF_AMDGPU_HSA_GLOBAL = 0x00100000,
-  SHF_AMDGPU_HSA_READONLY = 0x00200000,
-  SHF_AMDGPU_HSA_CODE = 0x00400000,
-  SHF_AMDGPU_HSA_AGENT = 0x00800000
+  SHF_ARM_PURECODE = 0x20000000
 };
 
 // Section Group Flags
@@ -897,9 +892,7 @@ enum {
   STT_HIPROC = 15,    // Highest processor-specific symbol type
 
   // AMDGPU symbol types
-  STT_AMDGPU_HSA_KERNEL = 10,
-  STT_AMDGPU_HSA_INDIRECT_FUNCTION = 11,
-  STT_AMDGPU_HSA_METADATA = 12
+  STT_AMDGPU_HSA_KERNEL = 10
 };
 
 enum {
@@ -1050,12 +1043,6 @@ enum {
   PT_MIPS_OPTIONS = 0x70000002,  // Options segment.
   PT_MIPS_ABIFLAGS = 0x70000003, // Abiflags segment.
 
-  // AMDGPU program header types.
-  PT_AMDGPU_HSA_LOAD_GLOBAL_PROGRAM = 0x60000000,
-  PT_AMDGPU_HSA_LOAD_GLOBAL_AGENT = 0x60000001,
-  PT_AMDGPU_HSA_LOAD_READONLY_AGENT = 0x60000002,
-  PT_AMDGPU_HSA_LOAD_CODE_AGENT = 0x60000003,
-
   // WebAssembly program header types.
   PT_WEBASSEMBLY_FUNCTIONS = PT_LOPROC + 0, // Function definitions.
 };
diff --git a/include/llvm/Support/ELFRelocs/AArch64.def b/include/llvm/BinaryFormat/ELFRelocs/AArch64.def
similarity index 100%
rename from include/llvm/Support/ELFRelocs/AArch64.def
rename to include/llvm/BinaryFormat/ELFRelocs/AArch64.def
diff --git a/include/llvm/Support/ELFRelocs/AMDGPU.def b/include/llvm/BinaryFormat/ELFRelocs/AMDGPU.def
similarity index 100%
rename from include/llvm/Support/ELFRelocs/AMDGPU.def
rename to include/llvm/BinaryFormat/ELFRelocs/AMDGPU.def
diff --git a/include/llvm/Support/ELFRelocs/ARM.def b/include/llvm/BinaryFormat/ELFRelocs/ARM.def
similarity index 100%
rename from include/llvm/Support/ELFRelocs/ARM.def
rename to include/llvm/BinaryFormat/ELFRelocs/ARM.def
diff --git a/include/llvm/Support/ELFRelocs/AVR.def b/include/llvm/BinaryFormat/ELFRelocs/AVR.def
similarity index 100%
rename from include/llvm/Support/ELFRelocs/AVR.def
rename to include/llvm/BinaryFormat/ELFRelocs/AVR.def
diff --git a/include/llvm/Support/ELFRelocs/BPF.def b/include/llvm/BinaryFormat/ELFRelocs/BPF.def
similarity index 100%
rename from include/llvm/Support/ELFRelocs/BPF.def
rename to include/llvm/BinaryFormat/ELFRelocs/BPF.def
diff --git a/include/llvm/Support/ELFRelocs/Hexagon.def b/include/llvm/BinaryFormat/ELFRelocs/Hexagon.def
similarity index 100%
rename from include/llvm/Support/ELFRelocs/Hexagon.def
rename to include/llvm/BinaryFormat/ELFRelocs/Hexagon.def
diff --git a/include/llvm/Support/ELFRelocs/Lanai.def b/include/llvm/BinaryFormat/ELFRelocs/Lanai.def
similarity index 100%
rename from include/llvm/Support/ELFRelocs/Lanai.def
rename to include/llvm/BinaryFormat/ELFRelocs/Lanai.def
diff --git a/include/llvm/Support/ELFRelocs/Mips.def b/include/llvm/BinaryFormat/ELFRelocs/Mips.def
similarity index 100%
rename from include/llvm/Support/ELFRelocs/Mips.def
rename to include/llvm/BinaryFormat/ELFRelocs/Mips.def
diff --git a/include/llvm/Support/ELFRelocs/PowerPC.def b/include/llvm/BinaryFormat/ELFRelocs/PowerPC.def
similarity index 100%
rename from include/llvm/Support/ELFRelocs/PowerPC.def
rename to include/llvm/BinaryFormat/ELFRelocs/PowerPC.def
diff --git a/include/llvm/Support/ELFRelocs/PowerPC64.def b/include/llvm/BinaryFormat/ELFRelocs/PowerPC64.def
similarity index 100%
rename from include/llvm/Support/ELFRelocs/PowerPC64.def
rename to include/llvm/BinaryFormat/ELFRelocs/PowerPC64.def
diff --git a/include/llvm/Support/ELFRelocs/RISCV.def b/include/llvm/BinaryFormat/ELFRelocs/RISCV.def
similarity index 100%
rename from include/llvm/Support/ELFRelocs/RISCV.def
rename to include/llvm/BinaryFormat/ELFRelocs/RISCV.def
diff --git a/include/llvm/Support/ELFRelocs/Sparc.def b/include/llvm/BinaryFormat/ELFRelocs/Sparc.def
similarity index 100%
rename from include/llvm/Support/ELFRelocs/Sparc.def
rename to include/llvm/BinaryFormat/ELFRelocs/Sparc.def
diff --git a/include/llvm/Support/ELFRelocs/SystemZ.def b/include/llvm/BinaryFormat/ELFRelocs/SystemZ.def
similarity index 100%
rename from include/llvm/Support/ELFRelocs/SystemZ.def
rename to include/llvm/BinaryFormat/ELFRelocs/SystemZ.def
diff --git a/include/llvm/Support/ELFRelocs/WebAssembly.def b/include/llvm/BinaryFormat/ELFRelocs/WebAssembly.def
similarity index 100%
rename from include/llvm/Support/ELFRelocs/WebAssembly.def
rename to include/llvm/BinaryFormat/ELFRelocs/WebAssembly.def
diff --git a/include/llvm/Support/ELFRelocs/i386.def b/include/llvm/BinaryFormat/ELFRelocs/i386.def
similarity index 100%
rename from include/llvm/Support/ELFRelocs/i386.def
rename to include/llvm/BinaryFormat/ELFRelocs/i386.def
diff --git a/include/llvm/Support/ELFRelocs/x86_64.def b/include/llvm/BinaryFormat/ELFRelocs/x86_64.def
similarity index 100%
rename from include/llvm/Support/ELFRelocs/x86_64.def
rename to include/llvm/BinaryFormat/ELFRelocs/x86_64.def
diff --git a/include/llvm/Support/MachO.def b/include/llvm/BinaryFormat/MachO.def
similarity index 100%
rename from include/llvm/Support/MachO.def
rename to include/llvm/BinaryFormat/MachO.def
diff --git a/include/llvm/BinaryFormat/MachO.h b/include/llvm/BinaryFormat/MachO.h
new file mode 100644
index 000000000000..8ab6dde800c2
--- /dev/null
+++ b/include/llvm/BinaryFormat/MachO.h
@@ -0,0 +1,1984 @@
+//===-- llvm/BinaryFormat/MachO.h - The MachO file format -------*- C++/-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines manifest constants for the MachO object file format.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BINARYFORMAT_MACHO_H
+#define LLVM_BINARYFORMAT_MACHO_H
+
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Host.h"
+
+namespace llvm {
+namespace MachO {
+// Enums from <mach-o/loader.h>
+enum : uint32_t {
+  // Constants for the "magic" field in llvm::MachO::mach_header and
+  // llvm::MachO::mach_header_64
+  MH_MAGIC = 0xFEEDFACEu,
+  MH_CIGAM = 0xCEFAEDFEu,
+  MH_MAGIC_64 = 0xFEEDFACFu,
+  MH_CIGAM_64 = 0xCFFAEDFEu,
+  FAT_MAGIC = 0xCAFEBABEu,
+  FAT_CIGAM = 0xBEBAFECAu,
+  FAT_MAGIC_64 = 0xCAFEBABFu,
+  FAT_CIGAM_64 = 0xBFBAFECAu
+};
+
+enum HeaderFileType {
+  // Constants for the "filetype" field in llvm::MachO::mach_header and
+  // llvm::MachO::mach_header_64
+  MH_OBJECT = 0x1u,
+  MH_EXECUTE = 0x2u,
+  MH_FVMLIB = 0x3u,
+  MH_CORE = 0x4u,
+  MH_PRELOAD = 0x5u,
+  MH_DYLIB = 0x6u,
+  MH_DYLINKER = 0x7u,
+  MH_BUNDLE = 0x8u,
+  MH_DYLIB_STUB = 0x9u,
+  MH_DSYM = 0xAu,
+  MH_KEXT_BUNDLE = 0xBu
+};
+
+enum {
+  // Constant bits for the "flags" field in llvm::MachO::mach_header and
+  // llvm::MachO::mach_header_64
+  MH_NOUNDEFS = 0x00000001u,
+  MH_INCRLINK = 0x00000002u,
+  MH_DYLDLINK = 0x00000004u,
+  MH_BINDATLOAD = 0x00000008u,
+  MH_PREBOUND = 0x00000010u,
+  MH_SPLIT_SEGS = 0x00000020u,
+  MH_LAZY_INIT = 0x00000040u,
+  MH_TWOLEVEL = 0x00000080u,
+  MH_FORCE_FLAT = 0x00000100u,
+  MH_NOMULTIDEFS = 0x00000200u,
+  MH_NOFIXPREBINDING = 0x00000400u,
+  MH_PREBINDABLE = 0x00000800u,
+  MH_ALLMODSBOUND = 0x00001000u,
+  MH_SUBSECTIONS_VIA_SYMBOLS = 0x00002000u,
+  MH_CANONICAL = 0x00004000u,
+  MH_WEAK_DEFINES = 0x00008000u,
+  MH_BINDS_TO_WEAK = 0x00010000u,
+  MH_ALLOW_STACK_EXECUTION = 0x00020000u,
+  MH_ROOT_SAFE = 0x00040000u,
+  MH_SETUID_SAFE = 0x00080000u,
+  MH_NO_REEXPORTED_DYLIBS = 0x00100000u,
+  MH_PIE = 0x00200000u,
+  MH_DEAD_STRIPPABLE_DYLIB = 0x00400000u,
+  MH_HAS_TLV_DESCRIPTORS = 0x00800000u,
+  MH_NO_HEAP_EXECUTION = 0x01000000u,
+  MH_APP_EXTENSION_SAFE = 0x02000000u
+};
+
+enum : uint32_t {
+  // Flags for the "cmd" field in llvm::MachO::load_command
+  LC_REQ_DYLD = 0x80000000u
+};
+
+#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) LCName = LCValue,
+
+enum LoadCommandType : uint32_t {
+#include "llvm/BinaryFormat/MachO.def"
+};
+
+#undef HANDLE_LOAD_COMMAND
+
+enum : uint32_t {
+  // Constant bits for the "flags" field in llvm::MachO::segment_command
+  SG_HIGHVM = 0x1u,
+  SG_FVMLIB = 0x2u,
+  SG_NORELOC = 0x4u,
+  SG_PROTECTED_VERSION_1 = 0x8u,
+
+  // Constant masks for the "flags" field in llvm::MachO::section and
+  // llvm::MachO::section_64
+  SECTION_TYPE = 0x000000ffu,           // SECTION_TYPE
+  SECTION_ATTRIBUTES = 0xffffff00u,     // SECTION_ATTRIBUTES
+  SECTION_ATTRIBUTES_USR = 0xff000000u, // SECTION_ATTRIBUTES_USR
+  SECTION_ATTRIBUTES_SYS = 0x00ffff00u  // SECTION_ATTRIBUTES_SYS
+};
+
+/// These are the section type and attributes fields.  A MachO section can
+/// have only one Type, but can have any of the attributes specified.
+enum SectionType : uint32_t {
+  // Constant masks for the "flags[7:0]" field in llvm::MachO::section and
+  // llvm::MachO::section_64 (mask "flags" with SECTION_TYPE)
+
+  /// S_REGULAR - Regular section.
+  S_REGULAR = 0x00u,
+  /// S_ZEROFILL - Zero fill on demand section.
+  S_ZEROFILL = 0x01u,
+  /// S_CSTRING_LITERALS - Section with literal C strings.
+  S_CSTRING_LITERALS = 0x02u,
+  /// S_4BYTE_LITERALS - Section with 4 byte literals.
+  S_4BYTE_LITERALS = 0x03u,
+  /// S_8BYTE_LITERALS - Section with 8 byte literals.
+  S_8BYTE_LITERALS = 0x04u,
+  /// S_LITERAL_POINTERS - Section with pointers to literals.
+  S_LITERAL_POINTERS = 0x05u,
+  /// S_NON_LAZY_SYMBOL_POINTERS - Section with non-lazy symbol pointers.
+  S_NON_LAZY_SYMBOL_POINTERS = 0x06u,
+  /// S_LAZY_SYMBOL_POINTERS - Section with lazy symbol pointers.
+  S_LAZY_SYMBOL_POINTERS = 0x07u,
+  /// S_SYMBOL_STUBS - Section with symbol stubs, byte size of stub in
+  /// the Reserved2 field.
+  S_SYMBOL_STUBS = 0x08u,
+  /// S_MOD_INIT_FUNC_POINTERS - Section with only function pointers for
+  /// initialization.
+  S_MOD_INIT_FUNC_POINTERS = 0x09u,
+  /// S_MOD_TERM_FUNC_POINTERS - Section with only function pointers for
+  /// termination.
+  S_MOD_TERM_FUNC_POINTERS = 0x0au,
+  /// S_COALESCED - Section contains symbols that are to be coalesced.
+  S_COALESCED = 0x0bu,
+  /// S_GB_ZEROFILL - Zero fill on demand section (that can be larger than 4
+  /// gigabytes).
+  S_GB_ZEROFILL = 0x0cu,
+  /// S_INTERPOSING - Section with only pairs of function pointers for
+  /// interposing.
+  S_INTERPOSING = 0x0du,
+  /// S_16BYTE_LITERALS - Section with only 16 byte literals.
+  S_16BYTE_LITERALS = 0x0eu,
+  /// S_DTRACE_DOF - Section contains DTrace Object Format.
+  S_DTRACE_DOF = 0x0fu,
+  /// S_LAZY_DYLIB_SYMBOL_POINTERS - Section with lazy symbol pointers to
+  /// lazy loaded dylibs.
+  S_LAZY_DYLIB_SYMBOL_POINTERS = 0x10u,
+  /// S_THREAD_LOCAL_REGULAR - Thread local data section.
+  S_THREAD_LOCAL_REGULAR = 0x11u,
+  /// S_THREAD_LOCAL_ZEROFILL - Thread local zerofill section.
+  S_THREAD_LOCAL_ZEROFILL = 0x12u,
+  /// S_THREAD_LOCAL_VARIABLES - Section with thread local variable
+  /// structure data.
+  S_THREAD_LOCAL_VARIABLES = 0x13u,
+  /// S_THREAD_LOCAL_VARIABLE_POINTERS - Section with pointers to thread
+  /// local structures.
+  S_THREAD_LOCAL_VARIABLE_POINTERS = 0x14u,
+  /// S_THREAD_LOCAL_INIT_FUNCTION_POINTERS - Section with thread local
+  /// variable initialization pointers to functions.
+  S_THREAD_LOCAL_INIT_FUNCTION_POINTERS = 0x15u,
+
+  LAST_KNOWN_SECTION_TYPE = S_THREAD_LOCAL_INIT_FUNCTION_POINTERS
+};
+
+enum : uint32_t {
+  // Constant masks for the "flags[31:24]" field in llvm::MachO::section and
+  // llvm::MachO::section_64 (mask "flags" with SECTION_ATTRIBUTES_USR)
+
+  /// S_ATTR_PURE_INSTRUCTIONS - Section contains only true machine
+  /// instructions.
+  S_ATTR_PURE_INSTRUCTIONS = 0x80000000u,
+  /// S_ATTR_NO_TOC - Section contains coalesced symbols that are not to be
+  /// in a ranlib table of contents.
+  S_ATTR_NO_TOC = 0x40000000u,
+  /// S_ATTR_STRIP_STATIC_SYMS - Ok to strip static symbols in this section
+  /// in files with the MY_DYLDLINK flag.
+  S_ATTR_STRIP_STATIC_SYMS = 0x20000000u,
+  /// S_ATTR_NO_DEAD_STRIP - No dead stripping.
+  S_ATTR_NO_DEAD_STRIP = 0x10000000u,
+  /// S_ATTR_LIVE_SUPPORT - Blocks are live if they reference live blocks.
+  S_ATTR_LIVE_SUPPORT = 0x08000000u,
+  /// S_ATTR_SELF_MODIFYING_CODE - Used with i386 code stubs written on by
+  /// dyld.
+  S_ATTR_SELF_MODIFYING_CODE = 0x04000000u,
+  /// S_ATTR_DEBUG - A debug section.
+  S_ATTR_DEBUG = 0x02000000u,
+
+  // Constant masks for the "flags[23:8]" field in llvm::MachO::section and
+  // llvm::MachO::section_64 (mask "flags" with SECTION_ATTRIBUTES_SYS)
+
+  /// S_ATTR_SOME_INSTRUCTIONS - Section contains some machine instructions.
+  S_ATTR_SOME_INSTRUCTIONS = 0x00000400u,
+  /// S_ATTR_EXT_RELOC - Section has external relocation entries.
+  S_ATTR_EXT_RELOC = 0x00000200u,
+  /// S_ATTR_LOC_RELOC - Section has local relocation entries.
+  S_ATTR_LOC_RELOC = 0x00000100u,
+
+  // Constant masks for the value of an indirect symbol in an indirect
+  // symbol table
+  INDIRECT_SYMBOL_LOCAL = 0x80000000u,
+  INDIRECT_SYMBOL_ABS = 0x40000000u
+};
+
+enum DataRegionType {
+  // Constants for the "kind" field in a data_in_code_entry structure
+  DICE_KIND_DATA = 1u,
+  DICE_KIND_JUMP_TABLE8 = 2u,
+  DICE_KIND_JUMP_TABLE16 = 3u,
+  DICE_KIND_JUMP_TABLE32 = 4u,
+  DICE_KIND_ABS_JUMP_TABLE32 = 5u
+};
+
+enum RebaseType {
+  REBASE_TYPE_POINTER = 1u,
+  REBASE_TYPE_TEXT_ABSOLUTE32 = 2u,
+  REBASE_TYPE_TEXT_PCREL32 = 3u
+};
+
+enum { REBASE_OPCODE_MASK = 0xF0u, REBASE_IMMEDIATE_MASK = 0x0Fu };
+
+enum RebaseOpcode {
+  REBASE_OPCODE_DONE = 0x00u,
+  REBASE_OPCODE_SET_TYPE_IMM = 0x10u,
+  REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB = 0x20u,
+  REBASE_OPCODE_ADD_ADDR_ULEB = 0x30u,
+  REBASE_OPCODE_ADD_ADDR_IMM_SCALED = 0x40u,
+  REBASE_OPCODE_DO_REBASE_IMM_TIMES = 0x50u,
+  REBASE_OPCODE_DO_REBASE_ULEB_TIMES = 0x60u,
+  REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB = 0x70u,
+  REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB = 0x80u
+};
+
+enum BindType {
+  BIND_TYPE_POINTER = 1u,
+  BIND_TYPE_TEXT_ABSOLUTE32 = 2u,
+  BIND_TYPE_TEXT_PCREL32 = 3u
+};
+
+enum BindSpecialDylib {
+  BIND_SPECIAL_DYLIB_SELF = 0,
+  BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE = -1,
+  BIND_SPECIAL_DYLIB_FLAT_LOOKUP = -2
+};
+
+enum {
+  BIND_SYMBOL_FLAGS_WEAK_IMPORT = 0x1u,
+  BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION = 0x8u,
+
+  BIND_OPCODE_MASK = 0xF0u,
+  BIND_IMMEDIATE_MASK = 0x0Fu
+};
+
+enum BindOpcode {
+  BIND_OPCODE_DONE = 0x00u,
+  BIND_OPCODE_SET_DYLIB_ORDINAL_IMM = 0x10u,
+  BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB = 0x20u,
+  BIND_OPCODE_SET_DYLIB_SPECIAL_IMM = 0x30u,
+  BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM = 0x40u,
+  BIND_OPCODE_SET_TYPE_IMM = 0x50u,
+  BIND_OPCODE_SET_ADDEND_SLEB = 0x60u,
+  BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB = 0x70u,
+  BIND_OPCODE_ADD_ADDR_ULEB = 0x80u,
+  BIND_OPCODE_DO_BIND = 0x90u,
+  BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB = 0xA0u,
+  BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED = 0xB0u,
+  BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB = 0xC0u
+};
+
+enum {
+  EXPORT_SYMBOL_FLAGS_KIND_MASK = 0x03u,
+  EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION = 0x04u,
+  EXPORT_SYMBOL_FLAGS_REEXPORT = 0x08u,
+  EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER = 0x10u
+};
+
+enum ExportSymbolKind {
+  EXPORT_SYMBOL_FLAGS_KIND_REGULAR = 0x00u,
+  EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL = 0x01u,
+  EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE = 0x02u
+};
+
+enum {
+  // Constant masks for the "n_type" field in llvm::MachO::nlist and
+  // llvm::MachO::nlist_64
+  N_STAB = 0xe0,
+  N_PEXT = 0x10,
+  N_TYPE = 0x0e,
+  N_EXT = 0x01
+};
+
+enum NListType : uint8_t {
+  // Constants for the "n_type & N_TYPE" llvm::MachO::nlist and
+  // llvm::MachO::nlist_64
+  N_UNDF = 0x0u,
+  N_ABS = 0x2u,
+  N_SECT = 0xeu,
+  N_PBUD = 0xcu,
+  N_INDR = 0xau
+};
+
+enum SectionOrdinal {
+  // Constants for the "n_sect" field in llvm::MachO::nlist and
+  // llvm::MachO::nlist_64
+  NO_SECT = 0u,
+  MAX_SECT = 0xffu
+};
+
+enum {
+  // Constant masks for the "n_desc" field in llvm::MachO::nlist and
+  // llvm::MachO::nlist_64
+  // The low 3 bits are the for the REFERENCE_TYPE.
+  REFERENCE_TYPE = 0x7,
+  REFERENCE_FLAG_UNDEFINED_NON_LAZY = 0,
+  REFERENCE_FLAG_UNDEFINED_LAZY = 1,
+  REFERENCE_FLAG_DEFINED = 2,
+  REFERENCE_FLAG_PRIVATE_DEFINED = 3,
+  REFERENCE_FLAG_PRIVATE_UNDEFINED_NON_LAZY = 4,
+  REFERENCE_FLAG_PRIVATE_UNDEFINED_LAZY = 5,
+  // Flag bits (some overlap with the library ordinal bits).
+  N_ARM_THUMB_DEF = 0x0008u,
+  REFERENCED_DYNAMICALLY = 0x0010u,
+  N_NO_DEAD_STRIP = 0x0020u,
+  N_WEAK_REF = 0x0040u,
+  N_WEAK_DEF = 0x0080u,
+  N_SYMBOL_RESOLVER = 0x0100u,
+  N_ALT_ENTRY = 0x0200u,
+  // For undefined symbols coming from libraries, see GET_LIBRARY_ORDINAL()
+  // as these are in the top 8 bits.
+  SELF_LIBRARY_ORDINAL = 0x0,
+  MAX_LIBRARY_ORDINAL = 0xfd,
+  DYNAMIC_LOOKUP_ORDINAL = 0xfe,
+  EXECUTABLE_ORDINAL = 0xff
+};
+
+enum StabType {
+  // Constant values for the "n_type" field in llvm::MachO::nlist and
+  // llvm::MachO::nlist_64 when "(n_type & N_STAB) != 0"
+  N_GSYM = 0x20u,
+  N_FNAME = 0x22u,
+  N_FUN = 0x24u,
+  N_STSYM = 0x26u,
+  N_LCSYM = 0x28u,
+  N_BNSYM = 0x2Eu,
+  N_PC = 0x30u,
+  N_AST = 0x32u,
+  N_OPT = 0x3Cu,
+  N_RSYM = 0x40u,
+  N_SLINE = 0x44u,
+  N_ENSYM = 0x4Eu,
+  N_SSYM = 0x60u,
+  N_SO = 0x64u,
+  N_OSO = 0x66u,
+  N_LSYM = 0x80u,
+  N_BINCL = 0x82u,
+  N_SOL = 0x84u,
+  N_PARAMS = 0x86u,
+  N_VERSION = 0x88u,
+  N_OLEVEL = 0x8Au,
+  N_PSYM = 0xA0u,
+  N_EINCL = 0xA2u,
+  N_ENTRY = 0xA4u,
+  N_LBRAC = 0xC0u,
+  N_EXCL = 0xC2u,
+  N_RBRAC = 0xE0u,
+  N_BCOMM = 0xE2u,
+  N_ECOMM = 0xE4u,
+  N_ECOML = 0xE8u,
+  N_LENG = 0xFEu
+};
+
+enum : uint32_t {
+  // Constant values for the r_symbolnum field in an
+  // llvm::MachO::relocation_info structure when r_extern is 0.
+  R_ABS = 0,
+
+  // Constant bits for the r_address field in an
+  // llvm::MachO::relocation_info structure.
+  R_SCATTERED = 0x80000000
+};
+
+enum RelocationInfoType {
+  // Constant values for the r_type field in an
+  // llvm::MachO::relocation_info or llvm::MachO::scattered_relocation_info
+  // structure.
+  GENERIC_RELOC_VANILLA = 0,
+  GENERIC_RELOC_PAIR = 1,
+  GENERIC_RELOC_SECTDIFF = 2,
+  GENERIC_RELOC_PB_LA_PTR = 3,
+  GENERIC_RELOC_LOCAL_SECTDIFF = 4,
+  GENERIC_RELOC_TLV = 5,
+
+  // Constant values for the r_type field in a PowerPC architecture
+  // llvm::MachO::relocation_info or llvm::MachO::scattered_relocation_info
+  // structure.
+  PPC_RELOC_VANILLA = GENERIC_RELOC_VANILLA,
+  PPC_RELOC_PAIR = GENERIC_RELOC_PAIR,
+  PPC_RELOC_BR14 = 2,
+  PPC_RELOC_BR24 = 3,
+  PPC_RELOC_HI16 = 4,
+  PPC_RELOC_LO16 = 5,
+  PPC_RELOC_HA16 = 6,
+  PPC_RELOC_LO14 = 7,
+  PPC_RELOC_SECTDIFF = 8,
+  PPC_RELOC_PB_LA_PTR = 9,
+  PPC_RELOC_HI16_SECTDIFF = 10,
+  PPC_RELOC_LO16_SECTDIFF = 11,
+  PPC_RELOC_HA16_SECTDIFF = 12,
+  PPC_RELOC_JBSR = 13,
+  PPC_RELOC_LO14_SECTDIFF = 14,
+  PPC_RELOC_LOCAL_SECTDIFF = 15,
+
+  // Constant values for the r_type field in an ARM architecture
+  // llvm::MachO::relocation_info or llvm::MachO::scattered_relocation_info
+  // structure.
+  ARM_RELOC_VANILLA = GENERIC_RELOC_VANILLA,
+  ARM_RELOC_PAIR = GENERIC_RELOC_PAIR,
+  ARM_RELOC_SECTDIFF = GENERIC_RELOC_SECTDIFF,
+  ARM_RELOC_LOCAL_SECTDIFF = 3,
+  ARM_RELOC_PB_LA_PTR = 4,
+  ARM_RELOC_BR24 = 5,
+  ARM_THUMB_RELOC_BR22 = 6,
+  ARM_THUMB_32BIT_BRANCH = 7, // obsolete
+  ARM_RELOC_HALF = 8,
+  ARM_RELOC_HALF_SECTDIFF = 9,
+
+  // Constant values for the r_type field in an ARM64 architecture
+  // llvm::MachO::relocation_info or llvm::MachO::scattered_relocation_info
+  // structure.
+
+  // For pointers.
+  ARM64_RELOC_UNSIGNED = 0,
+  // Must be followed by an ARM64_RELOC_UNSIGNED
+  ARM64_RELOC_SUBTRACTOR = 1,
+  // A B/BL instruction with 26-bit displacement.
+  ARM64_RELOC_BRANCH26 = 2,
+  // PC-rel distance to page of target.
+  ARM64_RELOC_PAGE21 = 3,
+  // Offset within page, scaled by r_length.
+  ARM64_RELOC_PAGEOFF12 = 4,
+  // PC-rel distance to page of GOT slot.
+  ARM64_RELOC_GOT_LOAD_PAGE21 = 5,
+  // Offset within page of GOT slot, scaled by r_length.
+  ARM64_RELOC_GOT_LOAD_PAGEOFF12 = 6,
+  // For pointers to GOT slots.
+  ARM64_RELOC_POINTER_TO_GOT = 7,
+  // PC-rel distance to page of TLVP slot.
+  ARM64_RELOC_TLVP_LOAD_PAGE21 = 8,
+  // Offset within page of TLVP slot, scaled by r_length.
+  ARM64_RELOC_TLVP_LOAD_PAGEOFF12 = 9,
+  // Must be followed by ARM64_RELOC_PAGE21 or ARM64_RELOC_PAGEOFF12.
+  ARM64_RELOC_ADDEND = 10,
+
+  // Constant values for the r_type field in an x86_64 architecture
+  // llvm::MachO::relocation_info or llvm::MachO::scattered_relocation_info
+  // structure
+  X86_64_RELOC_UNSIGNED = 0,
+  X86_64_RELOC_SIGNED = 1,
+  X86_64_RELOC_BRANCH = 2,
+  X86_64_RELOC_GOT_LOAD = 3,
+  X86_64_RELOC_GOT = 4,
+  X86_64_RELOC_SUBTRACTOR = 5,
+  X86_64_RELOC_SIGNED_1 = 6,
+  X86_64_RELOC_SIGNED_2 = 7,
+  X86_64_RELOC_SIGNED_4 = 8,
+  X86_64_RELOC_TLV = 9
+};
+
+// Values for segment_command.initprot.
+// From <mach/vm_prot.h>
+enum { VM_PROT_READ = 0x1, VM_PROT_WRITE = 0x2, VM_PROT_EXECUTE = 0x4 };
+
+// Values for platform field in build_version_command.
+enum {
+  PLATFORM_MACOS = 1,
+  PLATFORM_IOS = 2,
+  PLATFORM_TVOS = 3,
+  PLATFORM_WATCHOS = 4,
+  PLATFORM_BRIDGEOS = 5
+};
+
+// Values for tools enum in build_tool_version.
+enum { TOOL_CLANG = 1, TOOL_SWIFT = 2, TOOL_LD = 3 };
+
+// Structs from <mach-o/loader.h>
+
+struct mach_header {
+  uint32_t magic;
+  uint32_t cputype;
+  uint32_t cpusubtype;
+  uint32_t filetype;
+  uint32_t ncmds;
+  uint32_t sizeofcmds;
+  uint32_t flags;
+};
+
+struct mach_header_64 {
+  uint32_t magic;
+  uint32_t cputype;
+  uint32_t cpusubtype;
+  uint32_t filetype;
+  uint32_t ncmds;
+  uint32_t sizeofcmds;
+  uint32_t flags;
+  uint32_t reserved;
+};
+
+struct load_command {
+  uint32_t cmd;
+  uint32_t cmdsize;
+};
+
+struct segment_command {
+  uint32_t cmd;
+  uint32_t cmdsize;
+  char segname[16];
+  uint32_t vmaddr;
+  uint32_t vmsize;
+  uint32_t fileoff;
+  uint32_t filesize;
+  uint32_t maxprot;
+  uint32_t initprot;
+  uint32_t nsects;
+  uint32_t flags;
+};
+
+struct segment_command_64 {
+  uint32_t cmd;
+  uint32_t cmdsize;
+  char segname[16];
+  uint64_t vmaddr;
+  uint64_t vmsize;
+  uint64_t fileoff;
+  uint64_t filesize;
+  uint32_t maxprot;
+  uint32_t initprot;
+  uint32_t nsects;
+  uint32_t flags;
+};
+
+struct section {
+  char sectname[16];
+  char segname[16];
+  uint32_t addr;
+  uint32_t size;
+  uint32_t offset;
+  uint32_t align;
+  uint32_t reloff;
+  uint32_t nreloc;
+  uint32_t flags;
+  uint32_t reserved1;
+  uint32_t reserved2;
+};
+
+struct section_64 {
+  char sectname[16];
+  char segname[16];
+  uint64_t addr;
+  uint64_t size;
+  uint32_t offset;
+  uint32_t align;
+  uint32_t reloff;
+  uint32_t nreloc;
+  uint32_t flags;
+  uint32_t reserved1;
+  uint32_t reserved2;
+  uint32_t reserved3;
+};
+
+struct fvmlib {
+  uint32_t name;
+  uint32_t minor_version;
+  uint32_t header_addr;
+};
+
+// The fvmlib_command is obsolete and no longer supported.
+struct fvmlib_command {
+  uint32_t cmd;
+  uint32_t cmdsize;
+  struct fvmlib fvmlib;
+};
+
+struct dylib {
+  uint32_t name;
+  uint32_t timestamp;
+  uint32_t current_version;
+  uint32_t compatibility_version;
+};
+
+struct dylib_command {
+  uint32_t cmd;
+  uint32_t cmdsize;
+  struct dylib dylib;
+};
+
+struct sub_framework_command {
+  uint32_t cmd;
+  uint32_t cmdsize;
+  uint32_t umbrella;
+};
+
+struct sub_client_command {
+  uint32_t cmd;
+  uint32_t cmdsize;
+  uint32_t client;
+};
+
+struct sub_umbrella_command {
+  uint32_t cmd;
+  uint32_t cmdsize;
+  uint32_t sub_umbrella;
+};
+
+struct sub_library_command {
+  uint32_t cmd;
+  uint32_t cmdsize;
+  uint32_t sub_library;
+};
+
+// The prebound_dylib_command is obsolete and no longer supported.
+struct prebound_dylib_command {
+  uint32_t cmd;
+  uint32_t cmdsize;
+  uint32_t name;
+  uint32_t nmodules;
+  uint32_t linked_modules;
+};
+
+struct dylinker_command {
+  uint32_t cmd;
+  uint32_t cmdsize;
+  uint32_t name;
+};
+
+struct thread_command {
+  uint32_t cmd;
+  uint32_t cmdsize;
+};
+
+struct routines_command {
+  uint32_t cmd;
+  uint32_t cmdsize;
+  uint32_t init_address;
+  uint32_t init_module;
+  uint32_t reserved1;
+  uint32_t reserved2;
+  uint32_t reserved3;
+  uint32_t reserved4;
+  uint32_t reserved5;
+  uint32_t reserved6;
+};
+
+struct routines_command_64 {
+  uint32_t cmd;
+  uint32_t cmdsize;
+  uint64_t init_address;
+  uint64_t init_module;
+  uint64_t reserved1;
+  uint64_t reserved2;
+  uint64_t reserved3;
+  uint64_t reserved4;
+  uint64_t reserved5;
+  uint64_t reserved6;
+};
+
+struct symtab_command {
+  uint32_t cmd;
+  uint32_t cmdsize;
+  uint32_t symoff;
+  uint32_t nsyms;
+  uint32_t stroff;
+  uint32_t strsize;
+};
+
+struct dysymtab_command {
+  uint32_t cmd;
+  uint32_t cmdsize;
+  uint32_t ilocalsym;
+  uint32_t nlocalsym;
+  uint32_t iextdefsym;
+  uint32_t nextdefsym;
+  uint32_t iundefsym;
+  uint32_t nundefsym;
+  uint32_t tocoff;
+  uint32_t ntoc;
+  uint32_t modtaboff;
+  uint32_t nmodtab;
+  uint32_t extrefsymoff;
+  uint32_t nextrefsyms;
+  uint32_t indirectsymoff;
+  uint32_t nindirectsyms;
+  uint32_t extreloff;
+  uint32_t nextrel;
+  uint32_t locreloff;
+  uint32_t nlocrel;
+};
+
+struct dylib_table_of_contents {
+  uint32_t symbol_index;
+  uint32_t module_index;
+};
+
+struct dylib_module {
+  uint32_t module_name;
+  uint32_t iextdefsym;
+  uint32_t nextdefsym;
+  uint32_t irefsym;
+  uint32_t nrefsym;
+  uint32_t ilocalsym;
+  uint32_t nlocalsym;
+  uint32_t iextrel;
+  uint32_t nextrel;
+  uint32_t iinit_iterm;
+  uint32_t ninit_nterm;
+  uint32_t objc_module_info_addr;
+  uint32_t objc_module_info_size;
+};
+
+struct dylib_module_64 {
+  uint32_t module_name;
+  uint32_t iextdefsym;
+  uint32_t nextdefsym;
+  uint32_t irefsym;
+  uint32_t nrefsym;
+  uint32_t ilocalsym;
+  uint32_t nlocalsym;
+  uint32_t iextrel;
+  uint32_t nextrel;
+  uint32_t iinit_iterm;
+  uint32_t ninit_nterm;
+  uint32_t objc_module_info_size;
+  uint64_t objc_module_info_addr;
+};
+
+struct dylib_reference {
+  uint32_t isym : 24, flags : 8;
+};
+
+// The twolevel_hints_command is obsolete and no longer supported.
+struct twolevel_hints_command {
+  uint32_t cmd;
+  uint32_t cmdsize;
+  uint32_t offset;
+  uint32_t nhints;
+};
+
+// The twolevel_hints_command is obsolete and no longer supported.
+struct twolevel_hint {
+  uint32_t isub_image : 8, itoc : 24;
+};
+
+// The prebind_cksum_command is obsolete and no longer supported.
+struct prebind_cksum_command {
+  uint32_t cmd;
+  uint32_t cmdsize;
+  uint32_t cksum;
+};
+
+struct uuid_command {
+  uint32_t cmd;
+  uint32_t cmdsize;
+  uint8_t uuid[16];
+};
+
+struct rpath_command {
+  uint32_t cmd;
+  uint32_t cmdsize;
+  uint32_t path;
+};
+
+struct linkedit_data_command {
+  uint32_t cmd;
+  uint32_t cmdsize;
+  uint32_t dataoff;
+  uint32_t datasize;
+};
+
+struct data_in_code_entry {
+  uint32_t offset;
+  uint16_t length;
+  uint16_t kind;
+};
+
+struct source_version_command {
+  uint32_t cmd;
+  uint32_t cmdsize;
+  uint64_t version;
+};
+
+struct encryption_info_command {
+  uint32_t cmd;
+  uint32_t cmdsize;
+  uint32_t cryptoff;
+  uint32_t cryptsize;
+  uint32_t cryptid;
+};
+
+struct encryption_info_command_64 {
+  uint32_t cmd;
+  uint32_t cmdsize;
+  uint32_t cryptoff;
+  uint32_t cryptsize;
+  uint32_t cryptid;
+  uint32_t pad;
+};
+
+struct version_min_command {
+  uint32_t cmd;     // LC_VERSION_MIN_MACOSX or
+                    // LC_VERSION_MIN_IPHONEOS
+  uint32_t cmdsize; // sizeof(struct version_min_command)
+  uint32_t version; // X.Y.Z is encoded in nibbles xxxx.yy.zz
+  uint32_t sdk;     // X.Y.Z is encoded in nibbles xxxx.yy.zz
+};
+
+struct note_command {
+  uint32_t cmd;        // LC_NOTE
+  uint32_t cmdsize;    // sizeof(struct note_command)
+  char data_owner[16]; // owner name for this LC_NOTE
+  uint64_t offset;     // file offset of this data
+  uint64_t size;       // length of data region
+};
+
+struct build_tool_version {
+  uint32_t tool;    // enum for the tool
+  uint32_t version; // version of the tool
+};
+
+struct build_version_command {
+  uint32_t cmd;      // LC_BUILD_VERSION
+  uint32_t cmdsize;  // sizeof(struct build_version_command) +
+                     // ntools * sizeof(struct build_tool_version)
+  uint32_t platform; // platform
+  uint32_t minos;    // X.Y.Z is encoded in nibbles xxxx.yy.zz
+  uint32_t sdk;      // X.Y.Z is encoded in nibbles xxxx.yy.zz
+  uint32_t ntools;   // number of tool entries following this
+};
+
+struct dyld_info_command {
+  uint32_t cmd;
+  uint32_t cmdsize;
+  uint32_t rebase_off;
+  uint32_t rebase_size;
+  uint32_t bind_off;
+  uint32_t bind_size;
+  uint32_t weak_bind_off;
+  uint32_t weak_bind_size;
+  uint32_t lazy_bind_off;
+  uint32_t lazy_bind_size;
+  uint32_t export_off;
+  uint32_t export_size;
+};
+
+struct linker_option_command {
+  uint32_t cmd;
+  uint32_t cmdsize;
+  uint32_t count;
+};
+
+// The symseg_command is obsolete and no longer supported.
+struct symseg_command {
+  uint32_t cmd;
+  uint32_t cmdsize;
+  uint32_t offset;
+  uint32_t size;
+};
+
+// The ident_command is obsolete and no longer supported.
+struct ident_command {
+  uint32_t cmd;
+  uint32_t cmdsize;
+};
+
+// The fvmfile_command is obsolete and no longer supported.
+struct fvmfile_command {
+  uint32_t cmd;
+  uint32_t cmdsize;
+  uint32_t name;
+  uint32_t header_addr;
+};
+
+struct tlv_descriptor_32 {
+  uint32_t thunk;
+  uint32_t key;
+  uint32_t offset;
+};
+
+struct tlv_descriptor_64 {
+  uint64_t thunk;
+  uint64_t key;
+  uint64_t offset;
+};
+
+struct tlv_descriptor {
+  uintptr_t thunk;
+  uintptr_t key;
+  uintptr_t offset;
+};
+
+struct entry_point_command {
+  uint32_t cmd;
+  uint32_t cmdsize;
+  uint64_t entryoff;
+  uint64_t stacksize;
+};
+
+// Structs from <mach-o/fat.h>
+struct fat_header {
+  uint32_t magic;
+  uint32_t nfat_arch;
+};
+
+struct fat_arch {
+  uint32_t cputype;
+  uint32_t cpusubtype;
+  uint32_t offset;
+  uint32_t size;
+  uint32_t align;
+};
+
+struct fat_arch_64 {
+  uint32_t cputype;
+  uint32_t cpusubtype;
+  uint64_t offset;
+  uint64_t size;
+  uint32_t align;
+  uint32_t reserved;
+};
+
+// Structs from <mach-o/reloc.h>
+struct relocation_info {
+  int32_t r_address;
+  uint32_t r_symbolnum : 24, r_pcrel : 1, r_length : 2, r_extern : 1,
+      r_type : 4;
+};
+
+struct scattered_relocation_info {
+#if defined(BYTE_ORDER) && defined(BIG_ENDIAN) && (BYTE_ORDER == BIG_ENDIAN)
+  uint32_t r_scattered : 1, r_pcrel : 1, r_length : 2, r_type : 4,
+      r_address : 24;
+#else
+  uint32_t r_address : 24, r_type : 4, r_length : 2, r_pcrel : 1,
+      r_scattered : 1;
+#endif
+  int32_t r_value;
+};
+
+// Structs NOT from <mach-o/reloc.h>, but that make LLVM's life easier
+struct any_relocation_info {
+  uint32_t r_word0, r_word1;
+};
+
+// Structs from <mach-o/nlist.h>
+struct nlist_base {
+  uint32_t n_strx;
+  uint8_t n_type;
+  uint8_t n_sect;
+  uint16_t n_desc;
+};
+
+struct nlist {
+  uint32_t n_strx;
+  uint8_t n_type;
+  uint8_t n_sect;
+  int16_t n_desc;
+  uint32_t n_value;
+};
+
+struct nlist_64 {
+  uint32_t n_strx;
+  uint8_t n_type;
+  uint8_t n_sect;
+  uint16_t n_desc;
+  uint64_t n_value;
+};
+
+// Byte order swapping functions for MachO structs
+
+inline void swapStruct(fat_header &mh) {
+  sys::swapByteOrder(mh.magic);
+  sys::swapByteOrder(mh.nfat_arch);
+}
+
+inline void swapStruct(fat_arch &mh) {
+  sys::swapByteOrder(mh.cputype);
+  sys::swapByteOrder(mh.cpusubtype);
+  sys::swapByteOrder(mh.offset);
+  sys::swapByteOrder(mh.size);
+  sys::swapByteOrder(mh.align);
+}
+
+inline void swapStruct(fat_arch_64 &mh) {
+  sys::swapByteOrder(mh.cputype);
+  sys::swapByteOrder(mh.cpusubtype);
+  sys::swapByteOrder(mh.offset);
+  sys::swapByteOrder(mh.size);
+  sys::swapByteOrder(mh.align);
+  sys::swapByteOrder(mh.reserved);
+}
+
+inline void swapStruct(mach_header &mh) {
+  sys::swapByteOrder(mh.magic);
+  sys::swapByteOrder(mh.cputype);
+  sys::swapByteOrder(mh.cpusubtype);
+  sys::swapByteOrder(mh.filetype);
+  sys::swapByteOrder(mh.ncmds);
+  sys::swapByteOrder(mh.sizeofcmds);
+  sys::swapByteOrder(mh.flags);
+}
+
+inline void swapStruct(mach_header_64 &H) {
+  sys::swapByteOrder(H.magic);
+  sys::swapByteOrder(H.cputype);
+  sys::swapByteOrder(H.cpusubtype);
+  sys::swapByteOrder(H.filetype);
+  sys::swapByteOrder(H.ncmds);
+  sys::swapByteOrder(H.sizeofcmds);
+  sys::swapByteOrder(H.flags);
+  sys::swapByteOrder(H.reserved);
+}
+
+inline void swapStruct(load_command &lc) {
+  sys::swapByteOrder(lc.cmd);
+  sys::swapByteOrder(lc.cmdsize);
+}
+
+inline void swapStruct(symtab_command &lc) {
+  sys::swapByteOrder(lc.cmd);
+  sys::swapByteOrder(lc.cmdsize);
+  sys::swapByteOrder(lc.symoff);
+  sys::swapByteOrder(lc.nsyms);
+  sys::swapByteOrder(lc.stroff);
+  sys::swapByteOrder(lc.strsize);
+}
+
+inline void swapStruct(segment_command_64 &seg) {
+  sys::swapByteOrder(seg.cmd);
+  sys::swapByteOrder(seg.cmdsize);
+  sys::swapByteOrder(seg.vmaddr);
+  sys::swapByteOrder(seg.vmsize);
+  sys::swapByteOrder(seg.fileoff);
+  sys::swapByteOrder(seg.filesize);
+  sys::swapByteOrder(seg.maxprot);
+  sys::swapByteOrder(seg.initprot);
+  sys::swapByteOrder(seg.nsects);
+  sys::swapByteOrder(seg.flags);
+}
+
+inline void swapStruct(segment_command &seg) {
+  sys::swapByteOrder(seg.cmd);
+  sys::swapByteOrder(seg.cmdsize);
+  sys::swapByteOrder(seg.vmaddr);
+  sys::swapByteOrder(seg.vmsize);
+  sys::swapByteOrder(seg.fileoff);
+  sys::swapByteOrder(seg.filesize);
+  sys::swapByteOrder(seg.maxprot);
+  sys::swapByteOrder(seg.initprot);
+  sys::swapByteOrder(seg.nsects);
+  sys::swapByteOrder(seg.flags);
+}
+
+inline void swapStruct(section_64 &sect) {
+  sys::swapByteOrder(sect.addr);
+  sys::swapByteOrder(sect.size);
+  sys::swapByteOrder(sect.offset);
+  sys::swapByteOrder(sect.align);
+  sys::swapByteOrder(sect.reloff);
+  sys::swapByteOrder(sect.nreloc);
+  sys::swapByteOrder(sect.flags);
+  sys::swapByteOrder(sect.reserved1);
+  sys::swapByteOrder(sect.reserved2);
+}
+
+inline void swapStruct(section &sect) {
+  sys::swapByteOrder(sect.addr);
+  sys::swapByteOrder(sect.size);
+  sys::swapByteOrder(sect.offset);
+  sys::swapByteOrder(sect.align);
+  sys::swapByteOrder(sect.reloff);
+  sys::swapByteOrder(sect.nreloc);
+  sys::swapByteOrder(sect.flags);
+  sys::swapByteOrder(sect.reserved1);
+  sys::swapByteOrder(sect.reserved2);
+}
+
+inline void swapStruct(dyld_info_command &info) {
+  sys::swapByteOrder(info.cmd);
+  sys::swapByteOrder(info.cmdsize);
+  sys::swapByteOrder(info.rebase_off);
+  sys::swapByteOrder(info.rebase_size);
+  sys::swapByteOrder(info.bind_off);
+  sys::swapByteOrder(info.bind_size);
+  sys::swapByteOrder(info.weak_bind_off);
+  sys::swapByteOrder(info.weak_bind_size);
+  sys::swapByteOrder(info.lazy_bind_off);
+  sys::swapByteOrder(info.lazy_bind_size);
+  sys::swapByteOrder(info.export_off);
+  sys::swapByteOrder(info.export_size);
+}
+
+inline void swapStruct(dylib_command &d) {
+  sys::swapByteOrder(d.cmd);
+  sys::swapByteOrder(d.cmdsize);
+  sys::swapByteOrder(d.dylib.name);
+  sys::swapByteOrder(d.dylib.timestamp);
+  sys::swapByteOrder(d.dylib.current_version);
+  sys::swapByteOrder(d.dylib.compatibility_version);
+}
+
+inline void swapStruct(sub_framework_command &s) {
+  sys::swapByteOrder(s.cmd);
+  sys::swapByteOrder(s.cmdsize);
+  sys::swapByteOrder(s.umbrella);
+}
+
+inline void swapStruct(sub_umbrella_command &s) {
+  sys::swapByteOrder(s.cmd);
+  sys::swapByteOrder(s.cmdsize);
+  sys::swapByteOrder(s.sub_umbrella);
+}
+
+inline void swapStruct(sub_library_command &s) {
+  sys::swapByteOrder(s.cmd);
+  sys::swapByteOrder(s.cmdsize);
+  sys::swapByteOrder(s.sub_library);
+}
+
+inline void swapStruct(sub_client_command &s) {
+  sys::swapByteOrder(s.cmd);
+  sys::swapByteOrder(s.cmdsize);
+  sys::swapByteOrder(s.client);
+}
+
+inline void swapStruct(routines_command &r) {
+  sys::swapByteOrder(r.cmd);
+  sys::swapByteOrder(r.cmdsize);
+  sys::swapByteOrder(r.init_address);
+  sys::swapByteOrder(r.init_module);
+  sys::swapByteOrder(r.reserved1);
+  sys::swapByteOrder(r.reserved2);
+  sys::swapByteOrder(r.reserved3);
+  sys::swapByteOrder(r.reserved4);
+  sys::swapByteOrder(r.reserved5);
+  sys::swapByteOrder(r.reserved6);
+}
+
+inline void swapStruct(routines_command_64 &r) {
+  sys::swapByteOrder(r.cmd);
+  sys::swapByteOrder(r.cmdsize);
+  sys::swapByteOrder(r.init_address);
+  sys::swapByteOrder(r.init_module);
+  sys::swapByteOrder(r.reserved1);
+  sys::swapByteOrder(r.reserved2);
+  sys::swapByteOrder(r.reserved3);
+  sys::swapByteOrder(r.reserved4);
+  sys::swapByteOrder(r.reserved5);
+  sys::swapByteOrder(r.reserved6);
+}
+
+inline void swapStruct(thread_command &t) {
+  sys::swapByteOrder(t.cmd);
+  sys::swapByteOrder(t.cmdsize);
+}
+
+inline void swapStruct(dylinker_command &d) {
+  sys::swapByteOrder(d.cmd);
+  sys::swapByteOrder(d.cmdsize);
+  sys::swapByteOrder(d.name);
+}
+
+inline void swapStruct(uuid_command &u) {
+  sys::swapByteOrder(u.cmd);
+  sys::swapByteOrder(u.cmdsize);
+}
+
+inline void swapStruct(rpath_command &r) {
+  sys::swapByteOrder(r.cmd);
+  sys::swapByteOrder(r.cmdsize);
+  sys::swapByteOrder(r.path);
+}
+
+inline void swapStruct(source_version_command &s) {
+  sys::swapByteOrder(s.cmd);
+  sys::swapByteOrder(s.cmdsize);
+  sys::swapByteOrder(s.version);
+}
+
+inline void swapStruct(entry_point_command &e) {
+  sys::swapByteOrder(e.cmd);
+  sys::swapByteOrder(e.cmdsize);
+  sys::swapByteOrder(e.entryoff);
+  sys::swapByteOrder(e.stacksize);
+}
+
+inline void swapStruct(encryption_info_command &e) {
+  sys::swapByteOrder(e.cmd);
+  sys::swapByteOrder(e.cmdsize);
+  sys::swapByteOrder(e.cryptoff);
+  sys::swapByteOrder(e.cryptsize);
+  sys::swapByteOrder(e.cryptid);
+}
+
+inline void swapStruct(encryption_info_command_64 &e) {
+  sys::swapByteOrder(e.cmd);
+  sys::swapByteOrder(e.cmdsize);
+  sys::swapByteOrder(e.cryptoff);
+  sys::swapByteOrder(e.cryptsize);
+  sys::swapByteOrder(e.cryptid);
+  sys::swapByteOrder(e.pad);
+}
+
+inline void swapStruct(dysymtab_command &dst) {
+  sys::swapByteOrder(dst.cmd);
+  sys::swapByteOrder(dst.cmdsize);
+  sys::swapByteOrder(dst.ilocalsym);
+  sys::swapByteOrder(dst.nlocalsym);
+  sys::swapByteOrder(dst.iextdefsym);
+  sys::swapByteOrder(dst.nextdefsym);
+  sys::swapByteOrder(dst.iundefsym);
+  sys::swapByteOrder(dst.nundefsym);
+  sys::swapByteOrder(dst.tocoff);
+  sys::swapByteOrder(dst.ntoc);
+  sys::swapByteOrder(dst.modtaboff);
+  sys::swapByteOrder(dst.nmodtab);
+  sys::swapByteOrder(dst.extrefsymoff);
+  sys::swapByteOrder(dst.nextrefsyms);
+  sys::swapByteOrder(dst.indirectsymoff);
+  sys::swapByteOrder(dst.nindirectsyms);
+  sys::swapByteOrder(dst.extreloff);
+  sys::swapByteOrder(dst.nextrel);
+  sys::swapByteOrder(dst.locreloff);
+  sys::swapByteOrder(dst.nlocrel);
+}
+
+inline void swapStruct(any_relocation_info &reloc) {
+  sys::swapByteOrder(reloc.r_word0);
+  sys::swapByteOrder(reloc.r_word1);
+}
+
+inline void swapStruct(nlist_base &S) {
+  sys::swapByteOrder(S.n_strx);
+  sys::swapByteOrder(S.n_desc);
+}
+
+inline void swapStruct(nlist &sym) {
+  sys::swapByteOrder(sym.n_strx);
+  sys::swapByteOrder(sym.n_desc);
+  sys::swapByteOrder(sym.n_value);
+}
+
+inline void swapStruct(nlist_64 &sym) {
+  sys::swapByteOrder(sym.n_strx);
+  sys::swapByteOrder(sym.n_desc);
+  sys::swapByteOrder(sym.n_value);
+}
+
+inline void swapStruct(linkedit_data_command &C) {
+  sys::swapByteOrder(C.cmd);
+  sys::swapByteOrder(C.cmdsize);
+  sys::swapByteOrder(C.dataoff);
+  sys::swapByteOrder(C.datasize);
+}
+
+inline void swapStruct(linker_option_command &C) {
+  sys::swapByteOrder(C.cmd);
+  sys::swapByteOrder(C.cmdsize);
+  sys::swapByteOrder(C.count);
+}
+
+inline void swapStruct(version_min_command &C) {
+  sys::swapByteOrder(C.cmd);
+  sys::swapByteOrder(C.cmdsize);
+  sys::swapByteOrder(C.version);
+  sys::swapByteOrder(C.sdk);
+}
+
+inline void swapStruct(note_command &C) {
+  sys::swapByteOrder(C.cmd);
+  sys::swapByteOrder(C.cmdsize);
+  sys::swapByteOrder(C.offset);
+  sys::swapByteOrder(C.size);
+}
+
+inline void swapStruct(build_version_command &C) {
+  sys::swapByteOrder(C.cmd);
+  sys::swapByteOrder(C.cmdsize);
+  sys::swapByteOrder(C.platform);
+  sys::swapByteOrder(C.minos);
+  sys::swapByteOrder(C.sdk);
+  sys::swapByteOrder(C.ntools);
+}
+
+inline void swapStruct(build_tool_version &C) {
+  sys::swapByteOrder(C.tool);
+  sys::swapByteOrder(C.version);
+}
+
+inline void swapStruct(data_in_code_entry &C) {
+  sys::swapByteOrder(C.offset);
+  sys::swapByteOrder(C.length);
+  sys::swapByteOrder(C.kind);
+}
+
+inline void swapStruct(uint32_t &C) { sys::swapByteOrder(C); }
+
+// The prebind_cksum_command is obsolete and no longer supported.
+inline void swapStruct(prebind_cksum_command &C) {
+  sys::swapByteOrder(C.cmd);
+  sys::swapByteOrder(C.cmdsize);
+  sys::swapByteOrder(C.cksum);
+}
+
+// The twolevel_hints_command is obsolete and no longer supported.
+inline void swapStruct(twolevel_hints_command &C) {
+  sys::swapByteOrder(C.cmd);
+  sys::swapByteOrder(C.cmdsize);
+  sys::swapByteOrder(C.offset);
+  sys::swapByteOrder(C.nhints);
+}
+
+// The prebound_dylib_command is obsolete and no longer supported.
+inline void swapStruct(prebound_dylib_command &C) {
+  sys::swapByteOrder(C.cmd);
+  sys::swapByteOrder(C.cmdsize);
+  sys::swapByteOrder(C.name);
+  sys::swapByteOrder(C.nmodules);
+  sys::swapByteOrder(C.linked_modules);
+}
+
+// The fvmfile_command is obsolete and no longer supported.
+inline void swapStruct(fvmfile_command &C) {
+  sys::swapByteOrder(C.cmd);
+  sys::swapByteOrder(C.cmdsize);
+  sys::swapByteOrder(C.name);
+  sys::swapByteOrder(C.header_addr);
+}
+
+// The symseg_command is obsolete and no longer supported.
+inline void swapStruct(symseg_command &C) {
+  sys::swapByteOrder(C.cmd);
+  sys::swapByteOrder(C.cmdsize);
+  sys::swapByteOrder(C.offset);
+  sys::swapByteOrder(C.size);
+}
+
+// The ident_command is obsolete and no longer supported.
+inline void swapStruct(ident_command &C) {
+  sys::swapByteOrder(C.cmd);
+  sys::swapByteOrder(C.cmdsize);
+}
+
+inline void swapStruct(fvmlib &C) {
+  sys::swapByteOrder(C.name);
+  sys::swapByteOrder(C.minor_version);
+  sys::swapByteOrder(C.header_addr);
+}
+
+// The fvmlib_command is obsolete and no longer supported.
+inline void swapStruct(fvmlib_command &C) {
+  sys::swapByteOrder(C.cmd);
+  sys::swapByteOrder(C.cmdsize);
+  swapStruct(C.fvmlib);
+}
+
+// Get/Set functions from <mach-o/nlist.h>
+
+static inline uint16_t GET_LIBRARY_ORDINAL(uint16_t n_desc) {
+  return (((n_desc) >> 8u) & 0xffu);
+}
+
+static inline void SET_LIBRARY_ORDINAL(uint16_t &n_desc, uint8_t ordinal) {
+  n_desc = (((n_desc)&0x00ff) | (((ordinal)&0xff) << 8));
+}
+
+static inline uint8_t GET_COMM_ALIGN(uint16_t n_desc) {
+  return (n_desc >> 8u) & 0x0fu;
+}
+
+static inline void SET_COMM_ALIGN(uint16_t &n_desc, uint8_t align) {
+  n_desc = ((n_desc & 0xf0ffu) | ((align & 0x0fu) << 8u));
+}
+
+// Enums from <mach/machine.h>
+enum : uint32_t {
+  // Capability bits used in the definition of cpu_type.
+  CPU_ARCH_MASK = 0xff000000, // Mask for architecture bits
+  CPU_ARCH_ABI64 = 0x01000000 // 64 bit ABI
+};
+
+// Constants for the cputype field.
+enum CPUType {
+  CPU_TYPE_ANY = -1,
+  CPU_TYPE_X86 = 7,
+  CPU_TYPE_I386 = CPU_TYPE_X86,
+  CPU_TYPE_X86_64 = CPU_TYPE_X86 | CPU_ARCH_ABI64,
+  /* CPU_TYPE_MIPS      = 8, */
+  CPU_TYPE_MC98000 = 10, // Old Motorola PowerPC
+  CPU_TYPE_ARM = 12,
+  CPU_TYPE_ARM64 = CPU_TYPE_ARM | CPU_ARCH_ABI64,
+  CPU_TYPE_SPARC = 14,
+  CPU_TYPE_POWERPC = 18,
+  CPU_TYPE_POWERPC64 = CPU_TYPE_POWERPC | CPU_ARCH_ABI64
+};
+
+enum : uint32_t {
+  // Capability bits used in the definition of cpusubtype.
+  CPU_SUBTYPE_MASK = 0xff000000,  // Mask for architecture bits
+  CPU_SUBTYPE_LIB64 = 0x80000000, // 64 bit libraries
+
+  // Special CPU subtype constants.
+  CPU_SUBTYPE_MULTIPLE = ~0u
+};
+
+// Constants for the cpusubtype field.
+enum CPUSubTypeX86 {
+  CPU_SUBTYPE_I386_ALL = 3,
+  CPU_SUBTYPE_386 = 3,
+  CPU_SUBTYPE_486 = 4,
+  CPU_SUBTYPE_486SX = 0x84,
+  CPU_SUBTYPE_586 = 5,
+  CPU_SUBTYPE_PENT = CPU_SUBTYPE_586,
+  CPU_SUBTYPE_PENTPRO = 0x16,
+  CPU_SUBTYPE_PENTII_M3 = 0x36,
+  CPU_SUBTYPE_PENTII_M5 = 0x56,
+  CPU_SUBTYPE_CELERON = 0x67,
+  CPU_SUBTYPE_CELERON_MOBILE = 0x77,
+  CPU_SUBTYPE_PENTIUM_3 = 0x08,
+  CPU_SUBTYPE_PENTIUM_3_M = 0x18,
+  CPU_SUBTYPE_PENTIUM_3_XEON = 0x28,
+  CPU_SUBTYPE_PENTIUM_M = 0x09,
+  CPU_SUBTYPE_PENTIUM_4 = 0x0a,
+  CPU_SUBTYPE_PENTIUM_4_M = 0x1a,
+  CPU_SUBTYPE_ITANIUM = 0x0b,
+  CPU_SUBTYPE_ITANIUM_2 = 0x1b,
+  CPU_SUBTYPE_XEON = 0x0c,
+  CPU_SUBTYPE_XEON_MP = 0x1c,
+
+  CPU_SUBTYPE_X86_ALL = 3,
+  CPU_SUBTYPE_X86_64_ALL = 3,
+  CPU_SUBTYPE_X86_ARCH1 = 4,
+  CPU_SUBTYPE_X86_64_H = 8
+};
+static inline int CPU_SUBTYPE_INTEL(int Family, int Model) {
+  return Family | (Model << 4);
+}
+static inline int CPU_SUBTYPE_INTEL_FAMILY(CPUSubTypeX86 ST) {
+  return ((int)ST) & 0x0f;
+}
+static inline int CPU_SUBTYPE_INTEL_MODEL(CPUSubTypeX86 ST) {
+  return ((int)ST) >> 4;
+}
+enum { CPU_SUBTYPE_INTEL_FAMILY_MAX = 15, CPU_SUBTYPE_INTEL_MODEL_ALL = 0 };
+
+enum CPUSubTypeARM {
+  CPU_SUBTYPE_ARM_ALL = 0,
+  CPU_SUBTYPE_ARM_V4T = 5,
+  CPU_SUBTYPE_ARM_V6 = 6,
+  CPU_SUBTYPE_ARM_V5 = 7,
+  CPU_SUBTYPE_ARM_V5TEJ = 7,
+  CPU_SUBTYPE_ARM_XSCALE = 8,
+  CPU_SUBTYPE_ARM_V7 = 9,
+  //  unused  ARM_V7F     = 10,
+  CPU_SUBTYPE_ARM_V7S = 11,
+  CPU_SUBTYPE_ARM_V7K = 12,
+  CPU_SUBTYPE_ARM_V6M = 14,
+  CPU_SUBTYPE_ARM_V7M = 15,
+  CPU_SUBTYPE_ARM_V7EM = 16
+};
+
+enum CPUSubTypeARM64 { CPU_SUBTYPE_ARM64_ALL = 0 };
+
+enum CPUSubTypeSPARC { CPU_SUBTYPE_SPARC_ALL = 0 };
+
+enum CPUSubTypePowerPC {
+  CPU_SUBTYPE_POWERPC_ALL = 0,
+  CPU_SUBTYPE_POWERPC_601 = 1,
+  CPU_SUBTYPE_POWERPC_602 = 2,
+  CPU_SUBTYPE_POWERPC_603 = 3,
+  CPU_SUBTYPE_POWERPC_603e = 4,
+  CPU_SUBTYPE_POWERPC_603ev = 5,
+  CPU_SUBTYPE_POWERPC_604 = 6,
+  CPU_SUBTYPE_POWERPC_604e = 7,
+  CPU_SUBTYPE_POWERPC_620 = 8,
+  CPU_SUBTYPE_POWERPC_750 = 9,
+  CPU_SUBTYPE_POWERPC_7400 = 10,
+  CPU_SUBTYPE_POWERPC_7450 = 11,
+  CPU_SUBTYPE_POWERPC_970 = 100,
+
+  CPU_SUBTYPE_MC980000_ALL = CPU_SUBTYPE_POWERPC_ALL,
+  CPU_SUBTYPE_MC98601 = CPU_SUBTYPE_POWERPC_601
+};
+
+struct x86_thread_state32_t {
+  uint32_t eax;
+  uint32_t ebx;
+  uint32_t ecx;
+  uint32_t edx;
+  uint32_t edi;
+  uint32_t esi;
+  uint32_t ebp;
+  uint32_t esp;
+  uint32_t ss;
+  uint32_t eflags;
+  uint32_t eip;
+  uint32_t cs;
+  uint32_t ds;
+  uint32_t es;
+  uint32_t fs;
+  uint32_t gs;
+};
+
+struct x86_thread_state64_t {
+  uint64_t rax;
+  uint64_t rbx;
+  uint64_t rcx;
+  uint64_t rdx;
+  uint64_t rdi;
+  uint64_t rsi;
+  uint64_t rbp;
+  uint64_t rsp;
+  uint64_t r8;
+  uint64_t r9;
+  uint64_t r10;
+  uint64_t r11;
+  uint64_t r12;
+  uint64_t r13;
+  uint64_t r14;
+  uint64_t r15;
+  uint64_t rip;
+  uint64_t rflags;
+  uint64_t cs;
+  uint64_t fs;
+  uint64_t gs;
+};
+
+enum x86_fp_control_precis {
+  x86_FP_PREC_24B = 0,
+  x86_FP_PREC_53B = 2,
+  x86_FP_PREC_64B = 3
+};
+
+enum x86_fp_control_rc {
+  x86_FP_RND_NEAR = 0,
+  x86_FP_RND_DOWN = 1,
+  x86_FP_RND_UP = 2,
+  x86_FP_CHOP = 3
+};
+
+struct fp_control_t {
+  unsigned short invalid : 1, denorm : 1, zdiv : 1, ovrfl : 1, undfl : 1,
+      precis : 1, : 2, pc : 2, rc : 2, : 1, : 3;
+};
+
+struct fp_status_t {
+  unsigned short invalid : 1, denorm : 1, zdiv : 1, ovrfl : 1, undfl : 1,
+      precis : 1, stkflt : 1, errsumm : 1, c0 : 1, c1 : 1, c2 : 1, tos : 3,
+      c3 : 1, busy : 1;
+};
+
+struct mmst_reg_t {
+  char mmst_reg[10];
+  char mmst_rsrv[6];
+};
+
+struct xmm_reg_t {
+  char xmm_reg[16];
+};
+
+struct x86_float_state64_t {
+  int32_t fpu_reserved[2];
+  fp_control_t fpu_fcw;
+  fp_status_t fpu_fsw;
+  uint8_t fpu_ftw;
+  uint8_t fpu_rsrv1;
+  uint16_t fpu_fop;
+  uint32_t fpu_ip;
+  uint16_t fpu_cs;
+  uint16_t fpu_rsrv2;
+  uint32_t fpu_dp;
+  uint16_t fpu_ds;
+  uint16_t fpu_rsrv3;
+  uint32_t fpu_mxcsr;
+  uint32_t fpu_mxcsrmask;
+  mmst_reg_t fpu_stmm0;
+  mmst_reg_t fpu_stmm1;
+  mmst_reg_t fpu_stmm2;
+  mmst_reg_t fpu_stmm3;
+  mmst_reg_t fpu_stmm4;
+  mmst_reg_t fpu_stmm5;
+  mmst_reg_t fpu_stmm6;
+  mmst_reg_t fpu_stmm7;
+  xmm_reg_t fpu_xmm0;
+  xmm_reg_t fpu_xmm1;
+  xmm_reg_t fpu_xmm2;
+  xmm_reg_t fpu_xmm3;
+  xmm_reg_t fpu_xmm4;
+  xmm_reg_t fpu_xmm5;
+  xmm_reg_t fpu_xmm6;
+  xmm_reg_t fpu_xmm7;
+  xmm_reg_t fpu_xmm8;
+  xmm_reg_t fpu_xmm9;
+  xmm_reg_t fpu_xmm10;
+  xmm_reg_t fpu_xmm11;
+  xmm_reg_t fpu_xmm12;
+  xmm_reg_t fpu_xmm13;
+  xmm_reg_t fpu_xmm14;
+  xmm_reg_t fpu_xmm15;
+  char fpu_rsrv4[6 * 16];
+  uint32_t fpu_reserved1;
+};
+
+struct x86_exception_state64_t {
+  uint16_t trapno;
+  uint16_t cpu;
+  uint32_t err;
+  uint64_t faultvaddr;
+};
+
+inline void swapStruct(x86_thread_state32_t &x) {
+  sys::swapByteOrder(x.eax);
+  sys::swapByteOrder(x.ebx);
+  sys::swapByteOrder(x.ecx);
+  sys::swapByteOrder(x.edx);
+  sys::swapByteOrder(x.edi);
+  sys::swapByteOrder(x.esi);
+  sys::swapByteOrder(x.ebp);
+  sys::swapByteOrder(x.esp);
+  sys::swapByteOrder(x.ss);
+  sys::swapByteOrder(x.eflags);
+  sys::swapByteOrder(x.eip);
+  sys::swapByteOrder(x.cs);
+  sys::swapByteOrder(x.ds);
+  sys::swapByteOrder(x.es);
+  sys::swapByteOrder(x.fs);
+  sys::swapByteOrder(x.gs);
+}
+
+inline void swapStruct(x86_thread_state64_t &x) {
+  sys::swapByteOrder(x.rax);
+  sys::swapByteOrder(x.rbx);
+  sys::swapByteOrder(x.rcx);
+  sys::swapByteOrder(x.rdx);
+  sys::swapByteOrder(x.rdi);
+  sys::swapByteOrder(x.rsi);
+  sys::swapByteOrder(x.rbp);
+  sys::swapByteOrder(x.rsp);
+  sys::swapByteOrder(x.r8);
+  sys::swapByteOrder(x.r9);
+  sys::swapByteOrder(x.r10);
+  sys::swapByteOrder(x.r11);
+  sys::swapByteOrder(x.r12);
+  sys::swapByteOrder(x.r13);
+  sys::swapByteOrder(x.r14);
+  sys::swapByteOrder(x.r15);
+  sys::swapByteOrder(x.rip);
+  sys::swapByteOrder(x.rflags);
+  sys::swapByteOrder(x.cs);
+  sys::swapByteOrder(x.fs);
+  sys::swapByteOrder(x.gs);
+}
+
+inline void swapStruct(x86_float_state64_t &x) {
+  sys::swapByteOrder(x.fpu_reserved[0]);
+  sys::swapByteOrder(x.fpu_reserved[1]);
+  // TODO swap: fp_control_t fpu_fcw;
+  // TODO swap: fp_status_t fpu_fsw;
+  sys::swapByteOrder(x.fpu_fop);
+  sys::swapByteOrder(x.fpu_ip);
+  sys::swapByteOrder(x.fpu_cs);
+  sys::swapByteOrder(x.fpu_rsrv2);
+  sys::swapByteOrder(x.fpu_dp);
+  sys::swapByteOrder(x.fpu_ds);
+  sys::swapByteOrder(x.fpu_rsrv3);
+  sys::swapByteOrder(x.fpu_mxcsr);
+  sys::swapByteOrder(x.fpu_mxcsrmask);
+  sys::swapByteOrder(x.fpu_reserved1);
+}
+
+inline void swapStruct(x86_exception_state64_t &x) {
+  sys::swapByteOrder(x.trapno);
+  sys::swapByteOrder(x.cpu);
+  sys::swapByteOrder(x.err);
+  sys::swapByteOrder(x.faultvaddr);
+}
+
+struct x86_state_hdr_t {
+  uint32_t flavor;
+  uint32_t count;
+};
+
+struct x86_thread_state_t {
+  x86_state_hdr_t tsh;
+  union {
+    x86_thread_state64_t ts64;
+    x86_thread_state32_t ts32;
+  } uts;
+};
+
+struct x86_float_state_t {
+  x86_state_hdr_t fsh;
+  union {
+    x86_float_state64_t fs64;
+  } ufs;
+};
+
+struct x86_exception_state_t {
+  x86_state_hdr_t esh;
+  union {
+    x86_exception_state64_t es64;
+  } ues;
+};
+
+inline void swapStruct(x86_state_hdr_t &x) {
+  sys::swapByteOrder(x.flavor);
+  sys::swapByteOrder(x.count);
+}
+
+enum X86ThreadFlavors {
+  x86_THREAD_STATE32 = 1,
+  x86_FLOAT_STATE32 = 2,
+  x86_EXCEPTION_STATE32 = 3,
+  x86_THREAD_STATE64 = 4,
+  x86_FLOAT_STATE64 = 5,
+  x86_EXCEPTION_STATE64 = 6,
+  x86_THREAD_STATE = 7,
+  x86_FLOAT_STATE = 8,
+  x86_EXCEPTION_STATE = 9,
+  x86_DEBUG_STATE32 = 10,
+  x86_DEBUG_STATE64 = 11,
+  x86_DEBUG_STATE = 12
+};
+
+inline void swapStruct(x86_thread_state_t &x) {
+  swapStruct(x.tsh);
+  if (x.tsh.flavor == x86_THREAD_STATE64)
+    swapStruct(x.uts.ts64);
+}
+
+inline void swapStruct(x86_float_state_t &x) {
+  swapStruct(x.fsh);
+  if (x.fsh.flavor == x86_FLOAT_STATE64)
+    swapStruct(x.ufs.fs64);
+}
+
+inline void swapStruct(x86_exception_state_t &x) {
+  swapStruct(x.esh);
+  if (x.esh.flavor == x86_EXCEPTION_STATE64)
+    swapStruct(x.ues.es64);
+}
+
+const uint32_t x86_THREAD_STATE32_COUNT =
+    sizeof(x86_thread_state32_t) / sizeof(uint32_t);
+
+const uint32_t x86_THREAD_STATE64_COUNT =
+    sizeof(x86_thread_state64_t) / sizeof(uint32_t);
+const uint32_t x86_FLOAT_STATE64_COUNT =
+    sizeof(x86_float_state64_t) / sizeof(uint32_t);
+const uint32_t x86_EXCEPTION_STATE64_COUNT =
+    sizeof(x86_exception_state64_t) / sizeof(uint32_t);
+
+const uint32_t x86_THREAD_STATE_COUNT =
+    sizeof(x86_thread_state_t) / sizeof(uint32_t);
+const uint32_t x86_FLOAT_STATE_COUNT =
+    sizeof(x86_float_state_t) / sizeof(uint32_t);
+const uint32_t x86_EXCEPTION_STATE_COUNT =
+    sizeof(x86_exception_state_t) / sizeof(uint32_t);
+
+struct arm_thread_state32_t {
+  uint32_t r[13];
+  uint32_t sp;
+  uint32_t lr;
+  uint32_t pc;
+  uint32_t cpsr;
+};
+
+inline void swapStruct(arm_thread_state32_t &x) {
+  for (int i = 0; i < 13; i++)
+    sys::swapByteOrder(x.r[i]);
+  sys::swapByteOrder(x.sp);
+  sys::swapByteOrder(x.lr);
+  sys::swapByteOrder(x.pc);
+  sys::swapByteOrder(x.cpsr);
+}
+
+struct arm_thread_state64_t {
+  uint64_t x[29];
+  uint64_t fp;
+  uint64_t lr;
+  uint64_t sp;
+  uint64_t pc;
+  uint32_t cpsr;
+  uint32_t pad;
+};
+
+inline void swapStruct(arm_thread_state64_t &x) {
+  for (int i = 0; i < 29; i++)
+    sys::swapByteOrder(x.x[i]);
+  sys::swapByteOrder(x.fp);
+  sys::swapByteOrder(x.lr);
+  sys::swapByteOrder(x.sp);
+  sys::swapByteOrder(x.pc);
+  sys::swapByteOrder(x.cpsr);
+}
+
+struct arm_state_hdr_t {
+  uint32_t flavor;
+  uint32_t count;
+};
+
+struct arm_thread_state_t {
+  arm_state_hdr_t tsh;
+  union {
+    arm_thread_state32_t ts32;
+  } uts;
+};
+
+inline void swapStruct(arm_state_hdr_t &x) {
+  sys::swapByteOrder(x.flavor);
+  sys::swapByteOrder(x.count);
+}
+
+enum ARMThreadFlavors {
+  ARM_THREAD_STATE = 1,
+  ARM_VFP_STATE = 2,
+  ARM_EXCEPTION_STATE = 3,
+  ARM_DEBUG_STATE = 4,
+  ARN_THREAD_STATE_NONE = 5,
+  ARM_THREAD_STATE64 = 6,
+  ARM_EXCEPTION_STATE64 = 7
+};
+
+inline void swapStruct(arm_thread_state_t &x) {
+  swapStruct(x.tsh);
+  if (x.tsh.flavor == ARM_THREAD_STATE)
+    swapStruct(x.uts.ts32);
+}
+
+const uint32_t ARM_THREAD_STATE_COUNT =
+    sizeof(arm_thread_state32_t) / sizeof(uint32_t);
+
+const uint32_t ARM_THREAD_STATE64_COUNT =
+    sizeof(arm_thread_state64_t) / sizeof(uint32_t);
+
+struct ppc_thread_state32_t {
+  uint32_t srr0;
+  uint32_t srr1;
+  uint32_t r0;
+  uint32_t r1;
+  uint32_t r2;
+  uint32_t r3;
+  uint32_t r4;
+  uint32_t r5;
+  uint32_t r6;
+  uint32_t r7;
+  uint32_t r8;
+  uint32_t r9;
+  uint32_t r10;
+  uint32_t r11;
+  uint32_t r12;
+  uint32_t r13;
+  uint32_t r14;
+  uint32_t r15;
+  uint32_t r16;
+  uint32_t r17;
+  uint32_t r18;
+  uint32_t r19;
+  uint32_t r20;
+  uint32_t r21;
+  uint32_t r22;
+  uint32_t r23;
+  uint32_t r24;
+  uint32_t r25;
+  uint32_t r26;
+  uint32_t r27;
+  uint32_t r28;
+  uint32_t r29;
+  uint32_t r30;
+  uint32_t r31;
+  uint32_t ct;
+  uint32_t xer;
+  uint32_t lr;
+  uint32_t ctr;
+  uint32_t mq;
+  uint32_t vrsave;
+};
+
+inline void swapStruct(ppc_thread_state32_t &x) {
+  sys::swapByteOrder(x.srr0);
+  sys::swapByteOrder(x.srr1);
+  sys::swapByteOrder(x.r0);
+  sys::swapByteOrder(x.r1);
+  sys::swapByteOrder(x.r2);
+  sys::swapByteOrder(x.r3);
+  sys::swapByteOrder(x.r4);
+  sys::swapByteOrder(x.r5);
+  sys::swapByteOrder(x.r6);
+  sys::swapByteOrder(x.r7);
+  sys::swapByteOrder(x.r8);
+  sys::swapByteOrder(x.r9);
+  sys::swapByteOrder(x.r10);
+  sys::swapByteOrder(x.r11);
+  sys::swapByteOrder(x.r12);
+  sys::swapByteOrder(x.r13);
+  sys::swapByteOrder(x.r14);
+  sys::swapByteOrder(x.r15);
+  sys::swapByteOrder(x.r16);
+  sys::swapByteOrder(x.r17);
+  sys::swapByteOrder(x.r18);
+  sys::swapByteOrder(x.r19);
+  sys::swapByteOrder(x.r20);
+  sys::swapByteOrder(x.r21);
+  sys::swapByteOrder(x.r22);
+  sys::swapByteOrder(x.r23);
+  sys::swapByteOrder(x.r24);
+  sys::swapByteOrder(x.r25);
+  sys::swapByteOrder(x.r26);
+  sys::swapByteOrder(x.r27);
+  sys::swapByteOrder(x.r28);
+  sys::swapByteOrder(x.r29);
+  sys::swapByteOrder(x.r30);
+  sys::swapByteOrder(x.r31);
+  sys::swapByteOrder(x.ct);
+  sys::swapByteOrder(x.xer);
+  sys::swapByteOrder(x.lr);
+  sys::swapByteOrder(x.ctr);
+  sys::swapByteOrder(x.mq);
+  sys::swapByteOrder(x.vrsave);
+}
+
+struct ppc_state_hdr_t {
+  uint32_t flavor;
+  uint32_t count;
+};
+
+struct ppc_thread_state_t {
+  ppc_state_hdr_t tsh;
+  union {
+    ppc_thread_state32_t ts32;
+  } uts;
+};
+
+inline void swapStruct(ppc_state_hdr_t &x) {
+  sys::swapByteOrder(x.flavor);
+  sys::swapByteOrder(x.count);
+}
+
+enum PPCThreadFlavors {
+  PPC_THREAD_STATE = 1,
+  PPC_FLOAT_STATE = 2,
+  PPC_EXCEPTION_STATE = 3,
+  PPC_VECTOR_STATE = 4,
+  PPC_THREAD_STATE64 = 5,
+  PPC_EXCEPTION_STATE64 = 6,
+  PPC_THREAD_STATE_NONE = 7
+};
+
+inline void swapStruct(ppc_thread_state_t &x) {
+  swapStruct(x.tsh);
+  if (x.tsh.flavor == PPC_THREAD_STATE)
+    swapStruct(x.uts.ts32);
+}
+
+const uint32_t PPC_THREAD_STATE_COUNT =
+    sizeof(ppc_thread_state32_t) / sizeof(uint32_t);
+
+// Define a union of all load command structs
+#define LOAD_COMMAND_STRUCT(LCStruct) LCStruct LCStruct##_data;
+
+union macho_load_command {
+#include "llvm/BinaryFormat/MachO.def"
+};
+
+} // end namespace MachO
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/BinaryFormat/Magic.h b/include/llvm/BinaryFormat/Magic.h
new file mode 100644
index 000000000000..c0e23db5e1ae
--- /dev/null
+++ b/include/llvm/BinaryFormat/Magic.h
@@ -0,0 +1,73 @@
+//===- llvm/BinaryFormat/Magic.h - File magic identification ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BINARYFORMAT_MAGIC_H
+#define LLVM_BINARYFORMAT_MAGIC_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+
+#include <system_error>
+
+namespace llvm {
+/// file_magic - An "enum class" enumeration of file types based on magic (the
+/// first N bytes of the file).
+struct file_magic {
+  enum Impl {
+    unknown = 0,       ///< Unrecognized file
+    bitcode,           ///< Bitcode file
+    archive,           ///< ar style archive file
+    elf,               ///< ELF Unknown type
+    elf_relocatable,   ///< ELF Relocatable object file
+    elf_executable,    ///< ELF Executable image
+    elf_shared_object, ///< ELF dynamically linked shared lib
+    elf_core,          ///< ELF core image
+    macho_object,      ///< Mach-O Object file
+    macho_executable,  ///< Mach-O Executable
+    macho_fixed_virtual_memory_shared_lib,    ///< Mach-O Shared Lib, FVM
+    macho_core,                               ///< Mach-O Core File
+    macho_preload_executable,                 ///< Mach-O Preloaded Executable
+    macho_dynamically_linked_shared_lib,      ///< Mach-O dynlinked shared lib
+    macho_dynamic_linker,                     ///< The Mach-O dynamic linker
+    macho_bundle,                             ///< Mach-O Bundle file
+    macho_dynamically_linked_shared_lib_stub, ///< Mach-O Shared lib stub
+    macho_dsym_companion,                     ///< Mach-O dSYM companion file
+    macho_kext_bundle,                        ///< Mach-O kext bundle file
+    macho_universal_binary,                   ///< Mach-O universal binary
+    coff_cl_gl_object,   ///< Microsoft cl.exe's intermediate code file
+    coff_object,         ///< COFF object file
+    coff_import_library, ///< COFF import library
+    pecoff_executable,   ///< PECOFF executable file
+    windows_resource,    ///< Windows compiled resource file (.res)
+    wasm_object          ///< WebAssembly Object file
+  };
+
+  bool is_object() const { return V != unknown; }
+
+  file_magic() = default;
+  file_magic(Impl V) : V(V) {}
+  operator Impl() const { return V; }
+
+private:
+  Impl V = unknown;
+};
+
+/// @brief Identify the type of a binary file based on how magical it is.
+file_magic identify_magic(StringRef magic);
+
+/// @brief Get and identify \a path's type based on its content.
+///
+/// @param path Input path.
+/// @param result Set to the type of file, or file_magic::unknown.
+/// @returns errc::success if result has been successfully set, otherwise a
+///          platform-specific error_code.
+std::error_code identify_magic(const Twine &path, file_magic &result);
+} // namespace llvm
+
+#endif
diff --git a/include/llvm/Support/Wasm.h b/include/llvm/BinaryFormat/Wasm.h
similarity index 80%
rename from include/llvm/Support/Wasm.h
rename to include/llvm/BinaryFormat/Wasm.h
index e3831827062c..fcd8ad957040 100644
--- a/include/llvm/Support/Wasm.h
+++ b/include/llvm/BinaryFormat/Wasm.h
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_SUPPORT_WASM_H
-#define LLVM_SUPPORT_WASM_H
+#ifndef LLVM_BINARYFORMAT_WASM_H
+#define LLVM_BINARYFORMAT_WASM_H
 
 #include "llvm/ADT/ArrayRef.h"
 
@@ -106,10 +106,10 @@ struct WasmElemSegment {
 };
 
 struct WasmRelocation {
-  uint32_t Type;         // The type of the relocation.
-  int32_t Index;         // Index into function to global index space.
-  uint64_t Offset;       // Offset from the start of the section.
-  int64_t Addend;        // A value to add to the symbol.
+  uint32_t Type;   // The type of the relocation.
+  int32_t Index;   // Index into function to global index space.
+  uint64_t Offset; // Offset from the start of the section.
+  int64_t Addend;  // A value to add to the symbol.
 };
 
 enum : unsigned {
@@ -129,36 +129,36 @@ enum : unsigned {
 
 // Type immediate encodings used in various contexts.
 enum {
-  WASM_TYPE_I32          = -0x01,
-  WASM_TYPE_I64          = -0x02,
-  WASM_TYPE_F32          = -0x03,
-  WASM_TYPE_F64          = -0x04,
-  WASM_TYPE_ANYFUNC      = -0x10,
-  WASM_TYPE_FUNC         = -0x20,
-  WASM_TYPE_NORESULT     = -0x40, // for blocks with no result values
+  WASM_TYPE_I32 = -0x01,
+  WASM_TYPE_I64 = -0x02,
+  WASM_TYPE_F32 = -0x03,
+  WASM_TYPE_F64 = -0x04,
+  WASM_TYPE_ANYFUNC = -0x10,
+  WASM_TYPE_FUNC = -0x20,
+  WASM_TYPE_NORESULT = -0x40, // for blocks with no result values
 };
 
 // Kinds of externals (for imports and exports).
 enum : unsigned {
   WASM_EXTERNAL_FUNCTION = 0x0,
-  WASM_EXTERNAL_TABLE    = 0x1,
-  WASM_EXTERNAL_MEMORY   = 0x2,
-  WASM_EXTERNAL_GLOBAL   = 0x3,
+  WASM_EXTERNAL_TABLE = 0x1,
+  WASM_EXTERNAL_MEMORY = 0x2,
+  WASM_EXTERNAL_GLOBAL = 0x3,
 };
 
 // Opcodes used in initializer expressions.
 enum : unsigned {
-  WASM_OPCODE_END        = 0x0b,
+  WASM_OPCODE_END = 0x0b,
   WASM_OPCODE_GET_GLOBAL = 0x23,
-  WASM_OPCODE_I32_CONST  = 0x41,
-  WASM_OPCODE_I64_CONST  = 0x42,
-  WASM_OPCODE_F32_CONST  = 0x43,
-  WASM_OPCODE_F64_CONST  = 0x44,
+  WASM_OPCODE_I32_CONST = 0x41,
+  WASM_OPCODE_I64_CONST = 0x42,
+  WASM_OPCODE_F32_CONST = 0x43,
+  WASM_OPCODE_F64_CONST = 0x44,
 };
 
 enum : unsigned {
-  WASM_NAMES_FUNCTION    = 0x1,
-  WASM_NAMES_LOCAL       = 0x2,
+  WASM_NAMES_FUNCTION = 0x1,
+  WASM_NAMES_LOCAL = 0x2,
 };
 
 enum : unsigned {
diff --git a/include/llvm/Support/WasmRelocs/WebAssembly.def b/include/llvm/BinaryFormat/WasmRelocs/WebAssembly.def
similarity index 100%
rename from include/llvm/Support/WasmRelocs/WebAssembly.def
rename to include/llvm/BinaryFormat/WasmRelocs/WebAssembly.def
diff --git a/include/llvm/Bitcode/BitcodeReader.h b/include/llvm/Bitcode/BitcodeReader.h
index 31ffb7645f3a..61e4f6351b19 100644
--- a/include/llvm/Bitcode/BitcodeReader.h
+++ b/include/llvm/Bitcode/BitcodeReader.h
@@ -40,6 +40,8 @@ namespace llvm {
     return std::move(*Val);
   }
 
+  struct BitcodeFileContents;
+
   /// Represents a module in a bitcode file.
   class BitcodeModule {
     // This covers the identification (if present) and module blocks.
@@ -61,8 +63,8 @@ namespace llvm {
           IdentificationBit(IdentificationBit), ModuleBit(ModuleBit) {}
 
     // Calls the ctor.
-    friend Expected<std::vector<BitcodeModule>>
-    getBitcodeModuleList(MemoryBufferRef Buffer);
+    friend Expected<BitcodeFileContents>
+    getBitcodeFileContents(MemoryBufferRef Buffer);
 
     Expected<std::unique_ptr<Module>> getModuleImpl(LLVMContext &Context,
                                                     bool MaterializeAll,
@@ -99,6 +101,13 @@ namespace llvm {
     Error readSummary(ModuleSummaryIndex &CombinedIndex, unsigned ModuleId);
   };
 
+  struct BitcodeFileContents {
+    std::vector<BitcodeModule> Mods;
+  };
+
+  /// Returns the contents of a bitcode file.
+  Expected<BitcodeFileContents> getBitcodeFileContents(MemoryBufferRef Buffer);
+
   /// Returns a list of modules in the specified bitcode buffer.
   Expected<std::vector<BitcodeModule>>
   getBitcodeModuleList(MemoryBufferRef Buffer);
diff --git a/include/llvm/Bitcode/LLVMBitCodes.h b/include/llvm/Bitcode/LLVMBitCodes.h
index 8ee1e4b583b6..a643bfd1dcea 100644
--- a/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/include/llvm/Bitcode/LLVMBitCodes.h
@@ -55,6 +55,8 @@ enum BlockIDs {
   METADATA_KIND_BLOCK_ID,
 
   STRTAB_BLOCK_ID,
+
+  FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID,
 };
 
 /// Identification block contains a string that describes the producer details,
diff --git a/include/llvm/CodeGen/BasicTTIImpl.h b/include/llvm/CodeGen/BasicTTIImpl.h
index 32542fa87463..9e33df6b55ec 100644
--- a/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/include/llvm/CodeGen/BasicTTIImpl.h
@@ -17,11 +17,11 @@
 #define LLVM_CODEGEN_BASICTTIIMPL_H
 
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfoImpl.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
 
 namespace llvm {
 
@@ -117,6 +117,10 @@ public:
     return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace);
   }
 
+  bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) {
+    return TargetTransformInfoImplBase::isLSRCostLess(C1, C2);
+  }
+
   int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
                            bool HasBaseReg, int64_t Scale, unsigned AddrSpace) {
     TargetLoweringBase::AddrMode AM;
@@ -1080,46 +1084,46 @@ public:
     return 0; 
   }
 
+  /// Try to calculate arithmetic and shuffle op costs for reduction operations.
+  /// We're assuming that reduction operation are performing the following way:
+  /// 1. Non-pairwise reduction
+  /// %val1 = shufflevector<n x t> %val, <n x t> %undef,
+  /// <n x i32> <i32 n/2, i32 n/2 + 1, ..., i32 n, i32 undef, ..., i32 undef>
+  ///            \----------------v-------------/  \----------v------------/
+  ///                            n/2 elements               n/2 elements
+  /// %red1 = op <n x t> %val, <n x t> val1
+  /// After this operation we have a vector %red1 where only the first n/2
+  /// elements are meaningful, the second n/2 elements are undefined and can be
+  /// dropped. All other operations are actually working with the vector of
+  /// length n/2, not n, though the real vector length is still n.
+  /// %val2 = shufflevector<n x t> %red1, <n x t> %undef,
+  /// <n x i32> <i32 n/4, i32 n/4 + 1, ..., i32 n/2, i32 undef, ..., i32 undef>
+  ///            \----------------v-------------/  \----------v------------/
+  ///                            n/4 elements               3*n/4 elements
+  /// %red2 = op <n x t> %red1, <n x t> val2  - working with the vector of
+  /// length n/2, the resulting vector has length n/4 etc.
+  /// 2. Pairwise reduction:
+  /// Everything is the same except for an additional shuffle operation which
+  /// is used to produce operands for pairwise kind of reductions.
+  /// %val1 = shufflevector<n x t> %val, <n x t> %undef,
+  /// <n x i32> <i32 0, i32 2, ..., i32 n-2, i32 undef, ..., i32 undef>
+  ///            \-------------v----------/  \----------v------------/
+  ///                   n/2 elements               n/2 elements
+  /// %val2 = shufflevector<n x t> %val, <n x t> %undef,
+  /// <n x i32> <i32 1, i32 3, ..., i32 n-1, i32 undef, ..., i32 undef>
+  ///            \-------------v----------/  \----------v------------/
+  ///                   n/2 elements               n/2 elements
+  /// %red1 = op <n x t> %val1, <n x t> val2
+  /// Again, the operation is performed on <n x t> vector, but the resulting
+  /// vector %red1 is <n/2 x t> vector.
+  ///
+  /// The cost model should take into account that the actual length of the
+  /// vector is reduced on each iteration.
   unsigned getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwise) {
     assert(Ty->isVectorTy() && "Expect a vector type");
     Type *ScalarTy = Ty->getVectorElementType();
     unsigned NumVecElts = Ty->getVectorNumElements();
     unsigned NumReduxLevels = Log2_32(NumVecElts);
-    // Try to calculate arithmetic and shuffle op costs for reduction operations.
-    // We're assuming that reduction operation are performing the following way:
-    // 1. Non-pairwise reduction
-    // %val1 = shufflevector<n x t> %val, <n x t> %undef,
-    // <n x i32> <i32 n/2, i32 n/2 + 1, ..., i32 n, i32 undef, ..., i32 undef>
-    //            \----------------v-------------/  \----------v------------/
-    //                            n/2 elements               n/2 elements
-    // %red1 = op <n x t> %val, <n x t> val1
-    // After this operation we have a vector %red1 with only maningfull the
-    // first n/2 elements, the second n/2 elements are undefined and can be
-    // dropped. All other operations are actually working with the vector of
-    // length n/2, not n. though the real vector length is still n.
-    // %val2 = shufflevector<n x t> %red1, <n x t> %undef,
-    // <n x i32> <i32 n/4, i32 n/4 + 1, ..., i32 n/2, i32 undef, ..., i32 undef>
-    //            \----------------v-------------/  \----------v------------/
-    //                            n/4 elements               3*n/4 elements
-    // %red2 = op <n x t> %red1, <n x t> val2  - working with the vector of
-    // length n/2, the resulting vector has length n/4 etc.
-    // 2. Pairwise reduction:
-    // Everything is the same except for an additional shuffle operation which
-    // is used to produce operands for pairwise kind of reductions.
-    // %val1 = shufflevector<n x t> %val, <n x t> %undef,
-    // <n x i32> <i32 0, i32 2, ..., i32 n-2, i32 undef, ..., i32 undef>
-    //            \-------------v----------/  \----------v------------/
-    //                   n/2 elements               n/2 elements
-    // %val2 = shufflevector<n x t> %val, <n x t> %undef,
-    // <n x i32> <i32 1, i32 3, ..., i32 n-1, i32 undef, ..., i32 undef>
-    //            \-------------v----------/  \----------v------------/
-    //                   n/2 elements               n/2 elements
-    // %red1 = op <n x t> %val1, <n x t> val2
-    // Again, the operation is performed on <n x t> vector, but the resulting
-    // vector %red1 is <n/2 x t> vector.
-    //
-    // The cost model should take into account that the actual length of the
-    // vector is reduced on each iteration.
     unsigned ArithCost = 0;
     unsigned ShuffleCost = 0;
     auto *ConcreteTTI = static_cast<T *>(this);
diff --git a/include/llvm/CodeGen/DFAPacketizer.h b/include/llvm/CodeGen/DFAPacketizer.h
index 8de140e91bf3..77c37ac7abea 100644
--- a/include/llvm/CodeGen/DFAPacketizer.h
+++ b/include/llvm/CodeGen/DFAPacketizer.h
@@ -1,4 +1,4 @@
-//=- llvm/CodeGen/DFAPacketizer.h - DFA Packetizer for VLIW ---*- C++ -*-=====//
+//===- llvm/CodeGen/DFAPacketizer.h - DFA Packetizer for VLIW ---*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -29,17 +29,22 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/ScheduleDAGMutation.h"
+#include <cstdint>
 #include <map>
+#include <memory>
+#include <utility>
+#include <vector>
 
 namespace llvm {
 
-class MCInstrDesc;
+class DefaultVLIWScheduler;
+class InstrItineraryData;
+class MachineFunction;
 class MachineInstr;
 class MachineLoopInfo;
-class MachineDominatorTree;
-class InstrItineraryData;
-class DefaultVLIWScheduler;
+class MCInstrDesc;
 class SUnit;
+class TargetInstrInfo;
 
 // --------------------------------------------------------------------
 // Definitions shared between DFAPacketizer.cpp and DFAPacketizerEmitter.cpp
@@ -64,17 +69,18 @@ class SUnit;
 #define DFA_MAX_RESTERMS        4   // The max # of AND'ed resource terms.
 #define DFA_MAX_RESOURCES       16  // The max # of resource bits in one term.
 
-typedef uint64_t                DFAInput;
-typedef int64_t                 DFAStateInput;
+using DFAInput = uint64_t;
+using DFAStateInput = int64_t;
+
 #define DFA_TBLTYPE             "int64_t" // For generating DFAStateInputTable.
 // --------------------------------------------------------------------
 
 class DFAPacketizer {
 private:
-  typedef std::pair<unsigned, DFAInput> UnsignPair;
+  using UnsignPair = std::pair<unsigned, DFAInput>;
 
   const InstrItineraryData *InstrItins;
-  int CurrentState;
+  int CurrentState = 0;
   const DFAStateInput (*DFAStateInputTable)[2];
   const unsigned *DFAStateEntryTable;
 
@@ -101,24 +107,23 @@ public:
 
   // Check if the resources occupied by a MCInstrDesc are available in
   // the current state.
-  bool canReserveResources(const llvm::MCInstrDesc *MID);
+  bool canReserveResources(const MCInstrDesc *MID);
 
   // Reserve the resources occupied by a MCInstrDesc and change the current
   // state to reflect that change.
-  void reserveResources(const llvm::MCInstrDesc *MID);
+  void reserveResources(const MCInstrDesc *MID);
 
   // Check if the resources occupied by a machine instruction are available
   // in the current state.
-  bool canReserveResources(llvm::MachineInstr &MI);
+  bool canReserveResources(MachineInstr &MI);
 
   // Reserve the resources occupied by a machine instruction and change the
   // current state to reflect that change.
-  void reserveResources(llvm::MachineInstr &MI);
+  void reserveResources(MachineInstr &MI);
 
   const InstrItineraryData *getInstrItins() const { return InstrItins; }
 };
 
-
 // VLIWPacketizerList implements a simple VLIW packetizer using DFA. The
 // packetizer works on machine basic blocks. For each instruction I in BB,
 // the packetizer consults the DFA to see if machine resources are available
@@ -205,6 +210,6 @@ public:
   void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation);
 };
 
-} // namespace llvm
+} // end namespace llvm
 
-#endif
+#endif // LLVM_CODEGEN_DFAPACKETIZER_H
diff --git a/include/llvm/CodeGen/DIE.h b/include/llvm/CodeGen/DIE.h
index 4f47ba6e3852..5ed5faa2c415 100644
--- a/include/llvm/CodeGen/DIE.h
+++ b/include/llvm/CodeGen/DIE.h
@@ -21,10 +21,10 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/iterator.h"
 #include "llvm/ADT/iterator_range.h"
+#include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/CodeGen/DwarfStringPoolEntry.h"
 #include "llvm/Support/AlignOf.h"
 #include "llvm/Support/Allocator.h"
-#include "llvm/Support/Dwarf.h"
 #include <cassert>
 #include <cstddef>
 #include <cstdint>
diff --git a/include/llvm/CodeGen/ExecutionDepsFix.h b/include/llvm/CodeGen/ExecutionDepsFix.h
index 1d5b9684e105..f4db8b7322da 100644
--- a/include/llvm/CodeGen/ExecutionDepsFix.h
+++ b/include/llvm/CodeGen/ExecutionDepsFix.h
@@ -1,4 +1,4 @@
-//===- llvm/CodeGen/ExecutionDepsFix.h - Execution Dependency Fix -*- C++ -*-=//
+//==- llvm/CodeGen/ExecutionDepsFix.h - Execution Dependency Fix -*- C++ -*-==//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -20,19 +20,30 @@
 //
 //===----------------------------------------------------------------------===//
 
-
 #ifndef LLVM_CODEGEN_EXECUTIONDEPSFIX_H
 #define LLVM_CODEGEN_EXECUTIONDEPSFIX_H
 
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/Allocator.h"
+#include "llvm/Support/MathExtras.h"
+#include <cassert>
+#include <limits>
+#include <utility>
 #include <vector>
 
 namespace llvm {
 
+class MachineBasicBlock;
+class MachineInstr;
+class TargetInstrInfo;
+
 /// A DomainValue is a bit like LiveIntervals' ValNo, but it also keeps track
 /// of execution domains.
 ///
@@ -50,7 +61,7 @@ namespace llvm {
 /// domains.
 struct DomainValue {
   // Basic reference counting.
-  unsigned Refs;
+  unsigned Refs = 0;
 
   // Bitmask of available domains. For an open DomainValue, it is the still
   // possible domains for collapsing. For a collapsed DomainValue it is the
@@ -65,6 +76,8 @@ struct DomainValue {
   // Twiddleable instructions using or defining these registers.
   SmallVector<MachineInstr*, 8> Instrs;
 
+  DomainValue() { clear(); }
+
   // A collapsed DomainValue has no instructions to twiddle - it simply keeps
   // track of the domains where the registers are already available.
   bool isCollapsed() const { return Instrs.empty(); }
@@ -97,8 +110,6 @@ struct DomainValue {
     return countTrailingZeros(AvailableDomains);
   }
 
-  DomainValue() : Refs(0) { clear(); }
-
   // Clear this DomainValue and point to next which has all its data.
   void clear() {
     AvailableDomains = 0;
@@ -136,29 +147,27 @@ class ExecutionDepsFix : public MachineFunctionPass {
     // Keeps clearance and domain information for all registers. Note that this
     // is different from the usual definition notion of liveness. The CPU
     // doesn't care whether or not we consider a register killed.
-    LiveReg *OutRegs;
+    LiveReg *OutRegs = nullptr;
 
     // Whether we have gotten to this block in primary processing yet.
-    bool PrimaryCompleted;
+    bool PrimaryCompleted = false;
 
     // The number of predecessors for which primary processing has completed
-    unsigned IncomingProcessed;
+    unsigned IncomingProcessed = 0;
 
     // The value of `IncomingProcessed` at the start of primary processing
-    unsigned PrimaryIncoming;
+    unsigned PrimaryIncoming = 0;
 
     // The number of predecessors for which all processing steps are done.
-    unsigned IncomingCompleted;
+    unsigned IncomingCompleted = 0;
 
-    MBBInfo()
-        : OutRegs(nullptr), PrimaryCompleted(false), IncomingProcessed(0),
-          PrimaryIncoming(0), IncomingCompleted(0) {}
+    MBBInfo() = default;
   };
-  typedef DenseMap<MachineBasicBlock *, MBBInfo> MBBInfoMap;
+  using MBBInfoMap = DenseMap<MachineBasicBlock *, MBBInfo>;
   MBBInfoMap MBBInfos;
 
   /// List of undefined register reads in this block in forward order.
-  std::vector<std::pair<MachineInstr*, unsigned> > UndefReads;
+  std::vector<std::pair<MachineInstr *, unsigned>> UndefReads;
 
   /// Storage for register unit liveness.
   LivePhysRegs LiveRegSet;
@@ -166,6 +175,7 @@ class ExecutionDepsFix : public MachineFunctionPass {
   /// Current instruction number.
   /// The first instruction in each basic block is 0.
   int CurInstr;
+
 public:
   ExecutionDepsFix(char &PassID, const TargetRegisterClass &RC)
     : MachineFunctionPass(PassID), RC(&RC), NumRegs(RC.getNumRegs()) {}
@@ -217,4 +227,4 @@ private:
 
 } // end namepsace llvm
 
-#endif
+#endif // LLVM_CODEGEN_EXECUTIONDEPSFIX_H
diff --git a/include/llvm/CodeGen/FastISel.h b/include/llvm/CodeGen/FastISel.h
index 57fa0c73d272..74e4179e73e9 100644
--- a/include/llvm/CodeGen/FastISel.h
+++ b/include/llvm/CodeGen/FastISel.h
@@ -17,11 +17,12 @@
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineValueType.h"
 #include "llvm/IR/Attributes.h"
-#include "llvm/IR/CallingConv.h"
 #include "llvm/IR/CallSite.h"
+#include "llvm/IR/CallingConv.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/InstrTypes.h"
@@ -30,19 +31,43 @@
 #include <algorithm>
 #include <cstdint>
 #include <utility>
-#include <vector>
 
 namespace llvm {
 
+class AllocaInst;
+class BasicBlock;
+class CallInst;
+class Constant;
+class ConstantFP;
+class DataLayout;
+class FunctionLoweringInfo;
+class LoadInst;
 class MachineConstantPool;
+class MachineFrameInfo;
+class MachineFunction;
+class MachineInstr;
+class MachineMemOperand;
+class MachineOperand;
+class MachineRegisterInfo;
+class MCContext;
+class MCInstrDesc;
+class MCSymbol;
+class TargetInstrInfo;
+class TargetLibraryInfo;
+class TargetMachine;
+class TargetRegisterClass;
+class TargetRegisterInfo;
+class Type;
+class User;
+class Value;
 
 /// \brief This is a fast-path instruction selection class that generates poor
 /// code and doesn't support illegal types or non-trivial lowering, but runs
 /// quickly.
 class FastISel {
 public:
-  typedef TargetLoweringBase::ArgListEntry ArgListEntry;
-  typedef TargetLoweringBase::ArgListTy ArgListTy;
+  using ArgListEntry = TargetLoweringBase::ArgListEntry;
+  using ArgListTy = TargetLoweringBase::ArgListTy;
   struct CallLoweringInfo {
     Type *RetTy = nullptr;
     bool RetSExt : 1;
@@ -202,6 +227,8 @@ protected:
   MachineInstr *EmitStartPt;
 
 public:
+  virtual ~FastISel();
+
   /// \brief Return the position of the last instruction emitted for
   /// materializing constants for use in the current block.
   MachineInstr *getLastLocalValue() { return LastLocalValue; }
@@ -293,8 +320,6 @@ public:
   /// \brief Reset InsertPt to the given old insert position.
   void leaveLocalValueArea(SavePoint Old);
 
-  virtual ~FastISel();
-
 protected:
   explicit FastISel(FunctionLoweringInfo &FuncInfo,
                     const TargetLibraryInfo *LibInfo,
@@ -334,7 +359,7 @@ protected:
 
   /// \brief This method is called by target-independent code to request that an
   /// instruction with the given type, opcode, and register and immediate
-  // operands be emitted.
+  /// operands be emitted.
   virtual unsigned fastEmit_ri(MVT VT, MVT RetVT, unsigned Opcode, unsigned Op0,
                                bool Op0IsKill, uint64_t Imm);
 
diff --git a/include/llvm/CodeGen/FunctionLoweringInfo.h b/include/llvm/CodeGen/FunctionLoweringInfo.h
index e7544bd7b70c..7d7c3e8cfd22 100644
--- a/include/llvm/CodeGen/FunctionLoweringInfo.h
+++ b/include/llvm/CodeGen/FunctionLoweringInfo.h
@@ -1,4 +1,4 @@
-//===-- FunctionLoweringInfo.h - Lower functions from LLVM IR to CodeGen --===//
+//===- FunctionLoweringInfo.h - Lower functions from LLVM IR to CodeGen ---===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -23,29 +23,28 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/ISDOpcodes.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
 #include "llvm/Support/KnownBits.h"
 #include "llvm/Target/TargetRegisterInfo.h"
+#include <cassert>
+#include <utility>
 #include <vector>
 
 namespace llvm {
 
-class AllocaInst;
+class Argument;
 class BasicBlock;
 class BranchProbabilityInfo;
 class Function;
-class GlobalVariable;
 class Instruction;
-class MachineInstr;
-class MachineBasicBlock;
 class MachineFunction;
-class MachineModuleInfo;
+class MachineInstr;
 class MachineRegisterInfo;
-class SelectionDAG;
 class MVT;
+class SelectionDAG;
 class TargetLowering;
-class Value;
 
 //===--------------------------------------------------------------------===//
 /// FunctionLoweringInfo - This contains information that is global to a
@@ -74,25 +73,24 @@ public:
 
   /// A map from swifterror value in a basic block to the virtual register it is
   /// currently represented by.
-  llvm::DenseMap<std::pair<const MachineBasicBlock *, const Value *>, unsigned>
+  DenseMap<std::pair<const MachineBasicBlock *, const Value *>, unsigned>
       SwiftErrorVRegDefMap;
 
   /// A list of upward exposed vreg uses that need to be satisfied by either a
   /// copy def or a phi node at the beginning of the basic block representing
   /// the predecessor(s) swifterror value.
-  llvm::DenseMap<std::pair<const MachineBasicBlock *, const Value *>, unsigned>
+  DenseMap<std::pair<const MachineBasicBlock *, const Value *>, unsigned>
       SwiftErrorVRegUpwardsUse;
 
   /// The swifterror argument of the current function.
   const Value *SwiftErrorArg;
 
-  typedef SmallVector<const Value*, 1> SwiftErrorValues;
+  using SwiftErrorValues = SmallVector<const Value*, 1>;
   /// A function can only have a single swifterror argument. And if it does
   /// have a swifterror argument, it must be the first entry in
   /// SwiftErrorVals.
   SwiftErrorValues SwiftErrorVals;
 
-
   /// Get or create the swifterror value virtual register in
   /// SwiftErrorVRegDefMap for this basic block.
   unsigned getOrCreateSwiftErrorVReg(const MachineBasicBlock *,
@@ -118,7 +116,7 @@ public:
   /// slot), and we track that here.
 
   struct StatepointSpillMap {
-    typedef DenseMap<const Value *, Optional<int>> SlotMapTy;
+    using SlotMapTy = DenseMap<const Value *, Optional<int>>;
 
     /// Maps uniqued llvm IR values to the slots they were spilled in.  If a
     /// value is mapped to None it means we visited the value but didn't spill
@@ -172,8 +170,9 @@ public:
   struct LiveOutInfo {
     unsigned NumSignBits : 31;
     unsigned IsValid : 1;
-    KnownBits Known;
-    LiveOutInfo() : NumSignBits(0), IsValid(true), Known(1) {}
+    KnownBits Known = 1;
+
+    LiveOutInfo() : NumSignBits(0), IsValid(true) {}
   };
 
   /// Record the preferred extend type (ISD::SIGN_EXTEND or ISD::ZERO_EXTEND)
@@ -298,4 +297,4 @@ private:
 
 } // end namespace llvm
 
-#endif
+#endif // LLVM_CODEGEN_FUNCTIONLOWERINGINFO_H
diff --git a/include/llvm/CodeGen/GCMetadata.h b/include/llvm/CodeGen/GCMetadata.h
index e6afcbc8ded2..ad2599fc120e 100644
--- a/include/llvm/CodeGen/GCMetadata.h
+++ b/include/llvm/CodeGen/GCMetadata.h
@@ -1,4 +1,4 @@
-//===-- GCMetadata.h - Garbage collector metadata ---------------*- C++ -*-===//
+//===- GCMetadata.h - Garbage collector metadata ----------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -36,15 +36,20 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/CodeGen/GCStrategy.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/Pass.h"
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
 #include <memory>
-#include <utility>
+#include <vector>
 
 namespace llvm {
-class AsmPrinter;
+
 class Constant;
+class Function;
 class MCSymbol;
 
 /// GCPoint - Metadata for a collector-safe point in machine code.
@@ -62,20 +67,20 @@ struct GCPoint {
 /// collector.
 struct GCRoot {
   int Num;                  ///< Usually a frame index.
-  int StackOffset;          ///< Offset from the stack pointer.
+  int StackOffset = -1;     ///< Offset from the stack pointer.
   const Constant *Metadata; ///< Metadata straight from the call
                             ///< to llvm.gcroot.
 
-  GCRoot(int N, const Constant *MD) : Num(N), StackOffset(-1), Metadata(MD) {}
+  GCRoot(int N, const Constant *MD) : Num(N), Metadata(MD) {}
 };
 
 /// Garbage collection metadata for a single function.  Currently, this
 /// information only applies to GCStrategies which use GCRoot.
 class GCFunctionInfo {
 public:
-  typedef std::vector<GCPoint>::iterator iterator;
-  typedef std::vector<GCRoot>::iterator roots_iterator;
-  typedef std::vector<GCRoot>::const_iterator live_iterator;
+  using iterator = std::vector<GCPoint>::iterator;
+  using roots_iterator = std::vector<GCRoot>::iterator;
+  using live_iterator = std::vector<GCRoot>::const_iterator;
 
 private:
   const Function &F;
@@ -99,11 +104,9 @@ public:
   ~GCFunctionInfo();
 
   /// getFunction - Return the function to which this metadata applies.
-  ///
   const Function &getFunction() const { return F; }
 
   /// getStrategy - Return the GC strategy for the function.
-  ///
   GCStrategy &getStrategy() { return S; }
 
   /// addStackRoot - Registers a root that lives on the stack. Num is the
@@ -126,24 +129,20 @@ public:
   }
 
   /// getFrameSize/setFrameSize - Records the function's frame size.
-  ///
   uint64_t getFrameSize() const { return FrameSize; }
   void setFrameSize(uint64_t S) { FrameSize = S; }
 
   /// begin/end - Iterators for safe points.
-  ///
   iterator begin() { return SafePoints.begin(); }
   iterator end() { return SafePoints.end(); }
   size_t size() const { return SafePoints.size(); }
 
   /// roots_begin/roots_end - Iterators for all roots in the function.
-  ///
   roots_iterator roots_begin() { return Roots.begin(); }
   roots_iterator roots_end() { return Roots.end(); }
   size_t roots_size() const { return Roots.size(); }
 
   /// live_begin/live_end - Iterators for live roots at a given safe point.
-  ///
   live_iterator live_begin(const iterator &p) { return roots_begin(); }
   live_iterator live_end(const iterator &p) { return roots_end(); }
   size_t live_size(const iterator &p) const { return roots_size(); }
@@ -166,7 +165,7 @@ public:
 
   /// List of per function info objects.  In theory, Each of these
   /// may be associated with a different GC.
-  typedef std::vector<std::unique_ptr<GCFunctionInfo>> FuncInfoVec;
+  using FuncInfoVec = std::vector<std::unique_ptr<GCFunctionInfo>>;
 
   FuncInfoVec::iterator funcinfo_begin() { return Functions.begin(); }
   FuncInfoVec::iterator funcinfo_end() { return Functions.end(); }
@@ -177,11 +176,11 @@ private:
 
   /// Non-owning map to bypass linear search when finding the GCFunctionInfo
   /// associated with a particular Function.
-  typedef DenseMap<const Function *, GCFunctionInfo *> finfo_map_type;
+  using finfo_map_type = DenseMap<const Function *, GCFunctionInfo *>;
   finfo_map_type FInfoMap;
 
 public:
-  typedef SmallVector<std::unique_ptr<GCStrategy>,1>::const_iterator iterator;
+  using iterator = SmallVector<std::unique_ptr<GCStrategy>, 1>::const_iterator;
 
   static char ID;
 
@@ -202,6 +201,7 @@ public:
   /// will soon change.
   GCFunctionInfo &getFunctionInfo(const Function &F);
 };
-}
 
-#endif
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_GCMETADATA_H
diff --git a/include/llvm/CodeGen/GCMetadataPrinter.h b/include/llvm/CodeGen/GCMetadataPrinter.h
index 220847029113..1cc69a7b71af 100644
--- a/include/llvm/CodeGen/GCMetadataPrinter.h
+++ b/include/llvm/CodeGen/GCMetadataPrinter.h
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/GCMetadataPrinter.h - Prints asm GC tables -*- C++ -*-===//
+//===- llvm/CodeGen/GCMetadataPrinter.h - Prints asm GC tables --*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -20,45 +20,48 @@
 #ifndef LLVM_CODEGEN_GCMETADATAPRINTER_H
 #define LLVM_CODEGEN_GCMETADATAPRINTER_H
 
-#include "llvm/CodeGen/GCMetadata.h"
-#include "llvm/CodeGen/GCStrategy.h"
 #include "llvm/Support/Registry.h"
 
 namespace llvm {
 
+class AsmPrinter;
 class GCMetadataPrinter;
+class GCModuleInfo;
+class GCStrategy;
+class Module;
 
 /// GCMetadataPrinterRegistry - The GC assembly printer registry uses all the
 /// defaults from Registry.
-typedef Registry<GCMetadataPrinter> GCMetadataPrinterRegistry;
+using GCMetadataPrinterRegistry = Registry<GCMetadataPrinter>;
 
 /// GCMetadataPrinter - Emits GC metadata as assembly code.  Instances are
 /// created, managed, and owned by the AsmPrinter.
 class GCMetadataPrinter {
 private:
-  GCStrategy *S;
   friend class AsmPrinter;
 
+  GCStrategy *S;
+
 protected:
   // May only be subclassed.
   GCMetadataPrinter();
 
-private:
+public:
   GCMetadataPrinter(const GCMetadataPrinter &) = delete;
   GCMetadataPrinter &operator=(const GCMetadataPrinter &) = delete;
+  virtual ~GCMetadataPrinter();
 
-public:
   GCStrategy &getStrategy() { return *S; }
 
   /// Called before the assembly for the module is generated by
   /// the AsmPrinter (but after target specific hooks.)
   virtual void beginAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) {}
+
   /// Called after the assembly for the module is generated by
   /// the AsmPrinter (but before target specific hooks)
   virtual void finishAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) {}
-
-  virtual ~GCMetadataPrinter();
 };
-}
 
-#endif
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_GCMETADATAPRINTER_H
diff --git a/include/llvm/CodeGen/GCStrategy.h b/include/llvm/CodeGen/GCStrategy.h
index 5b1fafea25b5..16168e785f81 100644
--- a/include/llvm/CodeGen/GCStrategy.h
+++ b/include/llvm/CodeGen/GCStrategy.h
@@ -174,7 +174,7 @@ public:
 /// Note that to use a custom GCMetadataPrinter w/gc.roots, you must also
 /// register your GCMetadataPrinter subclass with the
 /// GCMetadataPrinterRegistery as well.
-typedef Registry<GCStrategy> GCRegistry;
+using GCRegistry = Registry<GCStrategy>;
 
 } // end namespace llvm
 
diff --git a/include/llvm/CodeGen/GlobalISel/InstructionSelector.h b/include/llvm/CodeGen/GlobalISel/InstructionSelector.h
index 45f25f96ec1f..1a865c3f0dce 100644
--- a/include/llvm/CodeGen/GlobalISel/InstructionSelector.h
+++ b/include/llvm/CodeGen/GlobalISel/InstructionSelector.h
@@ -17,8 +17,8 @@
 #define LLVM_CODEGEN_GLOBALISEL_INSTRUCTIONSELECTOR_H
 
 #include "llvm/ADT/Optional.h"
-#include <cstdint>
 #include <bitset>
+#include <cstdint>
 #include <functional>
 
 namespace llvm {
diff --git a/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
index 8fecafdc08d0..3148e70b56f8 100644
--- a/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
+++ b/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
@@ -22,8 +22,8 @@
 #define LLVM_CODEGEN_GLOBALISEL_MACHINELEGALIZEHELPER_H
 
 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/LowLevelType.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
 
 namespace llvm {
 // Forward declarations.
diff --git a/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
index 6b662a7f7413..db72f78c8321 100644
--- a/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
+++ b/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
@@ -16,9 +16,9 @@
 
 #include "llvm/CodeGen/GlobalISel/Types.h"
 
+#include "llvm/CodeGen/LowLevelType.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/LowLevelType.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DebugLoc.h"
 
diff --git a/include/llvm/CodeGen/LexicalScopes.h b/include/llvm/CodeGen/LexicalScopes.h
index 6c35832f963c..79fa12ec2fbb 100644
--- a/include/llvm/CodeGen/LexicalScopes.h
+++ b/include/llvm/CodeGen/LexicalScopes.h
@@ -31,12 +31,13 @@ namespace llvm {
 class MachineBasicBlock;
 class MachineFunction;
 class MachineInstr;
+class MDNode;
 
 //===----------------------------------------------------------------------===//
 /// InsnRange - This is used to track range of instructions with identical
 /// lexical scope.
 ///
-typedef std::pair<const MachineInstr *, const MachineInstr *> InsnRange;
+using InsnRange = std::pair<const MachineInstr *, const MachineInstr *>;
 
 //===----------------------------------------------------------------------===//
 /// LexicalScope - This class is used to track scope information.
diff --git a/include/llvm/CodeGen/LiveInterval.h b/include/llvm/CodeGen/LiveInterval.h
index 40cd146f88f8..f4fa872c7f5b 100644
--- a/include/llvm/CodeGen/LiveInterval.h
+++ b/include/llvm/CodeGen/LiveInterval.h
@@ -23,9 +23,9 @@
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/IntEqClasses.h"
-#include "llvm/ADT/iterator_range.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/CodeGen/SlotIndexes.h"
 #include "llvm/MC/LaneBitmask.h"
 #include "llvm/Support/Allocator.h"
diff --git a/include/llvm/CodeGen/LiveRegUnits.h b/include/llvm/CodeGen/LiveRegUnits.h
index 5de76c8b87bf..fa1ec867ea3d 100644
--- a/include/llvm/CodeGen/LiveRegUnits.h
+++ b/include/llvm/CodeGen/LiveRegUnits.h
@@ -16,9 +16,9 @@
 #define LLVM_CODEGEN_LIVEREGUNITS_H
 
 #include "llvm/ADT/BitVector.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/MC/LaneBitmask.h"
 #include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include <cstdint>
 
 namespace llvm {
diff --git a/include/llvm/CodeGen/MIRParser/MIRParser.h b/include/llvm/CodeGen/MIRParser/MIRParser.h
index dd0780397f42..b631a8c0122a 100644
--- a/include/llvm/CodeGen/MIRParser/MIRParser.h
+++ b/include/llvm/CodeGen/MIRParser/MIRParser.h
@@ -18,7 +18,6 @@
 #ifndef LLVM_CODEGEN_MIRPARSER_MIRPARSER_H
 #define LLVM_CODEGEN_MIRPARSER_MIRPARSER_H
 
-#include "llvm/CodeGen/MachineFunctionInitializer.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include <memory>
@@ -27,29 +26,30 @@ namespace llvm {
 
 class StringRef;
 class MIRParserImpl;
+class MachineModuleInfo;
 class SMDiagnostic;
 
 /// This class initializes machine functions by applying the state loaded from
 /// a MIR file.
-class MIRParser : public MachineFunctionInitializer {
+class MIRParser {
   std::unique_ptr<MIRParserImpl> Impl;
 
 public:
   MIRParser(std::unique_ptr<MIRParserImpl> Impl);
   MIRParser(const MIRParser &) = delete;
-  ~MIRParser() override;
+  ~MIRParser();
 
-  /// Parse the optional LLVM IR module that's embedded in the MIR file.
+  /// Parses the optional LLVM IR module in the MIR file.
   ///
   /// A new, empty module is created if the LLVM IR isn't present.
-  /// Returns null if a parsing error occurred.
-  std::unique_ptr<Module> parseLLVMModule();
+  /// \returns nullptr if a parsing error occurred.
+  std::unique_ptr<Module> parseIRModule();
 
-  /// Initialize the machine function to the state that's described in the MIR
-  /// file.
+  /// \brief Parses MachineFunctions in the MIR file and add them to the given
+  /// MachineModuleInfo \p MMI.
   ///
-  /// Return true if error occurred.
-  bool initializeMachineFunction(MachineFunction &MF) override;
+  /// \returns true if an error occurred.
+  bool parseMachineFunctions(Module &M, MachineModuleInfo &MMI);
 };
 
 /// This function is the main interface to the MIR serialization format parser.
diff --git a/include/llvm/CodeGen/MIRYamlMapping.h b/include/llvm/CodeGen/MIRYamlMapping.h
index 30e88fe38ac3..1b1ba6a05837 100644
--- a/include/llvm/CodeGen/MIRYamlMapping.h
+++ b/include/llvm/CodeGen/MIRYamlMapping.h
@@ -72,6 +72,9 @@ template <> struct ScalarTraits<FlowStringValue> {
 
 struct BlockStringValue {
   StringValue Value;
+  bool operator==(const BlockStringValue &Other) const {
+    return Value == Other.Value;
+  }
 };
 
 template <> struct BlockScalarTraits<BlockStringValue> {
@@ -146,6 +149,10 @@ struct VirtualRegisterDefinition {
   StringValue Class;
   StringValue PreferredRegister;
   // TODO: Serialize the target specific register hints.
+  bool operator==(const VirtualRegisterDefinition &Other) const {
+    return ID == Other.ID && Class == Other.Class &&
+           PreferredRegister == Other.PreferredRegister;
+  }
 };
 
 template <> struct MappingTraits<VirtualRegisterDefinition> {
@@ -162,6 +169,10 @@ template <> struct MappingTraits<VirtualRegisterDefinition> {
 struct MachineFunctionLiveIn {
   StringValue Register;
   StringValue VirtualRegister;
+  bool operator==(const MachineFunctionLiveIn &Other) const {
+    return Register == Other.Register &&
+           VirtualRegister == Other.VirtualRegister;
+  }
 };
 
 template <> struct MappingTraits<MachineFunctionLiveIn> {
@@ -196,6 +207,14 @@ struct MachineStackObject {
   StringValue DebugVar;
   StringValue DebugExpr;
   StringValue DebugLoc;
+  bool operator==(const MachineStackObject &Other) const {
+    return ID == Other.ID && Name == Other.Name && Type == Other.Type &&
+           Offset == Other.Offset && Size == Other.Size &&
+           Alignment == Other.Alignment &&
+           CalleeSavedRegister == Other.CalleeSavedRegister &&
+           LocalOffset == Other.LocalOffset && DebugVar == Other.DebugVar &&
+           DebugExpr == Other.DebugExpr && DebugLoc == Other.DebugLoc;
+  }
 };
 
 template <> struct ScalarEnumerationTraits<MachineStackObject::ObjectType> {
@@ -214,13 +233,13 @@ template <> struct MappingTraits<MachineStackObject> {
     YamlIO.mapOptional(
         "type", Object.Type,
         MachineStackObject::DefaultType); // Don't print the default type.
-    YamlIO.mapOptional("offset", Object.Offset);
+    YamlIO.mapOptional("offset", Object.Offset, (int64_t)0);
     if (Object.Type != MachineStackObject::VariableSized)
       YamlIO.mapRequired("size", Object.Size);
-    YamlIO.mapOptional("alignment", Object.Alignment);
+    YamlIO.mapOptional("alignment", Object.Alignment, (unsigned)0);
     YamlIO.mapOptional("callee-saved-register", Object.CalleeSavedRegister,
                        StringValue()); // Don't print it out when it's empty.
-    YamlIO.mapOptional("local-offset", Object.LocalOffset);
+    YamlIO.mapOptional("local-offset", Object.LocalOffset, Optional<int64_t>());
     YamlIO.mapOptional("di-variable", Object.DebugVar,
                        StringValue()); // Don't print it out when it's empty.
     YamlIO.mapOptional("di-expression", Object.DebugExpr,
@@ -244,6 +263,12 @@ struct FixedMachineStackObject {
   bool IsImmutable = false;
   bool IsAliased = false;
   StringValue CalleeSavedRegister;
+  bool operator==(const FixedMachineStackObject &Other) const {
+    return ID == Other.ID && Type == Other.Type && Offset == Other.Offset &&
+           Size == Other.Size && Alignment == Other.Alignment &&
+           IsImmutable == Other.IsImmutable && IsAliased == Other.IsAliased &&
+           CalleeSavedRegister == Other.CalleeSavedRegister;
+  }
 };
 
 template <>
@@ -261,12 +286,12 @@ template <> struct MappingTraits<FixedMachineStackObject> {
     YamlIO.mapOptional(
         "type", Object.Type,
         FixedMachineStackObject::DefaultType); // Don't print the default type.
-    YamlIO.mapOptional("offset", Object.Offset);
-    YamlIO.mapOptional("size", Object.Size);
-    YamlIO.mapOptional("alignment", Object.Alignment);
+    YamlIO.mapOptional("offset", Object.Offset, (int64_t)0);
+    YamlIO.mapOptional("size", Object.Size, (uint64_t)0);
+    YamlIO.mapOptional("alignment", Object.Alignment, (unsigned)0);
     if (Object.Type != FixedMachineStackObject::SpillSlot) {
-      YamlIO.mapOptional("isImmutable", Object.IsImmutable);
-      YamlIO.mapOptional("isAliased", Object.IsAliased);
+      YamlIO.mapOptional("isImmutable", Object.IsImmutable, false);
+      YamlIO.mapOptional("isAliased", Object.IsAliased, false);
     }
     YamlIO.mapOptional("callee-saved-register", Object.CalleeSavedRegister,
                        StringValue()); // Don't print it out when it's empty.
@@ -279,13 +304,17 @@ struct MachineConstantPoolValue {
   UnsignedValue ID;
   StringValue Value;
   unsigned Alignment = 0;
+  bool operator==(const MachineConstantPoolValue &Other) const {
+    return ID == Other.ID && Value == Other.Value &&
+           Alignment == Other.Alignment;
+  }
 };
 
 template <> struct MappingTraits<MachineConstantPoolValue> {
   static void mapping(IO &YamlIO, MachineConstantPoolValue &Constant) {
     YamlIO.mapRequired("id", Constant.ID);
-    YamlIO.mapOptional("value", Constant.Value);
-    YamlIO.mapOptional("alignment", Constant.Alignment);
+    YamlIO.mapOptional("value", Constant.Value, StringValue());
+    YamlIO.mapOptional("alignment", Constant.Alignment, (unsigned)0);
   }
 };
 
@@ -293,16 +322,22 @@ struct MachineJumpTable {
   struct Entry {
     UnsignedValue ID;
     std::vector<FlowStringValue> Blocks;
+    bool operator==(const Entry &Other) const {
+      return ID == Other.ID && Blocks == Other.Blocks;
+    }
   };
 
   MachineJumpTableInfo::JTEntryKind Kind = MachineJumpTableInfo::EK_Custom32;
   std::vector<Entry> Entries;
+  bool operator==(const MachineJumpTable &Other) const {
+    return Kind == Other.Kind && Entries == Other.Entries;
+  }
 };
 
 template <> struct MappingTraits<MachineJumpTable::Entry> {
   static void mapping(IO &YamlIO, MachineJumpTable::Entry &Entry) {
     YamlIO.mapRequired("id", Entry.ID);
-    YamlIO.mapOptional("blocks", Entry.Blocks);
+    YamlIO.mapOptional("blocks", Entry.Blocks, std::vector<FlowStringValue>());
   }
 };
 
@@ -322,7 +357,8 @@ namespace yaml {
 template <> struct MappingTraits<MachineJumpTable> {
   static void mapping(IO &YamlIO, MachineJumpTable &JT) {
     YamlIO.mapRequired("kind", JT.Kind);
-    YamlIO.mapOptional("entries", JT.Entries);
+    YamlIO.mapOptional("entries", JT.Entries,
+                       std::vector<MachineJumpTable::Entry>());
   }
 };
 
@@ -351,25 +387,43 @@ struct MachineFrameInfo {
   bool HasMustTailInVarArgFunc = false;
   StringValue SavePoint;
   StringValue RestorePoint;
+  bool operator==(const MachineFrameInfo &Other) const {
+    return IsFrameAddressTaken == Other.IsFrameAddressTaken &&
+           IsReturnAddressTaken == Other.IsReturnAddressTaken &&
+           HasStackMap == Other.HasStackMap &&
+           HasPatchPoint == Other.HasPatchPoint &&
+           StackSize == Other.StackSize &&
+           OffsetAdjustment == Other.OffsetAdjustment &&
+           MaxAlignment == Other.MaxAlignment &&
+           AdjustsStack == Other.AdjustsStack && HasCalls == Other.HasCalls &&
+           StackProtector == Other.StackProtector &&
+           MaxCallFrameSize == Other.MaxCallFrameSize &&
+           HasOpaqueSPAdjustment == Other.HasOpaqueSPAdjustment &&
+           HasVAStart == Other.HasVAStart &&
+           HasMustTailInVarArgFunc == Other.HasMustTailInVarArgFunc &&
+           SavePoint == Other.SavePoint && RestorePoint == Other.RestorePoint;
+  }
 };
 
 template <> struct MappingTraits<MachineFrameInfo> {
   static void mapping(IO &YamlIO, MachineFrameInfo &MFI) {
-    YamlIO.mapOptional("isFrameAddressTaken", MFI.IsFrameAddressTaken);
-    YamlIO.mapOptional("isReturnAddressTaken", MFI.IsReturnAddressTaken);
-    YamlIO.mapOptional("hasStackMap", MFI.HasStackMap);
-    YamlIO.mapOptional("hasPatchPoint", MFI.HasPatchPoint);
-    YamlIO.mapOptional("stackSize", MFI.StackSize);
-    YamlIO.mapOptional("offsetAdjustment", MFI.OffsetAdjustment);
-    YamlIO.mapOptional("maxAlignment", MFI.MaxAlignment);
-    YamlIO.mapOptional("adjustsStack", MFI.AdjustsStack);
-    YamlIO.mapOptional("hasCalls", MFI.HasCalls);
+    YamlIO.mapOptional("isFrameAddressTaken", MFI.IsFrameAddressTaken, false);
+    YamlIO.mapOptional("isReturnAddressTaken", MFI.IsReturnAddressTaken, false);
+    YamlIO.mapOptional("hasStackMap", MFI.HasStackMap, false);
+    YamlIO.mapOptional("hasPatchPoint", MFI.HasPatchPoint, false);
+    YamlIO.mapOptional("stackSize", MFI.StackSize, (uint64_t)0);
+    YamlIO.mapOptional("offsetAdjustment", MFI.OffsetAdjustment, (int)0);
+    YamlIO.mapOptional("maxAlignment", MFI.MaxAlignment, (unsigned)0);
+    YamlIO.mapOptional("adjustsStack", MFI.AdjustsStack, false);
+    YamlIO.mapOptional("hasCalls", MFI.HasCalls, false);
     YamlIO.mapOptional("stackProtector", MFI.StackProtector,
                        StringValue()); // Don't print it out when it's empty.
-    YamlIO.mapOptional("maxCallFrameSize", MFI.MaxCallFrameSize, ~0u);
-    YamlIO.mapOptional("hasOpaqueSPAdjustment", MFI.HasOpaqueSPAdjustment);
-    YamlIO.mapOptional("hasVAStart", MFI.HasVAStart);
-    YamlIO.mapOptional("hasMustTailInVarArgFunc", MFI.HasMustTailInVarArgFunc);
+    YamlIO.mapOptional("maxCallFrameSize", MFI.MaxCallFrameSize, (unsigned)~0);
+    YamlIO.mapOptional("hasOpaqueSPAdjustment", MFI.HasOpaqueSPAdjustment,
+                       false);
+    YamlIO.mapOptional("hasVAStart", MFI.HasVAStart, false);
+    YamlIO.mapOptional("hasMustTailInVarArgFunc", MFI.HasMustTailInVarArgFunc,
+                       false);
     YamlIO.mapOptional("savePoint", MFI.SavePoint,
                        StringValue()); // Don't print it out when it's empty.
     YamlIO.mapOptional("restorePoint", MFI.RestorePoint,
@@ -403,22 +457,28 @@ struct MachineFunction {
 template <> struct MappingTraits<MachineFunction> {
   static void mapping(IO &YamlIO, MachineFunction &MF) {
     YamlIO.mapRequired("name", MF.Name);
-    YamlIO.mapOptional("alignment", MF.Alignment);
-    YamlIO.mapOptional("exposesReturnsTwice", MF.ExposesReturnsTwice);
-    YamlIO.mapOptional("legalized", MF.Legalized);
-    YamlIO.mapOptional("regBankSelected", MF.RegBankSelected);
-    YamlIO.mapOptional("selected", MF.Selected);
-    YamlIO.mapOptional("tracksRegLiveness", MF.TracksRegLiveness);
-    YamlIO.mapOptional("registers", MF.VirtualRegisters);
-    YamlIO.mapOptional("liveins", MF.LiveIns);
-    YamlIO.mapOptional("calleeSavedRegisters", MF.CalleeSavedRegisters);
-    YamlIO.mapOptional("frameInfo", MF.FrameInfo);
-    YamlIO.mapOptional("fixedStack", MF.FixedStackObjects);
-    YamlIO.mapOptional("stack", MF.StackObjects);
-    YamlIO.mapOptional("constants", MF.Constants);
+    YamlIO.mapOptional("alignment", MF.Alignment, (unsigned)0);
+    YamlIO.mapOptional("exposesReturnsTwice", MF.ExposesReturnsTwice, false);
+    YamlIO.mapOptional("legalized", MF.Legalized, false);
+    YamlIO.mapOptional("regBankSelected", MF.RegBankSelected, false);
+    YamlIO.mapOptional("selected", MF.Selected, false);
+    YamlIO.mapOptional("tracksRegLiveness", MF.TracksRegLiveness, false);
+    YamlIO.mapOptional("registers", MF.VirtualRegisters,
+                       std::vector<VirtualRegisterDefinition>());
+    YamlIO.mapOptional("liveins", MF.LiveIns,
+                       std::vector<MachineFunctionLiveIn>());
+    YamlIO.mapOptional("calleeSavedRegisters", MF.CalleeSavedRegisters,
+                       Optional<std::vector<FlowStringValue>>());
+    YamlIO.mapOptional("frameInfo", MF.FrameInfo, MachineFrameInfo());
+    YamlIO.mapOptional("fixedStack", MF.FixedStackObjects,
+                       std::vector<FixedMachineStackObject>());
+    YamlIO.mapOptional("stack", MF.StackObjects,
+                       std::vector<MachineStackObject>());
+    YamlIO.mapOptional("constants", MF.Constants,
+                       std::vector<MachineConstantPoolValue>());
     if (!YamlIO.outputting() || !MF.JumpTableInfo.Entries.empty())
-      YamlIO.mapOptional("jumpTable", MF.JumpTableInfo);
-    YamlIO.mapOptional("body", MF.Body);
+      YamlIO.mapOptional("jumpTable", MF.JumpTableInfo, MachineJumpTable());
+    YamlIO.mapOptional("body", MF.Body, BlockStringValue());
   }
 };
 
diff --git a/include/llvm/CodeGen/MachineBasicBlock.h b/include/llvm/CodeGen/MachineBasicBlock.h
index 26ed8bb487a2..051908c40df7 100644
--- a/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/include/llvm/CodeGen/MachineBasicBlock.h
@@ -19,12 +19,12 @@
 #include "llvm/ADT/ilist_node.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/ADT/simple_ilist.h"
-#include "llvm/CodeGen/MachineInstrBundleIterator.h"
 #include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBundleIterator.h"
 #include "llvm/IR/DebugLoc.h"
-#include "llvm/Support/BranchProbability.h"
 #include "llvm/MC/LaneBitmask.h"
 #include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/BranchProbability.h"
 #include <cassert>
 #include <cstdint>
 #include <functional>
diff --git a/include/llvm/CodeGen/MachineFunction.h b/include/llvm/CodeGen/MachineFunction.h
index 10125864cd90..f67da7b01c54 100644
--- a/include/llvm/CodeGen/MachineFunction.h
+++ b/include/llvm/CodeGen/MachineFunction.h
@@ -22,11 +22,11 @@
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/GraphTraits.h"
-#include "llvm/ADT/ilist.h"
-#include "llvm/ADT/iterator.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/ilist.h"
+#include "llvm/ADT/iterator.h"
 #include "llvm/Analysis/EHPersonalities.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineInstr.h"
diff --git a/include/llvm/CodeGen/MachineFunctionInitializer.h b/include/llvm/CodeGen/MachineFunctionInitializer.h
deleted file mode 100644
index 0fbcb480b1ab..000000000000
--- a/include/llvm/CodeGen/MachineFunctionInitializer.h
+++ /dev/null
@@ -1,38 +0,0 @@
-//=- MachineFunctionInitializer.h - machine function initializer --*- C++ -*-=//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares an interface that allows custom machine function
-// initialization.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_MACHINEFUNCTIONINITIALIZER_H
-#define LLVM_CODEGEN_MACHINEFUNCTIONINITIALIZER_H
-
-namespace llvm {
-
-class MachineFunction;
-
-/// This interface provides a way to initialize machine functions after they are
-/// created by the machine function analysis pass.
-class MachineFunctionInitializer {
-  virtual void anchor();
-
-public:
-  virtual ~MachineFunctionInitializer() = default;
-
-  /// Initialize the machine function.
-  ///
-  /// Return true if error occurred.
-  virtual bool initializeMachineFunction(MachineFunction &MF) = 0;
-};
-
-} // end namespace llvm
-
-#endif // LLVM_CODEGEN_MACHINEFUNCTIONINITIALIZER_H
diff --git a/include/llvm/CodeGen/MachineFunctionPass.h b/include/llvm/CodeGen/MachineFunctionPass.h
index 653d1175d04b..6d978daa2018 100644
--- a/include/llvm/CodeGen/MachineFunctionPass.h
+++ b/include/llvm/CodeGen/MachineFunctionPass.h
@@ -19,8 +19,8 @@
 #ifndef LLVM_CODEGEN_MACHINEFUNCTIONPASS_H
 #define LLVM_CODEGEN_MACHINEFUNCTIONPASS_H
 
-#include "llvm/Pass.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Pass.h"
 
 namespace llvm {
 
diff --git a/include/llvm/CodeGen/MachineMemOperand.h b/include/llvm/CodeGen/MachineMemOperand.h
index a311124a35ba..4d83f27eac3c 100644
--- a/include/llvm/CodeGen/MachineMemOperand.h
+++ b/include/llvm/CodeGen/MachineMemOperand.h
@@ -21,7 +21,7 @@
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Metadata.h"
-#include "llvm/IR/Value.h"  // PointerLikeTypeTraits<Value*>
+#include "llvm/IR/Value.h" // PointerLikeTypeTraits<Value*>
 #include "llvm/Support/AtomicOrdering.h"
 #include "llvm/Support/DataTypes.h"
 
diff --git a/include/llvm/CodeGen/MachineModuleInfo.h b/include/llvm/CodeGen/MachineModuleInfo.h
index d4ac58c3bd22..d64941a9e725 100644
--- a/include/llvm/CodeGen/MachineModuleInfo.h
+++ b/include/llvm/CodeGen/MachineModuleInfo.h
@@ -47,7 +47,6 @@ class BasicBlock;
 class CallInst;
 class Function;
 class MachineFunction;
-class MachineFunctionInitializer;
 class MMIAddrLabelMap;
 class Module;
 class TargetMachine;
@@ -126,7 +125,6 @@ class MachineModuleInfo : public ImmutablePass {
   /// comments in lib/Target/X86/X86FrameLowering.cpp for more details.
   bool UsesMorestackAddr;
 
-  MachineFunctionInitializer *MFInitializer;
   /// Maps IR Functions to their corresponding MachineFunctions.
   DenseMap<const Function*, std::unique_ptr<MachineFunction>> MachineFunctions;
   /// Next unique number available for a MachineFunction.
@@ -150,14 +148,13 @@ public:
   void setModule(const Module *M) { TheModule = M; }
   const Module *getModule() const { return TheModule; }
 
-  void setMachineFunctionInitializer(MachineFunctionInitializer *MFInit) {
-    MFInitializer = MFInit;
-  }
-
   /// Returns the MachineFunction constructed for the IR function \p F.
-  /// Creates a new MachineFunction and runs the MachineFunctionInitializer
-  /// if none exists yet.
-  MachineFunction &getMachineFunction(const Function &F);
+  /// Creates a new MachineFunction if none exists yet.
+  MachineFunction &getOrCreateMachineFunction(const Function &F);
+
+  /// \bried Returns the MachineFunction associated to IR function \p F if there
+  /// is one, otherwise nullptr.
+  MachineFunction *getMachineFunction(const Function &F) const;
 
   /// Delete the MachineFunction \p MF and reset the link in the IR Function to
   /// Machine Function map.
diff --git a/include/llvm/CodeGen/MachineModuleInfoImpls.h b/include/llvm/CodeGen/MachineModuleInfoImpls.h
index f28a79c5b5cc..61cff3890b75 100644
--- a/include/llvm/CodeGen/MachineModuleInfoImpls.h
+++ b/include/llvm/CodeGen/MachineModuleInfoImpls.h
@@ -15,9 +15,9 @@
 #ifndef LLVM_CODEGEN_MACHINEMODULEINFOIMPLS_H
 #define LLVM_CODEGEN_MACHINEMODULEINFOIMPLS_H
 
-#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/BinaryFormat/Wasm.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/Support/Wasm.h"
+#include "llvm/CodeGen/ValueTypes.h"
 
 namespace llvm {
 class MCSymbol;
diff --git a/include/llvm/CodeGen/MachineOperand.h b/include/llvm/CodeGen/MachineOperand.h
index e16354088296..2560399bcf54 100644
--- a/include/llvm/CodeGen/MachineOperand.h
+++ b/include/llvm/CodeGen/MachineOperand.h
@@ -14,8 +14,8 @@
 #ifndef LLVM_CODEGEN_MACHINEOPERAND_H
 #define LLVM_CODEGEN_MACHINEOPERAND_H
 
-#include "llvm/Support/DataTypes.h"
 #include "llvm/IR/Intrinsics.h"
+#include "llvm/Support/DataTypes.h"
 #include <cassert>
 
 namespace llvm {
diff --git a/include/llvm/CodeGen/MachineRegisterInfo.h b/include/llvm/CodeGen/MachineRegisterInfo.h
index c027783aae55..8347f00cbc7a 100644
--- a/include/llvm/CodeGen/MachineRegisterInfo.h
+++ b/include/llvm/CodeGen/MachineRegisterInfo.h
@@ -18,9 +18,9 @@
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/IndexedMap.h"
-#include "llvm/ADT/iterator_range.h"
 #include "llvm/ADT/PointerUnion.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/CodeGen/GlobalISel/RegisterBank.h"
 #include "llvm/CodeGen/LowLevelType.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
@@ -30,13 +30,13 @@
 #include "llvm/MC/LaneBitmask.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
-#include <vector>
 #include <cassert>
 #include <cstddef>
 #include <cstdint>
 #include <iterator>
 #include <memory>
 #include <utility>
+#include <vector>
 
 namespace llvm {
 
diff --git a/include/llvm/CodeGen/RegAllocRegistry.h b/include/llvm/CodeGen/RegAllocRegistry.h
index 5c7e9999cc9a..481747dc163e 100644
--- a/include/llvm/CodeGen/RegAllocRegistry.h
+++ b/include/llvm/CodeGen/RegAllocRegistry.h
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/RegAllocRegistry.h -------------------------*- C++ -*-===//
+//===- llvm/CodeGen/RegAllocRegistry.h --------------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -19,16 +19,16 @@
 
 namespace llvm {
 
+class FunctionPass;
+
 //===----------------------------------------------------------------------===//
 ///
 /// RegisterRegAlloc class - Track the registration of register allocators.
 ///
 //===----------------------------------------------------------------------===//
 class RegisterRegAlloc : public MachinePassRegistryNode {
-
 public:
-
-  typedef FunctionPass *(*FunctionPassCtor)();
+  using FunctionPassCtor = FunctionPass *(*)();
 
   static MachinePassRegistry Registry;
 
@@ -36,22 +36,26 @@ public:
       : MachinePassRegistryNode(N, D, (MachinePassCtor)C) {
     Registry.Add(this);
   }
+
   ~RegisterRegAlloc() { Registry.Remove(this); }
 
   // Accessors.
-  //
   RegisterRegAlloc *getNext() const {
     return (RegisterRegAlloc *)MachinePassRegistryNode::getNext();
   }
+
   static RegisterRegAlloc *getList() {
     return (RegisterRegAlloc *)Registry.getList();
   }
+
   static FunctionPassCtor getDefault() {
     return (FunctionPassCtor)Registry.getDefault();
   }
+
   static void setDefault(FunctionPassCtor C) {
     Registry.setDefault((MachinePassCtor)C);
   }
+
   static void setListener(MachinePassRegistryListener *L) {
     Registry.setListener(L);
   }
@@ -59,5 +63,4 @@ public:
 
 } // end namespace llvm
 
-
-#endif
+#endif // LLVM_CODEGEN_REGALLOCREGISTRY_H
diff --git a/include/llvm/CodeGen/RegisterPressure.h b/include/llvm/CodeGen/RegisterPressure.h
index a3ea41d5236e..e997aaf269e3 100644
--- a/include/llvm/CodeGen/RegisterPressure.h
+++ b/include/llvm/CodeGen/RegisterPressure.h
@@ -32,7 +32,9 @@
 namespace llvm {
 
 class LiveIntervals;
+class MachineFunction;
 class MachineInstr;
+class MachineRegisterInfo;
 class RegisterClassInfo;
 
 struct RegisterMaskPair {
@@ -147,12 +149,14 @@ class PressureDiff {
 
   PressureChange PressureChanges[MaxPSets];
 
-  typedef PressureChange* iterator;
+  using iterator = PressureChange *;
+
   iterator nonconst_begin() { return &PressureChanges[0]; }
   iterator nonconst_end() { return &PressureChanges[MaxPSets]; }
 
 public:
-  typedef const PressureChange* const_iterator;
+  using const_iterator = const PressureChange *;
+
   const_iterator begin() const { return &PressureChanges[0]; }
   const_iterator end() const { return &PressureChanges[MaxPSets]; }
 
@@ -269,7 +273,7 @@ private:
     }
   };
 
-  typedef SparseSet<IndexMaskPair> RegSet;
+  using RegSet = SparseSet<IndexMaskPair>;
   RegSet Regs;
   unsigned NumRegUnits;
 
diff --git a/include/llvm/CodeGen/RegisterUsageInfo.h b/include/llvm/CodeGen/RegisterUsageInfo.h
index 3f88032cb638..0a04bc6a89f4 100644
--- a/include/llvm/CodeGen/RegisterUsageInfo.h
+++ b/include/llvm/CodeGen/RegisterUsageInfo.h
@@ -1,4 +1,4 @@
-//==- RegisterUsageInfo.h - Register Usage Informartion Storage -*- C++ -*-===//
+//==- RegisterUsageInfo.h - Register Usage Informartion Storage --*- C++ -*-==//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -20,15 +20,15 @@
 #define LLVM_CODEGEN_PHYSICALREGISTERUSAGEINFO_H
 
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Module.h"
 #include "llvm/Pass.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/raw_ostream.h"
+#include <cstdint>
+#include <vector>
 
 namespace llvm {
 
+class Function;
+class TargetMachine;
+
 class PhysicalRegisterUsageInfo : public ImmutablePass {
   virtual void anchor();
 
@@ -70,6 +70,7 @@ private:
 
   const TargetMachine *TM;
 };
-}
 
-#endif
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_PHYSICALREGISTERUSAGEINFO_H
diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h
index 97aa2aace822..4d72eda5c71a 100644
--- a/include/llvm/CodeGen/ScheduleDAG.h
+++ b/include/llvm/CodeGen/ScheduleDAG.h
@@ -18,9 +18,9 @@
 
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/GraphTraits.h"
-#include "llvm/ADT/iterator.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/iterator.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetLowering.h"
diff --git a/include/llvm/CodeGen/ScheduleDAGInstrs.h b/include/llvm/CodeGen/ScheduleDAGInstrs.h
index d62bb9bf0b75..218e22e40234 100644
--- a/include/llvm/CodeGen/ScheduleDAGInstrs.h
+++ b/include/llvm/CodeGen/ScheduleDAGInstrs.h
@@ -17,10 +17,10 @@
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/SparseMultiSet.h"
 #include "llvm/ADT/SparseSet.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/LivePhysRegs.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
diff --git a/include/llvm/CodeGen/ScheduleDFS.h b/include/llvm/CodeGen/ScheduleDFS.h
index c2013661cfff..d6a8c791392c 100644
--- a/include/llvm/CodeGen/ScheduleDFS.h
+++ b/include/llvm/CodeGen/ScheduleDFS.h
@@ -17,9 +17,9 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
-#include <vector>
 #include <cassert>
 #include <cstdint>
+#include <vector>
 
 namespace llvm {
 
diff --git a/include/llvm/CodeGen/SchedulerRegistry.h b/include/llvm/CodeGen/SchedulerRegistry.h
index a7a6227664de..badf927d0e95 100644
--- a/include/llvm/CodeGen/SchedulerRegistry.h
+++ b/include/llvm/CodeGen/SchedulerRegistry.h
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/SchedulerRegistry.h ------------------------*- C++ -*-===//
+//===- llvm/CodeGen/SchedulerRegistry.h -------------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -16,7 +16,7 @@
 #define LLVM_CODEGEN_SCHEDULERREGISTRY_H
 
 #include "llvm/CodeGen/MachinePassRegistry.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/CodeGen.h"
 
 namespace llvm {
 
@@ -26,15 +26,13 @@ namespace llvm {
 ///
 //===----------------------------------------------------------------------===//
 
-class SelectionDAGISel;
 class ScheduleDAGSDNodes;
-class SelectionDAG;
-class MachineBasicBlock;
+class SelectionDAGISel;
 
 class RegisterScheduler : public MachinePassRegistryNode {
 public:
-  typedef ScheduleDAGSDNodes *(*FunctionPassCtor)(SelectionDAGISel*,
-                                                  CodeGenOpt::Level);
+  using FunctionPassCtor = ScheduleDAGSDNodes *(*)(SelectionDAGISel*,
+                                                   CodeGenOpt::Level);
 
   static MachinePassRegistry Registry;
 
@@ -45,13 +43,14 @@ public:
 
 
   // Accessors.
-  //
   RegisterScheduler *getNext() const {
     return (RegisterScheduler *)MachinePassRegistryNode::getNext();
   }
+
   static RegisterScheduler *getList() {
     return (RegisterScheduler *)Registry.getList();
   }
+
   static void setListener(MachinePassRegistryListener *L) {
     Registry.setListener(L);
   }
@@ -103,4 +102,4 @@ ScheduleDAGSDNodes *createDAGLinearizer(SelectionDAGISel *IS,
 
 } // end namespace llvm
 
-#endif
+#endif // LLVM_CODEGEN_SCHEDULERREGISTRY_H
diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h
index 4b1a375abd57..2ef7796a4a07 100644
--- a/include/llvm/CodeGen/SelectionDAG.h
+++ b/include/llvm/CodeGen/SelectionDAG.h
@@ -21,12 +21,12 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/FoldingSet.h"
-#include "llvm/ADT/ilist.h"
-#include "llvm/ADT/iterator.h"
-#include "llvm/ADT/iterator_range.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/ilist.h"
+#include "llvm/ADT/iterator.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/DAGCombine.h"
 #include "llvm/CodeGen/ISDOpcodes.h"
@@ -1229,39 +1229,6 @@ public:
     AllNodes.insert(Position, AllNodes.remove(N));
   }
 
-  /// Returns true if the opcode is a commutative binary operation.
-  static bool isCommutativeBinOp(unsigned Opcode) {
-    // FIXME: This should get its info from the td file, so that we can include
-    // target info.
-    switch (Opcode) {
-    case ISD::ADD:
-    case ISD::SMIN:
-    case ISD::SMAX:
-    case ISD::UMIN:
-    case ISD::UMAX:
-    case ISD::MUL:
-    case ISD::MULHU:
-    case ISD::MULHS:
-    case ISD::SMUL_LOHI:
-    case ISD::UMUL_LOHI:
-    case ISD::FADD:
-    case ISD::FMUL:
-    case ISD::AND:
-    case ISD::OR:
-    case ISD::XOR:
-    case ISD::SADDO:
-    case ISD::UADDO:
-    case ISD::ADDC:
-    case ISD::ADDE:
-    case ISD::FMINNUM:
-    case ISD::FMAXNUM:
-    case ISD::FMINNAN:
-    case ISD::FMAXNAN:
-      return true;
-    default: return false;
-    }
-  }
-
   /// Returns an APFloat semantics tag appropriate for the given type. If VT is
   /// a vector type, the element semantics are returned.
   static const fltSemantics &EVTToAPFloatSemantics(EVT VT) {
diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h
index 3a4feb322092..0cd26d35a482 100644
--- a/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -24,11 +24,11 @@
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/GraphTraits.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/ilist_node.h"
 #include "llvm/ADT/iterator.h"
 #include "llvm/ADT/iterator_range.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/ISDOpcodes.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineValueType.h"
diff --git a/include/llvm/CodeGen/SlotIndexes.h b/include/llvm/CodeGen/SlotIndexes.h
index a275b2721b44..a7b16e7a9ed2 100644
--- a/include/llvm/CodeGen/SlotIndexes.h
+++ b/include/llvm/CodeGen/SlotIndexes.h
@@ -20,10 +20,10 @@
 #define LLVM_CODEGEN_SLOTINDEXES_H
 
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/ilist.h"
 #include "llvm/ADT/IntervalMap.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/ilist.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
diff --git a/include/llvm/CodeGen/StackProtector.h b/include/llvm/CodeGen/StackProtector.h
index b970de71f862..72de212d0df9 100644
--- a/include/llvm/CodeGen/StackProtector.h
+++ b/include/llvm/CodeGen/StackProtector.h
@@ -19,18 +19,20 @@
 
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Triple.h"
-#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/IR/ValueMap.h"
 #include "llvm/Pass.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetMachine.h"
 
 namespace llvm {
 
+class BasicBlock;
+class DominatorTree;
 class Function;
+class Instruction;
 class Module;
-class PHINode;
+class TargetLoweringBase;
+class TargetMachine;
+class Type;
 
 class StackProtector : public FunctionPass {
 public:
@@ -48,7 +50,7 @@ public:
   };
 
   /// A mapping of AllocaInsts to their required SSP layout.
-  typedef ValueMap<const AllocaInst *, SSPLayoutKind> SSPLayoutMap;
+  using SSPLayoutMap = ValueMap<const AllocaInst *, SSPLayoutKind>;
 
 private:
   const TargetMachine *TM = nullptr;
@@ -119,10 +121,7 @@ public:
     initializeStackProtectorPass(*PassRegistry::getPassRegistry());
   }
 
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.addRequired<TargetPassConfig>();
-    AU.addPreserved<DominatorTreeWrapperPass>();
-  }
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
 
   SSPLayoutKind getSSPLayout(const AllocaInst *AI) const;
 
diff --git a/include/llvm/CodeGen/TailDuplicator.h b/include/llvm/CodeGen/TailDuplicator.h
index b667245fd3c0..483c0ab1eec9 100644
--- a/include/llvm/CodeGen/TailDuplicator.h
+++ b/include/llvm/CodeGen/TailDuplicator.h
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/TailDuplicator.h ---------------------------*- C++ -*-===//
+//===- llvm/CodeGen/TailDuplicator.h ----------------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,19 +15,27 @@
 #ifndef LLVM_CODEGEN_TAILDUPLICATOR_H
 #define LLVM_CODEGEN_TAILDUPLICATOR_H
 
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineSSAUpdater.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
+#include <utility>
+#include <vector>
 
 namespace llvm {
 
-extern cl::opt<unsigned> TailDupIndirectBranchSize;
+class MachineBasicBlock;
+class MachineBranchProbabilityInfo;
+class MachineFunction;
+class MachineInstr;
+class MachineModuleInfo;
+class MachineRegisterInfo;
+class TargetRegisterInfo;
 
 /// Utility class to perform tail duplication.
 class TailDuplicator {
@@ -46,7 +54,7 @@ class TailDuplicator {
 
   // For each virtual register in SSAUpdateVals keep a list of source virtual
   // registers.
-  typedef std::vector<std::pair<MachineBasicBlock *, unsigned>> AvailableValsTy;
+  using AvailableValsTy = std::vector<std::pair<MachineBasicBlock *, unsigned>>;
 
   DenseMap<unsigned, AvailableValsTy> SSAUpdateVals;
 
@@ -62,11 +70,14 @@ public:
   void initMF(MachineFunction &MF,
               const MachineBranchProbabilityInfo *MBPI,
               bool LayoutMode, unsigned TailDupSize = 0);
+
   bool tailDuplicateBlocks();
   static bool isSimpleBB(MachineBasicBlock *TailBB);
   bool shouldTailDuplicate(bool IsSimple, MachineBasicBlock &TailBB);
+
   /// Returns true if TailBB can successfully be duplicated into PredBB
   bool canTailDuplicate(MachineBasicBlock *TailBB, MachineBasicBlock *PredBB);
+
   /// Tail duplicate a single basic block into its predecessors, and then clean
   /// up.
   /// If \p DuplicatePreds is not null, it will be updated to contain the list
@@ -77,10 +88,10 @@ public:
       bool IsSimple, MachineBasicBlock *MBB,
       MachineBasicBlock *ForcedLayoutPred,
       SmallVectorImpl<MachineBasicBlock*> *DuplicatedPreds = nullptr,
-      llvm::function_ref<void(MachineBasicBlock *)> *RemovalCallback = nullptr);
+      function_ref<void(MachineBasicBlock *)> *RemovalCallback = nullptr);
 
 private:
-  typedef TargetInstrInfo::RegSubRegPair RegSubRegPair;
+  using RegSubRegPair = TargetInstrInfo::RegSubRegPair;
 
   void addSSAUpdateEntry(unsigned OrigReg, unsigned NewReg,
                          MachineBasicBlock *BB);
@@ -112,9 +123,9 @@ private:
 
   void removeDeadBlock(
       MachineBasicBlock *MBB,
-      llvm::function_ref<void(MachineBasicBlock *)> *RemovalCallback = nullptr);
+      function_ref<void(MachineBasicBlock *)> *RemovalCallback = nullptr);
 };
 
-} // End llvm namespace
+} // end namespace llvm
 
-#endif
+#endif // LLVM_CODEGEN_TAILDUPLICATOR_H
diff --git a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
index adf2b3ea1c9b..106a084a95c0 100644
--- a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
+++ b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
@@ -41,6 +41,11 @@ public:
   TargetLoweringObjectFileELF() = default;
   ~TargetLoweringObjectFileELF() override = default;
 
+  /// Emit Obj-C garbage collection and linker options.
+  void emitModuleFlags(MCStreamer &Streamer,
+                       ArrayRef<Module::ModuleFlagEntry> ModuleFlags,
+                       const TargetMachine &TM) const override;
+
   void emitPersonalityValue(MCStreamer &Streamer, const DataLayout &TM,
                             const MCSymbol *Sym) const override;
 
@@ -149,8 +154,7 @@ public:
   MCSection *getSectionForJumpTable(const Function &F,
                                     const TargetMachine &TM) const override;
 
-  /// Emit Obj-C garbage collection and linker options. Only linker option
-  /// emission is implemented for COFF.
+  /// Emit Obj-C garbage collection and linker options.
   void emitModuleFlags(MCStreamer &Streamer,
                        ArrayRef<Module::ModuleFlagEntry> ModuleFlags,
                        const TargetMachine &TM) const override;
diff --git a/include/llvm/CodeGen/TargetPassConfig.h b/include/llvm/CodeGen/TargetPassConfig.h
index fcf1937c186e..c109b7489cca 100644
--- a/include/llvm/CodeGen/TargetPassConfig.h
+++ b/include/llvm/CodeGen/TargetPassConfig.h
@@ -119,6 +119,10 @@ protected:
   /// callers.
   bool RequireCodeGenSCCOrder;
 
+  /// Add the actual instruction selection passes. This does not include
+  /// preparation passes on IR.
+  bool addCoreISelPasses();
+
 public:
   TargetPassConfig(LLVMTargetMachine &TM, PassManagerBase &pm);
   // Dummy constructor.
@@ -206,6 +210,13 @@ public:
   /// has not be overriden on the command line with '-regalloc=...'
   bool usingDefaultRegAlloc() const;
 
+  /// High level function that adds all passes necessary to go from llvm IR
+  /// representation to the MI representation.
+  /// Adds IR based lowering and target specific optimization passes and finally
+  /// the core instruction selection passes.
+  /// \returns true if an error occured, false otherwise.
+  bool addISelPasses();
+
   /// Add common target configurable passes that perform LLVM IR to IR
   /// transforms following machine independent optimization.
   virtual void addIRPasses();
diff --git a/include/llvm/CodeGen/TargetSchedule.h b/include/llvm/CodeGen/TargetSchedule.h
index 4365fca74bf1..f23667976468 100644
--- a/include/llvm/CodeGen/TargetSchedule.h
+++ b/include/llvm/CodeGen/TargetSchedule.h
@@ -16,6 +16,7 @@
 #ifndef LLVM_CODEGEN_TARGETSCHEDULE_H
 #define LLVM_CODEGEN_TARGETSCHEDULE_H
 
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/MC/MCSchedule.h"
@@ -123,7 +124,7 @@ public:
   }
 #endif
 
-  typedef const MCWriteProcResEntry *ProcResIter;
+  using ProcResIter = const MCWriteProcResEntry *;
 
   // \brief Get an iterator into the processor resources consumed by this
   // scheduling class.
diff --git a/include/llvm/CodeGen/VirtRegMap.h b/include/llvm/CodeGen/VirtRegMap.h
index d7e92094877d..b9076353fd07 100644
--- a/include/llvm/CodeGen/VirtRegMap.h
+++ b/include/llvm/CodeGen/VirtRegMap.h
@@ -102,14 +102,7 @@ namespace llvm {
 
     /// @brief creates a mapping for the specified virtual register to
     /// the specified physical register
-    void assignVirt2Phys(unsigned virtReg, unsigned physReg) {
-      assert(TargetRegisterInfo::isVirtualRegister(virtReg) &&
-             TargetRegisterInfo::isPhysicalRegister(physReg));
-      assert(Virt2PhysMap[virtReg] == NO_PHYS_REG &&
-             "attempt to assign physical register to already mapped "
-             "virtual register");
-      Virt2PhysMap[virtReg] = physReg;
-    }
+    void assignVirt2Phys(unsigned virtReg, MCPhysReg physReg);
 
     /// @brief clears the specified virtual register's, physical
     /// register mapping
diff --git a/include/llvm/Config/abi-breaking.h.cmake b/include/llvm/Config/abi-breaking.h.cmake
index 4ce487b8f5f3..7ae401e5b8a8 100644
--- a/include/llvm/Config/abi-breaking.h.cmake
+++ b/include/llvm/Config/abi-breaking.h.cmake
@@ -15,6 +15,9 @@
 /* Define to enable checks that alter the LLVM C++ ABI */
 #cmakedefine01 LLVM_ENABLE_ABI_BREAKING_CHECKS
 
+/* Define to enable reverse iteration of unordered llvm containers */
+#cmakedefine01 LLVM_ENABLE_REVERSE_ITERATION
+
 /* Allow selectively disabling link-time mismatch checking so that header-only
    ADT content from LLVM can be used without linking libSupport. */
 #if !LLVM_DISABLE_ABI_BREAKING_CHECKS_ENFORCING
diff --git a/include/llvm/Config/config.h.cmake b/include/llvm/Config/config.h.cmake
index a64e208fa784..8df15ef97026 100644
--- a/include/llvm/Config/config.h.cmake
+++ b/include/llvm/Config/config.h.cmake
@@ -350,27 +350,6 @@
 /* Host triple LLVM will be executed on */
 #cmakedefine LLVM_HOST_TRIPLE "${LLVM_HOST_TRIPLE}"
 
-/* LLVM architecture name for the native architecture, if available */
-#cmakedefine LLVM_NATIVE_ARCH ${LLVM_NATIVE_ARCH}
-
-/* LLVM name for the native AsmParser init function, if available */
-#cmakedefine LLVM_NATIVE_ASMPARSER LLVMInitialize${LLVM_NATIVE_ARCH}AsmParser
-
-/* LLVM name for the native AsmPrinter init function, if available */
-#cmakedefine LLVM_NATIVE_ASMPRINTER LLVMInitialize${LLVM_NATIVE_ARCH}AsmPrinter
-
-/* LLVM name for the native Disassembler init function, if available */
-#cmakedefine LLVM_NATIVE_DISASSEMBLER LLVMInitialize${LLVM_NATIVE_ARCH}Disassembler
-
-/* LLVM name for the native Target init function, if available */
-#cmakedefine LLVM_NATIVE_TARGET LLVMInitialize${LLVM_NATIVE_ARCH}Target
-
-/* LLVM name for the native TargetInfo init function, if available */
-#cmakedefine LLVM_NATIVE_TARGETINFO LLVMInitialize${LLVM_NATIVE_ARCH}TargetInfo
-
-/* LLVM name for the native target MC init function, if available */
-#cmakedefine LLVM_NATIVE_TARGETMC LLVMInitialize${LLVM_NATIVE_ARCH}TargetMC
-
 /* Define if this is Unixish platform */
 #cmakedefine LLVM_ON_UNIX ${LLVM_ON_UNIX}
 
diff --git a/include/llvm/DebugInfo/CodeView/CVRecord.h b/include/llvm/DebugInfo/CodeView/CVRecord.h
index 68ad09982202..4c6bbedc6bbd 100644
--- a/include/llvm/DebugInfo/CodeView/CVRecord.h
+++ b/include/llvm/DebugInfo/CodeView/CVRecord.h
@@ -62,10 +62,8 @@ template <typename Kind> struct RemappedRecord {
 
 template <typename Kind>
 struct VarStreamArrayExtractor<codeview::CVRecord<Kind>> {
-  typedef void ContextType;
-
-  static Error extract(BinaryStreamRef Stream, uint32_t &Len,
-                       codeview::CVRecord<Kind> &Item) {
+  Error operator()(BinaryStreamRef Stream, uint32_t &Len,
+                   codeview::CVRecord<Kind> &Item) {
     using namespace codeview;
     const RecordPrefix *Prefix = nullptr;
     BinaryStreamReader Reader(Stream);
diff --git a/include/llvm/DebugInfo/CodeView/CodeView.h b/include/llvm/DebugInfo/CodeView/CodeView.h
index 9890263ae2d2..251c9d1ae62c 100644
--- a/include/llvm/DebugInfo/CodeView/CodeView.h
+++ b/include/llvm/DebugInfo/CodeView/CodeView.h
@@ -575,6 +575,24 @@ struct FrameData {
   };
 };
 
+// Corresponds to LocalIdAndGlobalIdPair structure.
+// This structure information allows cross-referencing between PDBs.  For
+// example, when a PDB is being built during compilation it is not yet known
+// what other modules may end up in the PDB at link time.  So certain types of
+// IDs may clash between the various compile time PDBs.  For each affected
+// module, a subsection would be put into the PDB containing a mapping from its
+// local IDs to a single ID namespace for all items in the PDB file.
+struct CrossModuleExport {
+  support::ulittle32_t Local;
+  support::ulittle32_t Global;
+};
+
+struct CrossModuleImport {
+  support::ulittle32_t ModuleNameOffset;
+  support::ulittle32_t Count; // Number of elements
+  // support::ulittle32_t ids[Count]; // id from referenced module
+};
+
 enum class CodeViewContainer { ObjectFile, Pdb };
 
 inline uint32_t alignOf(CodeViewContainer Container) {
diff --git a/include/llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h b/include/llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h
index c958a95ee6de..9fc90f13d347 100644
--- a/include/llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h
+++ b/include/llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h
@@ -36,8 +36,8 @@ template <> struct VarStreamArrayExtractor<codeview::FileChecksumEntry> {
 public:
   typedef void ContextType;
 
-  static Error extract(BinaryStreamRef Stream, uint32_t &Len,
-                       codeview::FileChecksumEntry &Item);
+  Error operator()(BinaryStreamRef Stream, uint32_t &Len,
+                   codeview::FileChecksumEntry &Item);
 };
 }
 
diff --git a/include/llvm/DebugInfo/CodeView/DebugCrossExSubsection.h b/include/llvm/DebugInfo/CodeView/DebugCrossExSubsection.h
new file mode 100644
index 000000000000..f755b23422c7
--- /dev/null
+++ b/include/llvm/DebugInfo/CodeView/DebugCrossExSubsection.h
@@ -0,0 +1,64 @@
+//===- DebugCrossExSubsection.h ---------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_CODEVIEW_DEBUGCROSSEXSUBSECTION_H
+#define LLVM_DEBUGINFO_CODEVIEW_DEBUGCROSSEXSUBSECTION_H
+
+#include "llvm/DebugInfo/CodeView/DebugSubsection.h"
+#include "llvm/Support/BinaryStreamArray.h"
+#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/Endian.h"
+
+#include <map>
+
+namespace llvm {
+namespace codeview {
+class DebugCrossModuleExportsSubsectionRef final : public DebugSubsectionRef {
+  typedef FixedStreamArray<CrossModuleExport> ReferenceArray;
+  typedef ReferenceArray::Iterator Iterator;
+
+public:
+  DebugCrossModuleExportsSubsectionRef()
+      : DebugSubsectionRef(DebugSubsectionKind::CrossScopeExports) {}
+
+  static bool classof(const DebugSubsectionRef *S) {
+    return S->kind() == DebugSubsectionKind::CrossScopeExports;
+  }
+
+  Error initialize(BinaryStreamReader Reader);
+  Error initialize(BinaryStreamRef Stream);
+
+  Iterator begin() const { return References.begin(); }
+  Iterator end() const { return References.end(); }
+
+private:
+  FixedStreamArray<CrossModuleExport> References;
+};
+
+class DebugCrossModuleExportsSubsection final : public DebugSubsection {
+public:
+  DebugCrossModuleExportsSubsection()
+      : DebugSubsection(DebugSubsectionKind::CrossScopeExports) {}
+
+  static bool classof(const DebugSubsection *S) {
+    return S->kind() == DebugSubsectionKind::CrossScopeExports;
+  }
+
+  void addMapping(uint32_t Local, uint32_t Global);
+
+  uint32_t calculateSerializedSize() const override;
+  Error commit(BinaryStreamWriter &Writer) const override;
+
+private:
+  std::map<uint32_t, uint32_t> Mappings;
+};
+}
+}
+
+#endif
diff --git a/include/llvm/DebugInfo/CodeView/DebugCrossImpSubsection.h b/include/llvm/DebugInfo/CodeView/DebugCrossImpSubsection.h
new file mode 100644
index 000000000000..ea3a9a43d50b
--- /dev/null
+++ b/include/llvm/DebugInfo/CodeView/DebugCrossImpSubsection.h
@@ -0,0 +1,88 @@
+//===- DebugCrossExSubsection.h ---------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_CODEVIEW_DEBUGCROSSIMPSUBSECTION_H
+#define LLVM_DEBUGINFO_CODEVIEW_DEBUGCROSSIMPSUBSECTION_H
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/DebugInfo/CodeView/DebugSubsection.h"
+#include "llvm/Support/BinaryStreamArray.h"
+#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/Endian.h"
+
+namespace llvm {
+namespace codeview {
+
+struct CrossModuleImportItem {
+  const CrossModuleImport *Header = nullptr;
+  llvm::FixedStreamArray<support::ulittle32_t> Imports;
+};
+}
+}
+
+namespace llvm {
+template <> struct VarStreamArrayExtractor<codeview::CrossModuleImportItem> {
+public:
+  typedef void ContextType;
+
+  Error operator()(BinaryStreamRef Stream, uint32_t &Len,
+                   codeview::CrossModuleImportItem &Item);
+};
+}
+
+namespace llvm {
+namespace codeview {
+class DebugStringTableSubsection;
+
+class DebugCrossModuleImportsSubsectionRef final : public DebugSubsectionRef {
+  typedef VarStreamArray<CrossModuleImportItem> ReferenceArray;
+  typedef ReferenceArray::Iterator Iterator;
+
+public:
+  DebugCrossModuleImportsSubsectionRef()
+      : DebugSubsectionRef(DebugSubsectionKind::CrossScopeImports) {}
+
+  static bool classof(const DebugSubsectionRef *S) {
+    return S->kind() == DebugSubsectionKind::CrossScopeImports;
+  }
+
+  Error initialize(BinaryStreamReader Reader);
+  Error initialize(BinaryStreamRef Stream);
+
+  Iterator begin() const { return References.begin(); }
+  Iterator end() const { return References.end(); }
+
+private:
+  ReferenceArray References;
+};
+
+class DebugCrossModuleImportsSubsection final : public DebugSubsection {
+public:
+  explicit DebugCrossModuleImportsSubsection(
+      DebugStringTableSubsection &Strings)
+      : DebugSubsection(DebugSubsectionKind::CrossScopeImports),
+        Strings(Strings) {}
+
+  static bool classof(const DebugSubsection *S) {
+    return S->kind() == DebugSubsectionKind::CrossScopeImports;
+  }
+
+  void addImport(StringRef Module, uint32_t ImportId);
+
+  uint32_t calculateSerializedSize() const override;
+  Error commit(BinaryStreamWriter &Writer) const override;
+
+private:
+  DebugStringTableSubsection &Strings;
+  StringMap<std::vector<support::ulittle32_t>> Mappings;
+};
+}
+}
+
+#endif
diff --git a/include/llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h b/include/llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h
index 60440700c265..c9b062717baa 100644
--- a/include/llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h
+++ b/include/llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h
@@ -43,10 +43,9 @@ struct InlineeSourceLine {
 }
 
 template <> struct VarStreamArrayExtractor<codeview::InlineeSourceLine> {
-  typedef bool ContextType;
-
-  static Error extract(BinaryStreamRef Stream, uint32_t &Len,
-                       codeview::InlineeSourceLine &Item, bool HasExtraFiles);
+  Error operator()(BinaryStreamRef Stream, uint32_t &Len,
+                   codeview::InlineeSourceLine &Item);
+  bool HasExtraFiles = false;
 };
 
 namespace codeview {
diff --git a/include/llvm/DebugInfo/CodeView/DebugLinesSubsection.h b/include/llvm/DebugInfo/CodeView/DebugLinesSubsection.h
index 1b63af59c2ed..f1feb1336cc5 100644
--- a/include/llvm/DebugInfo/CodeView/DebugLinesSubsection.h
+++ b/include/llvm/DebugInfo/CodeView/DebugLinesSubsection.h
@@ -64,10 +64,10 @@ struct LineColumnEntry {
 
 class LineColumnExtractor {
 public:
-  typedef const LineFragmentHeader *ContextType;
+  Error operator()(BinaryStreamRef Stream, uint32_t &Len,
+                   LineColumnEntry &Item);
 
-  static Error extract(BinaryStreamRef Stream, uint32_t &Len,
-                       LineColumnEntry &Item, const LineFragmentHeader *Ctx);
+  const LineFragmentHeader *Header = nullptr;
 };
 
 class DebugLinesSubsectionRef final : public DebugSubsectionRef {
@@ -122,7 +122,7 @@ public:
   uint32_t calculateSerializedSize() const override;
   Error commit(BinaryStreamWriter &Writer) const override;
 
-  void setRelocationAddress(uint16_t Segment, uint16_t Offset);
+  void setRelocationAddress(uint16_t Segment, uint32_t Offset);
   void setCodeSize(uint32_t Size);
   void setFlags(LineFlags Flags);
 
@@ -131,7 +131,7 @@ public:
 private:
   DebugChecksumsSubsection &Checksums;
 
-  uint16_t RelocOffset = 0;
+  uint32_t RelocOffset = 0;
   uint16_t RelocSegment = 0;
   uint32_t CodeSize = 0;
   LineFlags Flags = LF_None;
diff --git a/include/llvm/DebugInfo/CodeView/DebugStringTableSubsection.h b/include/llvm/DebugInfo/CodeView/DebugStringTableSubsection.h
index fbe39cb16f09..be0a2344965b 100644
--- a/include/llvm/DebugInfo/CodeView/DebugStringTableSubsection.h
+++ b/include/llvm/DebugInfo/CodeView/DebugStringTableSubsection.h
@@ -39,11 +39,14 @@ public:
   }
 
   Error initialize(BinaryStreamRef Contents);
+  Error initialize(BinaryStreamReader &Reader);
 
   Expected<StringRef> getString(uint32_t Offset) const;
 
   bool valid() const { return Stream.valid(); }
 
+  BinaryStreamRef getBuffer() const { return Stream; }
+
 private:
   BinaryStreamRef Stream;
 };
diff --git a/include/llvm/DebugInfo/CodeView/DebugSubsectionRecord.h b/include/llvm/DebugInfo/CodeView/DebugSubsectionRecord.h
index 847259c5ceac..49a269d92e35 100644
--- a/include/llvm/DebugInfo/CodeView/DebugSubsectionRecord.h
+++ b/include/llvm/DebugInfo/CodeView/DebugSubsectionRecord.h
@@ -52,7 +52,7 @@ public:
   DebugSubsectionRecordBuilder(std::unique_ptr<DebugSubsection> Subsection,
                                CodeViewContainer Container);
   uint32_t calculateSerializedLength();
-  Error commit(BinaryStreamWriter &Writer);
+  Error commit(BinaryStreamWriter &Writer) const;
 
 private:
   std::unique_ptr<DebugSubsection> Subsection;
@@ -62,18 +62,12 @@ private:
 } // namespace codeview
 
 template <> struct VarStreamArrayExtractor<codeview::DebugSubsectionRecord> {
-  typedef void ContextType;
-
-  static Error extract(BinaryStreamRef Stream, uint32_t &Length,
-                       codeview::DebugSubsectionRecord &Info) {
-    // FIXME: We need to pass the container type through to this function, but
-    // VarStreamArray doesn't easily support stateful contexts.  In practice
-    // this isn't super important since the subsection header describes its
-    // length and we can just skip it.  It's more important when writing.
+  Error operator()(BinaryStreamRef Stream, uint32_t &Length,
+                   codeview::DebugSubsectionRecord &Info) {
     if (auto EC = codeview::DebugSubsectionRecord::initialize(
             Stream, Info, codeview::CodeViewContainer::Pdb))
       return EC;
-    Length = Info.getRecordLength();
+    Length = alignTo(Info.getRecordLength(), 4);
     return Error::success();
   }
 };
diff --git a/include/llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h b/include/llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h
index 55bef491c97e..d4a3d9195a36 100644
--- a/include/llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h
+++ b/include/llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h
@@ -10,6 +10,8 @@
 #ifndef LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGFRAGMENTVISITOR_H
 #define LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGFRAGMENTVISITOR_H
 
+#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h"
 #include "llvm/Support/Error.h"
 #include <cstdint>
 
@@ -20,9 +22,65 @@ namespace codeview {
 class DebugChecksumsSubsectionRef;
 class DebugSubsectionRecord;
 class DebugInlineeLinesSubsectionRef;
+class DebugCrossModuleExportsSubsectionRef;
+class DebugCrossModuleImportsSubsectionRef;
+class DebugFrameDataSubsectionRef;
 class DebugLinesSubsectionRef;
+class DebugStringTableSubsectionRef;
+class DebugSymbolRVASubsectionRef;
+class DebugSymbolsSubsectionRef;
 class DebugUnknownSubsectionRef;
 
+struct DebugSubsectionState {
+public:
+  // If no subsections are known about initially, we find as much as we can.
+  DebugSubsectionState();
+
+  // If only a string table subsection is given, we find a checksums subsection.
+  explicit DebugSubsectionState(const DebugStringTableSubsectionRef &Strings);
+
+  // If both subsections are given, we don't need to find anything.
+  DebugSubsectionState(const DebugStringTableSubsectionRef &Strings,
+                       const DebugChecksumsSubsectionRef &Checksums);
+
+  template <typename T> void initialize(T &&FragmentRange) {
+    for (const DebugSubsectionRecord &R : FragmentRange) {
+      if (Strings && Checksums)
+        return;
+      if (R.kind() == DebugSubsectionKind::FileChecksums) {
+        initializeChecksums(R);
+        continue;
+      }
+      if (R.kind() == DebugSubsectionKind::StringTable && !Strings) {
+        // While in practice we should never encounter a string table even
+        // though the string table is already initialized, in theory it's
+        // possible.  PDBs are supposed to have one global string table and
+        // then this subsection should not appear.  Whereas object files are
+        // supposed to have this subsection appear exactly once.  However,
+        // for testing purposes it's nice to be able to test this subsection
+        // independently of one format or the other, so for some tests we
+        // manually construct a PDB that contains this subsection in addition
+        // to a global string table.
+        initializeStrings(R);
+        continue;
+      }
+    }
+  }
+
+  const DebugStringTableSubsectionRef &strings() const { return *Strings; }
+  const DebugChecksumsSubsectionRef &checksums() const { return *Checksums; }
+
+private:
+  void initializeStrings(const DebugSubsectionRecord &SR);
+  void initializeChecksums(const DebugSubsectionRecord &FCR);
+
+  std::unique_ptr<DebugStringTableSubsectionRef> OwnedStrings;
+  std::unique_ptr<DebugChecksumsSubsectionRef> OwnedChecksums;
+
+  const DebugStringTableSubsectionRef *Strings = nullptr;
+  const DebugChecksumsSubsectionRef *Checksums = nullptr;
+};
+
 class DebugSubsectionVisitor {
 public:
   virtual ~DebugSubsectionVisitor() = default;
@@ -30,33 +88,71 @@ public:
   virtual Error visitUnknown(DebugUnknownSubsectionRef &Unknown) {
     return Error::success();
   }
-  virtual Error visitLines(DebugLinesSubsectionRef &Lines) {
-    return Error::success();
-  }
+  virtual Error visitLines(DebugLinesSubsectionRef &Lines,
+                           const DebugSubsectionState &State) = 0;
+  virtual Error visitFileChecksums(DebugChecksumsSubsectionRef &Checksums,
+                                   const DebugSubsectionState &State) = 0;
+  virtual Error visitInlineeLines(DebugInlineeLinesSubsectionRef &Inlinees,
+                                  const DebugSubsectionState &State) = 0;
+  virtual Error
+  visitCrossModuleExports(DebugCrossModuleExportsSubsectionRef &CSE,
+                          const DebugSubsectionState &State) = 0;
+  virtual Error
+  visitCrossModuleImports(DebugCrossModuleImportsSubsectionRef &CSE,
+                          const DebugSubsectionState &State) = 0;
 
-  virtual Error visitFileChecksums(DebugChecksumsSubsectionRef &Checksums) {
-    return Error::success();
-  }
+  virtual Error visitStringTable(DebugStringTableSubsectionRef &ST,
+                                 const DebugSubsectionState &State) = 0;
 
-  virtual Error visitInlineeLines(DebugInlineeLinesSubsectionRef &Inlinees) {
-    return Error::success();
-  }
+  virtual Error visitSymbols(DebugSymbolsSubsectionRef &CSE,
+                             const DebugSubsectionState &State) = 0;
 
-  virtual Error finished() { return Error::success(); }
+  virtual Error visitFrameData(DebugFrameDataSubsectionRef &FD,
+                               const DebugSubsectionState &State) = 0;
+  virtual Error visitCOFFSymbolRVAs(DebugSymbolRVASubsectionRef &RVAs,
+                                    const DebugSubsectionState &State) = 0;
 };
 
 Error visitDebugSubsection(const DebugSubsectionRecord &R,
-                           DebugSubsectionVisitor &V);
+                           DebugSubsectionVisitor &V,
+                           const DebugSubsectionState &State);
+
+namespace detail {
+template <typename T>
+Error visitDebugSubsections(T &&FragmentRange, DebugSubsectionVisitor &V,
+                            DebugSubsectionState &State) {
+  State.initialize(std::forward<T>(FragmentRange));
+
+  for (const DebugSubsectionRecord &L : FragmentRange) {
+    if (auto EC = visitDebugSubsection(L, V, State))
+      return EC;
+  }
+  return Error::success();
+}
+} // namespace detail
 
 template <typename T>
 Error visitDebugSubsections(T &&FragmentRange, DebugSubsectionVisitor &V) {
-  for (const auto &L : FragmentRange) {
-    if (auto EC = visitDebugSubsection(L, V))
-      return EC;
-  }
-  if (auto EC = V.finished())
-    return EC;
-  return Error::success();
+  DebugSubsectionState State;
+  return detail::visitDebugSubsections(std::forward<T>(FragmentRange), V,
+                                       State);
+}
+
+template <typename T>
+Error visitDebugSubsections(T &&FragmentRange, DebugSubsectionVisitor &V,
+                            const DebugStringTableSubsectionRef &Strings) {
+  DebugSubsectionState State(Strings);
+  return detail::visitDebugSubsections(std::forward<T>(FragmentRange), V,
+                                       State);
+}
+
+template <typename T>
+Error visitDebugSubsections(T &&FragmentRange, DebugSubsectionVisitor &V,
+                            const DebugStringTableSubsectionRef &Strings,
+                            const DebugChecksumsSubsectionRef &Checksums) {
+  DebugSubsectionState State(Strings, Checksums);
+  return detail::visitDebugSubsections(std::forward<T>(FragmentRange), V,
+                                       State);
 }
 
 } // end namespace codeview
diff --git a/include/llvm/DebugInfo/CodeView/DebugSymbolRVASubsection.h b/include/llvm/DebugInfo/CodeView/DebugSymbolRVASubsection.h
new file mode 100644
index 000000000000..ad58a293cb09
--- /dev/null
+++ b/include/llvm/DebugInfo/CodeView/DebugSymbolRVASubsection.h
@@ -0,0 +1,59 @@
+//===- DebugSymbolRVASubsection.h -------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_CODEVIEW_DEBUGSYMBOLRVASUBSECTION_H
+#define LLVM_DEBUGINFO_CODEVIEW_DEBUGSYMBOLRVASUBSECTION_H
+
+#include "llvm/DebugInfo/CodeView/DebugSubsection.h"
+#include "llvm/Support/BinaryStreamArray.h"
+#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+namespace codeview {
+
+class DebugSymbolRVASubsectionRef final : public DebugSubsectionRef {
+public:
+  typedef FixedStreamArray<support::ulittle32_t> ArrayType;
+
+  DebugSymbolRVASubsectionRef();
+
+  static bool classof(const DebugSubsectionRef *S) {
+    return S->kind() == DebugSubsectionKind::CoffSymbolRVA;
+  }
+
+  ArrayType::Iterator begin() const { return RVAs.begin(); }
+  ArrayType::Iterator end() const { return RVAs.end(); }
+
+  Error initialize(BinaryStreamReader &Reader);
+
+private:
+  ArrayType RVAs;
+};
+
+class DebugSymbolRVASubsection final : public DebugSubsection {
+public:
+  DebugSymbolRVASubsection();
+
+  static bool classof(const DebugSubsection *S) {
+    return S->kind() == DebugSubsectionKind::CoffSymbolRVA;
+  }
+
+  Error commit(BinaryStreamWriter &Writer) const override;
+  uint32_t calculateSerializedSize() const override;
+
+  void addRVA(uint32_t RVA) { RVAs.push_back(support::ulittle32_t(RVA)); }
+
+private:
+  std::vector<support::ulittle32_t> RVAs;
+};
+} // namespace codeview
+} // namespace llvm
+
+#endif
diff --git a/include/llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h b/include/llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h
index 3d1eb27ba270..dfda7deb6cb4 100644
--- a/include/llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h
+++ b/include/llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h
@@ -27,6 +27,9 @@ public:
 
   Error initialize(BinaryStreamReader Reader);
 
+  CVSymbolArray::Iterator begin() const { return Records.begin(); }
+  CVSymbolArray::Iterator end() const { return Records.end(); }
+
 private:
   CVSymbolArray Records;
 };
diff --git a/include/llvm/DebugInfo/CodeView/EnumTables.h b/include/llvm/DebugInfo/CodeView/EnumTables.h
index 10d1c581a196..013e440613fc 100644
--- a/include/llvm/DebugInfo/CodeView/EnumTables.h
+++ b/include/llvm/DebugInfo/CodeView/EnumTables.h
@@ -11,8 +11,8 @@
 #define LLVM_DEBUGINFO_CODEVIEW_ENUMTABLES_H
 
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/BinaryFormat/COFF.h"
 #include "llvm/DebugInfo/CodeView/CodeView.h"
-#include "llvm/Support/COFF.h"
 #include "llvm/Support/ScopedPrinter.h"
 
 #include <stdint.h>
diff --git a/include/llvm/DebugInfo/CodeView/TypeSerializer.h b/include/llvm/DebugInfo/CodeView/TypeSerializer.h
index 1dee86a1da79..f785d4509547 100644
--- a/include/llvm/DebugInfo/CodeView/TypeSerializer.h
+++ b/include/llvm/DebugInfo/CodeView/TypeSerializer.h
@@ -86,6 +86,8 @@ public:
 
   void reset();
 
+  BumpPtrAllocator &getAllocator() { return RecordStorage; }
+
   ArrayRef<ArrayRef<uint8_t>> records() const;
   TypeIndex insertRecordBytes(ArrayRef<uint8_t> &Record);
   TypeIndex insertRecord(const RemappedType &Record);
diff --git a/include/llvm/DebugInfo/CodeView/TypeTableBuilder.h b/include/llvm/DebugInfo/CodeView/TypeTableBuilder.h
index 907ed1010e5b..1069dcd45334 100644
--- a/include/llvm/DebugInfo/CodeView/TypeTableBuilder.h
+++ b/include/llvm/DebugInfo/CodeView/TypeTableBuilder.h
@@ -37,8 +37,9 @@ private:
   TypeSerializer Serializer;
 
 public:
-  explicit TypeTableBuilder(BumpPtrAllocator &Allocator)
-      : Allocator(Allocator), Serializer(Allocator) {}
+  explicit TypeTableBuilder(BumpPtrAllocator &Allocator,
+                            bool WriteUnique = true)
+      : Allocator(Allocator), Serializer(Allocator, WriteUnique) {}
   TypeTableBuilder(const TypeTableBuilder &) = delete;
   TypeTableBuilder &operator=(const TypeTableBuilder &) = delete;
 
diff --git a/include/llvm/DebugInfo/DIContext.h b/include/llvm/DebugInfo/DIContext.h
index 2ab1c9508522..2e82a774cc23 100644
--- a/include/llvm/DebugInfo/DIContext.h
+++ b/include/llvm/DebugInfo/DIContext.h
@@ -135,6 +135,7 @@ enum DIDumpType {
   DIDT_GnuPubnames,
   DIDT_GnuPubtypes,
   DIDT_Str,
+  DIDT_StrOffsets,
   DIDT_StrDwo,
   DIDT_StrOffsetsDwo,
   DIDT_AppleNames,
@@ -152,6 +153,7 @@ struct DIDumpOptions {
     DIDumpType DumpType = DIDT_All;
     bool DumpEH = false;
     bool SummarizeTypes = false;
+    bool Brief = false;
 };
 
 class DIContext {
diff --git a/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h b/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h
index 7324f6e3eb38..e363cff15803 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h
@@ -10,11 +10,11 @@
 #ifndef LLVM_DEBUGINFO_DWARFABBREVIATIONDECLARATION_H
 #define LLVM_DEBUGINFO_DWARFABBREVIATIONDECLARATION_H
 
-#include "llvm/ADT/iterator_range.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/Support/DataExtractor.h"
-#include "llvm/Support/Dwarf.h"
 #include <cassert>
 #include <cstddef>
 #include <cstdint>
diff --git a/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h b/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h
index f95a013d7552..72793e97b60d 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h
@@ -11,9 +11,9 @@
 #define LLVM_DEBUGINFO_DWARFACCELERATORTABLE_H
 
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h"
 #include "llvm/Support/DataExtractor.h"
-#include "llvm/Support/Dwarf.h"
 #include <cstdint>
 #include <utility>
 
diff --git a/include/llvm/DebugInfo/DWARF/DWARFAttribute.h b/include/llvm/DebugInfo/DWARF/DWARFAttribute.h
index c3953b62d780..f0672bb0ca75 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFAttribute.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFAttribute.h
@@ -10,8 +10,8 @@
 #ifndef LLVM_DEBUGINFO_DWARFATTRIBUTE_H
 #define LLVM_DEBUGINFO_DWARFATTRIBUTE_H
 
+#include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
-#include "llvm/Support/Dwarf.h"
 #include <cstdint>
 
 namespace llvm {
diff --git a/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h b/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h
index 46c0b7f4ce60..b4e4721e3d51 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h
@@ -19,8 +19,8 @@ class DWARFCompileUnit : public DWARFUnit {
 public:
   DWARFCompileUnit(DWARFContext &Context, const DWARFSection &Section,
                    const DWARFDebugAbbrev *DA, const DWARFSection *RS,
-                   StringRef SS, StringRef SOS, const DWARFSection *AOS,
-                   StringRef LS, bool LE, bool IsDWO,
+                   StringRef SS, const DWARFSection &SOS,
+                   const DWARFSection *AOS, StringRef LS, bool LE, bool IsDWO,
                    const DWARFUnitSectionBase &UnitSection,
                    const DWARFUnitIndex::Entry *Entry)
       : DWARFUnit(Context, Section, DA, RS, SS, SOS, AOS, LS, LE, IsDWO,
@@ -29,7 +29,7 @@ public:
   // VTable anchor.
   ~DWARFCompileUnit() override;
 
-  void dump(raw_ostream &OS);
+  void dump(raw_ostream &OS, DIDumpOptions DumpOpts);
 
   static const DWARFSectionKind Section = DW_SECT_INFO;
 };
diff --git a/include/llvm/DebugInfo/DWARF/DWARFContext.h b/include/llvm/DebugInfo/DWARF/DWARFContext.h
index 519ecf618558..c72604a12bfd 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFContext.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFContext.h
@@ -11,12 +11,12 @@
 #define LLVM_DEBUGINFO_DWARF_DWARFCONTEXT_H
 
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/iterator_range.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/DebugInfo/DIContext.h"
 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
 #include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h"
@@ -72,6 +72,9 @@ class DWARFContext : public DIContext {
   std::unique_ptr<DWARFDebugAbbrev> AbbrevDWO;
   std::unique_ptr<DWARFDebugLocDWO> LocDWO;
 
+  /// The maximum DWARF version of all units.
+  unsigned MaxVersion;
+
   struct DWOFile {
     object::OwningBinary<object::ObjectFile> File;
     std::unique_ptr<DWARFContext> Context;
@@ -97,7 +100,7 @@ class DWARFContext : public DIContext {
   void parseDWOTypeUnits();
 
 public:
-  DWARFContext() : DIContext(CK_DWARF) {}
+  DWARFContext() : DIContext(CK_DWARF), MaxVersion(0) {}
   DWARFContext(DWARFContext &) = delete;
   DWARFContext &operator=(DWARFContext &) = delete;
 
@@ -178,6 +181,13 @@ public:
   /// Get a DIE given an exact offset.
   DWARFDie getDIEForOffset(uint32_t Offset);
 
+  unsigned getMaxVersion() const { return MaxVersion; }
+
+  void setMaxVersionIfGreater(unsigned Version) {
+    if (Version > MaxVersion)
+      MaxVersion = Version;
+  }
+
   const DWARFUnitIndex &getCUIndex();
   DWARFGdbIndex &getGdbIndex();
   const DWARFUnitIndex &getTUIndex();
@@ -237,6 +247,11 @@ public:
   virtual StringRef getGnuPubNamesSection() = 0;
   virtual StringRef getGnuPubTypesSection() = 0;
 
+  /// DWARF v5
+  /// @{
+  virtual const DWARFSection &getStringOffsetSection() = 0;
+  /// @}
+
   // Sections for DWARF5 split dwarf proposal.
   virtual const DWARFSection &getInfoDWOSection() = 0;
   virtual const TypeSectionMap &getTypesDWOSections() = 0;
@@ -244,7 +259,7 @@ public:
   virtual const DWARFSection &getLineDWOSection() = 0;
   virtual const DWARFSection &getLocDWOSection() = 0;
   virtual StringRef getStringDWOSection() = 0;
-  virtual StringRef getStringOffsetDWOSection() = 0;
+  virtual const DWARFSection &getStringOffsetDWOSection() = 0;
   virtual const DWARFSection &getRangeDWOSection() = 0;
   virtual const DWARFSection &getAddrSection() = 0;
   virtual const DWARFSection& getAppleNamesSection() = 0;
@@ -295,6 +310,11 @@ class DWARFContextInMemory : public DWARFContext {
   StringRef GnuPubNamesSection;
   StringRef GnuPubTypesSection;
 
+  /// DWARF v5
+  /// @{
+  DWARFSection StringOffsetSection;
+  /// @}
+
   // Sections for DWARF5 split dwarf proposal.
   DWARFSection InfoDWOSection;
   TypeSectionMap TypesDWOSections;
@@ -302,7 +322,7 @@ class DWARFContextInMemory : public DWARFContext {
   DWARFSection LineDWOSection;
   DWARFSection LocDWOSection;
   StringRef StringDWOSection;
-  StringRef StringOffsetDWOSection;
+  DWARFSection StringOffsetDWOSection;
   DWARFSection RangeDWOSection;
   DWARFSection AddrSection;
   DWARFSection AppleNamesSection;
@@ -353,6 +373,11 @@ public:
   const DWARFSection& getAppleNamespacesSection() override { return AppleNamespacesSection; }
   const DWARFSection& getAppleObjCSection() override { return AppleObjCSection; }
 
+  // DWARF v5
+  const DWARFSection &getStringOffsetSection() override {
+    return StringOffsetSection;
+  }
+
   // Sections for DWARF5 split dwarf proposal.
   const DWARFSection &getInfoDWOSection() override { return InfoDWOSection; }
 
@@ -365,7 +390,7 @@ public:
   const DWARFSection &getLocDWOSection() override { return LocDWOSection; }
   StringRef getStringDWOSection() override { return StringDWOSection; }
 
-  StringRef getStringOffsetDWOSection() override {
+  const DWARFSection &getStringOffsetDWOSection() override {
     return StringOffsetDWOSection;
   }
 
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h b/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h
index fc2423a2708b..5c591b3de491 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h
@@ -10,8 +10,8 @@
 #ifndef LLVM_DEBUGINFO_DWARFDEBUGINFOENTRY_H
 #define LLVM_DEBUGINFO_DWARFDEBUGINFOENTRY_H
 
+#include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h"
-#include "llvm/Support/Dwarf.h"
 #include <cstdint>
 
 namespace llvm {
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h b/include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h
index 9d36bb7ad211..a309fd104f93 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h
@@ -12,7 +12,7 @@
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/Support/Dwarf.h"
+#include "llvm/BinaryFormat/Dwarf.h"
 #include <cstdint>
 #include <vector>
 
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h b/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h
index b436711ae6ed..437060bc8fec 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h
@@ -10,8 +10,8 @@
 #ifndef LLVM_DEBUGINFO_DWARF_DWARFDEBUGRANGELIST_H
 #define LLVM_DEBUGINFO_DWARF_DWARFDEBUGRANGELIST_H
 
-#include "llvm/Support/DataExtractor.h"
 #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h"
+#include "llvm/Support/DataExtractor.h"
 
 #include <cassert>
 #include <cstdint>
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDie.h b/include/llvm/DebugInfo/DWARF/DWARFDie.h
index fa41b9e293c0..b216491b615a 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDie.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDie.h
@@ -11,14 +11,14 @@
 #define LLVM_DEBUGINFO_DWARFDIE_H
 
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/iterator.h"
 #include "llvm/ADT/iterator_range.h"
-#include "llvm/ADT/Optional.h"
+#include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/DebugInfo/DIContext.h"
 #include "llvm/DebugInfo/DWARF/DWARFAttribute.h"
 #include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h"
 #include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h"
-#include "llvm/Support/Dwarf.h"
 #include <cassert>
 #include <cstdint>
 #include <iterator>
@@ -120,7 +120,8 @@ public:
   /// \param recurseDepth the depth to recurse to when dumping this DIE and its
   /// children.
   /// \param indent the number of characters to indent each line that is output.
-  void dump(raw_ostream &OS, unsigned recurseDepth, unsigned indent = 0) const;
+  void dump(raw_ostream &OS, unsigned recurseDepth, unsigned indent = 0,
+            DIDumpOptions DumpOpts = DIDumpOptions()) const;
 
   /// Extract the specified attribute from this DIE.
   ///
diff --git a/include/llvm/DebugInfo/DWARF/DWARFFormValue.h b/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
index 3a781dde8929..d6a3b52f2fe1 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
@@ -13,8 +13,8 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/None.h"
 #include "llvm/ADT/Optional.h"
+#include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/Support/DataExtractor.h"
-#include "llvm/Support/Dwarf.h"
 #include <cstdint>
 
 namespace llvm {
diff --git a/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h b/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h
index c77d946c070a..2041d40eb53a 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h
@@ -31,7 +31,7 @@ private:
 public:
   DWARFTypeUnit(DWARFContext &Context, const DWARFSection &Section,
                 const DWARFDebugAbbrev *DA, const DWARFSection *RS,
-                StringRef SS, StringRef SOS, const DWARFSection *AOS,
+                StringRef SS, const DWARFSection &SOS, const DWARFSection *AOS,
                 StringRef LS, bool LE, bool IsDWO,
                 const DWARFUnitSectionBase &UnitSection,
                 const DWARFUnitIndex::Entry *Entry)
diff --git a/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/include/llvm/DebugInfo/DWARF/DWARFUnit.h
index d0f7bd0d623f..945b8999ff22 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFUnit.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFUnit.h
@@ -10,11 +10,12 @@
 #ifndef LLVM_DEBUGINFO_DWARF_DWARFUNIT_H
 #define LLVM_DEBUGINFO_DWARF_DWARFUNIT_H
 
-#include "llvm/ADT/iterator_range.h"
 #include "llvm/ADT/Optional.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h"
 #include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h"
 #include "llvm/DebugInfo/DWARF/DWARFDie.h"
@@ -24,14 +25,13 @@
 #include "llvm/Object/Binary.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/DataExtractor.h"
-#include "llvm/Support/Dwarf.h"
 #include <algorithm>
 #include <cassert>
 #include <cstddef>
 #include <cstdint>
+#include <map>
 #include <memory>
 #include <vector>
-#include <map>
 
 namespace llvm {
 
@@ -57,8 +57,9 @@ protected:
 
   virtual void parseImpl(DWARFContext &Context, const DWARFSection &Section,
                          const DWARFDebugAbbrev *DA, const DWARFSection *RS,
-                         StringRef SS, StringRef SOS, const DWARFSection *AOS,
-                         StringRef LS, bool isLittleEndian, bool isDWO) = 0;
+                         StringRef SS, const DWARFSection &SOS,
+                         const DWARFSection *AOS, StringRef LS,
+                         bool isLittleEndian, bool isDWO) = 0;
 };
 
 const DWARFUnitIndex &getDWARFUnitIndex(DWARFContext &Context,
@@ -89,7 +90,7 @@ public:
 private:
   void parseImpl(DWARFContext &Context, const DWARFSection &Section,
                  const DWARFDebugAbbrev *DA, const DWARFSection *RS,
-                 StringRef SS, StringRef SOS, const DWARFSection *AOS,
+                 StringRef SS, const DWARFSection &SOS, const DWARFSection *AOS,
                  StringRef LS, bool LE, bool IsDWO) override {
     if (Parsed)
       return;
@@ -119,7 +120,8 @@ class DWARFUnit {
   uint32_t RangeSectionBase;
   StringRef LineSection;
   StringRef StringSection;
-  StringRef StringOffsetSection;
+  const DWARFSection &StringOffsetSection;
+  uint64_t StringOffsetSectionBase = 0;
   const DWARFSection *AddrOffsetSection;
   uint32_t AddrOffsetSectionBase;
   bool isLittleEndian;
@@ -162,8 +164,8 @@ protected:
 public:
   DWARFUnit(DWARFContext &Context, const DWARFSection &Section,
             const DWARFDebugAbbrev *DA, const DWARFSection *RS, StringRef SS,
-            StringRef SOS, const DWARFSection *AOS, StringRef LS, bool LE,
-            bool IsDWO, const DWARFUnitSectionBase &UnitSection,
+            const DWARFSection &SOS, const DWARFSection *AOS, StringRef LS,
+            bool LE, bool IsDWO, const DWARFUnitSectionBase &UnitSection,
             const DWARFUnitIndex::Entry *IndexEntry = nullptr);
 
   virtual ~DWARFUnit();
@@ -172,7 +174,9 @@ public:
 
   StringRef getLineSection() const { return LineSection; }
   StringRef getStringSection() const { return StringSection; }
-  StringRef getStringOffsetSection() const { return StringOffsetSection; }
+  const DWARFSection &getStringOffsetSection() const {
+    return StringOffsetSection;
+  }
 
   void setAddrOffsetSection(const DWARFSection *AOS, uint32_t Base) {
     AddrOffsetSection = AOS;
@@ -189,7 +193,8 @@ public:
 
   bool getAddrOffsetSectionItem(uint32_t Index, uint64_t &Result) const;
   // FIXME: Result should be uint64_t in DWARF64.
-  bool getStringOffsetSectionItem(uint32_t Index, uint32_t &Result) const;
+  bool getStringOffsetSectionItem(uint32_t Index, uint64_t &Result) const;
+  uint64_t getStringOffsetSectionRelocation(uint32_t Index) const;
 
   DataExtractor getDebugInfoExtractor() const {
     return DataExtractor(InfoSection.Data, isLittleEndian, AddrSize);
@@ -200,6 +205,9 @@ public:
   }
 
   const RelocAddrMap *getRelocMap() const { return &InfoSection.Relocs; }
+  const RelocAddrMap &getStringOffsetsRelocMap() const {
+    return StringOffsetSection.Relocs;
+  }
 
   bool extract(DataExtractor debug_info, uint32_t* offset_ptr);
 
diff --git a/include/llvm/DebugInfo/MSF/MappedBlockStream.h b/include/llvm/DebugInfo/MSF/MappedBlockStream.h
index 36dce393fc66..02f3cb09b004 100644
--- a/include/llvm/DebugInfo/MSF/MappedBlockStream.h
+++ b/include/llvm/DebugInfo/MSF/MappedBlockStream.h
@@ -17,7 +17,6 @@
 #include "llvm/DebugInfo/MSF/MSFStreamLayout.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/BinaryStream.h"
-#include "llvm/Support/BinaryStream.h"
 #include "llvm/Support/BinaryStreamRef.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/Error.h"
diff --git a/include/llvm/DebugInfo/PDB/DIA/DIAEnumDebugStreams.h b/include/llvm/DebugInfo/PDB/DIA/DIAEnumDebugStreams.h
index 941e16a35fac..ffae6645e94b 100644
--- a/include/llvm/DebugInfo/PDB/DIA/DIAEnumDebugStreams.h
+++ b/include/llvm/DebugInfo/PDB/DIA/DIAEnumDebugStreams.h
@@ -11,6 +11,7 @@
 #define LLVM_DEBUGINFO_PDB_DIA_DIAENUMDEBUGSTREAMS_H
 
 #include "DIASupport.h"
+#include "llvm/DebugInfo/PDB/IPDBDataStream.h"
 #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
 
 namespace llvm {
diff --git a/include/llvm/DebugInfo/PDB/DIA/DIAEnumLineNumbers.h b/include/llvm/DebugInfo/PDB/DIA/DIAEnumLineNumbers.h
index 106b84cecfff..08f0de124ede 100644
--- a/include/llvm/DebugInfo/PDB/DIA/DIAEnumLineNumbers.h
+++ b/include/llvm/DebugInfo/PDB/DIA/DIAEnumLineNumbers.h
@@ -12,6 +12,7 @@
 
 #include "DIASupport.h"
 #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
+#include "llvm/DebugInfo/PDB/IPDBLineNumber.h"
 
 namespace llvm {
 namespace pdb {
diff --git a/include/llvm/DebugInfo/PDB/DIA/DIAEnumSourceFiles.h b/include/llvm/DebugInfo/PDB/DIA/DIAEnumSourceFiles.h
index 6c00d6a5e29d..e69d18f5ba37 100644
--- a/include/llvm/DebugInfo/PDB/DIA/DIAEnumSourceFiles.h
+++ b/include/llvm/DebugInfo/PDB/DIA/DIAEnumSourceFiles.h
@@ -12,6 +12,7 @@
 
 #include "DIASupport.h"
 #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
+#include "llvm/DebugInfo/PDB/IPDBSourceFile.h"
 
 namespace llvm {
 namespace pdb {
diff --git a/include/llvm/DebugInfo/PDB/DIA/DIAEnumSymbols.h b/include/llvm/DebugInfo/PDB/DIA/DIAEnumSymbols.h
index b206ff59a6a4..f779cd1f4be3 100644
--- a/include/llvm/DebugInfo/PDB/DIA/DIAEnumSymbols.h
+++ b/include/llvm/DebugInfo/PDB/DIA/DIAEnumSymbols.h
@@ -12,6 +12,7 @@
 
 #include "DIASupport.h"
 #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
+#include "llvm/DebugInfo/PDB/PDBSymbol.h"
 
 namespace llvm {
 namespace pdb {
diff --git a/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h b/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h
index 7e77f5a3eef9..8200f51e3da9 100644
--- a/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h
+++ b/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h
@@ -56,9 +56,8 @@ private:
 } // end namespace pdb
 
 template <> struct VarStreamArrayExtractor<pdb::DbiModuleDescriptor> {
-  typedef void ContextType;
-  static Error extract(BinaryStreamRef Stream, uint32_t &Length,
-                       pdb::DbiModuleDescriptor &Info) {
+  Error operator()(BinaryStreamRef Stream, uint32_t &Length,
+                   pdb::DbiModuleDescriptor &Info) {
     if (auto EC = pdb::DbiModuleDescriptor::initialize(Stream, Info))
       return EC;
     Length = Info.getRecordLength();
diff --git a/include/llvm/DebugInfo/PDB/Native/DbiStream.h b/include/llvm/DebugInfo/PDB/Native/DbiStream.h
index dc35f8c72cd9..7123e88cd642 100644
--- a/include/llvm/DebugInfo/PDB/Native/DbiStream.h
+++ b/include/llvm/DebugInfo/PDB/Native/DbiStream.h
@@ -96,7 +96,8 @@ private:
 
   FixedStreamArray<support::ulittle16_t> DbgStreams;
 
-  PdbRaw_DbiSecContribVer SectionContribVersion;
+  PdbRaw_DbiSecContribVer SectionContribVersion =
+      PdbRaw_DbiSecContribVer::DbiSecContribVer60;
   FixedStreamArray<SectionContrib> SectionContribs;
   FixedStreamArray<SectionContrib2> SectionContribs2;
   FixedStreamArray<SecMapEntry> SectionMap;
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbol.h b/include/llvm/DebugInfo/PDB/PDBSymbol.h
index b114b7afb0b0..9e883d2f99a7 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbol.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbol.h
@@ -89,6 +89,8 @@ public:
 
   template <typename T> std::unique_ptr<T> findOneChild() const {
     auto Enumerator(findAllChildren<T>());
+    if (!Enumerator)
+      return nullptr;
     return Enumerator->getNext();
   }
 
@@ -97,6 +99,8 @@ public:
   template <typename T>
   std::unique_ptr<ConcreteSymbolEnumerator<T>> findAllChildren() const {
     auto BaseIter = RawSymbol->findChildren(T::Tag);
+    if (!BaseIter)
+      return nullptr;
     return llvm::make_unique<ConcreteSymbolEnumerator<T>>(std::move(BaseIter));
   }
   std::unique_ptr<IPDBEnumSymbols> findAllChildren(PDB_SymType Type) const;
diff --git a/include/llvm/ExecutionEngine/ExecutionEngine.h b/include/llvm/ExecutionEngine/ExecutionEngine.h
index f68337c43271..1586f7b80669 100644
--- a/include/llvm/ExecutionEngine/ExecutionEngine.h
+++ b/include/llvm/ExecutionEngine/ExecutionEngine.h
@@ -27,10 +27,10 @@
 #include "llvm/Support/Mutex.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
+#include <functional>
 #include <map>
 #include <string>
 #include <vector>
-#include <functional>
 
 namespace llvm {
 
diff --git a/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h b/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h
index 1bb911d09cfb..2fccf8a0f625 100644
--- a/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h
+++ b/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h
@@ -20,9 +20,9 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/ExecutionEngine/JITSymbol.h"
-#include "llvm/ExecutionEngine/RuntimeDyld.h"
 #include "llvm/ExecutionEngine/Orc/IndirectionUtils.h"
 #include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
+#include "llvm/ExecutionEngine/RuntimeDyld.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
diff --git a/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h b/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h
index a32278b8a81e..71d847c06264 100644
--- a/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h
+++ b/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h
@@ -14,8 +14,8 @@
 #ifndef LLVM_EXECUTIONENGINE_ORC_EXECUTIONUTILS_H
 #define LLVM_EXECUTIONENGINE_ORC_EXECUTIONUTILS_H
 
-#include "llvm/ADT/iterator_range.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/ExecutionEngine/JITSymbol.h"
 #include "llvm/ExecutionEngine/RuntimeDyld.h"
 #include <vector>
diff --git a/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h b/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h
index f16dd021ea51..f81d054440fc 100644
--- a/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h
+++ b/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h
@@ -14,8 +14,8 @@
 #ifndef LLVM_EXECUTIONENGINE_ORC_IRCOMPILELAYER_H
 #define LLVM_EXECUTIONENGINE_ORC_IRCOMPILELAYER_H
 
-#include "llvm/ExecutionEngine/ObjectCache.h"
 #include "llvm/ExecutionEngine/JITSymbol.h"
+#include "llvm/ExecutionEngine/ObjectCache.h"
 #include "llvm/Object/ObjectFile.h"
 #include <memory>
 
diff --git a/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h b/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h
index 5b3426afe584..aabb44eef99d 100644
--- a/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h
+++ b/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h
@@ -23,8 +23,8 @@
 #include "llvm/ExecutionEngine/SectionMemoryManager.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/Error.h"
-#include <cassert>
 #include <algorithm>
+#include <cassert>
 #include <functional>
 #include <list>
 #include <memory>
diff --git a/include/llvm/ExecutionEngine/RTDyldMemoryManager.h b/include/llvm/ExecutionEngine/RTDyldMemoryManager.h
index 74535fe948ff..a9778514b9f1 100644
--- a/include/llvm/ExecutionEngine/RTDyldMemoryManager.h
+++ b/include/llvm/ExecutionEngine/RTDyldMemoryManager.h
@@ -14,10 +14,10 @@
 #ifndef LLVM_EXECUTIONENGINE_RTDYLDMEMORYMANAGER_H
 #define LLVM_EXECUTIONENGINE_RTDYLDMEMORYMANAGER_H
 
+#include "llvm-c/ExecutionEngine.h"
 #include "llvm/ExecutionEngine/JITSymbol.h"
 #include "llvm/ExecutionEngine/RuntimeDyld.h"
 #include "llvm/Support/CBindingWrapping.h"
-#include "llvm-c/ExecutionEngine.h"
 #include <cstddef>
 #include <cstdint>
 #include <string>
diff --git a/include/llvm/IR/Attributes.h b/include/llvm/IR/Attributes.h
index 687863857698..0e8adda82cbe 100644
--- a/include/llvm/IR/Attributes.h
+++ b/include/llvm/IR/Attributes.h
@@ -16,13 +16,13 @@
 #ifndef LLVM_IR_ATTRIBUTES_H
 #define LLVM_IR_ATTRIBUTES_H
 
+#include "llvm-c/Types.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/FoldingSet.h"
-#include "llvm/ADT/iterator_range.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/PointerLikeTypeTraits.h"
-#include "llvm-c/Types.h"
 #include <bitset>
 #include <cassert>
 #include <cstdint>
diff --git a/include/llvm/IR/BasicBlock.h b/include/llvm/IR/BasicBlock.h
index 235cb57cfd09..23f838b640e0 100644
--- a/include/llvm/IR/BasicBlock.h
+++ b/include/llvm/IR/BasicBlock.h
@@ -14,15 +14,15 @@
 #ifndef LLVM_IR_BASICBLOCK_H
 #define LLVM_IR_BASICBLOCK_H
 
+#include "llvm-c/Types.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/ADT/ilist.h"
 #include "llvm/ADT/ilist_node.h"
-#include "llvm/ADT/Twine.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/SymbolTableListTraits.h"
 #include "llvm/IR/Value.h"
 #include "llvm/Support/CBindingWrapping.h"
 #include "llvm/Support/Compiler.h"
-#include "llvm-c/Types.h"
 #include <cassert>
 #include <cstddef>
 
diff --git a/include/llvm/IR/CallSite.h b/include/llvm/IR/CallSite.h
index 4a806ab501e5..96fbebf42c38 100644
--- a/include/llvm/IR/CallSite.h
+++ b/include/llvm/IR/CallSite.h
@@ -26,9 +26,9 @@
 #ifndef LLVM_IR_CALLSITE_H
 #define LLVM_IR_CALLSITE_H
 
-#include "llvm/ADT/iterator_range.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/Function.h"
@@ -36,10 +36,10 @@
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Intrinsics.h"
-#include "llvm/Support/Casting.h"
 #include "llvm/IR/Use.h"
 #include "llvm/IR/User.h"
 #include "llvm/IR/Value.h"
+#include "llvm/Support/Casting.h"
 #include <cassert>
 #include <cstdint>
 #include <iterator>
diff --git a/include/llvm/IR/Constants.h b/include/llvm/IR/Constants.h
index 40a8d1eb27d0..bb5e1931393b 100644
--- a/include/llvm/IR/Constants.h
+++ b/include/llvm/IR/Constants.h
@@ -194,7 +194,7 @@ public:
   /// common code. It also correctly performs the comparison without the
   /// potential for an assertion from getZExtValue().
   bool isZero() const {
-    return Val == 0;
+    return Val.isNullValue();
   }
 
   /// This is just a convenience method to make client code smaller for a
@@ -202,7 +202,7 @@ public:
   /// potential for an assertion from getZExtValue().
   /// @brief Determine if the value is one.
   bool isOne() const {
-    return Val == 1;
+    return Val.isOneValue();
   }
 
   /// This function will return true iff every bit in this constant is set
@@ -243,7 +243,7 @@ public:
   /// @returns true iff this constant is greater or equal to the given number.
   /// @brief Determine if the value is greater or equal to the given number.
   bool uge(uint64_t Num) const {
-    return Val.getActiveBits() > 64 || Val.getZExtValue() >= Num;
+    return Val.uge(Num);
   }
 
   /// getLimitedValue - If the value is smaller than the specified limit,
diff --git a/include/llvm/IR/DataLayout.h b/include/llvm/IR/DataLayout.h
index c1d398f17b59..daf8f8da689d 100644
--- a/include/llvm/IR/DataLayout.h
+++ b/include/llvm/IR/DataLayout.h
@@ -21,8 +21,8 @@
 #define LLVM_IR_DATALAYOUT_H
 
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Type.h"
diff --git a/include/llvm/IR/DebugInfoMetadata.h b/include/llvm/IR/DebugInfoMetadata.h
index 358106aac43b..2174e1f301ee 100644
--- a/include/llvm/IR/DebugInfoMetadata.h
+++ b/include/llvm/IR/DebugInfoMetadata.h
@@ -16,15 +16,15 @@
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/BitmaskEnum.h"
-#include "llvm/ADT/iterator_range.h"
 #include "llvm/ADT/None.h"
 #include "llvm/ADT/Optional.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/Support/Casting.h"
-#include "llvm/Support/Dwarf.h"
 #include <cassert>
 #include <climits>
 #include <cstddef>
@@ -148,7 +148,7 @@ public:
 /// Tagged DWARF-like metadata node.
 ///
 /// A metadata node with a DWARF tag (i.e., a constant named \c DW_TAG_*,
-/// defined in llvm/Support/Dwarf.h).  Called \a DINode because it's
+/// defined in llvm/BinaryFormat/Dwarf.h).  Called \a DINode because it's
 /// potentially used for non-DWARF output.
 class DINode : public MDNode {
   friend class LLVMContextImpl;
@@ -2642,7 +2642,8 @@ public:
 /// Macro Info DWARF-like metadata node.
 ///
 /// A metadata node with a DWARF macro info (i.e., a constant named
-/// \c DW_MACINFO_*, defined in llvm/Support/Dwarf.h).  Called \a DIMacroNode
+/// \c DW_MACINFO_*, defined in llvm/BinaryFormat/Dwarf.h).  Called \a
+/// DIMacroNode
 /// because it's potentially used for non-DWARF output.
 class DIMacroNode : public MDNode {
   friend class LLVMContextImpl;
diff --git a/include/llvm/IR/DiagnosticInfo.h b/include/llvm/IR/DiagnosticInfo.h
index 5497652135bd..15d332577113 100644
--- a/include/llvm/IR/DiagnosticInfo.h
+++ b/include/llvm/IR/DiagnosticInfo.h
@@ -15,6 +15,7 @@
 #ifndef LLVM_IR_DIAGNOSTICINFO_H
 #define LLVM_IR_DIAGNOSTICINFO_H
 
+#include "llvm-c/Types.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
@@ -22,10 +23,9 @@
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/Support/CBindingWrapping.h"
 #include "llvm/Support/YAMLTraits.h"
-#include "llvm-c/Types.h"
-#include <functional>
 #include <algorithm>
 #include <cstdint>
+#include <functional>
 #include <iterator>
 #include <string>
 
diff --git a/include/llvm/IR/Dominators.h b/include/llvm/IR/Dominators.h
index def91e73eb1d..9be6acc33591 100644
--- a/include/llvm/IR/Dominators.h
+++ b/include/llvm/IR/Dominators.h
@@ -66,6 +66,7 @@ public:
     return End;
   }
 
+  /// Check if this is the only edge between Start and End.
   bool isSingleEdge() const;
 };
 
@@ -143,6 +144,11 @@ public:
   bool dominates(const Instruction *Def, const Use &U) const;
   bool dominates(const Instruction *Def, const Instruction *User) const;
   bool dominates(const Instruction *Def, const BasicBlock *BB) const;
+
+  /// Return true if an edge dominates a use.
+  ///
+  /// If BBE is not a unique edge between start and end of the edge, it can
+  /// never dominate the use.
   bool dominates(const BasicBlockEdge &BBE, const Use &U) const;
   bool dominates(const BasicBlockEdge &BBE, const BasicBlock *BB) const;
 
diff --git a/include/llvm/IR/Function.h b/include/llvm/IR/Function.h
index 29f512ddd076..3496806d9362 100644
--- a/include/llvm/IR/Function.h
+++ b/include/llvm/IR/Function.h
@@ -19,10 +19,10 @@
 #define LLVM_IR_FUNCTION_H
 
 #include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/ilist_node.h"
-#include "llvm/ADT/iterator_range.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/ADT/ilist_node.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/IR/Argument.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/BasicBlock.h"
diff --git a/include/llvm/IR/GetElementPtrTypeIterator.h b/include/llvm/IR/GetElementPtrTypeIterator.h
index f017a449d33f..3c143ea5f703 100644
--- a/include/llvm/IR/GetElementPtrTypeIterator.h
+++ b/include/llvm/IR/GetElementPtrTypeIterator.h
@@ -21,9 +21,9 @@
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/User.h"
 #include "llvm/Support/Casting.h"
-#include <cassert> 
+#include <cassert>
 #include <cstddef>
-#include <cstdint> 
+#include <cstdint>
 #include <iterator>
 
 namespace llvm {
diff --git a/include/llvm/IR/GlobalValue.h b/include/llvm/IR/GlobalValue.h
index 20495725f9d0..d65d43cc5957 100644
--- a/include/llvm/IR/GlobalValue.h
+++ b/include/llvm/IR/GlobalValue.h
@@ -23,9 +23,9 @@
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Value.h"
-#include "llvm/Support/MD5.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MD5.h"
 #include <cassert>
 #include <cstdint>
 #include <string>
diff --git a/include/llvm/IR/GlobalVariable.h b/include/llvm/IR/GlobalVariable.h
index 3f5d00bd3b3a..454492769c8b 100644
--- a/include/llvm/IR/GlobalVariable.h
+++ b/include/llvm/IR/GlobalVariable.h
@@ -23,8 +23,8 @@
 #include "llvm/ADT/PointerUnion.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/ADT/ilist_node.h"
-#include "llvm/IR/GlobalObject.h"
 #include "llvm/IR/Attributes.h"
+#include "llvm/IR/GlobalObject.h"
 #include "llvm/IR/OperandTraits.h"
 #include "llvm/IR/Value.h"
 #include <cassert>
@@ -235,6 +235,13 @@ public:
     Attrs = A;
   }
 
+  /// Check if section name is present
+  bool hasImplicitSection() const {
+    return getAttributes().hasAttribute("bss-section") ||
+           getAttributes().hasAttribute("data-section") ||
+           getAttributes().hasAttribute("rodata-section");
+  }
+
   // Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const Value *V) {
     return V->getValueID() == Value::GlobalVariableVal;
diff --git a/include/llvm/IR/IRBuilder.h b/include/llvm/IR/IRBuilder.h
index 9d4c13c29f68..5ddaf2b1733b 100644
--- a/include/llvm/IR/IRBuilder.h
+++ b/include/llvm/IR/IRBuilder.h
@@ -15,6 +15,7 @@
 #ifndef LLVM_IR_IRBUILDER_H
 #define LLVM_IR_IRBUILDER_H
 
+#include "llvm-c/Types.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/None.h"
 #include "llvm/ADT/StringRef.h"
@@ -41,11 +42,10 @@
 #include "llvm/Support/AtomicOrdering.h"
 #include "llvm/Support/CBindingWrapping.h"
 #include "llvm/Support/Casting.h"
-#include "llvm-c/Types.h"
+#include <algorithm>
 #include <cassert>
 #include <cstddef>
 #include <cstdint>
-#include <algorithm>
 #include <functional>
 
 namespace llvm {
@@ -435,6 +435,28 @@ public:
                          MDNode *ScopeTag = nullptr,
                          MDNode *NoAliasTag = nullptr);
 
+  /// \brief Create and insert an atomic memcpy between the specified
+  /// pointers.
+  ///
+  /// If the pointers aren't i8*, they will be converted.  If a TBAA tag is
+  /// specified, it will be added to the instruction. Likewise with alias.scope
+  /// and noalias tags.
+  CallInst *CreateElementAtomicMemCpy(
+      Value *Dst, Value *Src, uint64_t NumElements, uint32_t ElementSize,
+      MDNode *TBAATag = nullptr, MDNode *TBAAStructTag = nullptr,
+      MDNode *ScopeTag = nullptr, MDNode *NoAliasTag = nullptr) {
+    return CreateElementAtomicMemCpy(Dst, Src, getInt64(NumElements),
+                                     ElementSize, TBAATag, TBAAStructTag,
+                                     ScopeTag, NoAliasTag);
+  }
+
+  CallInst *CreateElementAtomicMemCpy(Value *Dst, Value *Src,
+                                      Value *NumElements, uint32_t ElementSize,
+                                      MDNode *TBAATag = nullptr,
+                                      MDNode *TBAAStructTag = nullptr,
+                                      MDNode *ScopeTag = nullptr,
+                                      MDNode *NoAliasTag = nullptr);
+
   /// \brief Create and insert a memmove between the specified
   /// pointers.
   ///
diff --git a/include/llvm/IR/InstrTypes.h b/include/llvm/IR/InstrTypes.h
index e850c015d711..ff63da50afee 100644
--- a/include/llvm/IR/InstrTypes.h
+++ b/include/llvm/IR/InstrTypes.h
@@ -17,13 +17,13 @@
 #define LLVM_IR_INSTRTYPES_H
 
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/iterator_range.h"
 #include "llvm/ADT/None.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Instruction.h"
diff --git a/include/llvm/IR/Instruction.h b/include/llvm/IR/Instruction.h
index 6e109735ddd3..00c431834e31 100644
--- a/include/llvm/IR/Instruction.h
+++ b/include/llvm/IR/Instruction.h
@@ -16,9 +16,9 @@
 #define LLVM_IR_INSTRUCTION_H
 
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/ilist_node.h"
 #include "llvm/ADT/None.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/ilist_node.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/SymbolTableListTraits.h"
 #include "llvm/IR/User.h"
@@ -360,9 +360,9 @@ public:
   /// Copy I's fast-math flags
   void copyFastMathFlags(const Instruction *I);
 
-  /// Convenience method to copy supported wrapping, exact, and fast-math flags
-  /// from V to this instruction.
-  void copyIRFlags(const Value *V);
+  /// Convenience method to copy supported exact, fast-math, and (optionally)
+  /// wrapping flags from V to this instruction.
+  void copyIRFlags(const Value *V, bool IncludeWrapFlags = true);
 
   /// Logical 'and' of any supported wrapping, exact, and fast-math flags of
   /// V and this instruction.
diff --git a/include/llvm/IR/Instructions.h b/include/llvm/IR/Instructions.h
index 1f7990b99ebe..6029b0a7c571 100644
--- a/include/llvm/IR/Instructions.h
+++ b/include/llvm/IR/Instructions.h
@@ -17,13 +17,13 @@
 #define LLVM_IR_INSTRUCTIONS_H
 
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/iterator.h"
-#include "llvm/ADT/iterator_range.h"
 #include "llvm/ADT/None.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/ADT/iterator.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CallingConv.h"
@@ -2234,6 +2234,8 @@ public:
     return User::operator new(s, 3);
   }
 
+  void *operator new(size_t, unsigned) = delete;
+
   /// Return true if a shufflevector instruction can be
   /// formed with the specified operands.
   static bool isValidOperands(const Value *V1, const Value *V2,
@@ -2331,9 +2333,6 @@ class ExtractValueInst : public UnaryInstruction {
                           ArrayRef<unsigned> Idxs,
                           const Twine &NameStr, BasicBlock *InsertAtEnd);
 
-  // allocate space for exactly one operand
-  void *operator new(size_t s) { return User::operator new(s, 1); }
-
   void init(ArrayRef<unsigned> Idxs, const Twine &NameStr);
 
 protected:
@@ -2579,7 +2578,6 @@ class PHINode : public Instruction {
   unsigned ReservedSpace;
 
   PHINode(const PHINode &PN);
-  // allocate space for exactly zero operands
 
   explicit PHINode(Type *Ty, unsigned NumReservedValues,
                    const Twine &NameStr = "",
@@ -2598,6 +2596,7 @@ class PHINode : public Instruction {
     allocHungoffUses(ReservedSpace);
   }
 
+  // allocate space for exactly zero operands
   void *operator new(size_t s) {
     return User::operator new(s);
   }
@@ -2970,9 +2969,13 @@ public:
 private:
   friend TerminatorInst;
 
-  BasicBlock *getSuccessorV(unsigned idx) const;
-  unsigned getNumSuccessorsV() const;
-  void setSuccessorV(unsigned idx, BasicBlock *B);
+  BasicBlock *getSuccessor(unsigned idx) const {
+    llvm_unreachable("ReturnInst has no successors!");
+  }
+
+  void setSuccessor(unsigned idx, BasicBlock *B) {
+    llvm_unreachable("ReturnInst has no successors!");
+  }
 };
 
 template <>
@@ -3078,13 +3081,6 @@ public:
   static inline bool classof(const Value *V) {
     return isa<Instruction>(V) && classof(cast<Instruction>(V));
   }
-
-private:
-  friend TerminatorInst;
-
-  BasicBlock *getSuccessorV(unsigned idx) const;
-  unsigned getNumSuccessorsV() const;
-  void setSuccessorV(unsigned idx, BasicBlock *B);
 };
 
 template <>
@@ -3444,13 +3440,6 @@ public:
   static inline bool classof(const Value *V) {
     return isa<Instruction>(V) && classof(cast<Instruction>(V));
   }
-
-private:
-  friend TerminatorInst;
-
-  BasicBlock *getSuccessorV(unsigned idx) const;
-  unsigned getNumSuccessorsV() const;
-  void setSuccessorV(unsigned idx, BasicBlock *B);
 };
 
 template <>
@@ -3551,13 +3540,6 @@ public:
   static inline bool classof(const Value *V) {
     return isa<Instruction>(V) && classof(cast<Instruction>(V));
   }
-
-private:
-  friend TerminatorInst;
-
-  BasicBlock *getSuccessorV(unsigned idx) const;
-  unsigned getNumSuccessorsV() const;
-  void setSuccessorV(unsigned idx, BasicBlock *B);
 };
 
 template <>
@@ -4036,12 +4018,6 @@ public:
   }
 
 private:
-  friend TerminatorInst;
-
-  BasicBlock *getSuccessorV(unsigned idx) const;
-  unsigned getNumSuccessorsV() const;
-  void setSuccessorV(unsigned idx, BasicBlock *B);
-
   template <typename AttrKind> bool hasFnAttrImpl(AttrKind Kind) const {
     if (Attrs.hasAttribute(AttributeList::FunctionIndex, Kind))
       return true;
@@ -4139,9 +4115,13 @@ public:
 private:
   friend TerminatorInst;
 
-  BasicBlock *getSuccessorV(unsigned idx) const;
-  unsigned getNumSuccessorsV() const;
-  void setSuccessorV(unsigned idx, BasicBlock *B);
+  BasicBlock *getSuccessor(unsigned idx) const {
+    llvm_unreachable("ResumeInst has no successors!");
+  }
+
+  void setSuccessor(unsigned idx, BasicBlock *NewSucc) {
+    llvm_unreachable("ResumeInst has no successors!");
+  }
 };
 
 template <>
@@ -4321,13 +4301,6 @@ public:
   static inline bool classof(const Value *V) {
     return isa<Instruction>(V) && classof(cast<Instruction>(V));
   }
-
-private:
-  friend TerminatorInst;
-
-  BasicBlock *getSuccessorV(unsigned Idx) const;
-  unsigned getNumSuccessorsV() const;
-  void setSuccessorV(unsigned Idx, BasicBlock *B);
 };
 
 template <>
@@ -4492,9 +4465,15 @@ public:
 private:
   friend TerminatorInst;
 
-  BasicBlock *getSuccessorV(unsigned Idx) const;
-  unsigned getNumSuccessorsV() const;
-  void setSuccessorV(unsigned Idx, BasicBlock *B);
+  BasicBlock *getSuccessor(unsigned Idx) const {
+    assert(Idx < getNumSuccessors() && "Successor # out of range for catchret!");
+    return getSuccessor();
+  }
+
+  void setSuccessor(unsigned Idx, BasicBlock *B) {
+    assert(Idx < getNumSuccessors() && "Successor # out of range for catchret!");
+    setSuccessor(B);
+  }
 };
 
 template <>
@@ -4582,9 +4561,15 @@ public:
 private:
   friend TerminatorInst;
 
-  BasicBlock *getSuccessorV(unsigned Idx) const;
-  unsigned getNumSuccessorsV() const;
-  void setSuccessorV(unsigned Idx, BasicBlock *B);
+  BasicBlock *getSuccessor(unsigned Idx) const {
+    assert(Idx == 0);
+    return getUnwindDest();
+  }
+
+  void setSuccessor(unsigned Idx, BasicBlock *B) {
+    assert(Idx == 0);
+    setUnwindDest(B);
+  }
 
   // Shadow Instruction::setInstructionSubclassData with a private forwarding
   // method so that subclasses cannot accidentally use it.
@@ -4639,9 +4624,13 @@ public:
 private:
   friend TerminatorInst;
 
-  BasicBlock *getSuccessorV(unsigned idx) const;
-  unsigned getNumSuccessorsV() const;
-  void setSuccessorV(unsigned idx, BasicBlock *B);
+  BasicBlock *getSuccessor(unsigned idx) const {
+    llvm_unreachable("UnreachableInst has no successors!");
+  }
+
+  void setSuccessor(unsigned idx, BasicBlock *B) {
+    llvm_unreachable("UnreachableInst has no successors!");
+  }
 };
 
 //===----------------------------------------------------------------------===//
diff --git a/include/llvm/IR/IntrinsicsAMDGPU.td b/include/llvm/IR/IntrinsicsAMDGPU.td
index e1928546607a..8017223c4ab0 100644
--- a/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -703,6 +703,16 @@ def int_amdgcn_readlane :
   GCCBuiltin<"__builtin_amdgcn_readlane">,
   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>;
 
+def int_amdgcn_alignbit : Intrinsic<[llvm_i32_ty],
+  [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+  [IntrNoMem, IntrSpeculatable]
+>;
+
+def int_amdgcn_alignbyte : Intrinsic<[llvm_i32_ty],
+  [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+  [IntrNoMem, IntrSpeculatable]
+>;
+
 //===----------------------------------------------------------------------===//
 // CI+ Intrinsics
 //===----------------------------------------------------------------------===//
diff --git a/include/llvm/IR/Metadata.h b/include/llvm/IR/Metadata.h
index 3c753260190e..d538c2595393 100644
--- a/include/llvm/IR/Metadata.h
+++ b/include/llvm/IR/Metadata.h
@@ -19,18 +19,18 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseMapInfo.h"
-#include "llvm/ADT/ilist_node.h"
-#include "llvm/ADT/iterator_range.h"
 #include "llvm/ADT/None.h"
 #include "llvm/ADT/PointerUnion.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/ilist_node.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Value.h"
-#include "llvm/Support/Casting.h"
 #include "llvm/Support/CBindingWrapping.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <cassert>
 #include <cstddef>
diff --git a/include/llvm/IR/Module.h b/include/llvm/IR/Module.h
index 5e1f680c5b36..d47d82a57bff 100644
--- a/include/llvm/IR/Module.h
+++ b/include/llvm/IR/Module.h
@@ -15,10 +15,11 @@
 #ifndef LLVM_IR_MODULE_H
 #define LLVM_IR_MODULE_H
 
-#include "llvm/ADT/iterator_range.h"
+#include "llvm-c/Types.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/Comdat.h"
 #include "llvm/IR/DataLayout.h"
@@ -30,7 +31,6 @@
 #include "llvm/IR/SymbolTableListTraits.h"
 #include "llvm/Support/CBindingWrapping.h"
 #include "llvm/Support/CodeGen.h"
-#include "llvm-c/Types.h"
 #include <cstddef>
 #include <cstdint>
 #include <iterator>
diff --git a/include/llvm/IR/ModuleSummaryIndex.h b/include/llvm/IR/ModuleSummaryIndex.h
index 757ddf6cf46b..144e45f18d2c 100644
--- a/include/llvm/IR/ModuleSummaryIndex.h
+++ b/include/llvm/IR/ModuleSummaryIndex.h
@@ -18,13 +18,13 @@
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/IR/Module.h"
 #include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Module.h"
 #include <algorithm>
 #include <array>
 #include <cassert>
diff --git a/include/llvm/IR/OperandTraits.h b/include/llvm/IR/OperandTraits.h
index 7b94283856b6..c618aff3df9a 100644
--- a/include/llvm/IR/OperandTraits.h
+++ b/include/llvm/IR/OperandTraits.h
@@ -88,9 +88,6 @@ struct VariadicOperandTraits {
 /// HungoffOperandTraits - determine the allocation regime of the Use array
 /// when it is not a prefix to the User object, but allocated at an unrelated
 /// heap address.
-/// Assumes that the User subclass that is determined by this traits class
-/// has an OperandList member of type User::op_iterator. [Note: this is now
-/// trivially satisfied, because User has that member for historic reasons.]
 ///
 /// This is the traits class that is needed when the Use array must be
 /// resizable.
diff --git a/include/llvm/IR/PatternMatch.h b/include/llvm/IR/PatternMatch.h
index 072c6c5ece83..542570aaaa24 100644
--- a/include/llvm/IR/PatternMatch.h
+++ b/include/llvm/IR/PatternMatch.h
@@ -262,7 +262,7 @@ template <typename Predicate> struct api_pred_ty : public Predicate {
 };
 
 struct is_one {
-  bool isValue(const APInt &C) { return C == 1; }
+  bool isValue(const APInt &C) { return C.isOneValue(); }
 };
 
 /// \brief Match an integer 1 or a vector with all elements equal to 1.
diff --git a/include/llvm/IR/Statepoint.h b/include/llvm/IR/Statepoint.h
index a5f0130f79f4..265e7eb348bf 100644
--- a/include/llvm/IR/Statepoint.h
+++ b/include/llvm/IR/Statepoint.h
@@ -17,8 +17,8 @@
 #ifndef LLVM_IR_STATEPOINT_H
 #define LLVM_IR_STATEPOINT_H
 
-#include "llvm/ADT/iterator_range.h"
 #include "llvm/ADT/Optional.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CallSite.h"
diff --git a/include/llvm/IR/Type.h b/include/llvm/IR/Type.h
index 82362107e41e..b37b59288e3f 100644
--- a/include/llvm/IR/Type.h
+++ b/include/llvm/IR/Type.h
@@ -18,8 +18,8 @@
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/Support/Casting.h"
 #include "llvm/Support/CBindingWrapping.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <cassert>
diff --git a/include/llvm/IR/Use.h b/include/llvm/IR/Use.h
index d3a59d8a060e..0ac13935c7ce 100644
--- a/include/llvm/IR/Use.h
+++ b/include/llvm/IR/Use.h
@@ -25,10 +25,10 @@
 #ifndef LLVM_IR_USE_H
 #define LLVM_IR_USE_H
 
+#include "llvm-c/Types.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/Support/CBindingWrapping.h"
 #include "llvm/Support/Compiler.h"
-#include "llvm-c/Types.h"
 
 namespace llvm {
 
diff --git a/include/llvm/IR/Value.h b/include/llvm/IR/Value.h
index d669b1544070..ccd40e576584 100644
--- a/include/llvm/IR/Value.h
+++ b/include/llvm/IR/Value.h
@@ -14,11 +14,11 @@
 #ifndef LLVM_IR_VALUE_H
 #define LLVM_IR_VALUE_H
 
+#include "llvm-c/Types.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/IR/Use.h"
 #include "llvm/Support/CBindingWrapping.h"
 #include "llvm/Support/Casting.h"
-#include "llvm-c/Types.h"
 #include <cassert>
 #include <iterator>
 #include <memory>
diff --git a/include/llvm/LTO/LTO.h b/include/llvm/LTO/LTO.h
index 3772592757be..774e144b3ef0 100644
--- a/include/llvm/LTO/LTO.h
+++ b/include/llvm/LTO/LTO.h
@@ -366,8 +366,9 @@ private:
 /// each global symbol based on its internal resolution of that symbol.
 struct SymbolResolution {
   SymbolResolution()
-      : Prevailing(0), FinalDefinitionInLinkageUnit(0), VisibleToRegularObj(0) {
-  }
+      : Prevailing(0), FinalDefinitionInLinkageUnit(0), VisibleToRegularObj(0),
+        LinkerRedefined(0) {}
+
   /// The linker has chosen this definition of the symbol.
   unsigned Prevailing : 1;
 
@@ -377,6 +378,10 @@ struct SymbolResolution {
 
   /// The definition of this symbol is visible outside of the LTO unit.
   unsigned VisibleToRegularObj : 1;
+
+  /// Linker redefined version of the symbol which appeared in -wrap or -defsym
+  /// linker option.
+  unsigned LinkerRedefined : 1;
 };
 
 } // namespace lto
diff --git a/include/llvm/LinkAllIR.h b/include/llvm/LinkAllIR.h
index f078c73f979e..de1d305f8e77 100644
--- a/include/llvm/LinkAllIR.h
+++ b/include/llvm/LinkAllIR.h
@@ -16,13 +16,13 @@
 #ifndef LLVM_LINKALLIR_H
 #define LLVM_LINKALLIR_H
 
+#include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Verifier.h"
-#include "llvm/Support/Dwarf.h"
 #include "llvm/Support/DynamicLibrary.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Memory.h"
diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h
index 5c398b2ab567..c309ddbe2f02 100644
--- a/include/llvm/LinkAllPasses.h
+++ b/include/llvm/LinkAllPasses.h
@@ -16,8 +16,8 @@
 #define LLVM_LINKALLPASSES_H
 
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AliasSetTracker.h"
 #include "llvm/Analysis/AliasAnalysisEvaluator.h"
+#include "llvm/Analysis/AliasSetTracker.h"
 #include "llvm/Analysis/BasicAliasAnalysis.h"
 #include "llvm/Analysis/CFLAndersAliasAnalysis.h"
 #include "llvm/Analysis/CFLSteensAliasAnalysis.h"
@@ -38,6 +38,7 @@
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRPrintingPasses.h"
+#include "llvm/Support/Valgrind.h"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/Transforms/IPO/AlwaysInliner.h"
 #include "llvm/Transforms/IPO/FunctionAttrs.h"
@@ -48,7 +49,6 @@
 #include "llvm/Transforms/Utils/SymbolRewriter.h"
 #include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
 #include "llvm/Transforms/Vectorize.h"
-#include "llvm/Support/Valgrind.h"
 #include <cstdlib>
 
 namespace {
diff --git a/include/llvm/MC/MCAsmInfo.h b/include/llvm/MC/MCAsmInfo.h
index 869706c45483..234762f36dd4 100644
--- a/include/llvm/MC/MCAsmInfo.h
+++ b/include/llvm/MC/MCAsmInfo.h
@@ -51,12 +51,6 @@ enum LCOMMType { NoAlignment, ByteAlignment, Log2Alignment };
 
 } // end namespace LCOMM
 
-enum class DebugCompressionType {
-  DCT_None,    // no compression
-  DCT_Zlib,    // zlib style complession
-  DCT_ZlibGnu  // zlib-gnu style compression
-};
-
 /// This class is intended to be used as a base class for asm
 /// properties and features specific to the target.
 class MCAsmInfo {
@@ -366,7 +360,7 @@ protected:
   bool PreserveAsmComments;
 
   /// Compress DWARF debug sections. Defaults to no compression.
-  DebugCompressionType CompressDebugSections = DebugCompressionType::DCT_None;
+  DebugCompressionType CompressDebugSections = DebugCompressionType::None;
 
   /// True if the integrated assembler should interpret 'a >> b' constant
   /// expressions as logical rather than arithmetic.
diff --git a/include/llvm/MC/MCAssembler.h b/include/llvm/MC/MCAssembler.h
index 185b892d9621..63f7057a7076 100644
--- a/include/llvm/MC/MCAssembler.h
+++ b/include/llvm/MC/MCAssembler.h
@@ -11,11 +11,11 @@
 #define LLVM_MC_MCASSEMBLER_H
 
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/iterator.h"
 #include "llvm/ADT/iterator_range.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCDirectives.h"
 #include "llvm/MC/MCDwarf.h"
 #include "llvm/MC/MCFixup.h"
diff --git a/include/llvm/MC/MCCodeView.h b/include/llvm/MC/MCCodeView.h
index 41521a6549b8..c3f1cecc97f4 100644
--- a/include/llvm/MC/MCCodeView.h
+++ b/include/llvm/MC/MCCodeView.h
@@ -14,10 +14,10 @@
 #ifndef LLVM_MC_MCCODEVIEW_H
 #define LLVM_MC_MCCODEVIEW_H
 
-#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringMap.h"
-#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCFragment.h"
+#include "llvm/MC/MCObjectStreamer.h"
 #include <map>
 #include <vector>
 
diff --git a/include/llvm/MC/MCContext.h b/include/llvm/MC/MCContext.h
index 9bea19631303..2c60014adf23 100644
--- a/include/llvm/MC/MCContext.h
+++ b/include/llvm/MC/MCContext.h
@@ -17,12 +17,12 @@
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/MC/MCDwarf.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/SectionKind.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Compiler.h"
-#include "llvm/Support/Dwarf.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cassert>
diff --git a/include/llvm/MC/MCELFObjectWriter.h b/include/llvm/MC/MCELFObjectWriter.h
index f22fc11f9b07..2efd37924e2e 100644
--- a/include/llvm/MC/MCELFObjectWriter.h
+++ b/include/llvm/MC/MCELFObjectWriter.h
@@ -11,8 +11,8 @@
 #define LLVM_MC_MCELFOBJECTWRITER_H
 
 #include "llvm/ADT/Triple.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/Support/Casting.h"
-#include "llvm/Support/ELF.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cstdint>
 #include <vector>
diff --git a/include/llvm/MC/MCFragment.h b/include/llvm/MC/MCFragment.h
index 0ca530c45102..0aca922e3cf5 100644
--- a/include/llvm/MC/MCFragment.h
+++ b/include/llvm/MC/MCFragment.h
@@ -11,10 +11,10 @@
 #define LLVM_MC_MCFRAGMENT_H
 
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/ilist_node.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/ilist_node.h"
 #include "llvm/MC/MCFixup.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/Support/SMLoc.h"
diff --git a/include/llvm/MC/MCMachObjectWriter.h b/include/llvm/MC/MCMachObjectWriter.h
index b93638f86408..2d2480a27223 100644
--- a/include/llvm/MC/MCMachObjectWriter.h
+++ b/include/llvm/MC/MCMachObjectWriter.h
@@ -12,11 +12,11 @@
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/MachO.h"
 #include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSection.h"
 #include "llvm/MC/StringTableBuilder.h"
-#include "llvm/Support/MachO.h"
 #include <cstdint>
 #include <memory>
 #include <string>
diff --git a/include/llvm/MC/MCObjectFileInfo.h b/include/llvm/MC/MCObjectFileInfo.h
index 8b2a1261b220..4d634447987b 100644
--- a/include/llvm/MC/MCObjectFileInfo.h
+++ b/include/llvm/MC/MCObjectFileInfo.h
@@ -109,6 +109,9 @@ protected:
   MCSection *DwarfLineDWOSection;
   MCSection *DwarfLocDWOSection;
   MCSection *DwarfStrOffDWOSection;
+
+  /// The DWARF v5 string offset and address table sections.
+  MCSection *DwarfStrOffSection;
   MCSection *DwarfAddrSection;
 
   // These are for Fission DWP files.
@@ -260,6 +263,7 @@ public:
   MCSection *getDwarfLineDWOSection() const { return DwarfLineDWOSection; }
   MCSection *getDwarfLocDWOSection() const { return DwarfLocDWOSection; }
   MCSection *getDwarfStrOffDWOSection() const { return DwarfStrOffDWOSection; }
+  MCSection *getDwarfStrOffSection() const { return DwarfStrOffSection; }
   MCSection *getDwarfAddrSection() const { return DwarfAddrSection; }
   MCSection *getDwarfCUIndexSection() const { return DwarfCUIndexSection; }
   MCSection *getDwarfTUIndexSection() const { return DwarfTUIndexSection; }
diff --git a/include/llvm/MC/MCParser/MCAsmParser.h b/include/llvm/MC/MCParser/MCAsmParser.h
index 75d45f490bde..3a659f048ccf 100644
--- a/include/llvm/MC/MCParser/MCAsmParser.h
+++ b/include/llvm/MC/MCParser/MCAsmParser.h
@@ -11,9 +11,9 @@
 #define LLVM_MC_MCPARSER_MCASMPARSER_H
 
 #include "llvm/ADT/None.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/MC/MCParser/MCAsmLexer.h"
diff --git a/include/llvm/MC/MCSection.h b/include/llvm/MC/MCSection.h
index 7bfffbcdb7c2..cc306d47250d 100644
--- a/include/llvm/MC/MCSection.h
+++ b/include/llvm/MC/MCSection.h
@@ -14,8 +14,8 @@
 #ifndef LLVM_MC_MCSECTION_H
 #define LLVM_MC_MCSECTION_H
 
-#include "llvm/ADT/ilist.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/ilist.h"
 #include "llvm/MC/MCFragment.h"
 #include "llvm/MC/SectionKind.h"
 #include <cassert>
diff --git a/include/llvm/MC/MCSectionMachO.h b/include/llvm/MC/MCSectionMachO.h
index 3bc5408a4f75..89db09cbdbdc 100644
--- a/include/llvm/MC/MCSectionMachO.h
+++ b/include/llvm/MC/MCSectionMachO.h
@@ -15,8 +15,8 @@
 #define LLVM_MC_MCSECTIONMACHO_H
 
 #include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/MachO.h"
 #include "llvm/MC/MCSection.h"
-#include "llvm/Support/MachO.h"
 
 namespace llvm {
 
diff --git a/include/llvm/MC/MCSymbolWasm.h b/include/llvm/MC/MCSymbolWasm.h
index 4445be006eb0..7d661ccc5de7 100644
--- a/include/llvm/MC/MCSymbolWasm.h
+++ b/include/llvm/MC/MCSymbolWasm.h
@@ -9,8 +9,8 @@
 #ifndef LLVM_MC_MCSYMBOLWASM_H
 #define LLVM_MC_MCSYMBOLWASM_H
 
+#include "llvm/BinaryFormat/Wasm.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/Wasm.h"
 
 namespace llvm {
 class MCSymbolWasm : public MCSymbol {
diff --git a/include/llvm/MC/MCTargetOptions.h b/include/llvm/MC/MCTargetOptions.h
index ab027ab27a41..5509bb3bdc7c 100644
--- a/include/llvm/MC/MCTargetOptions.h
+++ b/include/llvm/MC/MCTargetOptions.h
@@ -23,6 +23,12 @@ enum class ExceptionHandling {
   WinEH,    /// Windows Exception Handling
 };
 
+enum class DebugCompressionType {
+  None, /// No compression
+  GNU,  /// zlib-gnu style compression
+  Z,    /// zlib style complession
+};
+
 class StringRef;
 
 class MCTargetOptions {
diff --git a/include/llvm/MC/MCWasmObjectWriter.h b/include/llvm/MC/MCWasmObjectWriter.h
index a4dd382706d7..c250d3bf03fb 100644
--- a/include/llvm/MC/MCWasmObjectWriter.h
+++ b/include/llvm/MC/MCWasmObjectWriter.h
@@ -11,6 +11,7 @@
 #define LLVM_MC_MCWASMOBJECTWRITER_H
 
 #include "llvm/ADT/Triple.h"
+#include "llvm/BinaryFormat/Wasm.h"
 #include "llvm/MC/MCValue.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/raw_ostream.h"
@@ -28,27 +29,6 @@ class MCSymbolWasm;
 class MCValue;
 class raw_pwrite_stream;
 
-// Information about a single relocation.
-struct WasmRelocationEntry {
-  uint64_t Offset;            // Where is the relocation.
-  const MCSymbolWasm *Symbol; // The symbol to relocate with.
-  int64_t Addend;             // A value to add to the symbol.
-  unsigned Type;              // The type of the relocation.
-  MCSectionWasm *FixupSection;// The section the relocation is targeting.
-
-  WasmRelocationEntry(uint64_t Offset, const MCSymbolWasm *Symbol,
-                      int64_t Addend, unsigned Type,
-                      MCSectionWasm *FixupSection)
-      : Offset(Offset), Symbol(Symbol), Addend(Addend), Type(Type),
-        FixupSection(FixupSection) {}
-
-  void print(raw_ostream &Out) const {
-    Out << "Off=" << Offset << ", Sym=" << Symbol << ", Addend=" << Addend
-        << ", Type=" << Type << ", FixupSection=" << FixupSection;
-  }
-  void dump() const { print(errs()); }
-};
-
 class MCWasmObjectTargetWriter {
   const unsigned Is64Bit : 1;
 
@@ -56,17 +36,11 @@ protected:
   explicit MCWasmObjectTargetWriter(bool Is64Bit_);
 
 public:
-  virtual ~MCWasmObjectTargetWriter() {}
+  virtual ~MCWasmObjectTargetWriter();
 
   virtual unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
                                 const MCFixup &Fixup, bool IsPCRel) const = 0;
 
-  virtual bool needsRelocateWithSymbol(const MCSymbol &Sym,
-                                       unsigned Type) const;
-
-  virtual void sortRelocs(const MCAssembler &Asm,
-                          std::vector<WasmRelocationEntry> &Relocs);
-
   /// \name Accessors
   /// @{
   bool is64Bit() const { return Is64Bit; }
diff --git a/include/llvm/Object/Archive.h b/include/llvm/Object/Archive.h
index 807508107c56..6c5fb9d5c92b 100644
--- a/include/llvm/Object/Archive.h
+++ b/include/llvm/Object/Archive.h
@@ -14,9 +14,9 @@
 #ifndef LLVM_OBJECT_ARCHIVE_H
 #define LLVM_OBJECT_ARCHIVE_H
 
-#include "llvm/ADT/iterator_range.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/Object/Binary.h"
 #include "llvm/Support/Chrono.h"
 #include "llvm/Support/Error.h"
diff --git a/include/llvm/Object/COFF.h b/include/llvm/Object/COFF.h
index dafd1a43cb59..ae695a529597 100644
--- a/include/llvm/Object/COFF.h
+++ b/include/llvm/Object/COFF.h
@@ -15,13 +15,13 @@
 #define LLVM_OBJECT_COFF_H
 
 #include "llvm/ADT/iterator_range.h"
+#include "llvm/BinaryFormat/COFF.h"
 #include "llvm/DebugInfo/CodeView/CVDebugRecord.h"
 #include "llvm/MC/SubtargetFeature.h"
 #include "llvm/Object/Binary.h"
 #include "llvm/Object/Error.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/BinaryByteStream.h"
-#include "llvm/Support/COFF.h"
 #include "llvm/Support/ConvertUTF.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -646,6 +646,13 @@ struct coff_resource_dir_entry {
   } Offset;
 };
 
+struct coff_resource_data_entry {
+  support::ulittle32_t DataRVA;
+  support::ulittle32_t DataSize;
+  support::ulittle32_t Codepage;
+  support::ulittle32_t Reserved;
+};
+
 struct coff_resource_dir_table {
   support::ulittle32_t Characteristics;
   support::ulittle32_t TimeDateStamp;
diff --git a/include/llvm/Object/COFFModuleDefinition.h b/include/llvm/Object/COFFModuleDefinition.h
index 0428283fdc88..a0e8eacdb7a3 100644
--- a/include/llvm/Object/COFFModuleDefinition.h
+++ b/include/llvm/Object/COFFModuleDefinition.h
@@ -20,8 +20,8 @@
 #ifndef LLVM_OBJECT_COFF_MODULE_DEFINITION_H
 #define LLVM_OBJECT_COFF_MODULE_DEFINITION_H
 
-#include "llvm/Object/COFFImportFile.h"
 #include "llvm/Object/COFF.h"
+#include "llvm/Object/COFFImportFile.h"
 
 namespace llvm {
 namespace object {
diff --git a/include/llvm/Object/Decompressor.h b/include/llvm/Object/Decompressor.h
index 0f63f8b821b7..c8e888d285e4 100644
--- a/include/llvm/Object/Decompressor.h
+++ b/include/llvm/Object/Decompressor.h
@@ -10,8 +10,8 @@
 #ifndef LLVM_OBJECT_DECOMPRESSOR_H
 #define LLVM_OBJECT_DECOMPRESSOR_H
 
-#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Object/ObjectFile.h"
 
 namespace llvm {
diff --git a/include/llvm/Object/ELF.h b/include/llvm/Object/ELF.h
index a4d431b6cbe7..670c0bbce3ac 100644
--- a/include/llvm/Object/ELF.h
+++ b/include/llvm/Object/ELF.h
@@ -17,9 +17,9 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/Object/ELFTypes.h"
 #include "llvm/Object/Error.h"
-#include "llvm/Support/ELF.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/Error.h"
 #include <cassert>
diff --git a/include/llvm/Object/ELFObjectFile.h b/include/llvm/Object/ELFObjectFile.h
index ef2abd8c52ce..2ba3b13f49da 100644
--- a/include/llvm/Object/ELFObjectFile.h
+++ b/include/llvm/Object/ELFObjectFile.h
@@ -19,6 +19,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/ADT/iterator_range.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/MC/SubtargetFeature.h"
 #include "llvm/Object/Binary.h"
 #include "llvm/Object/ELF.h"
@@ -29,7 +30,6 @@
 #include "llvm/Support/ARMAttributeParser.h"
 #include "llvm/Support/ARMBuildAttributes.h"
 #include "llvm/Support/Casting.h"
-#include "llvm/Support/ELF.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/ErrorHandling.h"
diff --git a/include/llvm/Object/ELFTypes.h b/include/llvm/Object/ELFTypes.h
index 99346fe1a882..808144694acb 100644
--- a/include/llvm/Object/ELFTypes.h
+++ b/include/llvm/Object/ELFTypes.h
@@ -12,8 +12,8 @@
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/Object/Error.h"
-#include "llvm/Support/ELF.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/Error.h"
 #include <cassert>
diff --git a/include/llvm/Object/IRObjectFile.h b/include/llvm/Object/IRObjectFile.h
index 0ea89011e883..3bce7813ee93 100644
--- a/include/llvm/Object/IRObjectFile.h
+++ b/include/llvm/Object/IRObjectFile.h
@@ -15,10 +15,12 @@
 #define LLVM_OBJECT_IROBJECTFILE_H
 
 #include "llvm/ADT/PointerUnion.h"
+#include "llvm/Object/IRSymtab.h"
 #include "llvm/Object/ModuleSymbolTable.h"
 #include "llvm/Object/SymbolicFile.h"
 
 namespace llvm {
+class BitcodeModule;
 class Mangler;
 class Module;
 class GlobalValue;
@@ -61,7 +63,20 @@ public:
   static Expected<std::unique_ptr<IRObjectFile>> create(MemoryBufferRef Object,
                                                         LLVMContext &Context);
 };
+
+/// The contents of a bitcode file and its irsymtab. Any underlying data
+/// for the irsymtab are owned by Symtab and Strtab.
+struct IRSymtabFile {
+  std::vector<BitcodeModule> Mods;
+  SmallVector<char, 0> Symtab, Strtab;
+  irsymtab::Reader TheReader;
+};
+
+/// Reads a bitcode file, creating its irsymtab if necessary.
+Expected<IRSymtabFile> readIRSymtab(MemoryBufferRef MBRef);
+
 }
+
 }
 
 #endif
diff --git a/include/llvm/Object/IRSymtab.h b/include/llvm/Object/IRSymtab.h
index b425543bf637..5b832141a865 100644
--- a/include/llvm/Object/IRSymtab.h
+++ b/include/llvm/Object/IRSymtab.h
@@ -25,8 +25,8 @@
 #define LLVM_OBJECT_IRSYMTAB_H
 
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/iterator_range.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/Object/SymbolicFile.h"
 #include "llvm/Support/Endian.h"
@@ -36,6 +36,9 @@
 #include <vector>
 
 namespace llvm {
+
+struct BitcodeFileContents;
+
 namespace irsymtab {
 
 namespace storage {
@@ -314,6 +317,16 @@ inline Reader::symbol_range Reader::module_symbols(unsigned I) const {
           SymbolRef(MEnd, MEnd, nullptr, this)};
 }
 
+/// The contents of the irsymtab in a bitcode file. Any underlying data for the
+/// irsymtab are owned by Symtab and Strtab.
+struct FileContents {
+  SmallVector<char, 0> Symtab, Strtab;
+  Reader TheReader;
+};
+
+/// Reads the contents of a bitcode file, creating its irsymtab if necessary.
+Expected<FileContents> readBitcode(const BitcodeFileContents &BFC);
+
 } // end namespace irsymtab
 } // end namespace llvm
 
diff --git a/include/llvm/Object/MachO.h b/include/llvm/Object/MachO.h
index a4356d5977b2..3fc726f4ccb8 100644
--- a/include/llvm/Object/MachO.h
+++ b/include/llvm/Object/MachO.h
@@ -16,19 +16,19 @@
 #define LLVM_OBJECT_MACHO_H
 
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/iterator_range.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/BinaryFormat/MachO.h"
 #include "llvm/MC/SubtargetFeature.h"
 #include "llvm/Object/Binary.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Object/SymbolicFile.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/Format.h"
-#include "llvm/Support/MachO.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cstdint>
@@ -550,6 +550,8 @@ public:
 
   bool isRelocatableObject() const override;
 
+  StringRef mapDebugSectionName(StringRef Name) const override;
+
   bool hasPageZeroSegment() const { return HasPageZeroSegment; }
 
   static bool classof(const Binary *v) {
diff --git a/include/llvm/Object/MachOUniversal.h b/include/llvm/Object/MachOUniversal.h
index a14c4ca01223..8a6f0fc56971 100644
--- a/include/llvm/Object/MachOUniversal.h
+++ b/include/llvm/Object/MachOUniversal.h
@@ -16,10 +16,10 @@
 
 #include "llvm/ADT/Triple.h"
 #include "llvm/ADT/iterator_range.h"
+#include "llvm/BinaryFormat/MachO.h"
 #include "llvm/Object/Archive.h"
 #include "llvm/Object/Binary.h"
 #include "llvm/Object/MachO.h"
-#include "llvm/Support/MachO.h"
 
 namespace llvm {
 class StringRef;
diff --git a/include/llvm/Object/ObjectFile.h b/include/llvm/Object/ObjectFile.h
index ea6a9049bc1b..6b5b9d95fcf3 100644
--- a/include/llvm/Object/ObjectFile.h
+++ b/include/llvm/Object/ObjectFile.h
@@ -14,8 +14,9 @@
 #ifndef LLVM_OBJECT_OBJECTFILE_H
 #define LLVM_OBJECT_OBJECTFILE_H
 
-#include "llvm/ADT/iterator_range.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/BinaryFormat/Magic.h"
 #include "llvm/MC/SubtargetFeature.h"
 #include "llvm/Object/Binary.h"
 #include "llvm/Object/Error.h"
@@ -292,6 +293,9 @@ public:
       return std::error_code();
     }
 
+  /// Maps a debug section name to a standard DWARF section name.
+  virtual StringRef mapDebugSectionName(StringRef Name) const { return Name; }
+
   /// True if this is a relocatable object (.o/.obj).
   virtual bool isRelocatableObject() const = 0;
 
@@ -303,10 +307,10 @@ public:
   createObjectFile(StringRef ObjectPath);
 
   static Expected<std::unique_ptr<ObjectFile>>
-  createObjectFile(MemoryBufferRef Object, sys::fs::file_magic Type);
+  createObjectFile(MemoryBufferRef Object, llvm::file_magic Type);
   static Expected<std::unique_ptr<ObjectFile>>
   createObjectFile(MemoryBufferRef Object) {
-    return createObjectFile(Object, sys::fs::file_magic::unknown);
+    return createObjectFile(Object, llvm::file_magic::unknown);
   }
 
   static inline bool classof(const Binary *v) {
diff --git a/include/llvm/Object/RelocVisitor.h b/include/llvm/Object/RelocVisitor.h
index 348179860f3e..c358d3996435 100644
--- a/include/llvm/Object/RelocVisitor.h
+++ b/include/llvm/Object/RelocVisitor.h
@@ -17,15 +17,15 @@
 #define LLVM_OBJECT_RELOCVISITOR_H
 
 #include "llvm/ADT/Triple.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/BinaryFormat/MachO.h"
 #include "llvm/Object/COFF.h"
 #include "llvm/Object/ELFObjectFile.h"
 #include "llvm/Object/MachO.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/Casting.h"
-#include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ErrorOr.h"
-#include "llvm/Support/MachO.h"
 #include <cstdint>
 #include <system_error>
 
diff --git a/include/llvm/Object/SymbolicFile.h b/include/llvm/Object/SymbolicFile.h
index f4be4bfdb1a3..97eeba6611a2 100644
--- a/include/llvm/Object/SymbolicFile.h
+++ b/include/llvm/Object/SymbolicFile.h
@@ -14,8 +14,9 @@
 #ifndef LLVM_OBJECT_SYMBOLICFILE_H
 #define LLVM_OBJECT_SYMBOLICFILE_H
 
-#include "llvm/ADT/iterator_range.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/BinaryFormat/Magic.h"
 #include "llvm/Object/Binary.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/FileSystem.h"
@@ -162,12 +163,12 @@ public:
 
   // construction aux.
   static Expected<std::unique_ptr<SymbolicFile>>
-  createSymbolicFile(MemoryBufferRef Object, sys::fs::file_magic Type,
+  createSymbolicFile(MemoryBufferRef Object, llvm::file_magic Type,
                      LLVMContext *Context);
 
   static Expected<std::unique_ptr<SymbolicFile>>
   createSymbolicFile(MemoryBufferRef Object) {
-    return createSymbolicFile(Object, sys::fs::file_magic::unknown, nullptr);
+    return createSymbolicFile(Object, llvm::file_magic::unknown, nullptr);
   }
   static Expected<OwningBinary<SymbolicFile>>
   createSymbolicFile(StringRef ObjectPath);
diff --git a/include/llvm/Object/Wasm.h b/include/llvm/Object/Wasm.h
index de54a4928cce..10edc461b9e9 100644
--- a/include/llvm/Object/Wasm.h
+++ b/include/llvm/Object/Wasm.h
@@ -19,11 +19,11 @@
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/Wasm.h"
 #include "llvm/Object/Binary.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/Wasm.h"
 #include <cstddef>
 #include <cstdint>
 #include <vector>
diff --git a/include/llvm/Object/WindowsResource.h b/include/llvm/Object/WindowsResource.h
index 2484f551aee0..c5189329d3ec 100644
--- a/include/llvm/Object/WindowsResource.h
+++ b/include/llvm/Object/WindowsResource.h
@@ -31,11 +31,11 @@
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/BinaryFormat/COFF.h"
 #include "llvm/Object/Binary.h"
 #include "llvm/Object/Error.h"
 #include "llvm/Support/BinaryByteStream.h"
 #include "llvm/Support/BinaryStreamReader.h"
-#include "llvm/Support/COFF.h"
 #include "llvm/Support/ConvertUTF.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/Error.h"
@@ -44,10 +44,15 @@
 #include <map>
 
 namespace llvm {
+
+class FileOutputBuffer;
+
 namespace object {
 
 class WindowsResource;
 
+enum class Machine { UNKNOWN, ARM, X64, X86 };
+
 class ResourceEntryRef {
 public:
   Error moveNext(bool &End);
@@ -58,6 +63,10 @@ public:
   ArrayRef<UTF16> getNameString() const { return Name; }
   uint16_t getNameID() const { return NameID; }
   uint16_t getLanguage() const { return Suffix->Language; }
+  uint16_t getMajorVersion() const { return Suffix->Version >> 16; }
+  uint16_t getMinorVersion() const { return Suffix->Version; }
+  uint32_t getCharacteristics() const { return Suffix->Characteristics; }
+  ArrayRef<uint8_t> getData() const { return Data; }
 
 private:
   friend class WindowsResource;
@@ -106,34 +115,77 @@ private:
 
 class WindowsResourceParser {
 public:
+  class TreeNode;
   WindowsResourceParser();
-
   Error parse(WindowsResource *WR);
-
   void printTree() const;
+  const TreeNode &getTree() const { return Root; }
+  const ArrayRef<std::vector<uint8_t>> getData() const { return Data; }
+  const ArrayRef<std::vector<UTF16>> getStringTable() const {
+    return StringTable;
+  }
 
-private:
   class TreeNode {
   public:
-    TreeNode() = default;
-    explicit TreeNode(ArrayRef<UTF16> Ref);
-    void addEntry(const ResourceEntryRef &Entry);
+    template <typename T>
+    using Children = std::map<T, std::unique_ptr<TreeNode>>;
+
     void print(ScopedPrinter &Writer, StringRef Name) const;
+    uint32_t getTreeSize() const;
+    uint32_t getStringIndex() const { return StringIndex; }
+    uint32_t getDataIndex() const { return DataIndex; }
+    uint16_t getMajorVersion() const { return MajorVersion; }
+    uint16_t getMinorVersion() const { return MinorVersion; }
+    uint32_t getCharacteristics() const { return Characteristics; }
+    bool checkIsDataNode() const { return IsDataNode; }
+    const Children<uint32_t> &getIDChildren() const { return IDChildren; }
+    const Children<std::string> &getStringChildren() const {
+      return StringChildren;
+    }
 
   private:
+    friend class WindowsResourceParser;
+
+    static uint32_t StringCount;
+    static uint32_t DataCount;
+
+    static std::unique_ptr<TreeNode> createStringNode();
+    static std::unique_ptr<TreeNode> createIDNode();
+    static std::unique_ptr<TreeNode> createDataNode(uint16_t MajorVersion,
+                                                    uint16_t MinorVersion,
+                                                    uint32_t Characteristics);
+
+    explicit TreeNode(bool IsStringNode);
+    TreeNode(uint16_t MajorVersion, uint16_t MinorVersion,
+             uint32_t Characteristics);
+
+    void addEntry(const ResourceEntryRef &Entry);
     TreeNode &addTypeNode(const ResourceEntryRef &Entry);
     TreeNode &addNameNode(const ResourceEntryRef &Entry);
     TreeNode &addLanguageNode(const ResourceEntryRef &Entry);
-    TreeNode &addChild(uint32_t ID);
+    TreeNode &addChild(uint32_t ID, bool IsDataNode = false,
+                       uint16_t MajorVersion = 0, uint16_t MinorVersion = 0,
+                       uint32_t Characteristics = 0);
     TreeNode &addChild(ArrayRef<UTF16> NameRef);
-    std::vector<UTF16> Name;
-    std::map<uint32_t, std::unique_ptr<TreeNode>> IDChildren;
-    std::map<std::string, std::unique_ptr<TreeNode>> StringChildren;
+    bool IsDataNode = false;
+    uint32_t StringIndex;
+    uint32_t DataIndex;
+    Children<uint32_t> IDChildren;
+    Children<std::string> StringChildren;
+    uint16_t MajorVersion = 0;
+    uint16_t MinorVersion = 0;
+    uint32_t Characteristics = 0;
   };
 
+private:
   TreeNode Root;
+  std::vector<std::vector<uint8_t>> Data;
+  std::vector<std::vector<UTF16>> StringTable;
 };
 
+Error writeWindowsResourceCOFF(StringRef OutputFile, Machine MachineType,
+                               const WindowsResourceParser &Parser);
+
 } // namespace object
 } // namespace llvm
 
diff --git a/include/llvm/ObjectYAML/COFFYAML.h b/include/llvm/ObjectYAML/COFFYAML.h
index 65ad1dde67f5..1b5f7b00239a 100644
--- a/include/llvm/ObjectYAML/COFFYAML.h
+++ b/include/llvm/ObjectYAML/COFFYAML.h
@@ -15,8 +15,8 @@
 #define LLVM_OBJECTYAML_COFFYAML_H
 
 #include "llvm/ADT/Optional.h"
+#include "llvm/BinaryFormat/COFF.h"
 #include "llvm/ObjectYAML/YAML.h"
-#include "llvm/Support/COFF.h"
 
 namespace llvm {
 
diff --git a/include/llvm/ObjectYAML/CodeViewYAMLDebugSections.h b/include/llvm/ObjectYAML/CodeViewYAMLDebugSections.h
index a6d4d404415f..faa3ed8a6c52 100644
--- a/include/llvm/ObjectYAML/CodeViewYAMLDebugSections.h
+++ b/include/llvm/ObjectYAML/CodeViewYAMLDebugSections.h
@@ -26,6 +26,8 @@ namespace codeview {
 class DebugStringTableSubsection;
 class DebugStringTableSubsectionRef;
 class DebugChecksumsSubsectionRef;
+class DebugStringTableSubsection;
+class DebugChecksumsSubsection;
 }
 namespace CodeViewYAML {
 
@@ -33,6 +35,23 @@ namespace detail {
 struct YAMLSubsectionBase;
 }
 
+struct YAMLFrameData {
+  uint32_t RvaStart;
+  uint32_t CodeSize;
+  uint32_t LocalSize;
+  uint32_t ParamsSize;
+  uint32_t MaxStackSize;
+  StringRef FrameFunc;
+  uint32_t PrologSize;
+  uint32_t SavedRegsSize;
+  uint32_t Flags;
+};
+
+struct YAMLCrossModuleImport {
+  StringRef ModuleName;
+  std::vector<uint32_t> ImportIds;
+};
+
 struct SourceLineEntry {
   uint32_t Offset;
   uint32_t LineStart;
@@ -92,8 +111,17 @@ struct YAMLDebugSubsection {
 };
 
 Expected<std::vector<std::unique_ptr<codeview::DebugSubsection>>>
-convertSubsectionList(ArrayRef<YAMLDebugSubsection> Subsections,
-                      codeview::DebugStringTableSubsection &Strings);
+toCodeViewSubsectionList(BumpPtrAllocator &Allocator,
+                         ArrayRef<YAMLDebugSubsection> Subsections,
+                         codeview::DebugStringTableSubsection &Strings);
+Expected<std::vector<std::unique_ptr<codeview::DebugSubsection>>>
+toCodeViewSubsectionList(
+    BumpPtrAllocator &Allocator, ArrayRef<YAMLDebugSubsection> Subsections,
+    std::unique_ptr<codeview::DebugStringTableSubsection> &TakeStrings,
+    codeview::DebugStringTableSubsection *StringsRef);
+
+std::unique_ptr<codeview::DebugStringTableSubsection>
+findStringTable(ArrayRef<YAMLDebugSubsection> Sections);
 
 } // namespace CodeViewYAML
 } // namespace llvm
diff --git a/include/llvm/ObjectYAML/CodeViewYAMLTypes.h b/include/llvm/ObjectYAML/CodeViewYAMLTypes.h
index a57ada34a4fa..91b75aabe7a5 100644
--- a/include/llvm/ObjectYAML/CodeViewYAMLTypes.h
+++ b/include/llvm/ObjectYAML/CodeViewYAMLTypes.h
@@ -18,8 +18,12 @@
 #include "llvm/DebugInfo/CodeView/CodeView.h"
 #include "llvm/DebugInfo/CodeView/TypeRecord.h"
 #include "llvm/ObjectYAML/YAML.h"
+#include "llvm/Support/Allocator.h"
 
 namespace llvm {
+namespace codeview {
+class TypeTableBuilder;
+}
 namespace CodeViewYAML {
 namespace detail {
 struct LeafRecordBase;
@@ -34,6 +38,7 @@ struct LeafRecord {
   std::shared_ptr<detail::LeafRecordBase> Leaf;
 
   codeview::CVType toCodeViewRecord(BumpPtrAllocator &Allocator) const;
+  codeview::CVType toCodeViewRecord(codeview::TypeTableBuilder &TS) const;
   static Expected<LeafRecord> fromCodeViewRecord(codeview::CVType Type);
 };
 } // namespace CodeViewYAML
diff --git a/include/llvm/ObjectYAML/DWARFYAML.h b/include/llvm/ObjectYAML/DWARFYAML.h
index 3f39cfc7bb3d..75e9112e121a 100644
--- a/include/llvm/ObjectYAML/DWARFYAML.h
+++ b/include/llvm/ObjectYAML/DWARFYAML.h
@@ -16,8 +16,8 @@
 #ifndef LLVM_OBJECTYAML_DWARFYAML_H
 #define LLVM_OBJECTYAML_DWARFYAML_H
 
+#include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/ObjectYAML/YAML.h"
-#include "llvm/Support/Dwarf.h"
 
 namespace llvm {
 namespace DWARFYAML {
@@ -241,7 +241,7 @@ template <> struct MappingTraits<DWARFYAML::InitialLength> {
 
 template <> struct ScalarEnumerationTraits<dwarf::Tag> {
   static void enumeration(IO &io, dwarf::Tag &value) {
-#include "llvm/Support/Dwarf.def"
+#include "llvm/BinaryFormat/Dwarf.def"
     io.enumFallback<Hex16>(value);
   }
 };
@@ -251,7 +251,7 @@ template <> struct ScalarEnumerationTraits<dwarf::Tag> {
 
 template <> struct ScalarEnumerationTraits<dwarf::LineNumberOps> {
   static void enumeration(IO &io, dwarf::LineNumberOps &value) {
-#include "llvm/Support/Dwarf.def"
+#include "llvm/BinaryFormat/Dwarf.def"
     io.enumFallback<Hex8>(value);
   }
 };
@@ -261,7 +261,7 @@ template <> struct ScalarEnumerationTraits<dwarf::LineNumberOps> {
 
 template <> struct ScalarEnumerationTraits<dwarf::LineNumberExtendedOps> {
   static void enumeration(IO &io, dwarf::LineNumberExtendedOps &value) {
-#include "llvm/Support/Dwarf.def"
+#include "llvm/BinaryFormat/Dwarf.def"
     io.enumFallback<Hex16>(value);
   }
 };
@@ -271,7 +271,7 @@ template <> struct ScalarEnumerationTraits<dwarf::LineNumberExtendedOps> {
 
 template <> struct ScalarEnumerationTraits<dwarf::Attribute> {
   static void enumeration(IO &io, dwarf::Attribute &value) {
-#include "llvm/Support/Dwarf.def"
+#include "llvm/BinaryFormat/Dwarf.def"
     io.enumFallback<Hex16>(value);
   }
 };
@@ -281,7 +281,7 @@ template <> struct ScalarEnumerationTraits<dwarf::Attribute> {
 
 template <> struct ScalarEnumerationTraits<dwarf::Form> {
   static void enumeration(IO &io, dwarf::Form &value) {
-#include "llvm/Support/Dwarf.def"
+#include "llvm/BinaryFormat/Dwarf.def"
     io.enumFallback<Hex16>(value);
   }
 };
@@ -291,7 +291,7 @@ template <> struct ScalarEnumerationTraits<dwarf::Form> {
 
 template <> struct ScalarEnumerationTraits<dwarf::UnitType> {
   static void enumeration(IO &io, dwarf::UnitType &value) {
-#include "llvm/Support/Dwarf.def"
+#include "llvm/BinaryFormat/Dwarf.def"
     io.enumFallback<Hex8>(value);
   }
 };
diff --git a/include/llvm/ObjectYAML/ELFYAML.h b/include/llvm/ObjectYAML/ELFYAML.h
index 81a4ec28c94f..9d62ec27ad31 100644
--- a/include/llvm/ObjectYAML/ELFYAML.h
+++ b/include/llvm/ObjectYAML/ELFYAML.h
@@ -16,8 +16,8 @@
 #ifndef LLVM_OBJECTYAML_ELFYAML_H
 #define LLVM_OBJECTYAML_ELFYAML_H
 
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/ObjectYAML/YAML.h"
-#include "llvm/Support/ELF.h"
 
 namespace llvm {
 namespace ELFYAML {
diff --git a/include/llvm/ObjectYAML/MachOYAML.h b/include/llvm/ObjectYAML/MachOYAML.h
index ae858c8f4aaf..59aca9a1ddf2 100644
--- a/include/llvm/ObjectYAML/MachOYAML.h
+++ b/include/llvm/ObjectYAML/MachOYAML.h
@@ -16,9 +16,9 @@
 #ifndef LLVM_OBJECTYAML_MACHOYAML_H
 #define LLVM_OBJECTYAML_MACHOYAML_H
 
-#include "llvm/ObjectYAML/YAML.h"
+#include "llvm/BinaryFormat/MachO.h"
 #include "llvm/ObjectYAML/DWARFYAML.h"
-#include "llvm/Support/MachO.h"
+#include "llvm/ObjectYAML/YAML.h"
 
 namespace llvm {
 namespace MachOYAML {
@@ -209,7 +209,7 @@ template <> struct MappingTraits<MachO::build_tool_version> {
 
 template <> struct ScalarEnumerationTraits<MachO::LoadCommandType> {
   static void enumeration(IO &io, MachO::LoadCommandType &value) {
-#include "llvm/Support/MachO.def"
+#include "llvm/BinaryFormat/MachO.def"
     io.enumFallback<Hex32>(value);
   }
 };
@@ -278,7 +278,7 @@ template <> struct ScalarTraits<uuid_t> {
     static void mapping(IO &IO, MachO::LCStruct &LoadCommand);                 \
   };
 
-#include "llvm/Support/MachO.def"
+#include "llvm/BinaryFormat/MachO.def"
 
 // Extra structures used by load commands
 template <> struct MappingTraits<MachO::dylib> {
diff --git a/include/llvm/ObjectYAML/WasmYAML.h b/include/llvm/ObjectYAML/WasmYAML.h
index 7b70c9537827..447dbd7a603d 100644
--- a/include/llvm/ObjectYAML/WasmYAML.h
+++ b/include/llvm/ObjectYAML/WasmYAML.h
@@ -16,8 +16,8 @@
 #ifndef LLVM_OBJECTYAML_WASMYAML_H
 #define LLVM_OBJECTYAML_WASMYAML_H
 
+#include "llvm/BinaryFormat/Wasm.h"
 #include "llvm/ObjectYAML/YAML.h"
-#include "llvm/Support/Wasm.h"
 
 namespace llvm {
 namespace WasmYAML {
diff --git a/include/llvm/Option/ArgList.h b/include/llvm/Option/ArgList.h
index 4ed28d7a852b..6a92dd01e911 100644
--- a/include/llvm/Option/ArgList.h
+++ b/include/llvm/Option/ArgList.h
@@ -11,8 +11,8 @@
 #define LLVM_OPTION_ARGLIST_H
 
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Option/Arg.h"
diff --git a/include/llvm/Pass.h b/include/llvm/Pass.h
index e9c8ca3072c7..2dd6935cf01c 100644
--- a/include/llvm/Pass.h
+++ b/include/llvm/Pass.h
@@ -384,7 +384,7 @@ extern bool isFunctionInPrintList(StringRef FunctionName);
 // Include support files that contain important APIs commonly used by Passes,
 // but that we want to separate out to make it easier to read the header files.
 //
-#include "llvm/PassSupport.h"
 #include "llvm/PassAnalysisSupport.h"
+#include "llvm/PassSupport.h"
 
 #endif
diff --git a/include/llvm/ProfileData/Coverage/CoverageMapping.h b/include/llvm/ProfileData/Coverage/CoverageMapping.h
index b9a9f5377698..b2f73fda2bae 100644
--- a/include/llvm/ProfileData/Coverage/CoverageMapping.h
+++ b/include/llvm/ProfileData/Coverage/CoverageMapping.h
@@ -18,11 +18,11 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSet.h"
 #include "llvm/ADT/iterator.h"
 #include "llvm/ADT/iterator_range.h"
-#include "llvm/ADT/None.h"
-#include "llvm/ADT/StringSet.h"
-#include "llvm/ADT/StringRef.h"
 #include "llvm/ProfileData/InstrProf.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadata.h b/include/llvm/Support/AMDGPUCodeObjectMetadata.h
similarity index 98%
rename from lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadata.h
rename to include/llvm/Support/AMDGPUCodeObjectMetadata.h
index 816e8c744b27..d274c5ee9184 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadata.h
+++ b/include/llvm/Support/AMDGPUCodeObjectMetadata.h
@@ -14,8 +14,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATA_H
-#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATA_H
+#ifndef LLVM_SUPPORT_AMDGPUCODEOBJECTMETADATA_H
+#define LLVM_SUPPORT_AMDGPUCODEOBJECTMETADATA_H
 
 #include <cstdint>
 #include <string>
@@ -419,4 +419,4 @@ struct Metadata final {
 } // end namespace AMDGPU
 } // end namespace llvm
 
-#endif // LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATA_H
+#endif // LLVM_SUPPORT_AMDGPUCODEOBJECTMETADATA_H
diff --git a/include/llvm/Support/BinaryStreamArray.h b/include/llvm/Support/BinaryStreamArray.h
index 77c99ffff919..65ec15f6d9e0 100644
--- a/include/llvm/Support/BinaryStreamArray.h
+++ b/include/llvm/Support/BinaryStreamArray.h
@@ -42,36 +42,114 @@ namespace llvm {
 /// having to specify a second template argument to VarStreamArray (documented
 /// below).
 template <typename T> struct VarStreamArrayExtractor {
-  struct ContextType {};
-
   // Method intentionally deleted.  You must provide an explicit specialization
-  // with one of the following two methods implemented.
-  static Error extract(BinaryStreamRef Stream, uint32_t &Len, T &Item) = delete;
-
-  static Error extract(BinaryStreamRef Stream, uint32_t &Len, T &Item,
-                       const ContextType &Ctx) = delete;
+  // with the following method implemented.
+  Error operator()(BinaryStreamRef Stream, uint32_t &Len,
+                   T &Item) const = delete;
 };
 
-template <typename ArrayType, typename Value, typename Extractor,
-          typename WrappedCtx>
-class VarStreamArrayIterator
-    : public iterator_facade_base<
-          VarStreamArrayIterator<ArrayType, Value, Extractor, WrappedCtx>,
-          std::forward_iterator_tag, Value> {
-  typedef VarStreamArrayIterator<ArrayType, Value, Extractor, WrappedCtx>
-      IterType;
+/// VarStreamArray represents an array of variable length records backed by a
+/// stream.  This could be a contiguous sequence of bytes in memory, it could
+/// be a file on disk, or it could be a PDB stream where bytes are stored as
+/// discontiguous blocks in a file.  Usually it is desirable to treat arrays
+/// as contiguous blocks of memory, but doing so with large PDB files, for
+/// example, could mean allocating huge amounts of memory just to allow
+/// re-ordering of stream data to be contiguous before iterating over it.  By
+/// abstracting this out, we need not duplicate this memory, and we can
+/// iterate over arrays in arbitrarily formatted streams.  Elements are parsed
+/// lazily on iteration, so there is no upfront cost associated with building
+/// or copying a VarStreamArray, no matter how large it may be.
+///
+/// You create a VarStreamArray by specifying a ValueType and an Extractor type.
+/// If you do not specify an Extractor type, you are expected to specialize
+/// VarStreamArrayExtractor<T> for your ValueType.
+///
+/// By default an Extractor is default constructed in the class, but in some
+/// cases you might find it useful for an Extractor to maintain state across
+/// extractions.  In this case you can provide your own Extractor through a
+/// secondary constructor.  The following examples show various ways of
+/// creating a VarStreamArray.
+///
+///       // Will use VarStreamArrayExtractor<MyType> as the extractor.
+///       VarStreamArray<MyType> MyTypeArray;
+///
+///       // Will use a default-constructed MyExtractor as the extractor.
+///       VarStreamArray<MyType, MyExtractor> MyTypeArray2;
+///
+///       // Will use the specific instance of MyExtractor provided.
+///       // MyExtractor need not be default-constructible in this case.
+///       MyExtractor E(SomeContext);
+///       VarStreamArray<MyType, MyExtractor> MyTypeArray3(E);
+///
+
+template <typename ValueType, typename Extractor> class VarStreamArrayIterator;
+
+template <typename ValueType,
+          typename Extractor = VarStreamArrayExtractor<ValueType>>
+class VarStreamArray {
+  friend class VarStreamArrayIterator<ValueType, Extractor>;
 
 public:
-  VarStreamArrayIterator() = default;
-  VarStreamArrayIterator(const ArrayType &Array, const WrappedCtx &Ctx,
-                         BinaryStreamRef Stream, bool *HadError = nullptr,
-                         uint32_t Offset = 0)
-      : IterRef(Stream), Ctx(&Ctx), Array(&Array), AbsOffset(Offset),
-        HadError(HadError) {
+  typedef VarStreamArrayIterator<ValueType, Extractor> Iterator;
+
+  VarStreamArray() = default;
+
+  explicit VarStreamArray(const Extractor &E) : E(E) {}
+
+  explicit VarStreamArray(BinaryStreamRef Stream) : Stream(Stream) {}
+
+  VarStreamArray(BinaryStreamRef Stream, const Extractor &E)
+      : Stream(Stream), E(E) {}
+
+  Iterator begin(bool *HadError = nullptr) const {
+    return Iterator(*this, E, HadError);
+  }
+
+  bool valid() const { return Stream.valid(); }
+
+  Iterator end() const { return Iterator(E); }
+
+  bool empty() const { return Stream.getLength() == 0; }
+
+  /// \brief given an offset into the array's underlying stream, return an
+  /// iterator to the record at that offset.  This is considered unsafe
+  /// since the behavior is undefined if \p Offset does not refer to the
+  /// beginning of a valid record.
+  Iterator at(uint32_t Offset) const {
+    return Iterator(*this, E, Offset, nullptr);
+  }
+
+  const Extractor &getExtractor() const { return E; }
+  Extractor &getExtractor() { return E; }
+
+  BinaryStreamRef getUnderlyingStream() const { return Stream; }
+  void setUnderlyingStream(BinaryStreamRef S) { Stream = S; }
+
+private:
+  BinaryStreamRef Stream;
+  Extractor E;
+};
+
+template <typename ValueType, typename Extractor>
+class VarStreamArrayIterator
+    : public iterator_facade_base<VarStreamArrayIterator<ValueType, Extractor>,
+                                  std::forward_iterator_tag, ValueType> {
+  typedef VarStreamArrayIterator<ValueType, Extractor> IterType;
+  typedef VarStreamArray<ValueType, Extractor> ArrayType;
+
+public:
+  VarStreamArrayIterator(const ArrayType &Array, const Extractor &E,
+                         bool *HadError)
+      : VarStreamArrayIterator(Array, E, 0, HadError) {}
+
+  VarStreamArrayIterator(const ArrayType &Array, const Extractor &E,
+                         uint32_t Offset, bool *HadError)
+      : IterRef(Array.Stream.drop_front(Offset)), Extract(E),
+        Array(&Array), AbsOffset(Offset), HadError(HadError) {
     if (IterRef.getLength() == 0)
       moveToEnd();
     else {
-      auto EC = Ctx.template invoke<Extractor>(IterRef, ThisLen, ThisValue);
+      auto EC = Extract(IterRef, ThisLen, ThisValue);
       if (EC) {
         consumeError(std::move(EC));
         markError();
@@ -79,13 +157,8 @@ public:
     }
   }
 
-  VarStreamArrayIterator(const ArrayType &Array, const WrappedCtx &Ctx,
-                         bool *HadError = nullptr)
-      : VarStreamArrayIterator(Array, Ctx, Array.Stream, HadError) {}
-
-  VarStreamArrayIterator(const WrappedCtx &Ctx) : Ctx(&Ctx) {}
-  VarStreamArrayIterator(const VarStreamArrayIterator &Other) = default;
-
+  VarStreamArrayIterator() = default;
+  explicit VarStreamArrayIterator(const Extractor &E) : Extract(E) {}
   ~VarStreamArrayIterator() = default;
 
   bool operator==(const IterType &R) const {
@@ -103,12 +176,12 @@ public:
     return false;
   }
 
-  const Value &operator*() const {
+  const ValueType &operator*() const {
     assert(Array && !HasError);
     return ThisValue;
   }
 
-  Value &operator*() {
+  ValueType &operator*() {
     assert(Array && !HasError);
     return ThisValue;
   }
@@ -125,7 +198,7 @@ public:
         moveToEnd();
       } else {
         // There is some data after the current record.
-        auto EC = Ctx->template invoke<Extractor>(IterRef, ThisLen, ThisValue);
+        auto EC = Extract(IterRef, ThisLen, ThisValue);
         if (EC) {
           consumeError(std::move(EC));
           markError();
@@ -153,9 +226,9 @@ private:
       *HadError = true;
   }
 
-  Value ThisValue;
+  ValueType ThisValue;
   BinaryStreamRef IterRef;
-  const WrappedCtx *Ctx{nullptr};
+  Extractor Extract;
   const ArrayType *Array{nullptr};
   uint32_t ThisLen{0};
   uint32_t AbsOffset{0};
@@ -163,127 +236,6 @@ private:
   bool *HadError{nullptr};
 };
 
-template <typename T, typename Context> struct ContextWrapper {
-  ContextWrapper() = default;
-
-  explicit ContextWrapper(Context &&Ctx) : Ctx(Ctx) {}
-
-  template <typename Extractor>
-  Error invoke(BinaryStreamRef Stream, uint32_t &Len, T &Item) const {
-    return Extractor::extract(Stream, Len, Item, Ctx);
-  }
-
-  Context Ctx;
-};
-
-template <typename T> struct ContextWrapper<T, void> {
-  ContextWrapper() = default;
-
-  template <typename Extractor>
-  Error invoke(BinaryStreamRef Stream, uint32_t &Len, T &Item) const {
-    return Extractor::extract(Stream, Len, Item);
-  }
-};
-
-/// VarStreamArray represents an array of variable length records backed by a
-/// stream.  This could be a contiguous sequence of bytes in memory, it could
-/// be a file on disk, or it could be a PDB stream where bytes are stored as
-/// discontiguous blocks in a file.  Usually it is desirable to treat arrays
-/// as contiguous blocks of memory, but doing so with large PDB files, for
-/// example, could mean allocating huge amounts of memory just to allow
-/// re-ordering of stream data to be contiguous before iterating over it.  By
-/// abstracting this out, we need not duplicate this memory, and we can
-/// iterate over arrays in arbitrarily formatted streams.  Elements are parsed
-/// lazily on iteration, so there is no upfront cost associated with building
-/// or copying a VarStreamArray, no matter how large it may be.
-///
-/// You create a VarStreamArray by specifying a ValueType and an Extractor type.
-/// If you do not specify an Extractor type, you are expected to specialize
-/// VarStreamArrayExtractor<T> for your ValueType.
-///
-/// The default extractor type is stateless, but by specializing
-/// VarStreamArrayExtractor or defining your own custom extractor type and
-/// adding the appropriate ContextType typedef to the class, you can pass a
-/// context field during construction of the VarStreamArray that will be
-/// passed to each call to extract.
-///
-template <typename Value, typename Extractor, typename WrappedCtx>
-class VarStreamArrayBase {
-  typedef VarStreamArrayBase<Value, Extractor, WrappedCtx> MyType;
-
-public:
-  typedef VarStreamArrayIterator<MyType, Value, Extractor, WrappedCtx> Iterator;
-  friend Iterator;
-
-  VarStreamArrayBase() = default;
-
-  VarStreamArrayBase(BinaryStreamRef Stream, const WrappedCtx &Ctx)
-      : Stream(Stream), Ctx(Ctx) {}
-
-  VarStreamArrayBase(const MyType &Other)
-      : Stream(Other.Stream), Ctx(Other.Ctx) {}
-
-  Iterator begin(bool *HadError = nullptr) const {
-    if (empty())
-      return end();
-
-    return Iterator(*this, Ctx, Stream, HadError);
-  }
-
-  bool valid() const { return Stream.valid(); }
-
-  Iterator end() const { return Iterator(Ctx); }
-
-  bool empty() const { return Stream.getLength() == 0; }
-
-  /// \brief given an offset into the array's underlying stream, return an
-  /// iterator to the record at that offset.  This is considered unsafe
-  /// since the behavior is undefined if \p Offset does not refer to the
-  /// beginning of a valid record.
-  Iterator at(uint32_t Offset) const {
-    return Iterator(*this, Ctx, Stream.drop_front(Offset), nullptr, Offset);
-  }
-
-  BinaryStreamRef getUnderlyingStream() const { return Stream; }
-
-private:
-  BinaryStreamRef Stream;
-  WrappedCtx Ctx;
-};
-
-template <typename Value, typename Extractor, typename Context>
-class VarStreamArrayImpl
-    : public VarStreamArrayBase<Value, Extractor,
-                                ContextWrapper<Value, Context>> {
-  typedef ContextWrapper<Value, Context> WrappedContext;
-  typedef VarStreamArrayImpl<Value, Extractor, Context> MyType;
-  typedef VarStreamArrayBase<Value, Extractor, WrappedContext> BaseType;
-
-public:
-  typedef Context ContextType;
-
-  VarStreamArrayImpl() = default;
-  VarStreamArrayImpl(BinaryStreamRef Stream, Context &&Ctx)
-      : BaseType(Stream, WrappedContext(std::forward<Context>(Ctx))) {}
-};
-
-template <typename Value, typename Extractor>
-class VarStreamArrayImpl<Value, Extractor, void>
-    : public VarStreamArrayBase<Value, Extractor, ContextWrapper<Value, void>> {
-  typedef ContextWrapper<Value, void> WrappedContext;
-  typedef VarStreamArrayImpl<Value, Extractor, void> MyType;
-  typedef VarStreamArrayBase<Value, Extractor, WrappedContext> BaseType;
-
-public:
-  VarStreamArrayImpl() = default;
-  VarStreamArrayImpl(BinaryStreamRef Stream)
-      : BaseType(Stream, WrappedContext()) {}
-};
-
-template <typename Value, typename Extractor = VarStreamArrayExtractor<Value>>
-using VarStreamArray =
-    VarStreamArrayImpl<Value, Extractor, typename Extractor::ContextType>;
-
 template <typename T> class FixedStreamArrayIterator;
 
 /// FixedStreamArray is similar to VarStreamArray, except with each record
diff --git a/include/llvm/Support/BinaryStreamReader.h b/include/llvm/Support/BinaryStreamReader.h
index 29e8a2ab08aa..738c042add3e 100644
--- a/include/llvm/Support/BinaryStreamReader.h
+++ b/include/llvm/Support/BinaryStreamReader.h
@@ -198,25 +198,7 @@ public:
     BinaryStreamRef S;
     if (auto EC = readStreamRef(S, Size))
       return EC;
-    Array = VarStreamArray<T, U>(S);
-    return Error::success();
-  }
-
-  /// Read a VarStreamArray of size \p Size bytes and store the result into
-  /// \p Array.  Updates the stream's offset to point after the newly read
-  /// array.  Never causes a copy (although iterating the elements of the
-  /// VarStreamArray may, depending upon the implementation of the underlying
-  /// stream).
-  ///
-  /// \returns a success error code if the data was successfully read, otherwise
-  /// returns an appropriate error code.
-  template <typename T, typename U, typename ContextType>
-  Error readArray(VarStreamArray<T, U> &Array, uint32_t Size,
-                  ContextType &&Context) {
-    BinaryStreamRef S;
-    if (auto EC = readStreamRef(S, Size))
-      return EC;
-    Array = VarStreamArray<T, U>(S, std::move(Context));
+    Array.setUnderlyingStream(S);
     return Error::success();
   }
 
diff --git a/include/llvm/Support/CBindingWrapping.h b/include/llvm/Support/CBindingWrapping.h
index d4633aa7d3c6..f60f99d376ad 100644
--- a/include/llvm/Support/CBindingWrapping.h
+++ b/include/llvm/Support/CBindingWrapping.h
@@ -14,8 +14,8 @@
 #ifndef LLVM_SUPPORT_CBINDINGWRAPPING_H
 #define LLVM_SUPPORT_CBINDINGWRAPPING_H
 
-#include "llvm/Support/Casting.h"
 #include "llvm-c/Types.h"
+#include "llvm/Support/Casting.h"
 
 #define DEFINE_SIMPLE_CONVERSION_FUNCTIONS(ty, ref)     \
   inline ty *unwrap(ref P) {                            \
diff --git a/include/llvm/Support/COFF.h b/include/llvm/Support/COFF.h
deleted file mode 100644
index bc2098e2b5cf..000000000000
--- a/include/llvm/Support/COFF.h
+++ /dev/null
@@ -1,724 +0,0 @@
-//===-- llvm/Support/COFF.h -------------------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains an definitions used in Windows COFF Files.
-//
-// Structures and enums defined within this file where created using
-// information from Microsoft's publicly available PE/COFF format document:
-//
-// Microsoft Portable Executable and Common Object File Format Specification
-// Revision 8.1 - February 15, 2008
-//
-// As of 5/2/2010, hosted by Microsoft at:
-// http://www.microsoft.com/whdc/system/platform/firmware/pecoff.mspx
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_SUPPORT_COFF_H
-#define LLVM_SUPPORT_COFF_H
-
-#include "llvm/Support/DataTypes.h"
-#include <cassert>
-#include <cstring>
-
-namespace llvm {
-namespace COFF {
-
-  // The maximum number of sections that a COFF object can have (inclusive).
-  const int32_t MaxNumberOfSections16 = 65279;
-
-  // The PE signature bytes that follows the DOS stub header.
-  static const char PEMagic[] = { 'P', 'E', '\0', '\0' };
-
-  static const char BigObjMagic[] = {
-      '\xc7', '\xa1', '\xba', '\xd1', '\xee', '\xba', '\xa9', '\x4b',
-      '\xaf', '\x20', '\xfa', '\xf6', '\x6a', '\xa4', '\xdc', '\xb8',
-  };
-
-  static const char ClGlObjMagic[] = {
-      '\x38', '\xfe', '\xb3', '\x0c', '\xa5', '\xd9', '\xab', '\x4d',
-      '\xac', '\x9b', '\xd6', '\xb6', '\x22', '\x26', '\x53', '\xc2',
-  };
-
-  // Sizes in bytes of various things in the COFF format.
-  enum {
-    Header16Size   = 20,
-    Header32Size   = 56,
-    NameSize       = 8,
-    Symbol16Size   = 18,
-    Symbol32Size   = 20,
-    SectionSize    = 40,
-    RelocationSize = 10
-  };
-
-  struct header {
-    uint16_t Machine;
-    int32_t  NumberOfSections;
-    uint32_t TimeDateStamp;
-    uint32_t PointerToSymbolTable;
-    uint32_t NumberOfSymbols;
-    uint16_t SizeOfOptionalHeader;
-    uint16_t Characteristics;
-  };
-
-  struct BigObjHeader {
-    enum : uint16_t { MinBigObjectVersion = 2 };
-
-    uint16_t Sig1; ///< Must be IMAGE_FILE_MACHINE_UNKNOWN (0).
-    uint16_t Sig2; ///< Must be 0xFFFF.
-    uint16_t Version;
-    uint16_t Machine;
-    uint32_t TimeDateStamp;
-    uint8_t  UUID[16];
-    uint32_t unused1;
-    uint32_t unused2;
-    uint32_t unused3;
-    uint32_t unused4;
-    uint32_t NumberOfSections;
-    uint32_t PointerToSymbolTable;
-    uint32_t NumberOfSymbols;
-  };
-
-  enum MachineTypes {
-    MT_Invalid = 0xffff,
-
-    IMAGE_FILE_MACHINE_UNKNOWN   = 0x0,
-    IMAGE_FILE_MACHINE_AM33      = 0x13,
-    IMAGE_FILE_MACHINE_AMD64     = 0x8664,
-    IMAGE_FILE_MACHINE_ARM       = 0x1C0,
-    IMAGE_FILE_MACHINE_ARMNT     = 0x1C4,
-    IMAGE_FILE_MACHINE_ARM64     = 0xAA64,
-    IMAGE_FILE_MACHINE_EBC       = 0xEBC,
-    IMAGE_FILE_MACHINE_I386      = 0x14C,
-    IMAGE_FILE_MACHINE_IA64      = 0x200,
-    IMAGE_FILE_MACHINE_M32R      = 0x9041,
-    IMAGE_FILE_MACHINE_MIPS16    = 0x266,
-    IMAGE_FILE_MACHINE_MIPSFPU   = 0x366,
-    IMAGE_FILE_MACHINE_MIPSFPU16 = 0x466,
-    IMAGE_FILE_MACHINE_POWERPC   = 0x1F0,
-    IMAGE_FILE_MACHINE_POWERPCFP = 0x1F1,
-    IMAGE_FILE_MACHINE_R4000     = 0x166,
-    IMAGE_FILE_MACHINE_SH3       = 0x1A2,
-    IMAGE_FILE_MACHINE_SH3DSP    = 0x1A3,
-    IMAGE_FILE_MACHINE_SH4       = 0x1A6,
-    IMAGE_FILE_MACHINE_SH5       = 0x1A8,
-    IMAGE_FILE_MACHINE_THUMB     = 0x1C2,
-    IMAGE_FILE_MACHINE_WCEMIPSV2 = 0x169
-  };
-
-  enum Characteristics {
-    C_Invalid = 0,
-
-    /// The file does not contain base relocations and must be loaded at its
-    /// preferred base. If this cannot be done, the loader will error.
-    IMAGE_FILE_RELOCS_STRIPPED         = 0x0001,
-    /// The file is valid and can be run.
-    IMAGE_FILE_EXECUTABLE_IMAGE        = 0x0002,
-    /// COFF line numbers have been stripped. This is deprecated and should be
-    /// 0.
-    IMAGE_FILE_LINE_NUMS_STRIPPED      = 0x0004,
-    /// COFF symbol table entries for local symbols have been removed. This is
-    /// deprecated and should be 0.
-    IMAGE_FILE_LOCAL_SYMS_STRIPPED     = 0x0008,
-    /// Aggressively trim working set. This is deprecated and must be 0.
-    IMAGE_FILE_AGGRESSIVE_WS_TRIM      = 0x0010,
-    /// Image can handle > 2GiB addresses.
-    IMAGE_FILE_LARGE_ADDRESS_AWARE     = 0x0020,
-    /// Little endian: the LSB precedes the MSB in memory. This is deprecated
-    /// and should be 0.
-    IMAGE_FILE_BYTES_REVERSED_LO       = 0x0080,
-    /// Machine is based on a 32bit word architecture.
-    IMAGE_FILE_32BIT_MACHINE           = 0x0100,
-    /// Debugging info has been removed.
-    IMAGE_FILE_DEBUG_STRIPPED          = 0x0200,
-    /// If the image is on removable media, fully load it and copy it to swap.
-    IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP = 0x0400,
-    /// If the image is on network media, fully load it and copy it to swap.
-    IMAGE_FILE_NET_RUN_FROM_SWAP       = 0x0800,
-    /// The image file is a system file, not a user program.
-    IMAGE_FILE_SYSTEM                  = 0x1000,
-    /// The image file is a DLL.
-    IMAGE_FILE_DLL                     = 0x2000,
-    /// This file should only be run on a uniprocessor machine.
-    IMAGE_FILE_UP_SYSTEM_ONLY          = 0x4000,
-    /// Big endian: the MSB precedes the LSB in memory. This is deprecated
-    /// and should be 0.
-    IMAGE_FILE_BYTES_REVERSED_HI       = 0x8000
-  };
-
-  enum ResourceTypeID {
-    RID_Cursor = 1,
-    RID_Bitmap = 2,
-    RID_Icon = 3,
-    RID_Menu = 4,
-    RID_Dialog = 5,
-    RID_String = 6,
-    RID_FontDir = 7,
-    RID_Font = 8,
-    RID_Accelerator = 9,
-    RID_RCData = 10,
-    RID_MessageTable = 11,
-    RID_Group_Cursor = 12,
-    RID_Group_Icon = 14,
-    RID_Version = 16,
-    RID_DLGInclude = 17,
-    RID_PlugPlay = 19,
-    RID_VXD = 20,
-    RID_AniCursor = 21,
-    RID_AniIcon = 22,
-    RID_HTML = 23,
-    RID_Manifest = 24,
-  };
-
-  struct symbol {
-    char     Name[NameSize];
-    uint32_t Value;
-    int32_t  SectionNumber;
-    uint16_t Type;
-    uint8_t  StorageClass;
-    uint8_t  NumberOfAuxSymbols;
-  };
-
-  enum SymbolSectionNumber : int32_t {
-    IMAGE_SYM_DEBUG     = -2,
-    IMAGE_SYM_ABSOLUTE  = -1,
-    IMAGE_SYM_UNDEFINED = 0
-  };
-
-  /// Storage class tells where and what the symbol represents
-  enum SymbolStorageClass {
-    SSC_Invalid = 0xff,
-
-    IMAGE_SYM_CLASS_END_OF_FUNCTION  = -1,  ///< Physical end of function
-    IMAGE_SYM_CLASS_NULL             = 0,   ///< No symbol
-    IMAGE_SYM_CLASS_AUTOMATIC        = 1,   ///< Stack variable
-    IMAGE_SYM_CLASS_EXTERNAL         = 2,   ///< External symbol
-    IMAGE_SYM_CLASS_STATIC           = 3,   ///< Static
-    IMAGE_SYM_CLASS_REGISTER         = 4,   ///< Register variable
-    IMAGE_SYM_CLASS_EXTERNAL_DEF     = 5,   ///< External definition
-    IMAGE_SYM_CLASS_LABEL            = 6,   ///< Label
-    IMAGE_SYM_CLASS_UNDEFINED_LABEL  = 7,   ///< Undefined label
-    IMAGE_SYM_CLASS_MEMBER_OF_STRUCT = 8,   ///< Member of structure
-    IMAGE_SYM_CLASS_ARGUMENT         = 9,   ///< Function argument
-    IMAGE_SYM_CLASS_STRUCT_TAG       = 10,  ///< Structure tag
-    IMAGE_SYM_CLASS_MEMBER_OF_UNION  = 11,  ///< Member of union
-    IMAGE_SYM_CLASS_UNION_TAG        = 12,  ///< Union tag
-    IMAGE_SYM_CLASS_TYPE_DEFINITION  = 13,  ///< Type definition
-    IMAGE_SYM_CLASS_UNDEFINED_STATIC = 14,  ///< Undefined static
-    IMAGE_SYM_CLASS_ENUM_TAG         = 15,  ///< Enumeration tag
-    IMAGE_SYM_CLASS_MEMBER_OF_ENUM   = 16,  ///< Member of enumeration
-    IMAGE_SYM_CLASS_REGISTER_PARAM   = 17,  ///< Register parameter
-    IMAGE_SYM_CLASS_BIT_FIELD        = 18,  ///< Bit field
-    /// ".bb" or ".eb" - beginning or end of block
-    IMAGE_SYM_CLASS_BLOCK            = 100,
-    /// ".bf" or ".ef" - beginning or end of function
-    IMAGE_SYM_CLASS_FUNCTION         = 101,
-    IMAGE_SYM_CLASS_END_OF_STRUCT    = 102, ///< End of structure
-    IMAGE_SYM_CLASS_FILE             = 103, ///< File name
-    /// Line number, reformatted as symbol
-    IMAGE_SYM_CLASS_SECTION          = 104,
-    IMAGE_SYM_CLASS_WEAK_EXTERNAL    = 105, ///< Duplicate tag
-    /// External symbol in dmert public lib
-    IMAGE_SYM_CLASS_CLR_TOKEN        = 107
-  };
-
-  enum SymbolBaseType {
-    IMAGE_SYM_TYPE_NULL   = 0,  ///< No type information or unknown base type.
-    IMAGE_SYM_TYPE_VOID   = 1,  ///< Used with void pointers and functions.
-    IMAGE_SYM_TYPE_CHAR   = 2,  ///< A character (signed byte).
-    IMAGE_SYM_TYPE_SHORT  = 3,  ///< A 2-byte signed integer.
-    IMAGE_SYM_TYPE_INT    = 4,  ///< A natural integer type on the target.
-    IMAGE_SYM_TYPE_LONG   = 5,  ///< A 4-byte signed integer.
-    IMAGE_SYM_TYPE_FLOAT  = 6,  ///< A 4-byte floating-point number.
-    IMAGE_SYM_TYPE_DOUBLE = 7,  ///< An 8-byte floating-point number.
-    IMAGE_SYM_TYPE_STRUCT = 8,  ///< A structure.
-    IMAGE_SYM_TYPE_UNION  = 9,  ///< An union.
-    IMAGE_SYM_TYPE_ENUM   = 10, ///< An enumerated type.
-    IMAGE_SYM_TYPE_MOE    = 11, ///< A member of enumeration (a specific value).
-    IMAGE_SYM_TYPE_BYTE   = 12, ///< A byte; unsigned 1-byte integer.
-    IMAGE_SYM_TYPE_WORD   = 13, ///< A word; unsigned 2-byte integer.
-    IMAGE_SYM_TYPE_UINT   = 14, ///< An unsigned integer of natural size.
-    IMAGE_SYM_TYPE_DWORD  = 15  ///< An unsigned 4-byte integer.
-  };
-
-  enum SymbolComplexType {
-    IMAGE_SYM_DTYPE_NULL     = 0, ///< No complex type; simple scalar variable.
-    IMAGE_SYM_DTYPE_POINTER  = 1, ///< A pointer to base type.
-    IMAGE_SYM_DTYPE_FUNCTION = 2, ///< A function that returns a base type.
-    IMAGE_SYM_DTYPE_ARRAY    = 3, ///< An array of base type.
-
-    /// Type is formed as (base + (derived << SCT_COMPLEX_TYPE_SHIFT))
-    SCT_COMPLEX_TYPE_SHIFT   = 4
-  };
-
-  enum AuxSymbolType {
-    IMAGE_AUX_SYMBOL_TYPE_TOKEN_DEF = 1
-  };
-
-  struct section {
-    char     Name[NameSize];
-    uint32_t VirtualSize;
-    uint32_t VirtualAddress;
-    uint32_t SizeOfRawData;
-    uint32_t PointerToRawData;
-    uint32_t PointerToRelocations;
-    uint32_t PointerToLineNumbers;
-    uint16_t NumberOfRelocations;
-    uint16_t NumberOfLineNumbers;
-    uint32_t Characteristics;
-  };
-
-  enum SectionCharacteristics : uint32_t {
-    SC_Invalid = 0xffffffff,
-
-    IMAGE_SCN_TYPE_NOLOAD            = 0x00000002,
-    IMAGE_SCN_TYPE_NO_PAD            = 0x00000008,
-    IMAGE_SCN_CNT_CODE               = 0x00000020,
-    IMAGE_SCN_CNT_INITIALIZED_DATA   = 0x00000040,
-    IMAGE_SCN_CNT_UNINITIALIZED_DATA = 0x00000080,
-    IMAGE_SCN_LNK_OTHER              = 0x00000100,
-    IMAGE_SCN_LNK_INFO               = 0x00000200,
-    IMAGE_SCN_LNK_REMOVE             = 0x00000800,
-    IMAGE_SCN_LNK_COMDAT             = 0x00001000,
-    IMAGE_SCN_GPREL                  = 0x00008000,
-    IMAGE_SCN_MEM_PURGEABLE          = 0x00020000,
-    IMAGE_SCN_MEM_16BIT              = 0x00020000,
-    IMAGE_SCN_MEM_LOCKED             = 0x00040000,
-    IMAGE_SCN_MEM_PRELOAD            = 0x00080000,
-    IMAGE_SCN_ALIGN_1BYTES           = 0x00100000,
-    IMAGE_SCN_ALIGN_2BYTES           = 0x00200000,
-    IMAGE_SCN_ALIGN_4BYTES           = 0x00300000,
-    IMAGE_SCN_ALIGN_8BYTES           = 0x00400000,
-    IMAGE_SCN_ALIGN_16BYTES          = 0x00500000,
-    IMAGE_SCN_ALIGN_32BYTES          = 0x00600000,
-    IMAGE_SCN_ALIGN_64BYTES          = 0x00700000,
-    IMAGE_SCN_ALIGN_128BYTES         = 0x00800000,
-    IMAGE_SCN_ALIGN_256BYTES         = 0x00900000,
-    IMAGE_SCN_ALIGN_512BYTES         = 0x00A00000,
-    IMAGE_SCN_ALIGN_1024BYTES        = 0x00B00000,
-    IMAGE_SCN_ALIGN_2048BYTES        = 0x00C00000,
-    IMAGE_SCN_ALIGN_4096BYTES        = 0x00D00000,
-    IMAGE_SCN_ALIGN_8192BYTES        = 0x00E00000,
-    IMAGE_SCN_LNK_NRELOC_OVFL        = 0x01000000,
-    IMAGE_SCN_MEM_DISCARDABLE        = 0x02000000,
-    IMAGE_SCN_MEM_NOT_CACHED         = 0x04000000,
-    IMAGE_SCN_MEM_NOT_PAGED          = 0x08000000,
-    IMAGE_SCN_MEM_SHARED             = 0x10000000,
-    IMAGE_SCN_MEM_EXECUTE            = 0x20000000,
-    IMAGE_SCN_MEM_READ               = 0x40000000,
-    IMAGE_SCN_MEM_WRITE              = 0x80000000
-  };
-
-  struct relocation {
-    uint32_t VirtualAddress;
-    uint32_t SymbolTableIndex;
-    uint16_t Type;
-  };
-
-  enum RelocationTypeI386 {
-    IMAGE_REL_I386_ABSOLUTE = 0x0000,
-    IMAGE_REL_I386_DIR16    = 0x0001,
-    IMAGE_REL_I386_REL16    = 0x0002,
-    IMAGE_REL_I386_DIR32    = 0x0006,
-    IMAGE_REL_I386_DIR32NB  = 0x0007,
-    IMAGE_REL_I386_SEG12    = 0x0009,
-    IMAGE_REL_I386_SECTION  = 0x000A,
-    IMAGE_REL_I386_SECREL   = 0x000B,
-    IMAGE_REL_I386_TOKEN    = 0x000C,
-    IMAGE_REL_I386_SECREL7  = 0x000D,
-    IMAGE_REL_I386_REL32    = 0x0014
-  };
-
-  enum RelocationTypeAMD64 {
-    IMAGE_REL_AMD64_ABSOLUTE  = 0x0000,
-    IMAGE_REL_AMD64_ADDR64    = 0x0001,
-    IMAGE_REL_AMD64_ADDR32    = 0x0002,
-    IMAGE_REL_AMD64_ADDR32NB  = 0x0003,
-    IMAGE_REL_AMD64_REL32     = 0x0004,
-    IMAGE_REL_AMD64_REL32_1   = 0x0005,
-    IMAGE_REL_AMD64_REL32_2   = 0x0006,
-    IMAGE_REL_AMD64_REL32_3   = 0x0007,
-    IMAGE_REL_AMD64_REL32_4   = 0x0008,
-    IMAGE_REL_AMD64_REL32_5   = 0x0009,
-    IMAGE_REL_AMD64_SECTION   = 0x000A,
-    IMAGE_REL_AMD64_SECREL    = 0x000B,
-    IMAGE_REL_AMD64_SECREL7   = 0x000C,
-    IMAGE_REL_AMD64_TOKEN     = 0x000D,
-    IMAGE_REL_AMD64_SREL32    = 0x000E,
-    IMAGE_REL_AMD64_PAIR      = 0x000F,
-    IMAGE_REL_AMD64_SSPAN32   = 0x0010
-  };
-
-  enum RelocationTypesARM {
-    IMAGE_REL_ARM_ABSOLUTE  = 0x0000,
-    IMAGE_REL_ARM_ADDR32    = 0x0001,
-    IMAGE_REL_ARM_ADDR32NB  = 0x0002,
-    IMAGE_REL_ARM_BRANCH24  = 0x0003,
-    IMAGE_REL_ARM_BRANCH11  = 0x0004,
-    IMAGE_REL_ARM_TOKEN     = 0x0005,
-    IMAGE_REL_ARM_BLX24     = 0x0008,
-    IMAGE_REL_ARM_BLX11     = 0x0009,
-    IMAGE_REL_ARM_SECTION   = 0x000E,
-    IMAGE_REL_ARM_SECREL    = 0x000F,
-    IMAGE_REL_ARM_MOV32A    = 0x0010,
-    IMAGE_REL_ARM_MOV32T    = 0x0011,
-    IMAGE_REL_ARM_BRANCH20T = 0x0012,
-    IMAGE_REL_ARM_BRANCH24T = 0x0014,
-    IMAGE_REL_ARM_BLX23T    = 0x0015
-  };
-
-  enum RelocationTypesARM64 {
-    IMAGE_REL_ARM64_ABSOLUTE        = 0x0000,
-    IMAGE_REL_ARM64_ADDR32          = 0x0001,
-    IMAGE_REL_ARM64_ADDR32NB        = 0x0002,
-    IMAGE_REL_ARM64_BRANCH26        = 0x0003,
-    IMAGE_REL_ARM64_PAGEBASE_REL2   = 0x0004,
-    IMAGE_REL_ARM64_REL21           = 0x0005,
-    IMAGE_REL_ARM64_PAGEOFFSET_12A  = 0x0006,
-    IMAGE_REL_ARM64_PAGEOFFSET_12L  = 0x0007,
-    IMAGE_REL_ARM64_SECREL          = 0x0008,
-    IMAGE_REL_ARM64_SECREL_LOW12A   = 0x0009,
-    IMAGE_REL_ARM64_SECREL_HIGH12A  = 0x000A,
-    IMAGE_REL_ARM64_SECREL_LOW12L   = 0x000B,
-    IMAGE_REL_ARM64_TOKEN           = 0x000C,
-    IMAGE_REL_ARM64_SECTION         = 0x000D,
-    IMAGE_REL_ARM64_ADDR64          = 0x000E,
-    IMAGE_REL_ARM64_BRANCH19        = 0x000F,
-    IMAGE_REL_ARM64_BRANCH14        = 0x0010,
-  };
-
-  enum COMDATType {
-    IMAGE_COMDAT_SELECT_NODUPLICATES = 1,
-    IMAGE_COMDAT_SELECT_ANY,
-    IMAGE_COMDAT_SELECT_SAME_SIZE,
-    IMAGE_COMDAT_SELECT_EXACT_MATCH,
-    IMAGE_COMDAT_SELECT_ASSOCIATIVE,
-    IMAGE_COMDAT_SELECT_LARGEST,
-    IMAGE_COMDAT_SELECT_NEWEST
-  };
-
-  // Auxiliary Symbol Formats
-  struct AuxiliaryFunctionDefinition {
-    uint32_t TagIndex;
-    uint32_t TotalSize;
-    uint32_t PointerToLinenumber;
-    uint32_t PointerToNextFunction;
-    char     unused[2];
-  };
-
-  struct AuxiliarybfAndefSymbol {
-    uint8_t  unused1[4];
-    uint16_t Linenumber;
-    uint8_t  unused2[6];
-    uint32_t PointerToNextFunction;
-    uint8_t  unused3[2];
-  };
-
-  struct AuxiliaryWeakExternal {
-    uint32_t TagIndex;
-    uint32_t Characteristics;
-    uint8_t  unused[10];
-  };
-
-  enum WeakExternalCharacteristics {
-    IMAGE_WEAK_EXTERN_SEARCH_NOLIBRARY = 1,
-    IMAGE_WEAK_EXTERN_SEARCH_LIBRARY   = 2,
-    IMAGE_WEAK_EXTERN_SEARCH_ALIAS     = 3
-  };
-
-  struct AuxiliarySectionDefinition {
-    uint32_t Length;
-    uint16_t NumberOfRelocations;
-    uint16_t NumberOfLinenumbers;
-    uint32_t CheckSum;
-    uint32_t Number;
-    uint8_t  Selection;
-    char     unused;
-  };
-
-  struct AuxiliaryCLRToken {
-    uint8_t  AuxType;
-    uint8_t  unused1;
-    uint32_t SymbolTableIndex;
-    char     unused2[12];
-  };
-
-  union Auxiliary {
-    AuxiliaryFunctionDefinition FunctionDefinition;
-    AuxiliarybfAndefSymbol      bfAndefSymbol;
-    AuxiliaryWeakExternal       WeakExternal;
-    AuxiliarySectionDefinition  SectionDefinition;
-  };
-
-  /// @brief The Import Directory Table.
-  ///
-  /// There is a single array of these and one entry per imported DLL.
-  struct ImportDirectoryTableEntry {
-    uint32_t ImportLookupTableRVA;
-    uint32_t TimeDateStamp;
-    uint32_t ForwarderChain;
-    uint32_t NameRVA;
-    uint32_t ImportAddressTableRVA;
-  };
-
-  /// @brief The PE32 Import Lookup Table.
-  ///
-  /// There is an array of these for each imported DLL. It represents either
-  /// the ordinal to import from the target DLL, or a name to lookup and import
-  /// from the target DLL.
-  ///
-  /// This also happens to be the same format used by the Import Address Table
-  /// when it is initially written out to the image.
-  struct ImportLookupTableEntry32 {
-    uint32_t data;
-
-    /// @brief Is this entry specified by ordinal, or name?
-    bool isOrdinal() const { return data & 0x80000000; }
-
-    /// @brief Get the ordinal value of this entry. isOrdinal must be true.
-    uint16_t getOrdinal() const {
-      assert(isOrdinal() && "ILT entry is not an ordinal!");
-      return data & 0xFFFF;
-    }
-
-    /// @brief Set the ordinal value and set isOrdinal to true.
-    void setOrdinal(uint16_t o) {
-      data = o;
-      data |= 0x80000000;
-    }
-
-    /// @brief Get the Hint/Name entry RVA. isOrdinal must be false.
-    uint32_t getHintNameRVA() const {
-      assert(!isOrdinal() && "ILT entry is not a Hint/Name RVA!");
-      return data;
-    }
-
-    /// @brief Set the Hint/Name entry RVA and set isOrdinal to false.
-    void setHintNameRVA(uint32_t rva) { data = rva; }
-  };
-
-  /// @brief The DOS compatible header at the front of all PEs.
-  struct DOSHeader {
-    uint16_t Magic;
-    uint16_t UsedBytesInTheLastPage;
-    uint16_t FileSizeInPages;
-    uint16_t NumberOfRelocationItems;
-    uint16_t HeaderSizeInParagraphs;
-    uint16_t MinimumExtraParagraphs;
-    uint16_t MaximumExtraParagraphs;
-    uint16_t InitialRelativeSS;
-    uint16_t InitialSP;
-    uint16_t Checksum;
-    uint16_t InitialIP;
-    uint16_t InitialRelativeCS;
-    uint16_t AddressOfRelocationTable;
-    uint16_t OverlayNumber;
-    uint16_t Reserved[4];
-    uint16_t OEMid;
-    uint16_t OEMinfo;
-    uint16_t Reserved2[10];
-    uint32_t AddressOfNewExeHeader;
-  };
-
-  struct PE32Header {
-    enum {
-      PE32 = 0x10b,
-      PE32_PLUS = 0x20b
-    };
-
-    uint16_t Magic;
-    uint8_t  MajorLinkerVersion;
-    uint8_t  MinorLinkerVersion;
-    uint32_t SizeOfCode;
-    uint32_t SizeOfInitializedData;
-    uint32_t SizeOfUninitializedData;
-    uint32_t AddressOfEntryPoint; // RVA
-    uint32_t BaseOfCode; // RVA
-    uint32_t BaseOfData; // RVA
-    uint32_t ImageBase;
-    uint32_t SectionAlignment;
-    uint32_t FileAlignment;
-    uint16_t MajorOperatingSystemVersion;
-    uint16_t MinorOperatingSystemVersion;
-    uint16_t MajorImageVersion;
-    uint16_t MinorImageVersion;
-    uint16_t MajorSubsystemVersion;
-    uint16_t MinorSubsystemVersion;
-    uint32_t Win32VersionValue;
-    uint32_t SizeOfImage;
-    uint32_t SizeOfHeaders;
-    uint32_t CheckSum;
-    uint16_t Subsystem;
-    // FIXME: This should be DllCharacteristics to match the COFF spec.
-    uint16_t DLLCharacteristics;
-    uint32_t SizeOfStackReserve;
-    uint32_t SizeOfStackCommit;
-    uint32_t SizeOfHeapReserve;
-    uint32_t SizeOfHeapCommit;
-    uint32_t LoaderFlags;
-    // FIXME: This should be NumberOfRvaAndSizes to match the COFF spec.
-    uint32_t NumberOfRvaAndSize;
-  };
-
-  struct DataDirectory {
-    uint32_t RelativeVirtualAddress;
-    uint32_t Size;
-  };
-
-  enum DataDirectoryIndex {
-    EXPORT_TABLE = 0,
-    IMPORT_TABLE,
-    RESOURCE_TABLE,
-    EXCEPTION_TABLE,
-    CERTIFICATE_TABLE,
-    BASE_RELOCATION_TABLE,
-    DEBUG_DIRECTORY,
-    ARCHITECTURE,
-    GLOBAL_PTR,
-    TLS_TABLE,
-    LOAD_CONFIG_TABLE,
-    BOUND_IMPORT,
-    IAT,
-    DELAY_IMPORT_DESCRIPTOR,
-    CLR_RUNTIME_HEADER,
-
-    NUM_DATA_DIRECTORIES
-  };
-
-  enum WindowsSubsystem {
-    IMAGE_SUBSYSTEM_UNKNOWN = 0, ///< An unknown subsystem.
-    IMAGE_SUBSYSTEM_NATIVE = 1, ///< Device drivers and native Windows processes
-    IMAGE_SUBSYSTEM_WINDOWS_GUI = 2, ///< The Windows GUI subsystem.
-    IMAGE_SUBSYSTEM_WINDOWS_CUI = 3, ///< The Windows character subsystem.
-    IMAGE_SUBSYSTEM_OS2_CUI = 5, ///< The OS/2 character subsytem.
-    IMAGE_SUBSYSTEM_POSIX_CUI = 7, ///< The POSIX character subsystem.
-    IMAGE_SUBSYSTEM_NATIVE_WINDOWS = 8, ///< Native Windows 9x driver.
-    IMAGE_SUBSYSTEM_WINDOWS_CE_GUI = 9, ///< Windows CE.
-    IMAGE_SUBSYSTEM_EFI_APPLICATION = 10, ///< An EFI application.
-    IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER = 11, ///< An EFI driver with boot
-                                                  ///  services.
-    IMAGE_SUBSYSTEM_EFI_RUNTIME_DRIVER = 12, ///< An EFI driver with run-time
-                                             ///  services.
-    IMAGE_SUBSYSTEM_EFI_ROM = 13, ///< An EFI ROM image.
-    IMAGE_SUBSYSTEM_XBOX = 14, ///< XBOX.
-    IMAGE_SUBSYSTEM_WINDOWS_BOOT_APPLICATION = 16 ///< A BCD application.
-  };
-
-  enum DLLCharacteristics {
-    /// ASLR with 64 bit address space.
-    IMAGE_DLL_CHARACTERISTICS_HIGH_ENTROPY_VA = 0x0020,
-    /// DLL can be relocated at load time.
-    IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE = 0x0040,
-    /// Code integrity checks are enforced.
-    IMAGE_DLL_CHARACTERISTICS_FORCE_INTEGRITY = 0x0080,
-    ///< Image is NX compatible.
-    IMAGE_DLL_CHARACTERISTICS_NX_COMPAT = 0x0100,
-    /// Isolation aware, but do not isolate the image.
-    IMAGE_DLL_CHARACTERISTICS_NO_ISOLATION = 0x0200,
-    /// Does not use structured exception handling (SEH). No SEH handler may be
-    /// called in this image.
-    IMAGE_DLL_CHARACTERISTICS_NO_SEH = 0x0400,
-    /// Do not bind the image.
-    IMAGE_DLL_CHARACTERISTICS_NO_BIND = 0x0800,
-    ///< Image should execute in an AppContainer.
-    IMAGE_DLL_CHARACTERISTICS_APPCONTAINER = 0x1000,
-    ///< A WDM driver.
-    IMAGE_DLL_CHARACTERISTICS_WDM_DRIVER = 0x2000,
-    ///< Image supports Control Flow Guard.
-    IMAGE_DLL_CHARACTERISTICS_GUARD_CF = 0x4000,
-    /// Terminal Server aware.
-    IMAGE_DLL_CHARACTERISTICS_TERMINAL_SERVER_AWARE = 0x8000
-  };
-
-  enum DebugType {
-    IMAGE_DEBUG_TYPE_UNKNOWN       = 0,
-    IMAGE_DEBUG_TYPE_COFF          = 1,
-    IMAGE_DEBUG_TYPE_CODEVIEW      = 2,
-    IMAGE_DEBUG_TYPE_FPO           = 3,
-    IMAGE_DEBUG_TYPE_MISC          = 4,
-    IMAGE_DEBUG_TYPE_EXCEPTION     = 5,
-    IMAGE_DEBUG_TYPE_FIXUP         = 6,
-    IMAGE_DEBUG_TYPE_OMAP_TO_SRC   = 7,
-    IMAGE_DEBUG_TYPE_OMAP_FROM_SRC = 8,
-    IMAGE_DEBUG_TYPE_BORLAND       = 9,
-    IMAGE_DEBUG_TYPE_RESERVED10    = 10,
-    IMAGE_DEBUG_TYPE_CLSID         = 11,
-    IMAGE_DEBUG_TYPE_VC_FEATURE    = 12,
-    IMAGE_DEBUG_TYPE_POGO          = 13,
-    IMAGE_DEBUG_TYPE_ILTCG         = 14,
-    IMAGE_DEBUG_TYPE_MPX           = 15,
-    IMAGE_DEBUG_TYPE_REPRO         = 16,
-  };
-
-  enum BaseRelocationType {
-    IMAGE_REL_BASED_ABSOLUTE       = 0,
-    IMAGE_REL_BASED_HIGH           = 1,
-    IMAGE_REL_BASED_LOW            = 2,
-    IMAGE_REL_BASED_HIGHLOW        = 3,
-    IMAGE_REL_BASED_HIGHADJ        = 4,
-    IMAGE_REL_BASED_MIPS_JMPADDR   = 5,
-    IMAGE_REL_BASED_ARM_MOV32A     = 5,
-    IMAGE_REL_BASED_ARM_MOV32T     = 7,
-    IMAGE_REL_BASED_MIPS_JMPADDR16 = 9,
-    IMAGE_REL_BASED_DIR64          = 10
-  };
-
-  enum ImportType {
-    IMPORT_CODE  = 0,
-    IMPORT_DATA  = 1,
-    IMPORT_CONST = 2
-  };
-
-  enum ImportNameType {
-    /// Import is by ordinal. This indicates that the value in the Ordinal/Hint
-    /// field of the import header is the import's ordinal. If this constant is
-    /// not specified, then the Ordinal/Hint field should always be interpreted
-    /// as the import's hint.
-    IMPORT_ORDINAL         = 0,
-    /// The import name is identical to the public symbol name
-    IMPORT_NAME            = 1,
-    /// The import name is the public symbol name, but skipping the leading ?,
-    /// @, or optionally _.
-    IMPORT_NAME_NOPREFIX   = 2,
-    /// The import name is the public symbol name, but skipping the leading ?,
-    /// @, or optionally _, and truncating at the first @.
-    IMPORT_NAME_UNDECORATE = 3
-  };
-
-  struct ImportHeader {
-    uint16_t Sig1; ///< Must be IMAGE_FILE_MACHINE_UNKNOWN (0).
-    uint16_t Sig2; ///< Must be 0xFFFF.
-    uint16_t Version;
-    uint16_t Machine;
-    uint32_t TimeDateStamp;
-    uint32_t SizeOfData;
-    uint16_t OrdinalHint;
-    uint16_t TypeInfo;
-
-    ImportType getType() const {
-      return static_cast<ImportType>(TypeInfo & 0x3);
-    }
-
-    ImportNameType getNameType() const {
-      return static_cast<ImportNameType>((TypeInfo & 0x1C) >> 2);
-    }
-  };
-
-  enum CodeViewIdentifiers {
-    DEBUG_SECTION_MAGIC = 0x4,
-  };
-
-  inline bool isReservedSectionNumber(int32_t SectionNumber) {
-    return SectionNumber <= 0;
-  }
-
-} // End namespace COFF.
-} // End namespace llvm.
-
-#endif
diff --git a/include/llvm/Support/Casting.h b/include/llvm/Support/Casting.h
index 89d2af052dc1..baa2a814e9a1 100644
--- a/include/llvm/Support/Casting.h
+++ b/include/llvm/Support/Casting.h
@@ -1,4 +1,4 @@
-//===-- llvm/Support/Casting.h - Allow flexible, checked, casts -*- C++ -*-===//
+//===- llvm/Support/Casting.h - Allow flexible, checked, casts --*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -19,6 +19,7 @@
 #include "llvm/Support/type_traits.h"
 #include <cassert>
 #include <memory>
+#include <type_traits>
 
 namespace llvm {
 
@@ -31,18 +32,19 @@ namespace llvm {
 // template selection process...  the default implementation is a noop.
 //
 template<typename From> struct simplify_type {
-  typedef       From SimpleType;        // The real type this represents...
+  using SimpleType = From; // The real type this represents...
 
   // An accessor to get the real value...
   static SimpleType &getSimplifiedValue(From &Val) { return Val; }
 };
 
 template<typename From> struct simplify_type<const From> {
-  typedef typename simplify_type<From>::SimpleType NonConstSimpleType;
-  typedef typename add_const_past_pointer<NonConstSimpleType>::type
-    SimpleType;
-  typedef typename add_lvalue_reference_if_not_pointer<SimpleType>::type
-    RetType;
+  using NonConstSimpleType = typename simplify_type<From>::SimpleType;
+  using SimpleType =
+      typename add_const_past_pointer<NonConstSimpleType>::type;
+  using RetType =
+      typename add_lvalue_reference_if_not_pointer<SimpleType>::type;
+
   static RetType getSimplifiedValue(const From& Val) {
     return simplify_type<From>::getSimplifiedValue(const_cast<From&>(Val));
   }
@@ -148,36 +150,35 @@ template <class X, class Y> LLVM_NODISCARD inline bool isa(const Y &Val) {
 
 template<class To, class From> struct cast_retty;
 
-
 // Calculate what type the 'cast' function should return, based on a requested
 // type of To and a source type of From.
 template<class To, class From> struct cast_retty_impl {
-  typedef To& ret_type;         // Normal case, return Ty&
+  using ret_type = To &;       // Normal case, return Ty&
 };
 template<class To, class From> struct cast_retty_impl<To, const From> {
-  typedef const To &ret_type;   // Normal case, return Ty&
+  using ret_type = const To &; // Normal case, return Ty&
 };
 
 template<class To, class From> struct cast_retty_impl<To, From*> {
-  typedef To* ret_type;         // Pointer arg case, return Ty*
+  using ret_type = To *;       // Pointer arg case, return Ty*
 };
 
 template<class To, class From> struct cast_retty_impl<To, const From*> {
-  typedef const To* ret_type;   // Constant pointer arg case, return const Ty*
+  using ret_type = const To *; // Constant pointer arg case, return const Ty*
 };
 
 template<class To, class From> struct cast_retty_impl<To, const From*const> {
-  typedef const To* ret_type;   // Constant pointer arg case, return const Ty*
+  using ret_type = const To *; // Constant pointer arg case, return const Ty*
 };
 
 template <class To, class From>
 struct cast_retty_impl<To, std::unique_ptr<From>> {
 private:
-  typedef typename cast_retty_impl<To, From *>::ret_type PointerType;
-  typedef typename std::remove_pointer<PointerType>::type ResultType;
+  using PointerType = typename cast_retty_impl<To, From *>::ret_type;
+  using ResultType = typename std::remove_pointer<PointerType>::type;
 
 public:
-  typedef std::unique_ptr<ResultType> ret_type;
+  using ret_type = std::unique_ptr<ResultType>;
 };
 
 template<class To, class From, class SimpleFrom>
@@ -185,19 +186,19 @@ struct cast_retty_wrap {
   // When the simplified type and the from type are not the same, use the type
   // simplifier to reduce the type, then reuse cast_retty_impl to get the
   // resultant type.
-  typedef typename cast_retty<To, SimpleFrom>::ret_type ret_type;
+  using ret_type = typename cast_retty<To, SimpleFrom>::ret_type;
 };
 
 template<class To, class FromTy>
 struct cast_retty_wrap<To, FromTy, FromTy> {
   // When the simplified type is equal to the from type, use it directly.
-  typedef typename cast_retty_impl<To,FromTy>::ret_type ret_type;
+  using ret_type = typename cast_retty_impl<To,FromTy>::ret_type;
 };
 
 template<class To, class From>
 struct cast_retty {
-  typedef typename cast_retty_wrap<To, From,
-                   typename simplify_type<From>::SimpleType>::ret_type ret_type;
+  using ret_type = typename cast_retty_wrap<
+      To, From, typename simplify_type<From>::SimpleType>::ret_type;
 };
 
 // Ensure the non-simple values are converted using the simplify_type template
@@ -393,6 +394,6 @@ LLVM_NODISCARD inline auto unique_dyn_cast_or_null(std::unique_ptr<Y> &&Val)
   return unique_dyn_cast_or_null<X, Y>(Val);
 }
 
-} // End llvm namespace
+} // end namespace llvm
 
-#endif
+#endif // LLVM_SUPPORT_CASTING_H
diff --git a/include/llvm/Support/CommandLine.h b/include/llvm/Support/CommandLine.h
index ae32e20d6dab..771b0a8c26a9 100644
--- a/include/llvm/Support/CommandLine.h
+++ b/include/llvm/Support/CommandLine.h
@@ -21,18 +21,19 @@
 #define LLVM_SUPPORT_COMMANDLINE_H
 
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/iterator_range.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ManagedStatic.h"
 #include <cassert>
 #include <climits>
 #include <cstddef>
+#include <functional>
 #include <initializer_list>
 #include <string>
 #include <type_traits>
@@ -41,6 +42,7 @@
 namespace llvm {
 
 class StringSaver;
+class raw_ostream;
 
 /// cl Namespace - This namespace contains all of the command line option
 /// processing machinery.  It is intentionally a short name to make qualified
@@ -64,12 +66,15 @@ bool ParseCommandLineOptions(int argc, const char *const *argv,
 void ParseEnvironmentOptions(const char *progName, const char *envvar,
                              const char *Overview = "");
 
+// Function pointer type for printing version information.
+using VersionPrinterTy = std::function<void(raw_ostream &)>;
+
 ///===---------------------------------------------------------------------===//
 /// SetVersionPrinter - Override the default (LLVM specific) version printer
 ///                     used to print out the version when --version is given
 ///                     on the command line. This allows other systems using the
 ///                     CommandLine utilities to print their own version string.
-void SetVersionPrinter(void (*func)());
+void SetVersionPrinter(VersionPrinterTy func);
 
 ///===---------------------------------------------------------------------===//
 /// AddExtraVersionPrinter - Add an extra printer to use in addition to the
@@ -78,7 +83,7 @@ void SetVersionPrinter(void (*func)());
 ///                          which will be called after the basic LLVM version
 ///                          printing is complete. Each can then add additional
 ///                          information specific to the tool.
-void AddExtraVersionPrinter(void (*func)());
+void AddExtraVersionPrinter(VersionPrinterTy func);
 
 // PrintOptionValues - Print option values.
 // With -print-options print the difference between option values and defaults.
@@ -242,7 +247,7 @@ class Option {
   // Out of line virtual function to provide home for the class.
   virtual void anchor();
 
-  int NumOccurrences; // The number of times specified
+  int NumOccurrences = 0; // The number of times specified
   // Occurrences, HiddenFlag, and Formatting are all enum types but to avoid
   // problems with signed enums in bitfields.
   unsigned Occurrences : 3; // enum NumOccurrencesFlag
@@ -252,8 +257,8 @@ class Option {
   unsigned HiddenFlag : 2; // enum OptionHidden
   unsigned Formatting : 2; // enum FormattingFlags
   unsigned Misc : 3;
-  unsigned Position;       // Position of last occurrence of the option
-  unsigned AdditionalVals; // Greater than 0 for multi-valued option.
+  unsigned Position = 0;       // Position of last occurrence of the option
+  unsigned AdditionalVals = 0; // Greater than 0 for multi-valued option.
 
 public:
   StringRef ArgStr;   // The argument string itself (ex: "help", "o")
@@ -261,7 +266,7 @@ public:
   StringRef ValueStr; // String describing what the value of this option is
   OptionCategory *Category; // The Category this option belongs to
   SmallPtrSet<SubCommand *, 4> Subs; // The subcommands this option belongs to.
-  bool FullyInitialized;    // Has addArguemnt been called?
+  bool FullyInitialized = false; // Has addArguemnt been called?
 
   inline enum NumOccurrencesFlag getNumOccurrencesFlag() const {
     return (enum NumOccurrencesFlag)Occurrences;
@@ -316,10 +321,8 @@ public:
 protected:
   explicit Option(enum NumOccurrencesFlag OccurrencesFlag,
                   enum OptionHidden Hidden)
-      : NumOccurrences(0), Occurrences(OccurrencesFlag), Value(0),
-        HiddenFlag(Hidden), Formatting(NormalFormatting), Misc(0), Position(0),
-        AdditionalVals(0), Category(&GeneralCategory), FullyInitialized(false) {
-  }
+      : Occurrences(OccurrencesFlag), Value(0), HiddenFlag(Hidden),
+        Formatting(NormalFormatting), Misc(0), Category(&GeneralCategory) {}
 
   inline void setNumAdditionalVals(unsigned n) { AdditionalVals = n; }
 
@@ -447,8 +450,8 @@ struct GenericOptionValue {
 protected:
   GenericOptionValue() = default;
   GenericOptionValue(const GenericOptionValue&) = default;
-  ~GenericOptionValue() = default;
   GenericOptionValue &operator=(const GenericOptionValue &) = default;
+  ~GenericOptionValue() = default;
 
 private:
   virtual void anchor();
@@ -461,7 +464,7 @@ template <class DataType> struct OptionValue;
 template <class DataType, bool isClass>
 struct OptionValueBase : public GenericOptionValue {
   // Temporary storage for argument passing.
-  typedef OptionValue<DataType> WrapperType;
+  using WrapperType = OptionValue<DataType>;
 
   bool hasValue() const { return false; }
 
@@ -487,8 +490,8 @@ template <class DataType> class OptionValueCopy : public GenericOptionValue {
 
 protected:
   OptionValueCopy(const OptionValueCopy&) = default;
+  OptionValueCopy &operator=(const OptionValueCopy &) = default;
   ~OptionValueCopy() = default;
-  OptionValueCopy &operator=(const OptionValueCopy&) = default;
 
 public:
   OptionValueCopy() = default;
@@ -519,13 +522,13 @@ public:
 // Non-class option values.
 template <class DataType>
 struct OptionValueBase<DataType, false> : OptionValueCopy<DataType> {
-  typedef DataType WrapperType;
+  using WrapperType = DataType;
 
 protected:
   OptionValueBase() = default;
   OptionValueBase(const OptionValueBase&) = default;
+  OptionValueBase &operator=(const OptionValueBase &) = default;
   ~OptionValueBase() = default;
-  OptionValueBase &operator=(const OptionValueBase&) = default;
 };
 
 // Top-level option class.
@@ -548,7 +551,7 @@ enum boolOrDefault { BOU_UNSET, BOU_TRUE, BOU_FALSE };
 template <>
 struct OptionValue<cl::boolOrDefault> final
     : OptionValueCopy<cl::boolOrDefault> {
-  typedef cl::boolOrDefault WrapperType;
+  using WrapperType = cl::boolOrDefault;
 
   OptionValue() = default;
 
@@ -565,7 +568,7 @@ private:
 
 template <>
 struct OptionValue<std::string> final : OptionValueCopy<std::string> {
-  typedef StringRef WrapperType;
+  using WrapperType = StringRef;
 
   OptionValue() = default;
 
@@ -736,13 +739,15 @@ protected:
   public:
     OptionInfo(StringRef name, DataType v, StringRef helpStr)
         : GenericOptionInfo(name, helpStr), V(v) {}
+
     OptionValue<DataType> V;
   };
   SmallVector<OptionInfo, 8> Values;
 
 public:
   parser(Option &O) : generic_parser_base(O) {}
-  typedef DataType parser_data_type;
+
+  using parser_data_type = DataType;
 
   // Implement virtual functions needed by generic_parser_base
   unsigned getNumOptions() const override { return unsigned(Values.size()); }
@@ -837,10 +842,10 @@ protected:
 //
 template <class DataType> class basic_parser : public basic_parser_impl {
 public:
-  basic_parser(Option &O) : basic_parser_impl(O) {}
+  using parser_data_type = DataType;
+  using OptVal = OptionValue<DataType>;
 
-  typedef DataType parser_data_type;
-  typedef OptionValue<DataType> OptVal;
+  basic_parser(Option &O) : basic_parser_impl(O) {}
 
 protected:
   ~basic_parser() = default;
@@ -1292,6 +1297,7 @@ class opt : public Option,
   enum ValueExpected getValueExpectedFlagDefault() const override {
     return Parser.getValueExpectedFlagDefault();
   }
+
   void getExtraOptionNames(SmallVectorImpl<StringRef> &OptionNames) override {
     return Parser.getExtraOptionNames(OptionNames);
   }
@@ -1300,6 +1306,7 @@ class opt : public Option,
   size_t getOptionWidth() const override {
     return Parser.getOptionWidth(*this);
   }
+
   void printOptionInfo(size_t GlobalWidth) const override {
     Parser.printOptionInfo(*this, GlobalWidth);
   }
@@ -1384,16 +1391,18 @@ template <class DataType> class list_storage<DataType, bool> {
   std::vector<DataType> Storage;
 
 public:
-  typedef typename std::vector<DataType>::iterator iterator;
+  using iterator = typename std::vector<DataType>::iterator;
 
   iterator begin() { return Storage.begin(); }
   iterator end() { return Storage.end(); }
 
-  typedef typename std::vector<DataType>::const_iterator const_iterator;
+  using const_iterator = typename std::vector<DataType>::const_iterator;
+
   const_iterator begin() const { return Storage.begin(); }
   const_iterator end() const { return Storage.end(); }
 
-  typedef typename std::vector<DataType>::size_type size_type;
+  using size_type = typename std::vector<DataType>::size_type;
+
   size_type size() const { return Storage.size(); }
 
   bool empty() const { return Storage.empty(); }
@@ -1401,8 +1410,9 @@ public:
   void push_back(const DataType &value) { Storage.push_back(value); }
   void push_back(DataType &&value) { Storage.push_back(value); }
 
-  typedef typename std::vector<DataType>::reference reference;
-  typedef typename std::vector<DataType>::const_reference const_reference;
+  using reference = typename std::vector<DataType>::reference;
+  using const_reference = typename std::vector<DataType>::const_reference;
+
   reference operator[](size_type pos) { return Storage[pos]; }
   const_reference operator[](size_type pos) const { return Storage[pos]; }
 
@@ -1453,6 +1463,7 @@ class list : public Option, public list_storage<DataType, StorageClass> {
   enum ValueExpected getValueExpectedFlagDefault() const override {
     return Parser.getValueExpectedFlagDefault();
   }
+
   void getExtraOptionNames(SmallVectorImpl<StringRef> &OptionNames) override {
     return Parser.getExtraOptionNames(OptionNames);
   }
@@ -1473,6 +1484,7 @@ class list : public Option, public list_storage<DataType, StorageClass> {
   size_t getOptionWidth() const override {
     return Parser.getOptionWidth(*this);
   }
+
   void printOptionInfo(size_t GlobalWidth) const override {
     Parser.printOptionInfo(*this, GlobalWidth);
   }
@@ -1592,6 +1604,7 @@ class bits : public Option, public bits_storage<DataType, Storage> {
   enum ValueExpected getValueExpectedFlagDefault() const override {
     return Parser.getValueExpectedFlagDefault();
   }
+
   void getExtraOptionNames(SmallVectorImpl<StringRef> &OptionNames) override {
     return Parser.getExtraOptionNames(OptionNames);
   }
@@ -1612,6 +1625,7 @@ class bits : public Option, public bits_storage<DataType, Storage> {
   size_t getOptionWidth() const override {
     return Parser.getOptionWidth(*this);
   }
+
   void printOptionInfo(size_t GlobalWidth) const override {
     Parser.printOptionInfo(*this, GlobalWidth);
   }
@@ -1824,9 +1838,9 @@ void TokenizeWindowsCommandLine(StringRef Source, StringSaver &Saver,
 
 /// \brief String tokenization function type.  Should be compatible with either
 /// Windows or Unix command line tokenizers.
-typedef void (*TokenizerCallback)(StringRef Source, StringSaver &Saver,
-                                  SmallVectorImpl<const char *> &NewArgv,
-                                  bool MarkEOLs);
+using TokenizerCallback = void (*)(StringRef Source, StringSaver &Saver,
+                                   SmallVectorImpl<const char *> &NewArgv,
+                                   bool MarkEOLs);
 
 /// \brief Expand response files on a command line recursively using the given
 /// StringSaver and tokenization strategy.  Argv should contain the command line
@@ -1880,6 +1894,7 @@ void ResetAllOptionOccurrences();
 void ResetCommandLineParser();
 
 } // end namespace cl
+
 } // end namespace llvm
 
 #endif // LLVM_SUPPORT_COMMANDLINE_H
diff --git a/include/llvm/Support/ConvertUTF.h b/include/llvm/Support/ConvertUTF.h
index f714c0ed997e..bd439f360216 100644
--- a/include/llvm/Support/ConvertUTF.h
+++ b/include/llvm/Support/ConvertUTF.h
@@ -90,8 +90,8 @@
 #ifndef LLVM_SUPPORT_CONVERTUTF_H
 #define LLVM_SUPPORT_CONVERTUTF_H
 
-#include <string>
 #include <cstddef>
+#include <string>
 
 // Wrap everything in namespace llvm so that programs can link with llvm and
 // their own version of the unicode libraries.
diff --git a/include/llvm/Support/DataTypes.h.cmake b/include/llvm/Support/DataTypes.h.cmake
index 541dbc3d635d..a58e2e454b7d 100644
--- a/include/llvm/Support/DataTypes.h.cmake
+++ b/include/llvm/Support/DataTypes.h.cmake
@@ -85,11 +85,11 @@ typedef u_int64_t uint64_t;
 
 #else /* _MSC_VER */
 #ifdef __cplusplus
-#include <cstdlib>
 #include <cstddef>
+#include <cstdlib>
 #else
-#include <stdlib.h>
 #include <stddef.h>
+#include <stdlib.h>
 #endif
 #include <sys/types.h>
 
diff --git a/include/llvm/Support/Endian.h b/include/llvm/Support/Endian.h
index 06e089ffa166..f50d9b502daf 100644
--- a/include/llvm/Support/Endian.h
+++ b/include/llvm/Support/Endian.h
@@ -14,27 +14,36 @@
 #ifndef LLVM_SUPPORT_ENDIAN_H
 #define LLVM_SUPPORT_ENDIAN_H
 
+#include "llvm/Support/AlignOf.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Host.h"
 #include "llvm/Support/SwapByteOrder.h"
-
-#include <stdint.h>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <type_traits>
 
 namespace llvm {
 namespace support {
+
 enum endianness {big, little, native};
 
 // These are named values for common alignments.
 enum {aligned = 0, unaligned = 1};
 
 namespace detail {
-  /// \brief ::value is either alignment, or alignof(T) if alignment is 0.
-  template<class T, int alignment>
-  struct PickAlignment {
-    enum { value = alignment == 0 ? alignof(T) : alignment };
-  };
+
+/// \brief ::value is either alignment, or alignof(T) if alignment is 0.
+template<class T, int alignment>
+struct PickAlignment {
+ enum { value = alignment == 0 ? alignof(T) : alignment };
+};
+
 } // end namespace detail
 
 namespace endian {
+
 constexpr endianness system_endianness() {
   return sys::IsBigEndianHost ? big : little;
 }
@@ -190,9 +199,11 @@ inline void writeAtBitAlignment(void *memory, value_type value,
            &val[0], sizeof(value_type) * 2);
   }
 }
+
 } // end namespace endian
 
 namespace detail {
+
 template<typename value_type,
          endianness endian,
          std::size_t alignment>
@@ -254,77 +265,78 @@ public:
 
 } // end namespace detail
 
-typedef detail::packed_endian_specific_integral
-                  <uint16_t, little, unaligned> ulittle16_t;
-typedef detail::packed_endian_specific_integral
-                  <uint32_t, little, unaligned> ulittle32_t;
-typedef detail::packed_endian_specific_integral
-                  <uint64_t, little, unaligned> ulittle64_t;
+using ulittle16_t =
+    detail::packed_endian_specific_integral<uint16_t, little, unaligned>;
+using ulittle32_t =
+    detail::packed_endian_specific_integral<uint32_t, little, unaligned>;
+using ulittle64_t =
+    detail::packed_endian_specific_integral<uint64_t, little, unaligned>;
 
-typedef detail::packed_endian_specific_integral
-                   <int16_t, little, unaligned> little16_t;
-typedef detail::packed_endian_specific_integral
-                   <int32_t, little, unaligned> little32_t;
-typedef detail::packed_endian_specific_integral
-                   <int64_t, little, unaligned> little64_t;
+using little16_t =
+    detail::packed_endian_specific_integral<int16_t, little, unaligned>;
+using little32_t =
+    detail::packed_endian_specific_integral<int32_t, little, unaligned>;
+using little64_t =
+    detail::packed_endian_specific_integral<int64_t, little, unaligned>;
 
-typedef detail::packed_endian_specific_integral
-                    <uint16_t, little, aligned> aligned_ulittle16_t;
-typedef detail::packed_endian_specific_integral
-                    <uint32_t, little, aligned> aligned_ulittle32_t;
-typedef detail::packed_endian_specific_integral
-                    <uint64_t, little, aligned> aligned_ulittle64_t;
+using aligned_ulittle16_t =
+    detail::packed_endian_specific_integral<uint16_t, little, aligned>;
+using aligned_ulittle32_t =
+    detail::packed_endian_specific_integral<uint32_t, little, aligned>;
+using aligned_ulittle64_t =
+    detail::packed_endian_specific_integral<uint64_t, little, aligned>;
 
-typedef detail::packed_endian_specific_integral
-                     <int16_t, little, aligned> aligned_little16_t;
-typedef detail::packed_endian_specific_integral
-                     <int32_t, little, aligned> aligned_little32_t;
-typedef detail::packed_endian_specific_integral
-                     <int64_t, little, aligned> aligned_little64_t;
+using aligned_little16_t =
+    detail::packed_endian_specific_integral<int16_t, little, aligned>;
+using aligned_little32_t =
+    detail::packed_endian_specific_integral<int32_t, little, aligned>;
+using aligned_little64_t =
+    detail::packed_endian_specific_integral<int64_t, little, aligned>;
 
-typedef detail::packed_endian_specific_integral
-                  <uint16_t, big, unaligned>    ubig16_t;
-typedef detail::packed_endian_specific_integral
-                  <uint32_t, big, unaligned>    ubig32_t;
-typedef detail::packed_endian_specific_integral
-                  <uint64_t, big, unaligned>    ubig64_t;
+using ubig16_t =
+    detail::packed_endian_specific_integral<uint16_t, big, unaligned>;
+using ubig32_t =
+    detail::packed_endian_specific_integral<uint32_t, big, unaligned>;
+using ubig64_t =
+    detail::packed_endian_specific_integral<uint64_t, big, unaligned>;
 
-typedef detail::packed_endian_specific_integral
-                   <int16_t, big, unaligned>    big16_t;
-typedef detail::packed_endian_specific_integral
-                   <int32_t, big, unaligned>    big32_t;
-typedef detail::packed_endian_specific_integral
-                   <int64_t, big, unaligned>    big64_t;
+using big16_t =
+    detail::packed_endian_specific_integral<int16_t, big, unaligned>;
+using big32_t =
+    detail::packed_endian_specific_integral<int32_t, big, unaligned>;
+using big64_t =
+    detail::packed_endian_specific_integral<int64_t, big, unaligned>;
 
-typedef detail::packed_endian_specific_integral
-                    <uint16_t, big, aligned>    aligned_ubig16_t;
-typedef detail::packed_endian_specific_integral
-                    <uint32_t, big, aligned>    aligned_ubig32_t;
-typedef detail::packed_endian_specific_integral
-                    <uint64_t, big, aligned>    aligned_ubig64_t;
+using aligned_ubig16_t =
+    detail::packed_endian_specific_integral<uint16_t, big, aligned>;
+using aligned_ubig32_t =
+    detail::packed_endian_specific_integral<uint32_t, big, aligned>;
+using aligned_ubig64_t =
+    detail::packed_endian_specific_integral<uint64_t, big, aligned>;
 
-typedef detail::packed_endian_specific_integral
-                     <int16_t, big, aligned>    aligned_big16_t;
-typedef detail::packed_endian_specific_integral
-                     <int32_t, big, aligned>    aligned_big32_t;
-typedef detail::packed_endian_specific_integral
-                     <int64_t, big, aligned>    aligned_big64_t;
+using aligned_big16_t =
+    detail::packed_endian_specific_integral<int16_t, big, aligned>;
+using aligned_big32_t =
+    detail::packed_endian_specific_integral<int32_t, big, aligned>;
+using aligned_big64_t =
+    detail::packed_endian_specific_integral<int64_t, big, aligned>;
 
-typedef detail::packed_endian_specific_integral
-                  <uint16_t, native, unaligned> unaligned_uint16_t;
-typedef detail::packed_endian_specific_integral
-                  <uint32_t, native, unaligned> unaligned_uint32_t;
-typedef detail::packed_endian_specific_integral
-                  <uint64_t, native, unaligned> unaligned_uint64_t;
+using unaligned_uint16_t =
+    detail::packed_endian_specific_integral<uint16_t, native, unaligned>;
+using unaligned_uint32_t =
+    detail::packed_endian_specific_integral<uint32_t, native, unaligned>;
+using unaligned_uint64_t =
+    detail::packed_endian_specific_integral<uint64_t, native, unaligned>;
 
-typedef detail::packed_endian_specific_integral
-                   <int16_t, native, unaligned> unaligned_int16_t;
-typedef detail::packed_endian_specific_integral
-                   <int32_t, native, unaligned> unaligned_int32_t;
-typedef detail::packed_endian_specific_integral
-                   <int64_t, native, unaligned> unaligned_int64_t;
+using unaligned_int16_t =
+    detail::packed_endian_specific_integral<int16_t, native, unaligned>;
+using unaligned_int32_t =
+    detail::packed_endian_specific_integral<int32_t, native, unaligned>;
+using unaligned_int64_t =
+    detail::packed_endian_specific_integral<int64_t, native, unaligned>;
 
 namespace endian {
+
 template <typename T> inline T read(const void *P, endianness E) {
   return read<T, unaligned>(P, E);
 }
@@ -394,8 +406,10 @@ inline void write64le(void *P, uint64_t V) { write64<little>(P, V); }
 inline void write16be(void *P, uint16_t V) { write16<big>(P, V); }
 inline void write32be(void *P, uint32_t V) { write32<big>(P, V); }
 inline void write64be(void *P, uint64_t V) { write64<big>(P, V); }
+
 } // end namespace endian
+
 } // end namespace support
 } // end namespace llvm
 
-#endif
+#endif // LLVM_SUPPORT_ENDIAN_H
diff --git a/include/llvm/Support/Error.h b/include/llvm/Support/Error.h
index a3482f5a58b5..1e27e0b821f0 100644
--- a/include/llvm/Support/Error.h
+++ b/include/llvm/Support/Error.h
@@ -1,4 +1,4 @@
-//===----- llvm/Support/Error.h - Recoverable error handling ----*- C++ -*-===//
+//===- llvm/Support/Error.h - Recoverable error handling --------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -22,6 +22,7 @@
 #include "llvm/Support/AlignOf.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ErrorOr.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
@@ -167,7 +168,7 @@ class LLVM_NODISCARD Error {
 
 protected:
   /// Create a success value. Prefer using 'Error::success()' for readability
-  Error() : Payload(nullptr) {
+  Error() {
     setPtr(nullptr);
     setChecked(false);
   }
@@ -182,7 +183,7 @@ public:
   /// Move-construct an error value. The newly constructed error is considered
   /// unchecked, even if the source error had been checked. The original error
   /// becomes a checked Success value, regardless of its original state.
-  Error(Error &&Other) : Payload(nullptr) {
+  Error(Error &&Other) {
     setChecked(true);
     *this = std::move(Other);
   }
@@ -299,7 +300,7 @@ private:
     return Tmp;
   }
 
-  ErrorInfoBase *Payload;
+  ErrorInfoBase *Payload = nullptr;
 };
 
 /// Subclass of Error for the sole purpose of identifying the success path in
@@ -327,7 +328,6 @@ template <typename ErrT, typename... ArgTs> Error make_error(ArgTs &&... Args) {
 template <typename ThisErrT, typename ParentErrT = ErrorInfoBase>
 class ErrorInfo : public ParentErrT {
 public:
-
   static const void *classID() { return &ThisErrT::ID; }
 
   const void *dynamicClassID() const override { return &ThisErrT::ID; }
@@ -645,20 +645,22 @@ private:
 template <class T> class LLVM_NODISCARD Expected {
   template <class T1> friend class ExpectedAsOutParameter;
   template <class OtherT> friend class Expected;
-  static const bool isRef = std::is_reference<T>::value;
-  typedef ReferenceStorage<typename std::remove_reference<T>::type> wrap;
 
-  typedef std::unique_ptr<ErrorInfoBase> error_type;
+  static const bool isRef = std::is_reference<T>::value;
+
+  using wrap = ReferenceStorage<typename std::remove_reference<T>::type>;
+
+  using error_type = std::unique_ptr<ErrorInfoBase>;
 
 public:
-  typedef typename std::conditional<isRef, wrap, T>::type storage_type;
-  typedef T value_type;
+  using storage_type = typename std::conditional<isRef, wrap, T>::type;
+  using value_type = T;
 
 private:
-  typedef typename std::remove_reference<T>::type &reference;
-  typedef const typename std::remove_reference<T>::type &const_reference;
-  typedef typename std::remove_reference<T>::type *pointer;
-  typedef const typename std::remove_reference<T>::type *const_pointer;
+  using reference = typename std::remove_reference<T>::type &;
+  using const_reference = const typename std::remove_reference<T>::type &;
+  using pointer = typename std::remove_reference<T>::type *;
+  using const_pointer = const typename std::remove_reference<T>::type *;
 
 public:
   /// Create an Expected<T> error value from the given Error.
@@ -891,7 +893,6 @@ private:
 template <typename T>
 class ExpectedAsOutParameter {
 public:
-
   ExpectedAsOutParameter(Expected<T> *ValOrErr)
     : ValOrErr(ValOrErr) {
     if (ValOrErr)
diff --git a/include/llvm/Support/ErrorOr.h b/include/llvm/Support/ErrorOr.h
index 877f4063cd23..061fb65db465 100644
--- a/include/llvm/Support/ErrorOr.h
+++ b/include/llvm/Support/ErrorOr.h
@@ -16,13 +16,14 @@
 #ifndef LLVM_SUPPORT_ERROROR_H
 #define LLVM_SUPPORT_ERROROR_H
 
-#include "llvm/ADT/PointerIntPair.h"
 #include "llvm/Support/AlignOf.h"
 #include <cassert>
 #include <system_error>
 #include <type_traits>
+#include <utility>
 
 namespace llvm {
+
 /// \brief Stores a reference that can be changed.
 template <typename T>
 class ReferenceStorage {
@@ -67,17 +68,19 @@ public:
 template<class T>
 class ErrorOr {
   template <class OtherT> friend class ErrorOr;
+
   static const bool isRef = std::is_reference<T>::value;
-  typedef ReferenceStorage<typename std::remove_reference<T>::type> wrap;
+
+  using wrap = ReferenceStorage<typename std::remove_reference<T>::type>;
 
 public:
-  typedef typename std::conditional<isRef, wrap, T>::type storage_type;
+  using storage_type = typename std::conditional<isRef, wrap, T>::type;
 
 private:
-  typedef typename std::remove_reference<T>::type &reference;
-  typedef const typename std::remove_reference<T>::type &const_reference;
-  typedef typename std::remove_reference<T>::type *pointer;
-  typedef const typename std::remove_reference<T>::type *const_pointer;
+  using reference = typename std::remove_reference<T>::type &;
+  using const_reference = const typename std::remove_reference<T>::type &;
+  using pointer = typename std::remove_reference<T>::type *;
+  using const_pointer = const typename std::remove_reference<T>::type *;
 
 public:
   template <class E>
@@ -282,6 +285,7 @@ typename std::enable_if<std::is_error_code_enum<E>::value ||
 operator==(const ErrorOr<T> &Err, E Code) {
   return Err.getError() == Code;
 }
+
 } // end namespace llvm
 
 #endif // LLVM_SUPPORT_ERROROR_H
diff --git a/include/llvm/Support/FileSystem.h b/include/llvm/Support/FileSystem.h
index 7caefb5359b8..21c5fcdb7145 100644
--- a/include/llvm/Support/FileSystem.h
+++ b/include/llvm/Support/FileSystem.h
@@ -233,50 +233,6 @@ public:
   void permissions(perms p) { Perms = p; }
 };
 
-/// file_magic - An "enum class" enumeration of file types based on magic (the first
-///         N bytes of the file).
-struct file_magic {
-  enum Impl {
-    unknown = 0,              ///< Unrecognized file
-    bitcode,                  ///< Bitcode file
-    archive,                  ///< ar style archive file
-    elf,                      ///< ELF Unknown type
-    elf_relocatable,          ///< ELF Relocatable object file
-    elf_executable,           ///< ELF Executable image
-    elf_shared_object,        ///< ELF dynamically linked shared lib
-    elf_core,                 ///< ELF core image
-    macho_object,             ///< Mach-O Object file
-    macho_executable,         ///< Mach-O Executable
-    macho_fixed_virtual_memory_shared_lib, ///< Mach-O Shared Lib, FVM
-    macho_core,               ///< Mach-O Core File
-    macho_preload_executable, ///< Mach-O Preloaded Executable
-    macho_dynamically_linked_shared_lib, ///< Mach-O dynlinked shared lib
-    macho_dynamic_linker,     ///< The Mach-O dynamic linker
-    macho_bundle,             ///< Mach-O Bundle file
-    macho_dynamically_linked_shared_lib_stub, ///< Mach-O Shared lib stub
-    macho_dsym_companion,     ///< Mach-O dSYM companion file
-    macho_kext_bundle,        ///< Mach-O kext bundle file
-    macho_universal_binary,   ///< Mach-O universal binary
-    coff_cl_gl_object,        ///< Microsoft cl.exe's intermediate code file
-    coff_object,              ///< COFF object file
-    coff_import_library,      ///< COFF import library
-    pecoff_executable,        ///< PECOFF executable file
-    windows_resource,         ///< Windows compiled resource file (.res)
-    wasm_object               ///< WebAssembly Object file
-  };
-
-  bool is_object() const {
-    return V != unknown;
-  }
-
-  file_magic() = default;
-  file_magic(Impl V) : V(V) {}
-  operator Impl() const { return V; }
-
-private:
-  Impl V = unknown;
-};
-
 /// @}
 /// @name Physical Operators
 /// @{
@@ -770,17 +726,6 @@ std::error_code openFileForWrite(const Twine &Name, int &ResultFD,
 std::error_code openFileForRead(const Twine &Name, int &ResultFD,
                                 SmallVectorImpl<char> *RealPath = nullptr);
 
-/// @brief Identify the type of a binary file based on how magical it is.
-file_magic identify_magic(StringRef magic);
-
-/// @brief Get and identify \a path's type based on its content.
-///
-/// @param path Input path.
-/// @param result Set to the type of file, or file_magic::unknown.
-/// @returns errc::success if result has been successfully set, otherwise a
-///          platform-specific error_code.
-std::error_code identify_magic(const Twine &path, file_magic &result);
-
 std::error_code getUniqueID(const Twine Path, UniqueID &Result);
 
 /// @brief Get disk space usage information.
diff --git a/include/llvm/Support/FormatVariadic.h b/include/llvm/Support/FormatVariadic.h
index 3a4668687cc9..c1153e84dfb5 100644
--- a/include/llvm/Support/FormatVariadic.h
+++ b/include/llvm/Support/FormatVariadic.h
@@ -27,8 +27,8 @@
 #define LLVM_SUPPORT_FORMATVARIADIC_H
 
 #include "llvm/ADT/Optional.h"
-#include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/FormatCommon.h"
 #include "llvm/Support/FormatProviders.h"
diff --git a/include/llvm/Support/GCOV.h b/include/llvm/Support/GCOV.h
index 73fddca8e35b..268c53c50252 100644
--- a/include/llvm/Support/GCOV.h
+++ b/include/llvm/Support/GCOV.h
@@ -16,12 +16,12 @@
 #define LLVM_SUPPORT_GCOV_H
 
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/iterator.h"
-#include "llvm/ADT/iterator_range.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cassert>
diff --git a/include/llvm/Support/GenericDomTree.h b/include/llvm/Support/GenericDomTree.h
index 851ff7d80403..80a2dfcbad88 100644
--- a/include/llvm/Support/GenericDomTree.h
+++ b/include/llvm/Support/GenericDomTree.h
@@ -26,9 +26,9 @@
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/GraphTraits.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cassert>
diff --git a/include/llvm/Support/LowLevelTypeImpl.h b/include/llvm/Support/LowLevelTypeImpl.h
index e18e58b7b5b2..c79dd0c29507 100644
--- a/include/llvm/Support/LowLevelTypeImpl.h
+++ b/include/llvm/Support/LowLevelTypeImpl.h
@@ -27,9 +27,9 @@
 #ifndef LLVM_SUPPORT_LOWLEVELTYPEIMPL_H
 #define LLVM_SUPPORT_LOWLEVELTYPEIMPL_H
 
-#include <cassert>
 #include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/CodeGen/MachineValueType.h"
+#include <cassert>
 
 namespace llvm {
 
diff --git a/include/llvm/Support/MachO.h b/include/llvm/Support/MachO.h
deleted file mode 100644
index 3d704292c260..000000000000
--- a/include/llvm/Support/MachO.h
+++ /dev/null
@@ -1,2038 +0,0 @@
-//===-- llvm/Support/MachO.h - The MachO file format ------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines manifest constants for the MachO object file format.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_SUPPORT_MACHO_H
-#define LLVM_SUPPORT_MACHO_H
-
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/DataTypes.h"
-#include "llvm/Support/Host.h"
-
-namespace llvm {
-  namespace MachO {
-    // Enums from <mach-o/loader.h>
-    enum : uint32_t {
-      // Constants for the "magic" field in llvm::MachO::mach_header and
-      // llvm::MachO::mach_header_64
-      MH_MAGIC    = 0xFEEDFACEu,
-      MH_CIGAM    = 0xCEFAEDFEu,
-      MH_MAGIC_64 = 0xFEEDFACFu,
-      MH_CIGAM_64 = 0xCFFAEDFEu,
-      FAT_MAGIC   = 0xCAFEBABEu,
-      FAT_CIGAM   = 0xBEBAFECAu,
-      FAT_MAGIC_64 = 0xCAFEBABFu,
-      FAT_CIGAM_64 = 0xBFBAFECAu
-    };
-
-    enum HeaderFileType {
-      // Constants for the "filetype" field in llvm::MachO::mach_header and
-      // llvm::MachO::mach_header_64
-      MH_OBJECT      = 0x1u,
-      MH_EXECUTE     = 0x2u,
-      MH_FVMLIB      = 0x3u,
-      MH_CORE        = 0x4u,
-      MH_PRELOAD     = 0x5u,
-      MH_DYLIB       = 0x6u,
-      MH_DYLINKER    = 0x7u,
-      MH_BUNDLE      = 0x8u,
-      MH_DYLIB_STUB  = 0x9u,
-      MH_DSYM        = 0xAu,
-      MH_KEXT_BUNDLE = 0xBu
-    };
-
-    enum {
-      // Constant bits for the "flags" field in llvm::MachO::mach_header and
-      // llvm::MachO::mach_header_64
-      MH_NOUNDEFS                = 0x00000001u,
-      MH_INCRLINK                = 0x00000002u,
-      MH_DYLDLINK                = 0x00000004u,
-      MH_BINDATLOAD              = 0x00000008u,
-      MH_PREBOUND                = 0x00000010u,
-      MH_SPLIT_SEGS              = 0x00000020u,
-      MH_LAZY_INIT               = 0x00000040u,
-      MH_TWOLEVEL                = 0x00000080u,
-      MH_FORCE_FLAT              = 0x00000100u,
-      MH_NOMULTIDEFS             = 0x00000200u,
-      MH_NOFIXPREBINDING         = 0x00000400u,
-      MH_PREBINDABLE             = 0x00000800u,
-      MH_ALLMODSBOUND            = 0x00001000u,
-      MH_SUBSECTIONS_VIA_SYMBOLS = 0x00002000u,
-      MH_CANONICAL               = 0x00004000u,
-      MH_WEAK_DEFINES            = 0x00008000u,
-      MH_BINDS_TO_WEAK           = 0x00010000u,
-      MH_ALLOW_STACK_EXECUTION   = 0x00020000u,
-      MH_ROOT_SAFE               = 0x00040000u,
-      MH_SETUID_SAFE             = 0x00080000u,
-      MH_NO_REEXPORTED_DYLIBS    = 0x00100000u,
-      MH_PIE                     = 0x00200000u,
-      MH_DEAD_STRIPPABLE_DYLIB   = 0x00400000u,
-      MH_HAS_TLV_DESCRIPTORS     = 0x00800000u,
-      MH_NO_HEAP_EXECUTION       = 0x01000000u,
-      MH_APP_EXTENSION_SAFE      = 0x02000000u
-    };
-
-    enum : uint32_t {
-      // Flags for the "cmd" field in llvm::MachO::load_command
-      LC_REQ_DYLD    = 0x80000000u
-    };
-
-#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \
-    LCName = LCValue,
-
-    enum LoadCommandType : uint32_t {
-      #include "llvm/Support/MachO.def"
-    };
-
-#undef HANDLE_LOAD_COMMAND
-
-    enum : uint32_t {
-      // Constant bits for the "flags" field in llvm::MachO::segment_command
-      SG_HIGHVM              = 0x1u,
-      SG_FVMLIB              = 0x2u,
-      SG_NORELOC             = 0x4u,
-      SG_PROTECTED_VERSION_1 = 0x8u,
-
-      // Constant masks for the "flags" field in llvm::MachO::section and
-      // llvm::MachO::section_64
-      SECTION_TYPE           = 0x000000ffu, // SECTION_TYPE
-      SECTION_ATTRIBUTES     = 0xffffff00u, // SECTION_ATTRIBUTES
-      SECTION_ATTRIBUTES_USR = 0xff000000u, // SECTION_ATTRIBUTES_USR
-      SECTION_ATTRIBUTES_SYS = 0x00ffff00u  // SECTION_ATTRIBUTES_SYS
-    };
-
-    /// These are the section type and attributes fields.  A MachO section can
-    /// have only one Type, but can have any of the attributes specified.
-    enum SectionType : uint32_t {
-      // Constant masks for the "flags[7:0]" field in llvm::MachO::section and
-      // llvm::MachO::section_64 (mask "flags" with SECTION_TYPE)
-
-      /// S_REGULAR - Regular section.
-      S_REGULAR                             = 0x00u,
-      /// S_ZEROFILL - Zero fill on demand section.
-      S_ZEROFILL                            = 0x01u,
-      /// S_CSTRING_LITERALS - Section with literal C strings.
-      S_CSTRING_LITERALS                    = 0x02u,
-      /// S_4BYTE_LITERALS - Section with 4 byte literals.
-      S_4BYTE_LITERALS                      = 0x03u,
-      /// S_8BYTE_LITERALS - Section with 8 byte literals.
-      S_8BYTE_LITERALS                      = 0x04u,
-      /// S_LITERAL_POINTERS - Section with pointers to literals.
-      S_LITERAL_POINTERS                    = 0x05u,
-      /// S_NON_LAZY_SYMBOL_POINTERS - Section with non-lazy symbol pointers.
-      S_NON_LAZY_SYMBOL_POINTERS            = 0x06u,
-      /// S_LAZY_SYMBOL_POINTERS - Section with lazy symbol pointers.
-      S_LAZY_SYMBOL_POINTERS                = 0x07u,
-      /// S_SYMBOL_STUBS - Section with symbol stubs, byte size of stub in
-      /// the Reserved2 field.
-      S_SYMBOL_STUBS                        = 0x08u,
-      /// S_MOD_INIT_FUNC_POINTERS - Section with only function pointers for
-      /// initialization.
-      S_MOD_INIT_FUNC_POINTERS              = 0x09u,
-      /// S_MOD_TERM_FUNC_POINTERS - Section with only function pointers for
-      /// termination.
-      S_MOD_TERM_FUNC_POINTERS              = 0x0au,
-      /// S_COALESCED - Section contains symbols that are to be coalesced.
-      S_COALESCED                           = 0x0bu,
-      /// S_GB_ZEROFILL - Zero fill on demand section (that can be larger than 4
-      /// gigabytes).
-      S_GB_ZEROFILL                         = 0x0cu,
-      /// S_INTERPOSING - Section with only pairs of function pointers for
-      /// interposing.
-      S_INTERPOSING                         = 0x0du,
-      /// S_16BYTE_LITERALS - Section with only 16 byte literals.
-      S_16BYTE_LITERALS                     = 0x0eu,
-      /// S_DTRACE_DOF - Section contains DTrace Object Format.
-      S_DTRACE_DOF                          = 0x0fu,
-      /// S_LAZY_DYLIB_SYMBOL_POINTERS - Section with lazy symbol pointers to
-      /// lazy loaded dylibs.
-      S_LAZY_DYLIB_SYMBOL_POINTERS          = 0x10u,
-      /// S_THREAD_LOCAL_REGULAR - Thread local data section.
-      S_THREAD_LOCAL_REGULAR                = 0x11u,
-      /// S_THREAD_LOCAL_ZEROFILL - Thread local zerofill section.
-      S_THREAD_LOCAL_ZEROFILL               = 0x12u,
-      /// S_THREAD_LOCAL_VARIABLES - Section with thread local variable
-      /// structure data.
-      S_THREAD_LOCAL_VARIABLES              = 0x13u,
-      /// S_THREAD_LOCAL_VARIABLE_POINTERS - Section with pointers to thread
-      /// local structures.
-      S_THREAD_LOCAL_VARIABLE_POINTERS      = 0x14u,
-      /// S_THREAD_LOCAL_INIT_FUNCTION_POINTERS - Section with thread local
-      /// variable initialization pointers to functions.
-      S_THREAD_LOCAL_INIT_FUNCTION_POINTERS = 0x15u,
-
-      LAST_KNOWN_SECTION_TYPE = S_THREAD_LOCAL_INIT_FUNCTION_POINTERS
-    };
-
-    enum : uint32_t {
-      // Constant masks for the "flags[31:24]" field in llvm::MachO::section and
-      // llvm::MachO::section_64 (mask "flags" with SECTION_ATTRIBUTES_USR)
-
-      /// S_ATTR_PURE_INSTRUCTIONS - Section contains only true machine
-      /// instructions.
-      S_ATTR_PURE_INSTRUCTIONS   = 0x80000000u,
-      /// S_ATTR_NO_TOC - Section contains coalesced symbols that are not to be
-      /// in a ranlib table of contents.
-      S_ATTR_NO_TOC              = 0x40000000u,
-      /// S_ATTR_STRIP_STATIC_SYMS - Ok to strip static symbols in this section
-      /// in files with the MY_DYLDLINK flag.
-      S_ATTR_STRIP_STATIC_SYMS   = 0x20000000u,
-      /// S_ATTR_NO_DEAD_STRIP - No dead stripping.
-      S_ATTR_NO_DEAD_STRIP       = 0x10000000u,
-      /// S_ATTR_LIVE_SUPPORT - Blocks are live if they reference live blocks.
-      S_ATTR_LIVE_SUPPORT        = 0x08000000u,
-      /// S_ATTR_SELF_MODIFYING_CODE - Used with i386 code stubs written on by
-      /// dyld.
-      S_ATTR_SELF_MODIFYING_CODE = 0x04000000u,
-      /// S_ATTR_DEBUG - A debug section.
-      S_ATTR_DEBUG               = 0x02000000u,
-
-      // Constant masks for the "flags[23:8]" field in llvm::MachO::section and
-      // llvm::MachO::section_64 (mask "flags" with SECTION_ATTRIBUTES_SYS)
-
-      /// S_ATTR_SOME_INSTRUCTIONS - Section contains some machine instructions.
-      S_ATTR_SOME_INSTRUCTIONS   = 0x00000400u,
-      /// S_ATTR_EXT_RELOC - Section has external relocation entries.
-      S_ATTR_EXT_RELOC           = 0x00000200u,
-      /// S_ATTR_LOC_RELOC - Section has local relocation entries.
-      S_ATTR_LOC_RELOC           = 0x00000100u,
-
-      // Constant masks for the value of an indirect symbol in an indirect
-      // symbol table
-      INDIRECT_SYMBOL_LOCAL = 0x80000000u,
-      INDIRECT_SYMBOL_ABS   = 0x40000000u
-    };
-
-    enum DataRegionType {
-      // Constants for the "kind" field in a data_in_code_entry structure
-      DICE_KIND_DATA             = 1u,
-      DICE_KIND_JUMP_TABLE8      = 2u,
-      DICE_KIND_JUMP_TABLE16     = 3u,
-      DICE_KIND_JUMP_TABLE32     = 4u,
-      DICE_KIND_ABS_JUMP_TABLE32 = 5u
-    };
-
-    enum RebaseType {
-      REBASE_TYPE_POINTER         = 1u,
-      REBASE_TYPE_TEXT_ABSOLUTE32 = 2u,
-      REBASE_TYPE_TEXT_PCREL32    = 3u
-    };
-
-    enum {
-      REBASE_OPCODE_MASK    = 0xF0u,
-      REBASE_IMMEDIATE_MASK = 0x0Fu
-    };
-
-    enum RebaseOpcode {
-      REBASE_OPCODE_DONE                               = 0x00u,
-      REBASE_OPCODE_SET_TYPE_IMM                       = 0x10u,
-      REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB        = 0x20u,
-      REBASE_OPCODE_ADD_ADDR_ULEB                      = 0x30u,
-      REBASE_OPCODE_ADD_ADDR_IMM_SCALED                = 0x40u,
-      REBASE_OPCODE_DO_REBASE_IMM_TIMES                = 0x50u,
-      REBASE_OPCODE_DO_REBASE_ULEB_TIMES               = 0x60u,
-      REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB            = 0x70u,
-      REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB = 0x80u
-    };
-
-    enum BindType {
-      BIND_TYPE_POINTER         = 1u,
-      BIND_TYPE_TEXT_ABSOLUTE32 = 2u,
-      BIND_TYPE_TEXT_PCREL32    = 3u
-    };
-
-    enum BindSpecialDylib {
-      BIND_SPECIAL_DYLIB_SELF            =  0,
-      BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE = -1,
-      BIND_SPECIAL_DYLIB_FLAT_LOOKUP     = -2
-    };
-
-    enum {
-      BIND_SYMBOL_FLAGS_WEAK_IMPORT         = 0x1u,
-      BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION = 0x8u,
-
-      BIND_OPCODE_MASK                      = 0xF0u,
-      BIND_IMMEDIATE_MASK                   = 0x0Fu
-    };
-
-    enum BindOpcode {
-      BIND_OPCODE_DONE                             = 0x00u,
-      BIND_OPCODE_SET_DYLIB_ORDINAL_IMM            = 0x10u,
-      BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB           = 0x20u,
-      BIND_OPCODE_SET_DYLIB_SPECIAL_IMM            = 0x30u,
-      BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM    = 0x40u,
-      BIND_OPCODE_SET_TYPE_IMM                     = 0x50u,
-      BIND_OPCODE_SET_ADDEND_SLEB                  = 0x60u,
-      BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB      = 0x70u,
-      BIND_OPCODE_ADD_ADDR_ULEB                    = 0x80u,
-      BIND_OPCODE_DO_BIND                          = 0x90u,
-      BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB            = 0xA0u,
-      BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED      = 0xB0u,
-      BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB = 0xC0u
-    };
-
-    enum {
-      EXPORT_SYMBOL_FLAGS_KIND_MASK           = 0x03u,
-      EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION     = 0x04u,
-      EXPORT_SYMBOL_FLAGS_REEXPORT            = 0x08u,
-      EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER   = 0x10u
-    };
-
-    enum ExportSymbolKind {
-      EXPORT_SYMBOL_FLAGS_KIND_REGULAR        = 0x00u,
-      EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL   = 0x01u,
-      EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE       = 0x02u
-    };
-
-    enum {
-      // Constant masks for the "n_type" field in llvm::MachO::nlist and
-      // llvm::MachO::nlist_64
-      N_STAB = 0xe0,
-      N_PEXT = 0x10,
-      N_TYPE = 0x0e,
-      N_EXT  = 0x01
-    };
-
-    enum NListType : uint8_t {
-      // Constants for the "n_type & N_TYPE" llvm::MachO::nlist and
-      // llvm::MachO::nlist_64
-      N_UNDF = 0x0u,
-      N_ABS  = 0x2u,
-      N_SECT = 0xeu,
-      N_PBUD = 0xcu,
-      N_INDR = 0xau
-    };
-
-    enum SectionOrdinal {
-      // Constants for the "n_sect" field in llvm::MachO::nlist and
-      // llvm::MachO::nlist_64
-      NO_SECT  = 0u,
-      MAX_SECT = 0xffu
-    };
-
-    enum {
-      // Constant masks for the "n_desc" field in llvm::MachO::nlist and
-      // llvm::MachO::nlist_64
-      // The low 3 bits are the for the REFERENCE_TYPE.
-      REFERENCE_TYPE                            = 0x7,
-      REFERENCE_FLAG_UNDEFINED_NON_LAZY         = 0,
-      REFERENCE_FLAG_UNDEFINED_LAZY             = 1,
-      REFERENCE_FLAG_DEFINED                    = 2,
-      REFERENCE_FLAG_PRIVATE_DEFINED            = 3,
-      REFERENCE_FLAG_PRIVATE_UNDEFINED_NON_LAZY = 4,
-      REFERENCE_FLAG_PRIVATE_UNDEFINED_LAZY     = 5,
-      // Flag bits (some overlap with the library ordinal bits).
-      N_ARM_THUMB_DEF   = 0x0008u,
-      REFERENCED_DYNAMICALLY = 0x0010u,
-      N_NO_DEAD_STRIP   = 0x0020u,
-      N_WEAK_REF        = 0x0040u,
-      N_WEAK_DEF        = 0x0080u,
-      N_SYMBOL_RESOLVER = 0x0100u,
-      N_ALT_ENTRY       = 0x0200u,
-      // For undefined symbols coming from libraries, see GET_LIBRARY_ORDINAL()
-      // as these are in the top 8 bits.
-      SELF_LIBRARY_ORDINAL   = 0x0,
-      MAX_LIBRARY_ORDINAL    = 0xfd,
-      DYNAMIC_LOOKUP_ORDINAL = 0xfe,
-      EXECUTABLE_ORDINAL     = 0xff
-    };
-
-    enum StabType {
-      // Constant values for the "n_type" field in llvm::MachO::nlist and
-      // llvm::MachO::nlist_64 when "(n_type & N_STAB) != 0"
-      N_GSYM    = 0x20u,
-      N_FNAME   = 0x22u,
-      N_FUN     = 0x24u,
-      N_STSYM   = 0x26u,
-      N_LCSYM   = 0x28u,
-      N_BNSYM   = 0x2Eu,
-      N_PC      = 0x30u,
-      N_AST     = 0x32u,
-      N_OPT     = 0x3Cu,
-      N_RSYM    = 0x40u,
-      N_SLINE   = 0x44u,
-      N_ENSYM   = 0x4Eu,
-      N_SSYM    = 0x60u,
-      N_SO      = 0x64u,
-      N_OSO     = 0x66u,
-      N_LSYM    = 0x80u,
-      N_BINCL   = 0x82u,
-      N_SOL     = 0x84u,
-      N_PARAMS  = 0x86u,
-      N_VERSION = 0x88u,
-      N_OLEVEL  = 0x8Au,
-      N_PSYM    = 0xA0u,
-      N_EINCL   = 0xA2u,
-      N_ENTRY   = 0xA4u,
-      N_LBRAC   = 0xC0u,
-      N_EXCL    = 0xC2u,
-      N_RBRAC   = 0xE0u,
-      N_BCOMM   = 0xE2u,
-      N_ECOMM   = 0xE4u,
-      N_ECOML   = 0xE8u,
-      N_LENG    = 0xFEu
-    };
-
-    enum : uint32_t {
-      // Constant values for the r_symbolnum field in an
-      // llvm::MachO::relocation_info structure when r_extern is 0.
-      R_ABS = 0,
-
-      // Constant bits for the r_address field in an
-      // llvm::MachO::relocation_info structure.
-      R_SCATTERED = 0x80000000
-    };
-
-    enum RelocationInfoType {
-      // Constant values for the r_type field in an
-      // llvm::MachO::relocation_info or llvm::MachO::scattered_relocation_info
-      // structure.
-      GENERIC_RELOC_VANILLA        = 0,
-      GENERIC_RELOC_PAIR           = 1,
-      GENERIC_RELOC_SECTDIFF       = 2,
-      GENERIC_RELOC_PB_LA_PTR      = 3,
-      GENERIC_RELOC_LOCAL_SECTDIFF = 4,
-      GENERIC_RELOC_TLV            = 5,
-
-      // Constant values for the r_type field in a PowerPC architecture
-      // llvm::MachO::relocation_info or llvm::MachO::scattered_relocation_info
-      // structure.
-      PPC_RELOC_VANILLA            = GENERIC_RELOC_VANILLA,
-      PPC_RELOC_PAIR               = GENERIC_RELOC_PAIR,
-      PPC_RELOC_BR14               = 2,
-      PPC_RELOC_BR24               = 3,
-      PPC_RELOC_HI16               = 4,
-      PPC_RELOC_LO16               = 5,
-      PPC_RELOC_HA16               = 6,
-      PPC_RELOC_LO14               = 7,
-      PPC_RELOC_SECTDIFF           = 8,
-      PPC_RELOC_PB_LA_PTR          = 9,
-      PPC_RELOC_HI16_SECTDIFF      = 10,
-      PPC_RELOC_LO16_SECTDIFF      = 11,
-      PPC_RELOC_HA16_SECTDIFF      = 12,
-      PPC_RELOC_JBSR               = 13,
-      PPC_RELOC_LO14_SECTDIFF      = 14,
-      PPC_RELOC_LOCAL_SECTDIFF     = 15,
-
-      // Constant values for the r_type field in an ARM architecture
-      // llvm::MachO::relocation_info or llvm::MachO::scattered_relocation_info
-      // structure.
-      ARM_RELOC_VANILLA            = GENERIC_RELOC_VANILLA,
-      ARM_RELOC_PAIR               = GENERIC_RELOC_PAIR,
-      ARM_RELOC_SECTDIFF           = GENERIC_RELOC_SECTDIFF,
-      ARM_RELOC_LOCAL_SECTDIFF     = 3,
-      ARM_RELOC_PB_LA_PTR          = 4,
-      ARM_RELOC_BR24               = 5,
-      ARM_THUMB_RELOC_BR22         = 6,
-      ARM_THUMB_32BIT_BRANCH       = 7, // obsolete
-      ARM_RELOC_HALF               = 8,
-      ARM_RELOC_HALF_SECTDIFF      = 9,
-
-      // Constant values for the r_type field in an ARM64 architecture
-      // llvm::MachO::relocation_info or llvm::MachO::scattered_relocation_info
-      // structure.
-
-      // For pointers.
-      ARM64_RELOC_UNSIGNED            = 0,
-      // Must be followed by an ARM64_RELOC_UNSIGNED
-      ARM64_RELOC_SUBTRACTOR          = 1,
-      // A B/BL instruction with 26-bit displacement.
-      ARM64_RELOC_BRANCH26            = 2,
-      // PC-rel distance to page of target.
-      ARM64_RELOC_PAGE21              = 3,
-      // Offset within page, scaled by r_length.
-      ARM64_RELOC_PAGEOFF12           = 4,
-      // PC-rel distance to page of GOT slot.
-      ARM64_RELOC_GOT_LOAD_PAGE21     = 5,
-      // Offset within page of GOT slot, scaled by r_length.
-      ARM64_RELOC_GOT_LOAD_PAGEOFF12  = 6,
-      // For pointers to GOT slots.
-      ARM64_RELOC_POINTER_TO_GOT      = 7,
-      // PC-rel distance to page of TLVP slot.
-      ARM64_RELOC_TLVP_LOAD_PAGE21    = 8,
-      // Offset within page of TLVP slot, scaled by r_length.
-      ARM64_RELOC_TLVP_LOAD_PAGEOFF12 = 9,
-      // Must be followed by ARM64_RELOC_PAGE21 or ARM64_RELOC_PAGEOFF12.
-      ARM64_RELOC_ADDEND              = 10,
-
-      // Constant values for the r_type field in an x86_64 architecture
-      // llvm::MachO::relocation_info or llvm::MachO::scattered_relocation_info
-      // structure
-      X86_64_RELOC_UNSIGNED        = 0,
-      X86_64_RELOC_SIGNED          = 1,
-      X86_64_RELOC_BRANCH          = 2,
-      X86_64_RELOC_GOT_LOAD        = 3,
-      X86_64_RELOC_GOT             = 4,
-      X86_64_RELOC_SUBTRACTOR      = 5,
-      X86_64_RELOC_SIGNED_1        = 6,
-      X86_64_RELOC_SIGNED_2        = 7,
-      X86_64_RELOC_SIGNED_4        = 8,
-      X86_64_RELOC_TLV             = 9
-    };
-
-    // Values for segment_command.initprot.
-    // From <mach/vm_prot.h>
-    enum {
-      VM_PROT_READ    = 0x1,
-      VM_PROT_WRITE   = 0x2,
-      VM_PROT_EXECUTE = 0x4
-    };
-
-    // Values for platform field in build_version_command.
-    enum {
-      PLATFORM_MACOS    = 1,
-      PLATFORM_IOS      = 2,
-      PLATFORM_TVOS     = 3,
-      PLATFORM_WATCHOS  = 4,
-      PLATFORM_BRIDGEOS = 5
-    };
-
-    // Values for tools enum in build_tool_version.
-    enum {
-      TOOL_CLANG  = 1,
-      TOOL_SWIFT  = 2,
-      TOOL_LD     = 3
-    };
-
-    // Structs from <mach-o/loader.h>
-
-    struct mach_header {
-      uint32_t magic;
-      uint32_t cputype;
-      uint32_t cpusubtype;
-      uint32_t filetype;
-      uint32_t ncmds;
-      uint32_t sizeofcmds;
-      uint32_t flags;
-    };
-
-    struct mach_header_64 {
-      uint32_t magic;
-      uint32_t cputype;
-      uint32_t cpusubtype;
-      uint32_t filetype;
-      uint32_t ncmds;
-      uint32_t sizeofcmds;
-      uint32_t flags;
-      uint32_t reserved;
-    };
-
-    struct load_command {
-      uint32_t cmd;
-      uint32_t cmdsize;
-    };
-
-    struct segment_command {
-      uint32_t cmd;
-      uint32_t cmdsize;
-      char segname[16];
-      uint32_t vmaddr;
-      uint32_t vmsize;
-      uint32_t fileoff;
-      uint32_t filesize;
-      uint32_t maxprot;
-      uint32_t initprot;
-      uint32_t nsects;
-      uint32_t flags;
-    };
-
-    struct segment_command_64 {
-      uint32_t cmd;
-      uint32_t cmdsize;
-      char segname[16];
-      uint64_t vmaddr;
-      uint64_t vmsize;
-      uint64_t fileoff;
-      uint64_t filesize;
-      uint32_t maxprot;
-      uint32_t initprot;
-      uint32_t nsects;
-      uint32_t flags;
-    };
-
-    struct section {
-      char sectname[16];
-      char segname[16];
-      uint32_t addr;
-      uint32_t size;
-      uint32_t offset;
-      uint32_t align;
-      uint32_t reloff;
-      uint32_t nreloc;
-      uint32_t flags;
-      uint32_t reserved1;
-      uint32_t reserved2;
-    };
-
-    struct section_64 {
-      char sectname[16];
-      char segname[16];
-      uint64_t addr;
-      uint64_t size;
-      uint32_t offset;
-      uint32_t align;
-      uint32_t reloff;
-      uint32_t nreloc;
-      uint32_t flags;
-      uint32_t reserved1;
-      uint32_t reserved2;
-      uint32_t reserved3;
-    };
-
-    struct fvmlib {
-      uint32_t name;
-      uint32_t minor_version;
-      uint32_t header_addr;
-    };
-
-    // The fvmlib_command is obsolete and no longer supported.
-    struct fvmlib_command {
-      uint32_t  cmd;
-      uint32_t cmdsize;
-      struct fvmlib fvmlib;
-    };
-
-    struct dylib {
-      uint32_t name;
-      uint32_t timestamp;
-      uint32_t current_version;
-      uint32_t compatibility_version;
-    };
-
-    struct dylib_command {
-      uint32_t cmd;
-      uint32_t cmdsize;
-      struct dylib dylib;
-    };
-
-    struct sub_framework_command {
-      uint32_t cmd;
-      uint32_t cmdsize;
-      uint32_t umbrella;
-    };
-
-    struct sub_client_command {
-      uint32_t cmd;
-      uint32_t cmdsize;
-      uint32_t client;
-    };
-
-    struct sub_umbrella_command {
-      uint32_t cmd;
-      uint32_t cmdsize;
-      uint32_t sub_umbrella;
-    };
-
-    struct sub_library_command {
-      uint32_t cmd;
-      uint32_t cmdsize;
-      uint32_t sub_library;
-    };
-
-    // The prebound_dylib_command is obsolete and no longer supported.
-    struct prebound_dylib_command {
-      uint32_t cmd;
-      uint32_t cmdsize;
-      uint32_t name;
-      uint32_t nmodules;
-      uint32_t linked_modules;
-    };
-
-    struct dylinker_command {
-      uint32_t cmd;
-      uint32_t cmdsize;
-      uint32_t name;
-    };
-
-    struct thread_command {
-      uint32_t cmd;
-      uint32_t cmdsize;
-    };
-
-    struct routines_command {
-      uint32_t cmd;
-      uint32_t cmdsize;
-      uint32_t init_address;
-      uint32_t init_module;
-      uint32_t reserved1;
-      uint32_t reserved2;
-      uint32_t reserved3;
-      uint32_t reserved4;
-      uint32_t reserved5;
-      uint32_t reserved6;
-    };
-
-    struct routines_command_64 {
-      uint32_t cmd;
-      uint32_t cmdsize;
-      uint64_t init_address;
-      uint64_t init_module;
-      uint64_t reserved1;
-      uint64_t reserved2;
-      uint64_t reserved3;
-      uint64_t reserved4;
-      uint64_t reserved5;
-      uint64_t reserved6;
-    };
-
-    struct symtab_command {
-      uint32_t cmd;
-      uint32_t cmdsize;
-      uint32_t symoff;
-      uint32_t nsyms;
-      uint32_t stroff;
-      uint32_t strsize;
-    };
-
-    struct dysymtab_command {
-      uint32_t cmd;
-      uint32_t cmdsize;
-      uint32_t ilocalsym;
-      uint32_t nlocalsym;
-      uint32_t iextdefsym;
-      uint32_t nextdefsym;
-      uint32_t iundefsym;
-      uint32_t nundefsym;
-      uint32_t tocoff;
-      uint32_t ntoc;
-      uint32_t modtaboff;
-      uint32_t nmodtab;
-      uint32_t extrefsymoff;
-      uint32_t nextrefsyms;
-      uint32_t indirectsymoff;
-      uint32_t nindirectsyms;
-      uint32_t extreloff;
-      uint32_t nextrel;
-      uint32_t locreloff;
-      uint32_t nlocrel;
-    };
-
-    struct dylib_table_of_contents {
-      uint32_t symbol_index;
-      uint32_t module_index;
-    };
-
-    struct dylib_module {
-      uint32_t module_name;
-      uint32_t iextdefsym;
-      uint32_t nextdefsym;
-      uint32_t irefsym;
-      uint32_t nrefsym;
-      uint32_t ilocalsym;
-      uint32_t nlocalsym;
-      uint32_t iextrel;
-      uint32_t nextrel;
-      uint32_t iinit_iterm;
-      uint32_t ninit_nterm;
-      uint32_t objc_module_info_addr;
-      uint32_t objc_module_info_size;
-    };
-
-    struct dylib_module_64 {
-      uint32_t module_name;
-      uint32_t iextdefsym;
-      uint32_t nextdefsym;
-      uint32_t irefsym;
-      uint32_t nrefsym;
-      uint32_t ilocalsym;
-      uint32_t nlocalsym;
-      uint32_t iextrel;
-      uint32_t nextrel;
-      uint32_t iinit_iterm;
-      uint32_t ninit_nterm;
-      uint32_t objc_module_info_size;
-      uint64_t objc_module_info_addr;
-    };
-
-    struct dylib_reference {
-      uint32_t isym:24,
-               flags:8;
-    };
-
-    // The twolevel_hints_command is obsolete and no longer supported.
-    struct twolevel_hints_command {
-      uint32_t cmd;
-      uint32_t cmdsize;
-      uint32_t offset;
-      uint32_t nhints;
-    };
-
-    // The twolevel_hints_command is obsolete and no longer supported.
-    struct twolevel_hint {
-      uint32_t isub_image:8,
-               itoc:24;
-    };
-
-    // The prebind_cksum_command is obsolete and no longer supported.
-    struct prebind_cksum_command {
-      uint32_t cmd;
-      uint32_t cmdsize;
-      uint32_t cksum;
-    };
-
-    struct uuid_command {
-      uint32_t cmd;
-      uint32_t cmdsize;
-      uint8_t uuid[16];
-    };
-
-    struct rpath_command {
-      uint32_t cmd;
-      uint32_t cmdsize;
-      uint32_t path;
-    };
-
-    struct linkedit_data_command {
-      uint32_t cmd;
-      uint32_t cmdsize;
-      uint32_t dataoff;
-      uint32_t datasize;
-    };
-
-    struct data_in_code_entry {
-      uint32_t offset;
-      uint16_t length;
-      uint16_t kind;
-    };
-
-    struct source_version_command {
-      uint32_t cmd;
-      uint32_t cmdsize;
-      uint64_t version;
-    };
-
-    struct encryption_info_command {
-      uint32_t cmd;
-      uint32_t cmdsize;
-      uint32_t cryptoff;
-      uint32_t cryptsize;
-      uint32_t cryptid;
-    };
-
-    struct encryption_info_command_64 {
-      uint32_t cmd;
-      uint32_t cmdsize;
-      uint32_t cryptoff;
-      uint32_t cryptsize;
-      uint32_t cryptid;
-      uint32_t pad;
-    };
-
-    struct version_min_command {
-      uint32_t cmd;       // LC_VERSION_MIN_MACOSX or
-                          // LC_VERSION_MIN_IPHONEOS
-      uint32_t cmdsize;   // sizeof(struct version_min_command)
-      uint32_t version;   // X.Y.Z is encoded in nibbles xxxx.yy.zz
-      uint32_t sdk;       // X.Y.Z is encoded in nibbles xxxx.yy.zz
-    };
-
-    struct note_command {
-      uint32_t cmd;        // LC_NOTE
-      uint32_t cmdsize;    // sizeof(struct note_command)
-      char data_owner[16]; // owner name for this LC_NOTE
-      uint64_t offset;     // file offset of this data
-      uint64_t size;       // length of data region
-    };
-
-    struct build_tool_version {
-      uint32_t tool;      // enum for the tool
-      uint32_t version;   // version of the tool
-    };
-
-    struct build_version_command {
-      uint32_t cmd;       // LC_BUILD_VERSION
-      uint32_t cmdsize;   // sizeof(struct build_version_command) +
-                          // ntools * sizeof(struct build_tool_version)
-      uint32_t platform;  // platform
-      uint32_t minos;     // X.Y.Z is encoded in nibbles xxxx.yy.zz
-      uint32_t sdk;       // X.Y.Z is encoded in nibbles xxxx.yy.zz
-      uint32_t ntools;    // number of tool entries following this
-    };
-
-    struct dyld_info_command {
-      uint32_t cmd;
-      uint32_t cmdsize;
-      uint32_t rebase_off;
-      uint32_t rebase_size;
-      uint32_t bind_off;
-      uint32_t bind_size;
-      uint32_t weak_bind_off;
-      uint32_t weak_bind_size;
-      uint32_t lazy_bind_off;
-      uint32_t lazy_bind_size;
-      uint32_t export_off;
-      uint32_t export_size;
-    };
-
-    struct linker_option_command {
-      uint32_t cmd;
-      uint32_t cmdsize;
-      uint32_t count;
-    };
-
-    // The symseg_command is obsolete and no longer supported.
-    struct symseg_command {
-      uint32_t cmd;
-      uint32_t cmdsize;
-      uint32_t offset;
-      uint32_t size;
-    };
-
-    // The ident_command is obsolete and no longer supported.
-    struct ident_command {
-      uint32_t cmd;
-      uint32_t cmdsize;
-    };
-
-    // The fvmfile_command is obsolete and no longer supported.
-    struct fvmfile_command {
-      uint32_t cmd;
-      uint32_t cmdsize;
-      uint32_t name;
-      uint32_t header_addr;
-    };
-
-    struct tlv_descriptor_32 {
-      uint32_t thunk;
-      uint32_t key;
-      uint32_t offset;
-    };
-
-    struct tlv_descriptor_64 {
-      uint64_t thunk;
-      uint64_t key;
-      uint64_t offset;
-    };
-
-    struct tlv_descriptor {
-      uintptr_t thunk;
-      uintptr_t key;
-      uintptr_t offset;
-    };
-
-    struct entry_point_command {
-      uint32_t cmd;
-      uint32_t cmdsize;
-      uint64_t entryoff;
-      uint64_t stacksize;
-    };
-
-    // Structs from <mach-o/fat.h>
-    struct fat_header {
-      uint32_t magic;
-      uint32_t nfat_arch;
-    };
-
-    struct fat_arch {
-      uint32_t cputype;
-      uint32_t cpusubtype;
-      uint32_t offset;
-      uint32_t size;
-      uint32_t align;
-    };
-
-    struct fat_arch_64 {
-      uint32_t cputype;
-      uint32_t cpusubtype;
-      uint64_t offset;
-      uint64_t size;
-      uint32_t align;
-      uint32_t reserved;
-    };
-
-    // Structs from <mach-o/reloc.h>
-    struct relocation_info {
-      int32_t r_address;
-      uint32_t r_symbolnum:24,
-               r_pcrel:1,
-               r_length:2,
-               r_extern:1,
-               r_type:4;
-    };
-
-    struct scattered_relocation_info {
-#if defined(BYTE_ORDER) && defined(BIG_ENDIAN) && (BYTE_ORDER == BIG_ENDIAN)
-      uint32_t r_scattered:1,
-               r_pcrel:1,
-               r_length:2,
-               r_type:4,
-               r_address:24;
-#else
-      uint32_t r_address:24,
-               r_type:4,
-               r_length:2,
-               r_pcrel:1,
-               r_scattered:1;
-#endif
-      int32_t r_value;
-    };
-
-    // Structs NOT from <mach-o/reloc.h>, but that make LLVM's life easier
-    struct any_relocation_info {
-      uint32_t r_word0, r_word1;
-    };
-
-    // Structs from <mach-o/nlist.h>
-    struct nlist_base {
-      uint32_t n_strx;
-      uint8_t n_type;
-      uint8_t n_sect;
-      uint16_t n_desc;
-    };
-
-    struct nlist {
-      uint32_t n_strx;
-      uint8_t n_type;
-      uint8_t n_sect;
-      int16_t n_desc;
-      uint32_t n_value;
-    };
-
-    struct nlist_64 {
-      uint32_t n_strx;
-      uint8_t n_type;
-      uint8_t n_sect;
-      uint16_t n_desc;
-      uint64_t n_value;
-    };
-
-    // Byte order swapping functions for MachO structs
-
-    inline void swapStruct(fat_header &mh) {
-      sys::swapByteOrder(mh.magic);
-      sys::swapByteOrder(mh.nfat_arch);
-    }
-
-    inline void swapStruct(fat_arch &mh) {
-      sys::swapByteOrder(mh.cputype);
-      sys::swapByteOrder(mh.cpusubtype);
-      sys::swapByteOrder(mh.offset);
-      sys::swapByteOrder(mh.size);
-      sys::swapByteOrder(mh.align);
-    }
-
-    inline void swapStruct(fat_arch_64 &mh) {
-      sys::swapByteOrder(mh.cputype);
-      sys::swapByteOrder(mh.cpusubtype);
-      sys::swapByteOrder(mh.offset);
-      sys::swapByteOrder(mh.size);
-      sys::swapByteOrder(mh.align);
-      sys::swapByteOrder(mh.reserved);
-    }
-
-    inline void swapStruct(mach_header &mh) {
-      sys::swapByteOrder(mh.magic);
-      sys::swapByteOrder(mh.cputype);
-      sys::swapByteOrder(mh.cpusubtype);
-      sys::swapByteOrder(mh.filetype);
-      sys::swapByteOrder(mh.ncmds);
-      sys::swapByteOrder(mh.sizeofcmds);
-      sys::swapByteOrder(mh.flags);
-    }
-
-    inline void swapStruct(mach_header_64 &H) {
-      sys::swapByteOrder(H.magic);
-      sys::swapByteOrder(H.cputype);
-      sys::swapByteOrder(H.cpusubtype);
-      sys::swapByteOrder(H.filetype);
-      sys::swapByteOrder(H.ncmds);
-      sys::swapByteOrder(H.sizeofcmds);
-      sys::swapByteOrder(H.flags);
-      sys::swapByteOrder(H.reserved);
-    }
-
-    inline void swapStruct(load_command &lc) {
-      sys::swapByteOrder(lc.cmd);
-      sys::swapByteOrder(lc.cmdsize);
-    }
-
-    inline void swapStruct(symtab_command &lc) {
-      sys::swapByteOrder(lc.cmd);
-      sys::swapByteOrder(lc.cmdsize);
-      sys::swapByteOrder(lc.symoff);
-      sys::swapByteOrder(lc.nsyms);
-      sys::swapByteOrder(lc.stroff);
-      sys::swapByteOrder(lc.strsize);
-    }
-
-    inline void swapStruct(segment_command_64 &seg) {
-      sys::swapByteOrder(seg.cmd);
-      sys::swapByteOrder(seg.cmdsize);
-      sys::swapByteOrder(seg.vmaddr);
-      sys::swapByteOrder(seg.vmsize);
-      sys::swapByteOrder(seg.fileoff);
-      sys::swapByteOrder(seg.filesize);
-      sys::swapByteOrder(seg.maxprot);
-      sys::swapByteOrder(seg.initprot);
-      sys::swapByteOrder(seg.nsects);
-      sys::swapByteOrder(seg.flags);
-    }
-
-    inline void swapStruct(segment_command &seg) {
-      sys::swapByteOrder(seg.cmd);
-      sys::swapByteOrder(seg.cmdsize);
-      sys::swapByteOrder(seg.vmaddr);
-      sys::swapByteOrder(seg.vmsize);
-      sys::swapByteOrder(seg.fileoff);
-      sys::swapByteOrder(seg.filesize);
-      sys::swapByteOrder(seg.maxprot);
-      sys::swapByteOrder(seg.initprot);
-      sys::swapByteOrder(seg.nsects);
-      sys::swapByteOrder(seg.flags);
-    }
-
-    inline void swapStruct(section_64 &sect) {
-      sys::swapByteOrder(sect.addr);
-      sys::swapByteOrder(sect.size);
-      sys::swapByteOrder(sect.offset);
-      sys::swapByteOrder(sect.align);
-      sys::swapByteOrder(sect.reloff);
-      sys::swapByteOrder(sect.nreloc);
-      sys::swapByteOrder(sect.flags);
-      sys::swapByteOrder(sect.reserved1);
-      sys::swapByteOrder(sect.reserved2);
-    }
-
-    inline void swapStruct(section &sect) {
-      sys::swapByteOrder(sect.addr);
-      sys::swapByteOrder(sect.size);
-      sys::swapByteOrder(sect.offset);
-      sys::swapByteOrder(sect.align);
-      sys::swapByteOrder(sect.reloff);
-      sys::swapByteOrder(sect.nreloc);
-      sys::swapByteOrder(sect.flags);
-      sys::swapByteOrder(sect.reserved1);
-      sys::swapByteOrder(sect.reserved2);
-    }
-
-    inline void swapStruct(dyld_info_command &info) {
-      sys::swapByteOrder(info.cmd);
-      sys::swapByteOrder(info.cmdsize);
-      sys::swapByteOrder(info.rebase_off);
-      sys::swapByteOrder(info.rebase_size);
-      sys::swapByteOrder(info.bind_off);
-      sys::swapByteOrder(info.bind_size);
-      sys::swapByteOrder(info.weak_bind_off);
-      sys::swapByteOrder(info.weak_bind_size);
-      sys::swapByteOrder(info.lazy_bind_off);
-      sys::swapByteOrder(info.lazy_bind_size);
-      sys::swapByteOrder(info.export_off);
-      sys::swapByteOrder(info.export_size);
-    }
-
-    inline void swapStruct(dylib_command &d) {
-      sys::swapByteOrder(d.cmd);
-      sys::swapByteOrder(d.cmdsize);
-      sys::swapByteOrder(d.dylib.name);
-      sys::swapByteOrder(d.dylib.timestamp);
-      sys::swapByteOrder(d.dylib.current_version);
-      sys::swapByteOrder(d.dylib.compatibility_version);
-    }
-
-    inline void swapStruct(sub_framework_command &s) {
-      sys::swapByteOrder(s.cmd);
-      sys::swapByteOrder(s.cmdsize);
-      sys::swapByteOrder(s.umbrella);
-    }
-
-    inline void swapStruct(sub_umbrella_command &s) {
-      sys::swapByteOrder(s.cmd);
-      sys::swapByteOrder(s.cmdsize);
-      sys::swapByteOrder(s.sub_umbrella);
-    }
-
-    inline void swapStruct(sub_library_command &s) {
-      sys::swapByteOrder(s.cmd);
-      sys::swapByteOrder(s.cmdsize);
-      sys::swapByteOrder(s.sub_library);
-    }
-
-    inline void swapStruct(sub_client_command &s) {
-      sys::swapByteOrder(s.cmd);
-      sys::swapByteOrder(s.cmdsize);
-      sys::swapByteOrder(s.client);
-    }
-
-    inline void swapStruct(routines_command &r) {
-      sys::swapByteOrder(r.cmd);
-      sys::swapByteOrder(r.cmdsize);
-      sys::swapByteOrder(r.init_address);
-      sys::swapByteOrder(r.init_module);
-      sys::swapByteOrder(r.reserved1);
-      sys::swapByteOrder(r.reserved2);
-      sys::swapByteOrder(r.reserved3);
-      sys::swapByteOrder(r.reserved4);
-      sys::swapByteOrder(r.reserved5);
-      sys::swapByteOrder(r.reserved6);
-    }
-
-    inline void swapStruct(routines_command_64 &r) {
-      sys::swapByteOrder(r.cmd);
-      sys::swapByteOrder(r.cmdsize);
-      sys::swapByteOrder(r.init_address);
-      sys::swapByteOrder(r.init_module);
-      sys::swapByteOrder(r.reserved1);
-      sys::swapByteOrder(r.reserved2);
-      sys::swapByteOrder(r.reserved3);
-      sys::swapByteOrder(r.reserved4);
-      sys::swapByteOrder(r.reserved5);
-      sys::swapByteOrder(r.reserved6);
-    }
-
-    inline void swapStruct(thread_command &t) {
-      sys::swapByteOrder(t.cmd);
-      sys::swapByteOrder(t.cmdsize);
-    }
-
-    inline void swapStruct(dylinker_command &d) {
-      sys::swapByteOrder(d.cmd);
-      sys::swapByteOrder(d.cmdsize);
-      sys::swapByteOrder(d.name);
-    }
-
-    inline void swapStruct(uuid_command &u) {
-      sys::swapByteOrder(u.cmd);
-      sys::swapByteOrder(u.cmdsize);
-    }
-
-    inline void swapStruct(rpath_command &r) {
-      sys::swapByteOrder(r.cmd);
-      sys::swapByteOrder(r.cmdsize);
-      sys::swapByteOrder(r.path);
-    }
-
-    inline void swapStruct(source_version_command &s) {
-      sys::swapByteOrder(s.cmd);
-      sys::swapByteOrder(s.cmdsize);
-      sys::swapByteOrder(s.version);
-    }
-
-    inline void swapStruct(entry_point_command &e) {
-      sys::swapByteOrder(e.cmd);
-      sys::swapByteOrder(e.cmdsize);
-      sys::swapByteOrder(e.entryoff);
-      sys::swapByteOrder(e.stacksize);
-    }
-
-    inline void swapStruct(encryption_info_command &e) {
-      sys::swapByteOrder(e.cmd);
-      sys::swapByteOrder(e.cmdsize);
-      sys::swapByteOrder(e.cryptoff);
-      sys::swapByteOrder(e.cryptsize);
-      sys::swapByteOrder(e.cryptid);
-    }
-
-    inline void swapStruct(encryption_info_command_64 &e) {
-      sys::swapByteOrder(e.cmd);
-      sys::swapByteOrder(e.cmdsize);
-      sys::swapByteOrder(e.cryptoff);
-      sys::swapByteOrder(e.cryptsize);
-      sys::swapByteOrder(e.cryptid);
-      sys::swapByteOrder(e.pad);
-    }
-
-    inline void swapStruct(dysymtab_command &dst) {
-      sys::swapByteOrder(dst.cmd);
-      sys::swapByteOrder(dst.cmdsize);
-      sys::swapByteOrder(dst.ilocalsym);
-      sys::swapByteOrder(dst.nlocalsym);
-      sys::swapByteOrder(dst.iextdefsym);
-      sys::swapByteOrder(dst.nextdefsym);
-      sys::swapByteOrder(dst.iundefsym);
-      sys::swapByteOrder(dst.nundefsym);
-      sys::swapByteOrder(dst.tocoff);
-      sys::swapByteOrder(dst.ntoc);
-      sys::swapByteOrder(dst.modtaboff);
-      sys::swapByteOrder(dst.nmodtab);
-      sys::swapByteOrder(dst.extrefsymoff);
-      sys::swapByteOrder(dst.nextrefsyms);
-      sys::swapByteOrder(dst.indirectsymoff);
-      sys::swapByteOrder(dst.nindirectsyms);
-      sys::swapByteOrder(dst.extreloff);
-      sys::swapByteOrder(dst.nextrel);
-      sys::swapByteOrder(dst.locreloff);
-      sys::swapByteOrder(dst.nlocrel);
-    }
-
-    inline void swapStruct(any_relocation_info &reloc) {
-      sys::swapByteOrder(reloc.r_word0);
-      sys::swapByteOrder(reloc.r_word1);
-    }
-
-    inline void swapStruct(nlist_base &S) {
-      sys::swapByteOrder(S.n_strx);
-      sys::swapByteOrder(S.n_desc);
-    }
-
-    inline void swapStruct(nlist &sym) {
-      sys::swapByteOrder(sym.n_strx);
-      sys::swapByteOrder(sym.n_desc);
-      sys::swapByteOrder(sym.n_value);
-    }
-
-    inline void swapStruct(nlist_64 &sym) {
-      sys::swapByteOrder(sym.n_strx);
-      sys::swapByteOrder(sym.n_desc);
-      sys::swapByteOrder(sym.n_value);
-    }
-
-    inline void swapStruct(linkedit_data_command &C) {
-      sys::swapByteOrder(C.cmd);
-      sys::swapByteOrder(C.cmdsize);
-      sys::swapByteOrder(C.dataoff);
-      sys::swapByteOrder(C.datasize);
-    }
-
-    inline void swapStruct(linker_option_command &C) {
-      sys::swapByteOrder(C.cmd);
-      sys::swapByteOrder(C.cmdsize);
-      sys::swapByteOrder(C.count);
-    }
-
-    inline void swapStruct(version_min_command&C) {
-      sys::swapByteOrder(C.cmd);
-      sys::swapByteOrder(C.cmdsize);
-      sys::swapByteOrder(C.version);
-      sys::swapByteOrder(C.sdk);
-    }
-
-    inline void swapStruct(note_command &C) {
-      sys::swapByteOrder(C.cmd);
-      sys::swapByteOrder(C.cmdsize);
-      sys::swapByteOrder(C.offset);
-      sys::swapByteOrder(C.size);
-    }
-
-    inline void swapStruct(build_version_command&C) {
-      sys::swapByteOrder(C.cmd);
-      sys::swapByteOrder(C.cmdsize);
-      sys::swapByteOrder(C.platform);
-      sys::swapByteOrder(C.minos);
-      sys::swapByteOrder(C.sdk);
-      sys::swapByteOrder(C.ntools);
-    }
-
-    inline void swapStruct(build_tool_version&C) {
-      sys::swapByteOrder(C.tool);
-      sys::swapByteOrder(C.version);
-    }
-
-    inline void swapStruct(data_in_code_entry &C) {
-      sys::swapByteOrder(C.offset);
-      sys::swapByteOrder(C.length);
-      sys::swapByteOrder(C.kind);
-    }
-
-    inline void swapStruct(uint32_t &C) {
-      sys::swapByteOrder(C);
-    }
-
-    // The prebind_cksum_command is obsolete and no longer supported.
-    inline void swapStruct(prebind_cksum_command &C) {
-      sys::swapByteOrder(C.cmd);
-      sys::swapByteOrder(C.cmdsize);
-      sys::swapByteOrder(C.cksum);
-    }
-
-    // The twolevel_hints_command is obsolete and no longer supported.
-    inline void swapStruct(twolevel_hints_command &C) {
-      sys::swapByteOrder(C.cmd);
-      sys::swapByteOrder(C.cmdsize);
-      sys::swapByteOrder(C.offset);
-      sys::swapByteOrder(C.nhints);
-    }
-
-    // The prebound_dylib_command is obsolete and no longer supported.
-    inline void swapStruct(prebound_dylib_command &C) {
-      sys::swapByteOrder(C.cmd);
-      sys::swapByteOrder(C.cmdsize);
-      sys::swapByteOrder(C.name);
-      sys::swapByteOrder(C.nmodules);
-      sys::swapByteOrder(C.linked_modules);
-    }
-
-    // The fvmfile_command is obsolete and no longer supported.
-    inline void swapStruct(fvmfile_command &C) {
-      sys::swapByteOrder(C.cmd);
-      sys::swapByteOrder(C.cmdsize);
-      sys::swapByteOrder(C.name);
-      sys::swapByteOrder(C.header_addr);
-    }
-
-    // The symseg_command is obsolete and no longer supported.
-    inline void swapStruct(symseg_command &C) {
-      sys::swapByteOrder(C.cmd);
-      sys::swapByteOrder(C.cmdsize);
-      sys::swapByteOrder(C.offset);
-      sys::swapByteOrder(C.size);
-    }
-
-    // The ident_command is obsolete and no longer supported.
-    inline void swapStruct(ident_command &C) {
-      sys::swapByteOrder(C.cmd);
-      sys::swapByteOrder(C.cmdsize);
-    }
-
-    inline void swapStruct(fvmlib &C) {
-      sys::swapByteOrder(C.name);
-      sys::swapByteOrder(C.minor_version);
-      sys::swapByteOrder(C.header_addr);
-    }
-
-    // The fvmlib_command is obsolete and no longer supported.
-    inline void swapStruct(fvmlib_command &C) {
-      sys::swapByteOrder(C.cmd);
-      sys::swapByteOrder(C.cmdsize);
-      swapStruct(C.fvmlib);
-    }
-
-    // Get/Set functions from <mach-o/nlist.h>
-
-    static inline uint16_t GET_LIBRARY_ORDINAL(uint16_t n_desc) {
-      return (((n_desc) >> 8u) & 0xffu);
-    }
-
-    static inline void SET_LIBRARY_ORDINAL(uint16_t &n_desc, uint8_t ordinal) {
-      n_desc = (((n_desc) & 0x00ff) | (((ordinal) & 0xff) << 8));
-    }
-
-    static inline uint8_t GET_COMM_ALIGN (uint16_t n_desc) {
-      return (n_desc >> 8u) & 0x0fu;
-    }
-
-    static inline void SET_COMM_ALIGN (uint16_t &n_desc, uint8_t align) {
-      n_desc = ((n_desc & 0xf0ffu) | ((align & 0x0fu) << 8u));
-    }
-
-    // Enums from <mach/machine.h>
-    enum : uint32_t {
-      // Capability bits used in the definition of cpu_type.
-      CPU_ARCH_MASK  = 0xff000000,   // Mask for architecture bits
-      CPU_ARCH_ABI64 = 0x01000000    // 64 bit ABI
-    };
-
-    // Constants for the cputype field.
-    enum CPUType {
-      CPU_TYPE_ANY       = -1,
-      CPU_TYPE_X86       = 7,
-      CPU_TYPE_I386      = CPU_TYPE_X86,
-      CPU_TYPE_X86_64    = CPU_TYPE_X86 | CPU_ARCH_ABI64,
-   /* CPU_TYPE_MIPS      = 8, */
-      CPU_TYPE_MC98000   = 10, // Old Motorola PowerPC
-      CPU_TYPE_ARM       = 12,
-      CPU_TYPE_ARM64     = CPU_TYPE_ARM | CPU_ARCH_ABI64,
-      CPU_TYPE_SPARC     = 14,
-      CPU_TYPE_POWERPC   = 18,
-      CPU_TYPE_POWERPC64 = CPU_TYPE_POWERPC | CPU_ARCH_ABI64
-    };
-
-    enum : uint32_t {
-      // Capability bits used in the definition of cpusubtype.
-      CPU_SUBTYPE_MASK  = 0xff000000,   // Mask for architecture bits
-      CPU_SUBTYPE_LIB64 = 0x80000000,   // 64 bit libraries
-
-      // Special CPU subtype constants.
-      CPU_SUBTYPE_MULTIPLE = ~0u
-    };
-
-    // Constants for the cpusubtype field.
-    enum CPUSubTypeX86 {
-      CPU_SUBTYPE_I386_ALL       = 3,
-      CPU_SUBTYPE_386            = 3,
-      CPU_SUBTYPE_486            = 4,
-      CPU_SUBTYPE_486SX          = 0x84,
-      CPU_SUBTYPE_586            = 5,
-      CPU_SUBTYPE_PENT           = CPU_SUBTYPE_586,
-      CPU_SUBTYPE_PENTPRO        = 0x16,
-      CPU_SUBTYPE_PENTII_M3      = 0x36,
-      CPU_SUBTYPE_PENTII_M5      = 0x56,
-      CPU_SUBTYPE_CELERON        = 0x67,
-      CPU_SUBTYPE_CELERON_MOBILE = 0x77,
-      CPU_SUBTYPE_PENTIUM_3      = 0x08,
-      CPU_SUBTYPE_PENTIUM_3_M    = 0x18,
-      CPU_SUBTYPE_PENTIUM_3_XEON = 0x28,
-      CPU_SUBTYPE_PENTIUM_M      = 0x09,
-      CPU_SUBTYPE_PENTIUM_4      = 0x0a,
-      CPU_SUBTYPE_PENTIUM_4_M    = 0x1a,
-      CPU_SUBTYPE_ITANIUM        = 0x0b,
-      CPU_SUBTYPE_ITANIUM_2      = 0x1b,
-      CPU_SUBTYPE_XEON           = 0x0c,
-      CPU_SUBTYPE_XEON_MP        = 0x1c,
-
-      CPU_SUBTYPE_X86_ALL     = 3,
-      CPU_SUBTYPE_X86_64_ALL  = 3,
-      CPU_SUBTYPE_X86_ARCH1   = 4,
-      CPU_SUBTYPE_X86_64_H    = 8
-    };
-    static inline int CPU_SUBTYPE_INTEL(int Family, int Model) {
-      return Family | (Model << 4);
-    }
-    static inline int CPU_SUBTYPE_INTEL_FAMILY(CPUSubTypeX86 ST) {
-      return ((int)ST) & 0x0f;
-    }
-    static inline int CPU_SUBTYPE_INTEL_MODEL(CPUSubTypeX86 ST) {
-      return ((int)ST) >> 4;
-    }
-    enum {
-      CPU_SUBTYPE_INTEL_FAMILY_MAX = 15,
-      CPU_SUBTYPE_INTEL_MODEL_ALL  = 0
-    };
-
-    enum CPUSubTypeARM {
-      CPU_SUBTYPE_ARM_ALL     = 0,
-      CPU_SUBTYPE_ARM_V4T     = 5,
-      CPU_SUBTYPE_ARM_V6      = 6,
-      CPU_SUBTYPE_ARM_V5      = 7,
-      CPU_SUBTYPE_ARM_V5TEJ   = 7,
-      CPU_SUBTYPE_ARM_XSCALE  = 8,
-      CPU_SUBTYPE_ARM_V7      = 9,
-      //  unused  ARM_V7F     = 10,
-      CPU_SUBTYPE_ARM_V7S     = 11,
-      CPU_SUBTYPE_ARM_V7K     = 12,
-      CPU_SUBTYPE_ARM_V6M     = 14,
-      CPU_SUBTYPE_ARM_V7M     = 15,
-      CPU_SUBTYPE_ARM_V7EM    = 16
-    };
-
-    enum CPUSubTypeARM64 {
-      CPU_SUBTYPE_ARM64_ALL   = 0
-    };
-
-    enum CPUSubTypeSPARC {
-      CPU_SUBTYPE_SPARC_ALL   = 0
-    };
-
-    enum CPUSubTypePowerPC {
-      CPU_SUBTYPE_POWERPC_ALL   = 0,
-      CPU_SUBTYPE_POWERPC_601   = 1,
-      CPU_SUBTYPE_POWERPC_602   = 2,
-      CPU_SUBTYPE_POWERPC_603   = 3,
-      CPU_SUBTYPE_POWERPC_603e  = 4,
-      CPU_SUBTYPE_POWERPC_603ev = 5,
-      CPU_SUBTYPE_POWERPC_604   = 6,
-      CPU_SUBTYPE_POWERPC_604e  = 7,
-      CPU_SUBTYPE_POWERPC_620   = 8,
-      CPU_SUBTYPE_POWERPC_750   = 9,
-      CPU_SUBTYPE_POWERPC_7400  = 10,
-      CPU_SUBTYPE_POWERPC_7450  = 11,
-      CPU_SUBTYPE_POWERPC_970   = 100,
-
-      CPU_SUBTYPE_MC980000_ALL  = CPU_SUBTYPE_POWERPC_ALL,
-      CPU_SUBTYPE_MC98601       = CPU_SUBTYPE_POWERPC_601
-    };
-
-    struct x86_thread_state32_t {
-      uint32_t eax;
-      uint32_t ebx;
-      uint32_t ecx;
-      uint32_t edx;
-      uint32_t edi;
-      uint32_t esi;
-      uint32_t ebp;
-      uint32_t esp;
-      uint32_t ss;
-      uint32_t eflags;
-      uint32_t eip;
-      uint32_t cs;
-      uint32_t ds;
-      uint32_t es;
-      uint32_t fs;
-      uint32_t gs;
-    };
-
-    struct x86_thread_state64_t {
-      uint64_t rax;
-      uint64_t rbx;
-      uint64_t rcx;
-      uint64_t rdx;
-      uint64_t rdi;
-      uint64_t rsi;
-      uint64_t rbp;
-      uint64_t rsp;
-      uint64_t r8;
-      uint64_t r9;
-      uint64_t r10;
-      uint64_t r11;
-      uint64_t r12;
-      uint64_t r13;
-      uint64_t r14;
-      uint64_t r15;
-      uint64_t rip;
-      uint64_t rflags;
-      uint64_t cs;
-      uint64_t fs;
-      uint64_t gs;
-    };
-
-    enum x86_fp_control_precis {
-      x86_FP_PREC_24B = 0,
-      x86_FP_PREC_53B = 2,
-      x86_FP_PREC_64B = 3
-    };
-
-    enum x86_fp_control_rc {
-      x86_FP_RND_NEAR = 0,
-      x86_FP_RND_DOWN = 1,
-      x86_FP_RND_UP = 2,
-      x86_FP_CHOP = 3
-    };
-
-    struct fp_control_t {
-      unsigned short
-       invalid :1,
-       denorm  :1,
-       zdiv    :1,
-       ovrfl   :1,
-       undfl   :1,
-       precis  :1,
-               :2,
-       pc      :2,
-       rc      :2,
-               :1,
-               :3;
-    };
-
-    struct fp_status_t {
-      unsigned short
-        invalid :1,
-        denorm  :1,
-        zdiv    :1,
-        ovrfl   :1,
-        undfl   :1,
-        precis  :1,
-        stkflt  :1,
-        errsumm :1,
-        c0      :1,
-        c1      :1,
-        c2      :1,
-        tos     :3,
-        c3      :1,
-        busy    :1;
-    };
-
-    struct mmst_reg_t {
-      char mmst_reg[10];
-      char mmst_rsrv[6];
-    };
-
-    struct xmm_reg_t {
-      char xmm_reg[16];
-    };
-
-    struct x86_float_state64_t {
-      int32_t fpu_reserved[2];
-      fp_control_t fpu_fcw;
-      fp_status_t fpu_fsw;
-      uint8_t fpu_ftw;
-      uint8_t fpu_rsrv1;
-      uint16_t fpu_fop;
-      uint32_t fpu_ip;
-      uint16_t fpu_cs;
-      uint16_t fpu_rsrv2;
-      uint32_t fpu_dp;
-      uint16_t fpu_ds;
-      uint16_t fpu_rsrv3;
-      uint32_t fpu_mxcsr;
-      uint32_t fpu_mxcsrmask;
-      mmst_reg_t fpu_stmm0;
-      mmst_reg_t fpu_stmm1;
-      mmst_reg_t fpu_stmm2;
-      mmst_reg_t fpu_stmm3;
-      mmst_reg_t fpu_stmm4;
-      mmst_reg_t fpu_stmm5;
-      mmst_reg_t fpu_stmm6;
-      mmst_reg_t fpu_stmm7;
-      xmm_reg_t fpu_xmm0;
-      xmm_reg_t fpu_xmm1;
-      xmm_reg_t fpu_xmm2;
-      xmm_reg_t fpu_xmm3;
-      xmm_reg_t fpu_xmm4;
-      xmm_reg_t fpu_xmm5;
-      xmm_reg_t fpu_xmm6;
-      xmm_reg_t fpu_xmm7;
-      xmm_reg_t fpu_xmm8;
-      xmm_reg_t fpu_xmm9;
-      xmm_reg_t fpu_xmm10;
-      xmm_reg_t fpu_xmm11;
-      xmm_reg_t fpu_xmm12;
-      xmm_reg_t fpu_xmm13;
-      xmm_reg_t fpu_xmm14;
-      xmm_reg_t fpu_xmm15;
-      char fpu_rsrv4[6*16];
-      uint32_t fpu_reserved1;
-    };
-
-    struct x86_exception_state64_t {
-      uint16_t trapno;
-      uint16_t cpu;
-      uint32_t err;
-      uint64_t faultvaddr;
-    };
-
-    inline void swapStruct(x86_thread_state32_t &x) {
-      sys::swapByteOrder(x.eax);
-      sys::swapByteOrder(x.ebx);
-      sys::swapByteOrder(x.ecx);
-      sys::swapByteOrder(x.edx);
-      sys::swapByteOrder(x.edi);
-      sys::swapByteOrder(x.esi);
-      sys::swapByteOrder(x.ebp);
-      sys::swapByteOrder(x.esp);
-      sys::swapByteOrder(x.ss);
-      sys::swapByteOrder(x.eflags);
-      sys::swapByteOrder(x.eip);
-      sys::swapByteOrder(x.cs);
-      sys::swapByteOrder(x.ds);
-      sys::swapByteOrder(x.es);
-      sys::swapByteOrder(x.fs);
-      sys::swapByteOrder(x.gs);
-    }
-
-    inline void swapStruct(x86_thread_state64_t &x) {
-      sys::swapByteOrder(x.rax);
-      sys::swapByteOrder(x.rbx);
-      sys::swapByteOrder(x.rcx);
-      sys::swapByteOrder(x.rdx);
-      sys::swapByteOrder(x.rdi);
-      sys::swapByteOrder(x.rsi);
-      sys::swapByteOrder(x.rbp);
-      sys::swapByteOrder(x.rsp);
-      sys::swapByteOrder(x.r8);
-      sys::swapByteOrder(x.r9);
-      sys::swapByteOrder(x.r10);
-      sys::swapByteOrder(x.r11);
-      sys::swapByteOrder(x.r12);
-      sys::swapByteOrder(x.r13);
-      sys::swapByteOrder(x.r14);
-      sys::swapByteOrder(x.r15);
-      sys::swapByteOrder(x.rip);
-      sys::swapByteOrder(x.rflags);
-      sys::swapByteOrder(x.cs);
-      sys::swapByteOrder(x.fs);
-      sys::swapByteOrder(x.gs);
-    }
-
-    inline void swapStruct(x86_float_state64_t &x) {
-      sys::swapByteOrder(x.fpu_reserved[0]);
-      sys::swapByteOrder(x.fpu_reserved[1]);
-      // TODO swap: fp_control_t fpu_fcw;
-      // TODO swap: fp_status_t fpu_fsw;
-      sys::swapByteOrder(x.fpu_fop);
-      sys::swapByteOrder(x.fpu_ip);
-      sys::swapByteOrder(x.fpu_cs);
-      sys::swapByteOrder(x.fpu_rsrv2);
-      sys::swapByteOrder(x.fpu_dp);
-      sys::swapByteOrder(x.fpu_ds);
-      sys::swapByteOrder(x.fpu_rsrv3);
-      sys::swapByteOrder(x.fpu_mxcsr);
-      sys::swapByteOrder(x.fpu_mxcsrmask);
-      sys::swapByteOrder(x.fpu_reserved1);
-    }
-
-    inline void swapStruct(x86_exception_state64_t &x) {
-      sys::swapByteOrder(x.trapno);
-      sys::swapByteOrder(x.cpu);
-      sys::swapByteOrder(x.err);
-      sys::swapByteOrder(x.faultvaddr);
-    }
-
-    struct x86_state_hdr_t {
-      uint32_t flavor;
-      uint32_t count;
-    };
-
-    struct x86_thread_state_t {
-      x86_state_hdr_t tsh;
-      union {
-        x86_thread_state64_t ts64;
-        x86_thread_state32_t ts32;
-      } uts;
-    };
-
-    struct x86_float_state_t {
-      x86_state_hdr_t fsh;
-      union {
-        x86_float_state64_t fs64;
-      } ufs;
-    };
-
-    struct x86_exception_state_t {
-      x86_state_hdr_t esh;
-      union {
-        x86_exception_state64_t es64;
-      } ues;
-    };
-
-    inline void swapStruct(x86_state_hdr_t &x) {
-      sys::swapByteOrder(x.flavor);
-      sys::swapByteOrder(x.count);
-    }
-
-    enum X86ThreadFlavors {
-      x86_THREAD_STATE32    = 1,
-      x86_FLOAT_STATE32     = 2,
-      x86_EXCEPTION_STATE32 = 3,
-      x86_THREAD_STATE64    = 4,
-      x86_FLOAT_STATE64     = 5,
-      x86_EXCEPTION_STATE64 = 6,
-      x86_THREAD_STATE      = 7,
-      x86_FLOAT_STATE       = 8,
-      x86_EXCEPTION_STATE   = 9,
-      x86_DEBUG_STATE32     = 10,
-      x86_DEBUG_STATE64     = 11,
-      x86_DEBUG_STATE       = 12
-    };
-
-    inline void swapStruct(x86_thread_state_t &x) {
-      swapStruct(x.tsh);
-      if (x.tsh.flavor == x86_THREAD_STATE64)
-        swapStruct(x.uts.ts64);
-    }
-
-    inline void swapStruct(x86_float_state_t &x) {
-      swapStruct(x.fsh);
-      if (x.fsh.flavor == x86_FLOAT_STATE64)
-        swapStruct(x.ufs.fs64);
-    }
-
-    inline void swapStruct(x86_exception_state_t &x) {
-      swapStruct(x.esh);
-      if (x.esh.flavor == x86_EXCEPTION_STATE64)
-        swapStruct(x.ues.es64);
-    }
-
-    const uint32_t x86_THREAD_STATE32_COUNT =
-      sizeof(x86_thread_state32_t) / sizeof(uint32_t);
-
-    const uint32_t x86_THREAD_STATE64_COUNT =
-      sizeof(x86_thread_state64_t) / sizeof(uint32_t);
-    const uint32_t x86_FLOAT_STATE64_COUNT =
-      sizeof(x86_float_state64_t) / sizeof(uint32_t);
-    const uint32_t x86_EXCEPTION_STATE64_COUNT =
-      sizeof(x86_exception_state64_t) / sizeof(uint32_t);
-
-    const uint32_t x86_THREAD_STATE_COUNT =
-      sizeof(x86_thread_state_t) / sizeof(uint32_t);
-    const uint32_t x86_FLOAT_STATE_COUNT =
-      sizeof(x86_float_state_t) / sizeof(uint32_t);
-    const uint32_t x86_EXCEPTION_STATE_COUNT =
-      sizeof(x86_exception_state_t) / sizeof(uint32_t);
-
-    struct arm_thread_state32_t {
-      uint32_t r[13];
-      uint32_t sp;
-      uint32_t lr;
-      uint32_t pc;
-      uint32_t cpsr;
-    };
-
-    inline void swapStruct(arm_thread_state32_t &x) {
-      for (int i = 0; i < 13; i++)
-        sys::swapByteOrder(x.r[i]);
-      sys::swapByteOrder(x.sp);
-      sys::swapByteOrder(x.lr);
-      sys::swapByteOrder(x.pc);
-      sys::swapByteOrder(x.cpsr);
-    }
-
-    struct arm_thread_state64_t {
-      uint64_t x[29];
-      uint64_t fp;
-      uint64_t lr;
-      uint64_t sp;
-      uint64_t pc;
-      uint32_t cpsr;
-      uint32_t pad;
-    };
-
-    inline void swapStruct(arm_thread_state64_t &x) {
-      for (int i = 0; i < 29; i++)
-        sys::swapByteOrder(x.x[i]);
-      sys::swapByteOrder(x.fp);
-      sys::swapByteOrder(x.lr);
-      sys::swapByteOrder(x.sp);
-      sys::swapByteOrder(x.pc);
-      sys::swapByteOrder(x.cpsr);
-    }
-
-    struct arm_state_hdr_t {
-      uint32_t flavor;
-      uint32_t count;
-    };
-
-    struct arm_thread_state_t {
-      arm_state_hdr_t tsh;
-      union {
-        arm_thread_state32_t ts32;
-      } uts;
-    };
-
-    inline void swapStruct(arm_state_hdr_t &x) {
-      sys::swapByteOrder(x.flavor);
-      sys::swapByteOrder(x.count);
-    }
-
-    enum ARMThreadFlavors {
-      ARM_THREAD_STATE      = 1,
-      ARM_VFP_STATE         = 2,
-      ARM_EXCEPTION_STATE   = 3,
-      ARM_DEBUG_STATE       = 4,
-      ARN_THREAD_STATE_NONE = 5,
-      ARM_THREAD_STATE64    = 6,
-      ARM_EXCEPTION_STATE64 = 7
-    };
-
-    inline void swapStruct(arm_thread_state_t &x) {
-      swapStruct(x.tsh);
-      if (x.tsh.flavor == ARM_THREAD_STATE)
-        swapStruct(x.uts.ts32);
-    }
-
-    const uint32_t ARM_THREAD_STATE_COUNT =
-      sizeof(arm_thread_state32_t) / sizeof(uint32_t);
-
-    const uint32_t ARM_THREAD_STATE64_COUNT =
-      sizeof(arm_thread_state64_t) / sizeof(uint32_t);
-
-    struct ppc_thread_state32_t {
-      uint32_t srr0;
-      uint32_t srr1;
-      uint32_t r0;
-      uint32_t r1;
-      uint32_t r2;
-      uint32_t r3;
-      uint32_t r4;
-      uint32_t r5;
-      uint32_t r6;
-      uint32_t r7;
-      uint32_t r8;
-      uint32_t r9;
-      uint32_t r10;
-      uint32_t r11;
-      uint32_t r12;
-      uint32_t r13;
-      uint32_t r14;
-      uint32_t r15;
-      uint32_t r16;
-      uint32_t r17;
-      uint32_t r18;
-      uint32_t r19;
-      uint32_t r20;
-      uint32_t r21;
-      uint32_t r22;
-      uint32_t r23;
-      uint32_t r24;
-      uint32_t r25;
-      uint32_t r26;
-      uint32_t r27;
-      uint32_t r28;
-      uint32_t r29;
-      uint32_t r30;
-      uint32_t r31;
-      uint32_t ct;
-      uint32_t xer;
-      uint32_t lr;
-      uint32_t ctr;
-      uint32_t mq;
-      uint32_t vrsave;
-    };
-
-    inline void swapStruct(ppc_thread_state32_t &x) {
-      sys::swapByteOrder(x.srr0);
-      sys::swapByteOrder(x.srr1);
-      sys::swapByteOrder(x.r0);
-      sys::swapByteOrder(x.r1);
-      sys::swapByteOrder(x.r2);
-      sys::swapByteOrder(x.r3);
-      sys::swapByteOrder(x.r4);
-      sys::swapByteOrder(x.r5);
-      sys::swapByteOrder(x.r6);
-      sys::swapByteOrder(x.r7);
-      sys::swapByteOrder(x.r8);
-      sys::swapByteOrder(x.r9);
-      sys::swapByteOrder(x.r10);
-      sys::swapByteOrder(x.r11);
-      sys::swapByteOrder(x.r12);
-      sys::swapByteOrder(x.r13);
-      sys::swapByteOrder(x.r14);
-      sys::swapByteOrder(x.r15);
-      sys::swapByteOrder(x.r16);
-      sys::swapByteOrder(x.r17);
-      sys::swapByteOrder(x.r18);
-      sys::swapByteOrder(x.r19);
-      sys::swapByteOrder(x.r20);
-      sys::swapByteOrder(x.r21);
-      sys::swapByteOrder(x.r22);
-      sys::swapByteOrder(x.r23);
-      sys::swapByteOrder(x.r24);
-      sys::swapByteOrder(x.r25);
-      sys::swapByteOrder(x.r26);
-      sys::swapByteOrder(x.r27);
-      sys::swapByteOrder(x.r28);
-      sys::swapByteOrder(x.r29);
-      sys::swapByteOrder(x.r30);
-      sys::swapByteOrder(x.r31);
-      sys::swapByteOrder(x.ct);
-      sys::swapByteOrder(x.xer);
-      sys::swapByteOrder(x.lr);
-      sys::swapByteOrder(x.ctr);
-      sys::swapByteOrder(x.mq);
-      sys::swapByteOrder(x.vrsave);
-    }
-
-    struct ppc_state_hdr_t {
-      uint32_t flavor;
-      uint32_t count;
-    };
-
-    struct ppc_thread_state_t {
-      ppc_state_hdr_t tsh;
-      union {
-        ppc_thread_state32_t ts32;
-      } uts;
-    };
-
-    inline void swapStruct(ppc_state_hdr_t &x) {
-      sys::swapByteOrder(x.flavor);
-      sys::swapByteOrder(x.count);
-    }
-
-    enum PPCThreadFlavors {
-      PPC_THREAD_STATE      = 1,
-      PPC_FLOAT_STATE       = 2,
-      PPC_EXCEPTION_STATE   = 3,
-      PPC_VECTOR_STATE      = 4,
-      PPC_THREAD_STATE64    = 5,
-      PPC_EXCEPTION_STATE64 = 6,
-      PPC_THREAD_STATE_NONE = 7
-    };
-
-    inline void swapStruct(ppc_thread_state_t &x) {
-      swapStruct(x.tsh);
-      if (x.tsh.flavor == PPC_THREAD_STATE)
-        swapStruct(x.uts.ts32);
-    }
-
-    const uint32_t PPC_THREAD_STATE_COUNT =
-      sizeof(ppc_thread_state32_t) / sizeof(uint32_t);
-
-    // Define a union of all load command structs
-    #define LOAD_COMMAND_STRUCT(LCStruct) LCStruct LCStruct##_data;
-
-    union macho_load_command {
-      #include "llvm/Support/MachO.def"
-    };
-
-  } // end namespace MachO
-} // end namespace llvm
-
-#endif
diff --git a/include/llvm/Support/MathExtras.h b/include/llvm/Support/MathExtras.h
index 7f07e8cc3a51..bb840380d4d3 100644
--- a/include/llvm/Support/MathExtras.h
+++ b/include/llvm/Support/MathExtras.h
@@ -20,8 +20,8 @@
 #include <cassert>
 #include <climits>
 #include <cstring>
-#include <type_traits>
 #include <limits>
+#include <type_traits>
 
 #ifdef _MSC_VER
 #include <intrin.h>
diff --git a/include/llvm/Support/MemoryBuffer.h b/include/llvm/Support/MemoryBuffer.h
index e8bdc3e89fa7..73f0251a6b6e 100644
--- a/include/llvm/Support/MemoryBuffer.h
+++ b/include/llvm/Support/MemoryBuffer.h
@@ -14,14 +14,14 @@
 #ifndef LLVM_SUPPORT_MEMORYBUFFER_H
 #define LLVM_SUPPORT_MEMORYBUFFER_H
 
+#include "llvm-c/Types.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/CBindingWrapping.h"
 #include "llvm/Support/ErrorOr.h"
-#include "llvm-c/Types.h"
-#include <memory>
 #include <cstddef>
 #include <cstdint>
+#include <memory>
 
 namespace llvm {
 
diff --git a/include/llvm/Support/Solaris.h b/include/llvm/Support/Solaris.h
index b08228532489..88d83014c468 100644
--- a/include/llvm/Support/Solaris.h
+++ b/include/llvm/Support/Solaris.h
@@ -14,8 +14,8 @@
 #ifndef LLVM_SUPPORT_SOLARIS_H
 #define LLVM_SUPPORT_SOLARIS_H
 
-#include <sys/types.h>
 #include <sys/regset.h>
+#include <sys/types.h>
 
 /* Solaris doesn't have endian.h. SPARC is the only supported big-endian ISA. */
 #define BIG_ENDIAN 4321
diff --git a/include/llvm/Support/SourceMgr.h b/include/llvm/Support/SourceMgr.h
index cb90d968c44c..399f8dcd76fc 100644
--- a/include/llvm/Support/SourceMgr.h
+++ b/include/llvm/Support/SourceMgr.h
@@ -49,7 +49,7 @@ public:
   /// Clients that want to handle their own diagnostics in a custom way can
   /// register a function pointer+context as a diagnostic handler.
   /// It gets called each time PrintMessage is invoked.
-  typedef void (*DiagHandlerTy)(const SMDiagnostic &, void *Context);
+  using DiagHandlerTy = void (*)(const SMDiagnostic &, void *Context);
 
 private:
   struct SrcBuffer {
diff --git a/include/llvm/Support/StringPool.h b/include/llvm/Support/StringPool.h
index 2ec0c3b76c11..bb5fd07f0d00 100644
--- a/include/llvm/Support/StringPool.h
+++ b/include/llvm/Support/StringPool.h
@@ -1,4 +1,4 @@
-//===-- StringPool.h - Interned string pool ---------------------*- C++ -*-===//
+//===- StringPool.h - Interned string pool ----------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -30,6 +30,7 @@
 #define LLVM_SUPPORT_STRINGPOOL_H
 
 #include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
 #include <cassert>
 
 namespace llvm {
@@ -43,17 +44,17 @@ namespace llvm {
     /// PooledString - This is the value of an entry in the pool's interning
     /// table.
     struct PooledString {
-      StringPool *Pool;  ///< So the string can remove itself.
-      unsigned Refcount; ///< Number of referencing PooledStringPtrs.
+      StringPool *Pool = nullptr;  ///< So the string can remove itself.
+      unsigned Refcount = 0;       ///< Number of referencing PooledStringPtrs.
 
     public:
-      PooledString() : Pool(nullptr), Refcount(0) { }
+      PooledString() = default;
     };
 
     friend class PooledStringPtr;
 
-    typedef StringMap<PooledString> table_t;
-    typedef StringMapEntry<PooledString> entry_t;
+    using table_t = StringMap<PooledString>;
+    using entry_t = StringMapEntry<PooledString>;
     table_t InternTable;
 
   public:
@@ -76,11 +77,12 @@ namespace llvm {
   /// a single pointer, but it does have reference-counting overhead when
   /// copied.
   class PooledStringPtr {
-    typedef StringPool::entry_t entry_t;
-    entry_t *S;
+    using entry_t = StringPool::entry_t;
+
+    entry_t *S = nullptr;
 
   public:
-    PooledStringPtr() : S(nullptr) {}
+    PooledStringPtr() = default;
 
     explicit PooledStringPtr(entry_t *E) : S(E) {
       if (S) ++S->getValue().Refcount;
@@ -133,6 +135,6 @@ namespace llvm {
     inline bool operator!=(const PooledStringPtr &That) const { return S != That.S; }
   };
 
-} // End llvm namespace
+} // end namespace llvm
 
-#endif
+#endif // LLVM_SUPPORT_STRINGPOOL_H
diff --git a/include/llvm/Support/TargetRegistry.h b/include/llvm/Support/TargetRegistry.h
index bd68d2414487..9e9a91b0abda 100644
--- a/include/llvm/Support/TargetRegistry.h
+++ b/include/llvm/Support/TargetRegistry.h
@@ -20,10 +20,10 @@
 #define LLVM_SUPPORT_TARGETREGISTRY_H
 
 #include "llvm-c/Disassembler.h"
-#include "llvm/ADT/iterator_range.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/CodeGen.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
@@ -598,7 +598,7 @@ struct TargetRegistry {
 
   /// printRegisteredTargetsForVersion - Print the registered targets
   /// appropriately for inclusion in a tool's version output.
-  static void printRegisteredTargetsForVersion();
+  static void printRegisteredTargetsForVersion(raw_ostream &OS);
 
   /// @name Registry Access
   /// @{
diff --git a/include/llvm/Support/raw_sha1_ostream.h b/include/llvm/Support/raw_sha1_ostream.h
index 329ef9fd069b..bd55d98b7c1d 100644
--- a/include/llvm/Support/raw_sha1_ostream.h
+++ b/include/llvm/Support/raw_sha1_ostream.h
@@ -14,9 +14,9 @@
 #ifndef LLVM_SUPPORT_RAW_SHA1_OSTREAM_H
 #define LLVM_SUPPORT_RAW_SHA1_OSTREAM_H
 
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/SHA1.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/Support/SHA1.h"
+#include "llvm/Support/raw_ostream.h"
 
 namespace llvm {
 
diff --git a/include/llvm/Support/type_traits.h b/include/llvm/Support/type_traits.h
index ce4bbf8cb2cc..cc0878358800 100644
--- a/include/llvm/Support/type_traits.h
+++ b/include/llvm/Support/type_traits.h
@@ -14,11 +14,10 @@
 #ifndef LLVM_SUPPORT_TYPE_TRAITS_H
 #define LLVM_SUPPORT_TYPE_TRAITS_H
 
+#include "llvm/Support/Compiler.h"
 #include <type_traits>
 #include <utility>
 
-#include "llvm/Support/Compiler.h"
-
 #ifndef __has_feature
 #define LLVM_DEFINED_HAS_FEATURE
 #define __has_feature(x) 0
@@ -51,7 +50,7 @@ struct isPodLike {
 
 // std::pair's are pod-like if their elements are.
 template<typename T, typename U>
-struct isPodLike<std::pair<T, U> > {
+struct isPodLike<std::pair<T, U>> {
   static const bool value = isPodLike<T>::value && isPodLike<U>::value;
 };
 
@@ -63,7 +62,7 @@ struct isPodLike<std::pair<T, U> > {
 /// Also note that enum classes aren't implicitly convertible to integral types,
 /// the value may therefore need to be explicitly converted before being used.
 template <typename T> class is_integral_or_enum {
-  typedef typename std::remove_reference<T>::type UnderlyingT;
+  using UnderlyingT = typename std::remove_reference<T>::type;
 
 public:
   static const bool value =
@@ -76,23 +75,23 @@ public:
 
 /// \brief If T is a pointer, just return it. If it is not, return T&.
 template<typename T, typename Enable = void>
-struct add_lvalue_reference_if_not_pointer { typedef T &type; };
+struct add_lvalue_reference_if_not_pointer { using type = T &; };
 
 template <typename T>
 struct add_lvalue_reference_if_not_pointer<
     T, typename std::enable_if<std::is_pointer<T>::value>::type> {
-  typedef T type;
+  using type = T;
 };
 
 /// \brief If T is a pointer to X, return a pointer to const X. If it is not,
 /// return const T.
 template<typename T, typename Enable = void>
-struct add_const_past_pointer { typedef const T type; };
+struct add_const_past_pointer { using type = const T; };
 
 template <typename T>
 struct add_const_past_pointer<
     T, typename std::enable_if<std::is_pointer<T>::value>::type> {
-  typedef const typename std::remove_pointer<T>::type *type;
+  using type = const typename std::remove_pointer<T>::type *;
 };
 
 template <typename T, typename Enable = void>
@@ -104,7 +103,8 @@ struct const_pointer_or_const_ref<
     T, typename std::enable_if<std::is_pointer<T>::value>::type> {
   using type = typename add_const_past_pointer<T>::type;
 };
-}
+
+} // end namespace llvm
 
 // If the compiler supports detecting whether a class is final, define
 // an LLVM_IS_FINAL macro. If it cannot be defined properly, this
@@ -119,4 +119,4 @@ struct const_pointer_or_const_ref<
 #undef __has_feature
 #endif
 
-#endif
+#endif // LLVM_SUPPORT_TYPE_TRAITS_H
diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h
index 97a6f0c6e3ae..7595d4339810 100644
--- a/include/llvm/Target/TargetInstrInfo.h
+++ b/include/llvm/Target/TargetInstrInfo.h
@@ -16,13 +16,13 @@
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/MachineCombinerPattern.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/Support/BranchProbability.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
 
 namespace llvm {
 
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index 7258a5cc2d89..a9d67228d205 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -677,6 +677,16 @@ public:
                                   unsigned &NumIntermediates,
                                   MVT &RegisterVT) const;
 
+  /// Certain targets such as MIPS require that some types such as vectors are
+  /// always broken down into scalars in some contexts. This occurs even if the
+  /// vector type is legal.
+  virtual unsigned getVectorTypeBreakdownForCallingConv(
+      LLVMContext &Context, EVT VT, EVT &IntermediateVT,
+      unsigned &NumIntermediates, MVT &RegisterVT) const {
+    return getVectorTypeBreakdown(Context, VT, IntermediateVT, NumIntermediates,
+                                  RegisterVT);
+  }
+
   struct IntrinsicInfo {
     unsigned     opc = 0;          // target opcode
     EVT          memVT;            // memory VT
@@ -1085,6 +1095,33 @@ public:
     llvm_unreachable("Unsupported extended type!");
   }
 
+  /// Certain combinations of ABIs, Targets and features require that types
+  /// are legal for some operations and not for other operations.
+  /// For MIPS all vector types must be passed through the integer register set.
+  virtual MVT getRegisterTypeForCallingConv(MVT VT) const {
+    return getRegisterType(VT);
+  }
+
+  virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context,
+                                            EVT VT) const {
+    return getRegisterType(Context, VT);
+  }
+
+  /// Certain targets require unusual breakdowns of certain types. For MIPS,
+  /// this occurs when a vector type is used, as vector are passed through the
+  /// integer register set.
+  virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context,
+                                                 EVT VT) const {
+    return getNumRegisters(Context, VT);
+  }
+
+  /// Certain targets have context senstive alignment requirements, where one
+  /// type has the alignment requirement of another type.
+  virtual unsigned getABIAlignmentForCallingConv(Type *ArgTy,
+                                                 DataLayout DL) const {
+    return DL.getABITypeAlignment(ArgTy);
+  }
+
   /// If true, then instruction selection should seek to shrink the FP constant
   /// of the specified type to a smaller type in order to save space and / or
   /// reduce runtime.
@@ -1876,6 +1913,38 @@ public:
     return false;
   }
 
+  /// Returns true if the opcode is a commutative binary operation.
+  virtual bool isCommutativeBinOp(unsigned Opcode) const {
+    // FIXME: This should get its info from the td file.
+    switch (Opcode) {
+    case ISD::ADD:
+    case ISD::SMIN:
+    case ISD::SMAX:
+    case ISD::UMIN:
+    case ISD::UMAX:
+    case ISD::MUL:
+    case ISD::MULHU:
+    case ISD::MULHS:
+    case ISD::SMUL_LOHI:
+    case ISD::UMUL_LOHI:
+    case ISD::FADD:
+    case ISD::FMUL:
+    case ISD::AND:
+    case ISD::OR:
+    case ISD::XOR:
+    case ISD::SADDO:
+    case ISD::UADDO:
+    case ISD::ADDC:
+    case ISD::ADDE:
+    case ISD::FMINNUM:
+    case ISD::FMAXNUM:
+    case ISD::FMINNAN:
+    case ISD::FMAXNAN:
+      return true;
+    default: return false;
+    }
+  }
+
   /// Return true if it's free to truncate a value of type FromTy to type
   /// ToTy. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
   /// by referencing its sub-register AX.
diff --git a/include/llvm/Target/TargetMachine.h b/include/llvm/Target/TargetMachine.h
index ed390799cfc3..933c6c87b0be 100644
--- a/include/llvm/Target/TargetMachine.h
+++ b/include/llvm/Target/TargetMachine.h
@@ -25,7 +25,6 @@
 namespace llvm {
 
 class GlobalValue;
-class MachineFunctionInitializer;
 class Mangler;
 class MCAsmInfo;
 class MCContext;
@@ -227,8 +226,7 @@ public:
       PassManagerBase &, raw_pwrite_stream &, CodeGenFileType,
       bool /*DisableVerify*/ = true, AnalysisID /*StartBefore*/ = nullptr,
       AnalysisID /*StartAfter*/ = nullptr, AnalysisID /*StopBefore*/ = nullptr,
-      AnalysisID /*StopAfter*/ = nullptr,
-      MachineFunctionInitializer * /*MFInitializer*/ = nullptr) {
+      AnalysisID /*StopAfter*/ = nullptr) {
     return true;
   }
 
@@ -289,8 +287,7 @@ public:
       PassManagerBase &PM, raw_pwrite_stream &Out, CodeGenFileType FileType,
       bool DisableVerify = true, AnalysisID StartBefore = nullptr,
       AnalysisID StartAfter = nullptr, AnalysisID StopBefore = nullptr,
-      AnalysisID StopAfter = nullptr,
-      MachineFunctionInitializer *MFInitializer = nullptr) override;
+      AnalysisID StopAfter = nullptr) override;
 
   /// Add passes to the specified pass manager to get machine code emitted with
   /// the MCJIT. This method returns true if machine code is not supported. It
@@ -305,6 +302,11 @@ public:
   /// remove this at some point and always enable the verifier when
   /// EXPENSIVE_CHECKS is enabled.
   virtual bool isMachineVerifierClean() const { return true; }
+
+  /// \brief Adds an AsmPrinter pass to the pipeline that prints assembly or
+  /// machine code from the MI representation.
+  bool addAsmPrinter(PassManagerBase &PM, raw_pwrite_stream &Out,
+                     CodeGenFileType FileTYpe, MCContext &Context);
 };
 
 } // end namespace llvm
diff --git a/include/llvm/Target/TargetOptions.h b/include/llvm/Target/TargetOptions.h
index 7cc33f2fdccb..5c2063880f8b 100644
--- a/include/llvm/Target/TargetOptions.h
+++ b/include/llvm/Target/TargetOptions.h
@@ -105,10 +105,10 @@ namespace llvm {
           HonorSignDependentRoundingFPMathOption(false), NoZerosInBSS(false),
           GuaranteedTailCallOpt(false), StackSymbolOrdering(true),
           EnableFastISel(false), UseInitArray(false),
-          DisableIntegratedAS(false), CompressDebugSections(false),
-          RelaxELFRelocations(false), FunctionSections(false),
-          DataSections(false), UniqueSectionNames(true), TrapUnreachable(false),
-          EmulatedTLS(false), EnableIPRA(false) {}
+          DisableIntegratedAS(false), RelaxELFRelocations(false),
+          FunctionSections(false), DataSections(false),
+          UniqueSectionNames(true), TrapUnreachable(false), EmulatedTLS(false),
+          EnableIPRA(false) {}
 
     /// PrintMachineCode - This flag is enabled when the -print-machineinstrs
     /// option is specified on the command line, and should enable debugging
@@ -194,7 +194,7 @@ namespace llvm {
     unsigned DisableIntegratedAS : 1;
 
     /// Compress DWARF debug sections.
-    unsigned CompressDebugSections : 1;
+    DebugCompressionType CompressDebugSections = DebugCompressionType::None;
 
     unsigned RelaxELFRelocations : 1;
 
diff --git a/include/llvm/Target/TargetSubtargetInfo.h b/include/llvm/Target/TargetSubtargetInfo.h
index 83950a9cd027..9cb07a5c6dae 100644
--- a/include/llvm/Target/TargetSubtargetInfo.h
+++ b/include/llvm/Target/TargetSubtargetInfo.h
@@ -18,8 +18,8 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/CodeGen/PBQPRAConstraint.h"
-#include "llvm/CodeGen/SchedulerRegistry.h"
 #include "llvm/CodeGen/ScheduleDAGMutation.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Support/CodeGen.h"
diff --git a/include/llvm/Transforms/IPO/FunctionAttrs.h b/include/llvm/Transforms/IPO/FunctionAttrs.h
index 85d6364c8bbc..36dd06b85b41 100644
--- a/include/llvm/Transforms/IPO/FunctionAttrs.h
+++ b/include/llvm/Transforms/IPO/FunctionAttrs.h
@@ -14,8 +14,8 @@
 #ifndef LLVM_TRANSFORMS_IPO_FUNCTIONATTRS_H
 #define LLVM_TRANSFORMS_IPO_FUNCTIONATTRS_H
 
-#include "llvm/Analysis/LazyCallGraph.h"
 #include "llvm/Analysis/CGSCCPassManager.h"
+#include "llvm/Analysis/LazyCallGraph.h"
 #include "llvm/IR/PassManager.h"
 
 namespace llvm {
diff --git a/include/llvm/Transforms/Scalar/GVNExpression.h b/include/llvm/Transforms/Scalar/GVNExpression.h
index 324ebca46de2..008341304995 100644
--- a/include/llvm/Transforms/Scalar/GVNExpression.h
+++ b/include/llvm/Transforms/Scalar/GVNExpression.h
@@ -93,6 +93,11 @@ public:
   }
 
   virtual bool equals(const Expression &Other) const { return true; }
+  // Return true if the two expressions are exactly the same, including the
+  // normally ignored fields.
+  virtual bool exactlyEquals(const Expression &Other) const {
+    return getExpressionType() == Other.getExpressionType() && equals(Other);
+  }
 
   unsigned getOpcode() const { return Opcode; }
   void setOpcode(unsigned opcode) { Opcode = opcode; }
@@ -345,6 +350,10 @@ public:
   void setAlignment(unsigned Align) { Alignment = Align; }
 
   bool equals(const Expression &Other) const override;
+  bool exactlyEquals(const Expression &Other) const override {
+    return Expression::exactlyEquals(Other) &&
+           cast<LoadExpression>(Other).getLoadInst() == getLoadInst();
+  }
 
   //
   // Debugging support
@@ -382,6 +391,10 @@ public:
   Value *getStoredValue() const { return StoredValue; }
 
   bool equals(const Expression &Other) const override;
+  bool exactlyEquals(const Expression &Other) const override {
+    return Expression::exactlyEquals(Other) &&
+           cast<StoreExpression>(Other).getStoreInst() == getStoreInst();
+  }
 
   // Debugging support
   //
diff --git a/include/llvm/Transforms/Utils/EscapeEnumerator.h b/include/llvm/Transforms/Utils/EscapeEnumerator.h
index 80d16ed4cf5b..1256dfdaca17 100644
--- a/include/llvm/Transforms/Utils/EscapeEnumerator.h
+++ b/include/llvm/Transforms/Utils/EscapeEnumerator.h
@@ -15,8 +15,8 @@
 #ifndef LLVM_TRANSFORMS_UTILS_ESCAPEENUMERATOR_H
 #define LLVM_TRANSFORMS_UTILS_ESCAPEENUMERATOR_H
 
-#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
 
 namespace llvm {
 
diff --git a/include/llvm/Transforms/Utils/FunctionComparator.h b/include/llvm/Transforms/Utils/FunctionComparator.h
index ee58d1d138f7..b0f10eafaa95 100644
--- a/include/llvm/Transforms/Utils/FunctionComparator.h
+++ b/include/llvm/Transforms/Utils/FunctionComparator.h
@@ -19,8 +19,8 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/IR/Function.h"
-#include "llvm/IR/ValueMap.h"
 #include "llvm/IR/Operator.h"
+#include "llvm/IR/ValueMap.h"
 #include "llvm/Support/AtomicOrdering.h"
 #include "llvm/Support/Casting.h"
 #include <cstdint>
diff --git a/include/llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h b/include/llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h
index bb7fa523cb19..b7a3d130aa11 100644
--- a/include/llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h
+++ b/include/llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h
@@ -14,8 +14,8 @@
 #define LLVM_TRANSFORMS_UTILS_IMPORTEDFUNCTIONSINLININGSTATISTICS_H
 
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
 #include <string>
 #include <vector>
 
diff --git a/include/llvm/Transforms/Utils/Local.h b/include/llvm/Transforms/Utils/Local.h
index 8942111307ff..8fed292e77a3 100644
--- a/include/llvm/Transforms/Utils/Local.h
+++ b/include/llvm/Transforms/Utils/Local.h
@@ -15,13 +15,13 @@
 #ifndef LLVM_TRANSFORMS_UTILS_LOCAL_H
 #define LLVM_TRANSFORMS_UTILS_LOCAL_H
 
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/GetElementPtrTypeIterator.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Operator.h"
-#include "llvm/ADT/SmallPtrSet.h"
 
 namespace llvm {
 
diff --git a/include/llvm/Transforms/Utils/LoopVersioning.h b/include/llvm/Transforms/Utils/LoopVersioning.h
index 0d345a972e10..fa5d7845d080 100644
--- a/include/llvm/Transforms/Utils/LoopVersioning.h
+++ b/include/llvm/Transforms/Utils/LoopVersioning.h
@@ -18,8 +18,8 @@
 
 #include "llvm/Analysis/LoopAccessAnalysis.h"
 #include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Transforms/Utils/ValueMapper.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
 
 namespace llvm {
 
diff --git a/include/llvm/Transforms/Utils/OrderedInstructions.h b/include/llvm/Transforms/Utils/OrderedInstructions.h
new file mode 100644
index 000000000000..e043ff39a998
--- /dev/null
+++ b/include/llvm/Transforms/Utils/OrderedInstructions.h
@@ -0,0 +1,51 @@
+//===- llvm/Transforms/Utils/OrderedInstructions.h -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an efficient way to check for dominance relation between 2
+// instructions.
+//
+// This interface dispatches to appropriate dominance check given 2
+// instructions, i.e. in case the instructions are in the same basic block,
+// OrderedBasicBlock (with instruction numbering and caching) are used.
+// Otherwise, dominator tree is used.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_ORDEREDINSTRUCTIONS_H
+#define LLVM_TRANSFORMS_UTILS_ORDEREDINSTRUCTIONS_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Analysis/OrderedBasicBlock.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Operator.h"
+
+namespace llvm {
+
+class OrderedInstructions {
+  /// Used to check dominance for instructions in same basic block.
+  mutable DenseMap<const BasicBlock *, std::unique_ptr<OrderedBasicBlock>>
+      OBBMap;
+
+  /// The dominator tree of the parent function.
+  DominatorTree *DT;
+
+public:
+  /// Constructor.
+  OrderedInstructions(DominatorTree *DT) : DT(DT) {}
+
+  /// Return true if first instruction dominates the second.
+  bool dominates(const Instruction *, const Instruction *) const;
+
+  /// Invalidate the OrderedBasicBlock cache when its basic block changes.
+  void invalidateBlock(BasicBlock *BB) { OBBMap.erase(BB); }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_UTILS_ORDEREDINSTRUCTIONS_H
diff --git a/include/llvm/Transforms/Utils/ValueMapper.h b/include/llvm/Transforms/Utils/ValueMapper.h
index e44dc437342d..0cc6b34d4593 100644
--- a/include/llvm/Transforms/Utils/ValueMapper.h
+++ b/include/llvm/Transforms/Utils/ValueMapper.h
@@ -16,8 +16,8 @@
 #define LLVM_TRANSFORMS_UTILS_VALUEMAPPER_H
 
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/IR/ValueMap.h"
 #include "llvm/IR/ValueHandle.h"
+#include "llvm/IR/ValueMap.h"
 
 namespace llvm {
 
diff --git a/include/llvm/Transforms/Vectorize/SLPVectorizer.h b/include/llvm/Transforms/Vectorize/SLPVectorizer.h
index c514db41623c..6f258191e89e 100644
--- a/include/llvm/Transforms/Vectorize/SLPVectorizer.h
+++ b/include/llvm/Transforms/Vectorize/SLPVectorizer.h
@@ -84,7 +84,7 @@ private:
                           ArrayRef<Value *> BuildVector = None,
                           bool AllowReorder = false);
 
-  /// \brief Try to vectorize a chain that may start at the operands of \V;
+  /// \brief Try to vectorize a chain that may start at the operands of \p V.
   bool tryToVectorize(BinaryOperator *V, slpvectorizer::BoUpSLP &R);
 
   /// \brief Vectorize the store instructions collected in Stores.
diff --git a/include/llvm/module.modulemap b/include/llvm/module.modulemap
index e0780885d159..d906b05f7aaa 100644
--- a/include/llvm/module.modulemap
+++ b/include/llvm/module.modulemap
@@ -38,6 +38,31 @@ module LLVM_Backend {
 }
 
 module LLVM_Bitcode { requires cplusplus umbrella "Bitcode" module * { export * } }
+
+module LLVM_BinaryFormat {
+    requires cplusplus
+    umbrella "BinaryFormat" module * { export * }
+    textual header "BinaryFormat/Dwarf.def"
+    textual header "BinaryFormat/MachO.def"
+    textual header "BinaryFormat/ELFRelocs/AArch64.def"
+    textual header "BinaryFormat/ELFRelocs/AMDGPU.def"
+    textual header "BinaryFormat/ELFRelocs/ARM.def"
+    textual header "BinaryFormat/ELFRelocs/AVR.def"
+    textual header "BinaryFormat/ELFRelocs/BPF.def"
+    textual header "BinaryFormat/ELFRelocs/Hexagon.def"
+    textual header "BinaryFormat/ELFRelocs/i386.def"
+    textual header "BinaryFormat/ELFRelocs/Lanai.def"
+    textual header "BinaryFormat/ELFRelocs/Mips.def"
+    textual header "BinaryFormat/ELFRelocs/PowerPC64.def"
+    textual header "BinaryFormat/ELFRelocs/PowerPC.def"
+    textual header "BinaryFormat/ELFRelocs/RISCV.def"
+    textual header "BinaryFormat/ELFRelocs/Sparc.def"
+    textual header "BinaryFormat/ELFRelocs/SystemZ.def"
+    textual header "BinaryFormat/ELFRelocs/x86_64.def"
+    textual header "BinaryFormat/ELFRelocs/WebAssembly.def"
+    textual header "BinaryFormat/WasmRelocs/WebAssembly.def"
+}
+
 module LLVM_Config { requires cplusplus umbrella "Config" module * { export * } }
 
 module LLVM_DebugInfo {
@@ -259,25 +284,6 @@ module LLVM_Utils {
     // These are intended for textual inclusion.
     textual header "Support/ARMTargetParser.def"
     textual header "Support/AArch64TargetParser.def"
-    textual header "Support/Dwarf.def"
-    textual header "Support/MachO.def"
-    textual header "Support/ELFRelocs/AArch64.def"
-    textual header "Support/ELFRelocs/AMDGPU.def"
-    textual header "Support/ELFRelocs/ARM.def"
-    textual header "Support/ELFRelocs/AVR.def"
-    textual header "Support/ELFRelocs/BPF.def"
-    textual header "Support/ELFRelocs/Hexagon.def"
-    textual header "Support/ELFRelocs/i386.def"
-    textual header "Support/ELFRelocs/Lanai.def"
-    textual header "Support/ELFRelocs/Mips.def"
-    textual header "Support/ELFRelocs/PowerPC64.def"
-    textual header "Support/ELFRelocs/PowerPC.def"
-    textual header "Support/ELFRelocs/RISCV.def"
-    textual header "Support/ELFRelocs/Sparc.def"
-    textual header "Support/ELFRelocs/SystemZ.def"
-    textual header "Support/ELFRelocs/x86_64.def"
-    textual header "Support/ELFRelocs/WebAssembly.def"
-    textual header "Support/WasmRelocs/WebAssembly.def"
   }
 
   // This part of the module is usable from both C and C++ code.
diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp
index 4d6a6c9a30aa..435c782d97a5 100644
--- a/lib/Analysis/AliasAnalysisEvaluator.cpp
+++ b/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -14,9 +14,9 @@
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
-#include "llvm/IR/Module.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp
index 16b711a69ec3..ee17ad3ba586 100644
--- a/lib/Analysis/AliasSetTracker.cpp
+++ b/lib/Analysis/AliasSetTracker.cpp
@@ -17,8 +17,8 @@
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Module.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp
index 267e19adfe4d..23d5a887c34a 100644
--- a/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/lib/Analysis/BranchProbabilityInfo.cpp
@@ -14,6 +14,7 @@
 #include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
@@ -30,6 +31,7 @@ using namespace llvm;
 INITIALIZE_PASS_BEGIN(BranchProbabilityInfoWrapperPass, "branch-prob",
                       "Branch Probability Analysis", false, true)
 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_PASS_END(BranchProbabilityInfoWrapperPass, "branch-prob",
                     "Branch Probability Analysis", false, true)
 
@@ -457,7 +459,8 @@ bool BranchProbabilityInfo::calcLoopBranchHeuristics(const BasicBlock *BB,
   return true;
 }
 
-bool BranchProbabilityInfo::calcZeroHeuristics(const BasicBlock *BB) {
+bool BranchProbabilityInfo::calcZeroHeuristics(const BasicBlock *BB,
+                                               const TargetLibraryInfo *TLI) {
   const BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
   if (!BI || !BI->isConditional())
     return false;
@@ -480,8 +483,37 @@ bool BranchProbabilityInfo::calcZeroHeuristics(const BasicBlock *BB) {
         if (AndRHS->getUniqueInteger().isPowerOf2())
           return false;
 
+  // Check if the LHS is the return value of a library function
+  LibFunc Func = NumLibFuncs;
+  if (TLI)
+    if (CallInst *Call = dyn_cast<CallInst>(CI->getOperand(0)))
+      if (Function *CalledFn = Call->getCalledFunction())
+        TLI->getLibFunc(*CalledFn, Func);
+
   bool isProb;
-  if (CV->isZero()) {
+  if (Func == LibFunc_strcasecmp ||
+      Func == LibFunc_strcmp ||
+      Func == LibFunc_strncasecmp ||
+      Func == LibFunc_strncmp ||
+      Func == LibFunc_memcmp) {
+    // strcmp and similar functions return zero, negative, or positive, if the
+    // first string is equal, less, or greater than the second. We consider it
+    // likely that the strings are not equal, so a comparison with zero is
+    // probably false, but also a comparison with any other number is also
+    // probably false given that what exactly is returned for nonzero values is
+    // not specified. Any kind of comparison other than equality we know
+    // nothing about.
+    switch (CI->getPredicate()) {
+    case CmpInst::ICMP_EQ:
+      isProb = false;
+      break;
+    case CmpInst::ICMP_NE:
+      isProb = true;
+      break;
+    default:
+      return false;
+    }
+  } else if (CV->isZero()) {
     switch (CI->getPredicate()) {
     case CmpInst::ICMP_EQ:
       // X == 0   ->  Unlikely
@@ -707,7 +739,8 @@ void BranchProbabilityInfo::eraseBlock(const BasicBlock *BB) {
   }
 }
 
-void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI) {
+void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI,
+                                      const TargetLibraryInfo *TLI) {
   DEBUG(dbgs() << "---- Branch Probability Info : " << F.getName()
                << " ----\n\n");
   LastF = &F; // Store the last function we ran on for printing.
@@ -733,7 +766,7 @@ void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI) {
       continue;
     if (calcPointerHeuristics(BB))
       continue;
-    if (calcZeroHeuristics(BB))
+    if (calcZeroHeuristics(BB, TLI))
       continue;
     if (calcFloatingPointHeuristics(BB))
       continue;
@@ -747,12 +780,14 @@ void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI) {
 void BranchProbabilityInfoWrapperPass::getAnalysisUsage(
     AnalysisUsage &AU) const {
   AU.addRequired<LoopInfoWrapperPass>();
+  AU.addRequired<TargetLibraryInfoWrapperPass>();
   AU.setPreservesAll();
 }
 
 bool BranchProbabilityInfoWrapperPass::runOnFunction(Function &F) {
   const LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
-  BPI.calculate(F, LI);
+  const TargetLibraryInfo &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+  BPI.calculate(F, LI, &TLI);
   return false;
 }
 
@@ -767,7 +802,7 @@ AnalysisKey BranchProbabilityAnalysis::Key;
 BranchProbabilityInfo
 BranchProbabilityAnalysis::run(Function &F, FunctionAnalysisManager &AM) {
   BranchProbabilityInfo BPI;
-  BPI.calculate(F, AM.getResult<LoopAnalysis>(F));
+  BPI.calculate(F, AM.getResult<LoopAnalysis>(F), &AM.getResult<TargetLibraryAnalysis>(F));
   return BPI;
 }
 
diff --git a/lib/Analysis/CFLGraph.h b/lib/Analysis/CFLGraph.h
index 54782b6bd4ad..95874b88244b 100644
--- a/lib/Analysis/CFLGraph.h
+++ b/lib/Analysis/CFLGraph.h
@@ -16,7 +16,6 @@
 #define LLVM_ANALYSIS_CFLGRAPH_H
 
 #include "AliasAnalysisSummary.h"
-#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/IR/InstVisitor.h"
 #include "llvm/IR/Instructions.h"
diff --git a/lib/Analysis/CallPrinter.cpp b/lib/Analysis/CallPrinter.cpp
index af942e9ed3e9..e7017e77652a 100644
--- a/lib/Analysis/CallPrinter.cpp
+++ b/lib/Analysis/CallPrinter.cpp
@@ -14,8 +14,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/CallPrinter.h"
+#include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/DOTGraphTraitsPass.h"
 
 using namespace llvm;
diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp
index 9862c3c9c270..2093f0fdec12 100644
--- a/lib/Analysis/CaptureTracking.cpp
+++ b/lib/Analysis/CaptureTracking.cpp
@@ -16,11 +16,11 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Analysis/CaptureTracking.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/CFG.h"
-#include "llvm/Analysis/CaptureTracking.h"
 #include "llvm/Analysis/OrderedBasicBlock.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/Constants.h"
diff --git a/lib/Analysis/CodeMetrics.cpp b/lib/Analysis/CodeMetrics.cpp
index bdffdd8eb270..e4d9292db92d 100644
--- a/lib/Analysis/CodeMetrics.cpp
+++ b/lib/Analysis/CodeMetrics.cpp
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index a906770dbb34..0f5ec3f5626e 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -22,8 +22,8 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Config/config.h"
@@ -1015,9 +1015,11 @@ Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode,
   case Instruction::ICmp:
   case Instruction::FCmp: llvm_unreachable("Invalid for compares");
   case Instruction::Call:
-    if (auto *F = dyn_cast<Function>(Ops.back()))
-      if (canConstantFoldCallTo(F))
-        return ConstantFoldCall(F, Ops.slice(0, Ops.size() - 1), TLI);
+    if (auto *F = dyn_cast<Function>(Ops.back())) {
+      ImmutableCallSite CS(cast<CallInst>(InstOrCE));
+      if (canConstantFoldCallTo(CS, F))
+        return ConstantFoldCall(CS, F, Ops.slice(0, Ops.size() - 1), TLI);
+    }
     return nullptr;
   case Instruction::Select:
     return ConstantExpr::getSelect(Ops[0], Ops[1], Ops[2]);
@@ -1356,7 +1358,9 @@ llvm::ConstantFoldLoadThroughGEPIndices(Constant *C,
 //  Constant Folding for Calls
 //
 
-bool llvm::canConstantFoldCallTo(const Function *F) {
+bool llvm::canConstantFoldCallTo(ImmutableCallSite CS, const Function *F) {
+  if (CS.isNoBuiltin())
+    return false;
   switch (F->getIntrinsicID()) {
   case Intrinsic::fabs:
   case Intrinsic::minnum:
@@ -1584,6 +1588,9 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
       // cosine(arg) is between -1 and 1. cosine(invalid arg) is NaN
       if (IntrinsicID == Intrinsic::cos)
         return Constant::getNullValue(Ty);
+      if (IntrinsicID == Intrinsic::bswap ||
+          IntrinsicID == Intrinsic::bitreverse)
+        return Operands[0];
     }
     if (auto *Op = dyn_cast<ConstantFP>(Operands[0])) {
       if (IntrinsicID == Intrinsic::convert_to_fp16) {
@@ -1815,7 +1822,7 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
                 dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
           return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
                                              /*roundTowardZero=*/false, Ty);
-        LLVM_FALLTHROUGH;
+        break;
       case Intrinsic::x86_sse_cvttss2si:
       case Intrinsic::x86_sse_cvttss2si64:
       case Intrinsic::x86_sse2_cvttsd2si:
@@ -1824,16 +1831,10 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
                 dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
           return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
                                              /*roundTowardZero=*/true, Ty);
+        break;
       }
     }
 
-    if (isa<UndefValue>(Operands[0])) {
-      if (IntrinsicID == Intrinsic::bswap ||
-          IntrinsicID == Intrinsic::bitreverse)
-        return Operands[0];
-      return nullptr;
-    }
-
     return nullptr;
   }
 
@@ -2034,6 +2035,14 @@ Constant *ConstantFoldVectorCall(StringRef Name, unsigned IntrinsicID,
   for (unsigned I = 0, E = VTy->getNumElements(); I != E; ++I) {
     // Gather a column of constants.
     for (unsigned J = 0, JE = Operands.size(); J != JE; ++J) {
+      // These intrinsics use a scalar type for their second argument.
+      if (J == 1 &&
+          (IntrinsicID == Intrinsic::cttz || IntrinsicID == Intrinsic::ctlz ||
+           IntrinsicID == Intrinsic::powi)) {
+        Lane[J] = Operands[J];
+        continue;
+      }
+
       Constant *Agg = Operands[J]->getAggregateElement(I);
       if (!Agg)
         return nullptr;
@@ -2054,8 +2063,11 @@ Constant *ConstantFoldVectorCall(StringRef Name, unsigned IntrinsicID,
 } // end anonymous namespace
 
 Constant *
-llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
+llvm::ConstantFoldCall(ImmutableCallSite CS, Function *F,
+                       ArrayRef<Constant *> Operands,
                        const TargetLibraryInfo *TLI) {
+  if (CS.isNoBuiltin())
+    return nullptr;
   if (!F->hasName())
     return nullptr;
   StringRef Name = F->getName();
@@ -2072,6 +2084,8 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
 bool llvm::isMathLibCallNoop(CallSite CS, const TargetLibraryInfo *TLI) {
   // FIXME: Refactor this code; this duplicates logic in LibCallsShrinkWrap
   // (and to some extent ConstantFoldScalarCall).
+  if (CS.isNoBuiltin())
+    return false;
   Function *F = CS.getCalledFunction();
   if (!F)
     return false;
diff --git a/lib/Analysis/GlobalsModRef.cpp b/lib/Analysis/GlobalsModRef.cpp
index 33f00cb19b26..4ef023379bb6 100644
--- a/lib/Analysis/GlobalsModRef.cpp
+++ b/lib/Analysis/GlobalsModRef.cpp
@@ -475,7 +475,9 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
     const std::vector<CallGraphNode *> &SCC = *I;
     assert(!SCC.empty() && "SCC with no functions?");
 
-    if (!SCC[0]->getFunction() || !SCC[0]->getFunction()->isDefinitionExact()) {
+    Function *F = SCC[0]->getFunction();
+
+    if (!F || !F->isDefinitionExact()) {
       // Calls externally or not exact - can't say anything useful. Remove any
       // existing function records (may have been created when scanning
       // globals).
@@ -484,19 +486,18 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
       continue;
     }
 
-    FunctionInfo &FI = FunctionInfos[SCC[0]->getFunction()];
+    FunctionInfo &FI = FunctionInfos[F];
     bool KnowNothing = false;
 
     // Collect the mod/ref properties due to called functions.  We only compute
     // one mod-ref set.
     for (unsigned i = 0, e = SCC.size(); i != e && !KnowNothing; ++i) {
-      Function *F = SCC[i]->getFunction();
       if (!F) {
         KnowNothing = true;
         break;
       }
 
-      if (F->isDeclaration()) {
+      if (F->isDeclaration() || F->hasFnAttribute(Attribute::OptimizeNone)) {
         // Try to get mod/ref behaviour from function attributes.
         if (F->doesNotAccessMemory()) {
           // Can't do better than that!
@@ -545,6 +546,13 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
     for (auto *Node : SCC) {
       if (FI.getModRefInfo() == MRI_ModRef)
         break; // The mod/ref lattice saturates here.
+
+      // Don't prove any properties based on the implementation of an optnone
+      // function. Function attributes were already used as a best approximation
+      // above.
+      if (Node->getFunction()->hasFnAttribute(Attribute::OptimizeNone))
+        continue;
+
       for (Instruction &I : instructions(Node->getFunction())) {
         if (FI.getModRefInfo() == MRI_ModRef)
           break; // The mod/ref lattice saturates here.
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index 77c87928728a..6ff5938a3175 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -869,7 +869,7 @@ bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) {
   // because we have to continually rebuild the argument list even when no
   // simplifications can be performed. Until that is fixed with remapping
   // inside of instsimplify, directly constant fold calls here.
-  if (!canConstantFoldCallTo(F))
+  if (!canConstantFoldCallTo(CS, F))
     return false;
 
   // Try to re-map the arguments to constants.
@@ -885,7 +885,7 @@ bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) {
 
     ConstantArgs.push_back(C);
   }
-  if (Constant *C = ConstantFoldCall(F, ConstantArgs)) {
+  if (Constant *C = ConstantFoldCall(CS, F, ConstantArgs)) {
     SimplifiedValues[CS.getInstruction()] = C;
     return true;
   }
diff --git a/lib/Analysis/InstCount.cpp b/lib/Analysis/InstCount.cpp
index de2b9c0c56db..27c6b580e7ac 100644
--- a/lib/Analysis/InstCount.cpp
+++ b/lib/Analysis/InstCount.cpp
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Analysis/Passes.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Passes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/InstVisitor.h"
 #include "llvm/Pass.h"
@@ -33,7 +33,6 @@ STATISTIC(TotalMemInst, "Number of memory instructions");
 
 #include "llvm/IR/Instruction.def"
 
-
 namespace {
   class InstCount : public FunctionPass, public InstVisitor<InstCount> {
     friend class InstVisitor<InstCount>;
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index 66ac847455cd..a975be79619b 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -2391,7 +2391,7 @@ static void setLimitsForBinOp(BinaryOperator &BO, APInt &Lower, APInt &Upper) {
   const APInt *C;
   switch (BO.getOpcode()) {
   case Instruction::Add:
-    if (match(BO.getOperand(1), m_APInt(C)) && *C != 0) {
+    if (match(BO.getOperand(1), m_APInt(C)) && !C->isNullValue()) {
       // FIXME: If we have both nuw and nsw, we should reduce the range further.
       if (BO.hasNoUnsignedWrap()) {
         // 'add nuw x, C' produces [C, UINT_MAX].
@@ -2429,7 +2429,7 @@ static void setLimitsForBinOp(BinaryOperator &BO, APInt &Lower, APInt &Upper) {
       Upper = APInt::getSignedMaxValue(Width).ashr(*C) + 1;
     } else if (match(BO.getOperand(0), m_APInt(C))) {
       unsigned ShiftAmount = Width - 1;
-      if (*C != 0 && BO.isExact())
+      if (!C->isNullValue() && BO.isExact())
         ShiftAmount = C->countTrailingZeros();
       if (C->isNegative()) {
         // 'ashr C, x' produces [C, C >> (Width-1)]
@@ -2450,7 +2450,7 @@ static void setLimitsForBinOp(BinaryOperator &BO, APInt &Lower, APInt &Upper) {
     } else if (match(BO.getOperand(0), m_APInt(C))) {
       // 'lshr C, x' produces [C >> (Width-1), C].
       unsigned ShiftAmount = Width - 1;
-      if (*C != 0 && BO.isExact())
+      if (!C->isNullValue() && BO.isExact())
         ShiftAmount = C->countTrailingZeros();
       Lower = C->lshr(ShiftAmount);
       Upper = *C + 1;
@@ -2512,7 +2512,7 @@ static void setLimitsForBinOp(BinaryOperator &BO, APInt &Lower, APInt &Upper) {
     break;
 
   case Instruction::UDiv:
-    if (match(BO.getOperand(1), m_APInt(C)) && *C != 0) {
+    if (match(BO.getOperand(1), m_APInt(C)) && !C->isNullValue()) {
       // 'udiv x, C' produces [0, UINT_MAX / C].
       Upper = APInt::getMaxValue(Width).udiv(*C) + 1;
     } else if (match(BO.getOperand(0), m_APInt(C))) {
@@ -2827,14 +2827,14 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS,
         // - CI2 is one
         // - CI isn't zero
         if (LBO->hasNoSignedWrap() || LBO->hasNoUnsignedWrap() ||
-            *CI2Val == 1 || !CI->isZero()) {
+            CI2Val->isOneValue() || !CI->isZero()) {
           if (Pred == ICmpInst::ICMP_EQ)
             return ConstantInt::getFalse(RHS->getContext());
           if (Pred == ICmpInst::ICMP_NE)
             return ConstantInt::getTrue(RHS->getContext());
         }
       }
-      if (CIVal->isSignMask() && *CI2Val == 1) {
+      if (CIVal->isSignMask() && CI2Val->isOneValue()) {
         if (Pred == ICmpInst::ICMP_UGT)
           return ConstantInt::getFalse(RHS->getContext());
         if (Pred == ICmpInst::ICMP_ULE)
@@ -3308,11 +3308,9 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
   }
 
   // icmp eq|ne X, Y -> false|true if X != Y
-  if ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) &&
+  if (ICmpInst::isEquality(Pred) &&
       isKnownNonEqual(LHS, RHS, Q.DL, Q.AC, Q.CxtI, Q.DT)) {
-    LLVMContext &Ctx = LHS->getType()->getContext();
-    return Pred == ICmpInst::ICMP_NE ?
-      ConstantInt::getTrue(Ctx) : ConstantInt::getFalse(Ctx);
+    return Pred == ICmpInst::ICMP_NE ? getTrue(ITy) : getFalse(ITy);
   }
 
   if (Value *V = simplifyICmpWithBinOp(Pred, LHS, RHS, Q, MaxRecurse))
@@ -3360,19 +3358,6 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
     }
   }
 
-  // If a bit is known to be zero for A and known to be one for B,
-  // then A and B cannot be equal.
-  if (ICmpInst::isEquality(Pred)) {
-    const APInt *RHSVal;
-    if (match(RHS, m_APInt(RHSVal))) {
-      KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT);
-      if (LHSKnown.Zero.intersects(*RHSVal) ||
-          !LHSKnown.One.isSubsetOf(*RHSVal))
-        return Pred == ICmpInst::ICMP_EQ ? ConstantInt::getFalse(ITy)
-                                         : ConstantInt::getTrue(ITy);
-    }
-  }
-
   // If the comparison is with the result of a select instruction, check whether
   // comparing with either branch of the select always yields the same value.
   if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS))
@@ -3896,12 +3881,14 @@ static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops,
   }
 
   // Check to see if this is constant foldable.
-  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
-    if (!isa<Constant>(Ops[i]))
-      return nullptr;
+  if (!all_of(Ops, [](Value *V) { return isa<Constant>(V); }))
+    return nullptr;
 
-  return ConstantExpr::getGetElementPtr(SrcTy, cast<Constant>(Ops[0]),
-                                        Ops.slice(1));
+  auto *CE = ConstantExpr::getGetElementPtr(SrcTy, cast<Constant>(Ops[0]),
+                                            Ops.slice(1));
+  if (auto *CEFolded = ConstantFoldConstant(CE, Q.DL))
+    return CEFolded;
+  return CE;
 }
 
 Value *llvm::SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops,
@@ -4486,8 +4473,9 @@ static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd,
 }
 
 template <typename IterTy>
-static Value *SimplifyCall(Value *V, IterTy ArgBegin, IterTy ArgEnd,
-                           const SimplifyQuery &Q, unsigned MaxRecurse) {
+static Value *SimplifyCall(ImmutableCallSite CS, Value *V, IterTy ArgBegin,
+                           IterTy ArgEnd, const SimplifyQuery &Q,
+                           unsigned MaxRecurse) {
   Type *Ty = V->getType();
   if (PointerType *PTy = dyn_cast<PointerType>(Ty))
     Ty = PTy->getElementType();
@@ -4506,7 +4494,7 @@ static Value *SimplifyCall(Value *V, IterTy ArgBegin, IterTy ArgEnd,
     if (Value *Ret = SimplifyIntrinsic(F, ArgBegin, ArgEnd, Q, MaxRecurse))
       return Ret;
 
-  if (!canConstantFoldCallTo(F))
+  if (!canConstantFoldCallTo(CS, F))
     return nullptr;
 
   SmallVector<Constant *, 4> ConstantArgs;
@@ -4518,17 +4506,18 @@ static Value *SimplifyCall(Value *V, IterTy ArgBegin, IterTy ArgEnd,
     ConstantArgs.push_back(C);
   }
 
-  return ConstantFoldCall(F, ConstantArgs, Q.TLI);
+  return ConstantFoldCall(CS, F, ConstantArgs, Q.TLI);
 }
 
-Value *llvm::SimplifyCall(Value *V, User::op_iterator ArgBegin,
-                          User::op_iterator ArgEnd, const SimplifyQuery &Q) {
-  return ::SimplifyCall(V, ArgBegin, ArgEnd, Q, RecursionLimit);
-}
-
-Value *llvm::SimplifyCall(Value *V, ArrayRef<Value *> Args,
+Value *llvm::SimplifyCall(ImmutableCallSite CS, Value *V,
+                          User::op_iterator ArgBegin, User::op_iterator ArgEnd,
                           const SimplifyQuery &Q) {
-  return ::SimplifyCall(V, Args.begin(), Args.end(), Q, RecursionLimit);
+  return ::SimplifyCall(CS, V, ArgBegin, ArgEnd, Q, RecursionLimit);
+}
+
+Value *llvm::SimplifyCall(ImmutableCallSite CS, Value *V,
+                          ArrayRef<Value *> Args, const SimplifyQuery &Q) {
+  return ::SimplifyCall(CS, V, Args.begin(), Args.end(), Q, RecursionLimit);
 }
 
 /// See if we can compute a simplified version of this instruction.
@@ -4659,7 +4648,8 @@ Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &SQ,
     break;
   case Instruction::Call: {
     CallSite CS(cast<CallInst>(I));
-    Result = SimplifyCall(CS.getCalledValue(), CS.arg_begin(), CS.arg_end(), Q);
+    Result = SimplifyCall(CS, CS.getCalledValue(), CS.arg_begin(), CS.arg_end(),
+                          Q);
     break;
   }
 #define HANDLE_CAST_INST(num, opc, clas) case Instruction::opc:
diff --git a/lib/Analysis/LLVMBuild.txt b/lib/Analysis/LLVMBuild.txt
index 15c757b48f76..8a87b980b0a8 100644
--- a/lib/Analysis/LLVMBuild.txt
+++ b/lib/Analysis/LLVMBuild.txt
@@ -19,4 +19,4 @@
 type = Library
 name = Analysis
 parent = Libraries
-required_libraries = Core Support ProfileData Object
+required_libraries = BinaryFormat Core Object ProfileData Support
diff --git a/lib/Analysis/LazyBranchProbabilityInfo.cpp b/lib/Analysis/LazyBranchProbabilityInfo.cpp
index b51c6beb7959..e2884d0a4564 100644
--- a/lib/Analysis/LazyBranchProbabilityInfo.cpp
+++ b/lib/Analysis/LazyBranchProbabilityInfo.cpp
@@ -16,6 +16,7 @@
 
 #include "llvm/Analysis/LazyBranchProbabilityInfo.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 
 using namespace llvm;
 
@@ -24,6 +25,7 @@ using namespace llvm;
 INITIALIZE_PASS_BEGIN(LazyBranchProbabilityInfoPass, DEBUG_TYPE,
                       "Lazy Branch Probability Analysis", true, true)
 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_PASS_END(LazyBranchProbabilityInfoPass, DEBUG_TYPE,
                     "Lazy Branch Probability Analysis", true, true)
 
@@ -41,6 +43,7 @@ void LazyBranchProbabilityInfoPass::print(raw_ostream &OS,
 
 void LazyBranchProbabilityInfoPass::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<LoopInfoWrapperPass>();
+  AU.addRequired<TargetLibraryInfoWrapperPass>();
   AU.setPreservesAll();
 }
 
@@ -48,16 +51,19 @@ void LazyBranchProbabilityInfoPass::releaseMemory() { LBPI.reset(); }
 
 bool LazyBranchProbabilityInfoPass::runOnFunction(Function &F) {
   LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
-  LBPI = llvm::make_unique<LazyBranchProbabilityInfo>(&F, &LI);
+  TargetLibraryInfo &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+  LBPI = llvm::make_unique<LazyBranchProbabilityInfo>(&F, &LI, &TLI);
   return false;
 }
 
 void LazyBranchProbabilityInfoPass::getLazyBPIAnalysisUsage(AnalysisUsage &AU) {
   AU.addRequired<LazyBranchProbabilityInfoPass>();
   AU.addRequired<LoopInfoWrapperPass>();
+  AU.addRequired<TargetLibraryInfoWrapperPass>();
 }
 
 void llvm::initializeLazyBPIPassPass(PassRegistry &Registry) {
   INITIALIZE_PASS_DEPENDENCY(LazyBranchProbabilityInfoPass);
   INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass);
+  INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass);
 }
diff --git a/lib/Analysis/LazyCallGraph.cpp b/lib/Analysis/LazyCallGraph.cpp
index eef56815f2e0..b6a9436cc1ec 100644
--- a/lib/Analysis/LazyCallGraph.cpp
+++ b/lib/Analysis/LazyCallGraph.cpp
@@ -8,10 +8,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/LazyCallGraph.h"
-#include "llvm/ADT/ScopeExit.h"
-#include "llvm/ADT/Sequence.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/Sequence.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/InstVisitor.h"
 #include "llvm/IR/Instructions.h"
diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp
index 6a9ae6440ace..3ed61a79478a 100644
--- a/lib/Analysis/LazyValueInfo.cpp
+++ b/lib/Analysis/LazyValueInfo.cpp
@@ -302,7 +302,7 @@ static bool hasSingleValue(const LVILatticeVal &Val) {
 ///   contradictory.  If this happens, we return some valid lattice value so as
 ///   not confuse the rest of LVI.  Ideally, we'd always return Undefined, but
 ///   we do not make this guarantee.  TODO: This would be a useful enhancement.
-static LVILatticeVal intersect(LVILatticeVal A, LVILatticeVal B) {
+static LVILatticeVal intersect(const LVILatticeVal &A, const LVILatticeVal &B) {
   // Undefined is the strongest state.  It means the value is known to be along
   // an unreachable path.
   if (A.isUndefined())
@@ -364,7 +364,6 @@ namespace {
   /// This is the cache kept by LazyValueInfo which
   /// maintains information about queries across the clients' queries.
   class LazyValueInfoCache {
-    friend class LazyValueInfoAnnotatedWriter;
     /// This is all of the cached block information for exactly one Value*.
     /// The entries are sorted by the BasicBlock* of the
     /// entries, allowing us to do a lookup with a binary search.
@@ -384,7 +383,6 @@ namespace {
     /// don't spend time removing unused blocks from our caches.
     DenseSet<PoisoningVH<BasicBlock> > SeenBlocks;
 
-  protected:
     /// This is all of the cached information for all values,
     /// mapped from Value* to key information.
     DenseMap<Value *, std::unique_ptr<ValueCacheEntryTy>> ValueCache;
@@ -443,7 +441,6 @@ namespace {
       return BBI->second;
     }
 
-    void printCache(Function &F, raw_ostream &OS);
     /// clear - Empty the cache.
     void clear() {
       SeenBlocks.clear();
@@ -467,61 +464,6 @@ namespace {
   };
 }
 
-
-namespace {
-
-  /// An assembly annotator class to print LazyValueCache information in
-  /// comments.
-  class LazyValueInfoAnnotatedWriter : public AssemblyAnnotationWriter {
-    const LazyValueInfoCache* LVICache;
-
-  public:
-    LazyValueInfoAnnotatedWriter(const LazyValueInfoCache *L) : LVICache(L) {}
-
-    virtual void emitBasicBlockStartAnnot(const BasicBlock *BB,
-                                          formatted_raw_ostream &OS) {
-      auto ODI = LVICache->OverDefinedCache.find(const_cast<BasicBlock*>(BB));
-      if (ODI == LVICache->OverDefinedCache.end())
-        return;
-      OS << "; OverDefined values for block are: \n";
-      for (auto *V : ODI->second)
-        OS << ";" << *V << "\n";
-
-      // Find if there are latticevalues defined for arguments of the function.
-      auto *F = const_cast<Function *>(BB->getParent());
-      for (auto &Arg : F->args()) {
-        auto VI = LVICache->ValueCache.find_as(&Arg);
-        if (VI == LVICache->ValueCache.end())
-          continue;
-        auto BBI = VI->second->BlockVals.find(const_cast<BasicBlock *>(BB));
-        if (BBI != VI->second->BlockVals.end())
-          OS << "; CachedLatticeValue for: '" << *VI->first << "' is: '"
-             << BBI->second << "'\n";
-      }
-    }
-
-    virtual void emitInstructionAnnot(const Instruction *I,
-                                      formatted_raw_ostream &OS) {
-
-      auto VI = LVICache->ValueCache.find_as(const_cast<Instruction *>(I));
-      if (VI == LVICache->ValueCache.end())
-        return;
-      OS << "; CachedLatticeValues for: '" << *VI->first << "'\n";
-      for (auto &BV : VI->second->BlockVals) {
-        OS << "; at beginning of BasicBlock: '";
-        BV.first->printAsOperand(OS, false);
-        OS << "' LatticeVal: '" << BV.second << "' \n";
-      }
-    }
-};
-}
-
-void LazyValueInfoCache::printCache(Function &F, raw_ostream &OS) {
-  LazyValueInfoAnnotatedWriter Writer(this);
-  F.print(OS, &Writer);
-
-}
-
 void LazyValueInfoCache::eraseValue(Value *V) {
   for (auto I = OverDefinedCache.begin(), E = OverDefinedCache.end(); I != E;) {
     // Copy and increment the iterator immediately so we can erase behind
@@ -615,6 +557,30 @@ void LazyValueInfoCache::threadEdgeImpl(BasicBlock *OldSucc,
   }
 }
 
+
+namespace {
+/// An assembly annotator class to print LazyValueCache information in
+/// comments.
+class LazyValueInfoImpl;
+class LazyValueInfoAnnotatedWriter : public AssemblyAnnotationWriter {
+  LazyValueInfoImpl *LVIImpl;
+  // While analyzing which blocks we can solve values for, we need the dominator
+  // information. Since this is an optional parameter in LVI, we require this
+  // DomTreeAnalysis pass in the printer pass, and pass the dominator
+  // tree to the LazyValueInfoAnnotatedWriter.
+  DominatorTree &DT;
+
+public:
+  LazyValueInfoAnnotatedWriter(LazyValueInfoImpl *L, DominatorTree &DTree)
+      : LVIImpl(L), DT(DTree) {}
+
+  virtual void emitBasicBlockStartAnnot(const BasicBlock *BB,
+                                        formatted_raw_ostream &OS);
+
+  virtual void emitInstructionAnnot(const Instruction *I,
+                                    formatted_raw_ostream &OS);
+};
+}
 namespace {
   // The actual implementation of the lazy analysis and update.  Note that the
   // inheritance from LazyValueInfoCache is intended to be temporary while
@@ -693,9 +659,10 @@ namespace {
       TheCache.clear();
     }
 
-    /// Printing the LazyValueInfoCache.
-    void printCache(Function &F, raw_ostream &OS) {
-       TheCache.printCache(F, OS);
+    /// Printing the LazyValueInfo Analysis.
+    void printLVI(Function &F, DominatorTree &DTree, raw_ostream &OS) {
+        LazyValueInfoAnnotatedWriter Writer(this, DTree);
+        F.print(OS, &Writer);
     }
 
     /// This is part of the update interface to inform the cache
@@ -714,6 +681,7 @@ namespace {
   };
 } // end anonymous namespace
 
+
 void LazyValueInfoImpl::solve() {
   SmallVector<std::pair<BasicBlock *, Value *>, 8> StartingStack(
       BlockValueStack.begin(), BlockValueStack.end());
@@ -838,7 +806,7 @@ bool LazyValueInfoImpl::solveBlockValueImpl(LVILatticeVal &Res,
   // that for all other pointer typed values, we terminate the search at the
   // definition.  We could easily extend this to look through geps, bitcasts,
   // and the like to prove non-nullness, but it's not clear that's worth it
-  // compile time wise.  The context-insensative value walk done inside
+  // compile time wise.  The context-insensitive value walk done inside
   // isKnownNonNull gets most of the profitable cases at much less expense.
   // This does mean that we have a sensativity to where the defining
   // instruction is placed, even if it could legally be hoisted much higher.
@@ -1693,63 +1661,62 @@ Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB,
 }
 
 static LazyValueInfo::Tristate getPredicateResult(unsigned Pred, Constant *C,
-                                                  LVILatticeVal &Result,
+                                                  const LVILatticeVal &Val,
                                                   const DataLayout &DL,
                                                   TargetLibraryInfo *TLI) {
 
   // If we know the value is a constant, evaluate the conditional.
   Constant *Res = nullptr;
-  if (Result.isConstant()) {
-    Res = ConstantFoldCompareInstOperands(Pred, Result.getConstant(), C, DL,
-                                          TLI);
+  if (Val.isConstant()) {
+    Res = ConstantFoldCompareInstOperands(Pred, Val.getConstant(), C, DL, TLI);
     if (ConstantInt *ResCI = dyn_cast<ConstantInt>(Res))
       return ResCI->isZero() ? LazyValueInfo::False : LazyValueInfo::True;
     return LazyValueInfo::Unknown;
   }
 
-  if (Result.isConstantRange()) {
+  if (Val.isConstantRange()) {
     ConstantInt *CI = dyn_cast<ConstantInt>(C);
     if (!CI) return LazyValueInfo::Unknown;
 
-    const ConstantRange &CR = Result.getConstantRange();
+    const ConstantRange &CR = Val.getConstantRange();
     if (Pred == ICmpInst::ICMP_EQ) {
       if (!CR.contains(CI->getValue()))
         return LazyValueInfo::False;
 
-      if (CR.isSingleElement() && CR.contains(CI->getValue()))
+      if (CR.isSingleElement())
         return LazyValueInfo::True;
     } else if (Pred == ICmpInst::ICMP_NE) {
       if (!CR.contains(CI->getValue()))
         return LazyValueInfo::True;
 
-      if (CR.isSingleElement() && CR.contains(CI->getValue()))
+      if (CR.isSingleElement())
+        return LazyValueInfo::False;
+    } else {
+      // Handle more complex predicates.
+      ConstantRange TrueValues = ConstantRange::makeExactICmpRegion(
+          (ICmpInst::Predicate)Pred, CI->getValue());
+      if (TrueValues.contains(CR))
+        return LazyValueInfo::True;
+      if (TrueValues.inverse().contains(CR))
         return LazyValueInfo::False;
     }
-
-    // Handle more complex predicates.
-    ConstantRange TrueValues = ConstantRange::makeExactICmpRegion(
-        (ICmpInst::Predicate)Pred, CI->getValue());
-    if (TrueValues.contains(CR))
-      return LazyValueInfo::True;
-    if (TrueValues.inverse().contains(CR))
-      return LazyValueInfo::False;
     return LazyValueInfo::Unknown;
   }
 
-  if (Result.isNotConstant()) {
+  if (Val.isNotConstant()) {
     // If this is an equality comparison, we can try to fold it knowing that
     // "V != C1".
     if (Pred == ICmpInst::ICMP_EQ) {
       // !C1 == C -> false iff C1 == C.
       Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE,
-                                            Result.getNotConstant(), C, DL,
+                                            Val.getNotConstant(), C, DL,
                                             TLI);
       if (Res->isNullValue())
         return LazyValueInfo::False;
     } else if (Pred == ICmpInst::ICMP_NE) {
       // !C1 != C -> true iff C1 == C.
       Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE,
-                                            Result.getNotConstant(), C, DL,
+                                            Val.getNotConstant(), C, DL,
                                             TLI);
       if (Res->isNullValue())
         return LazyValueInfo::True;
@@ -1890,12 +1857,65 @@ void LazyValueInfo::eraseBlock(BasicBlock *BB) {
 }
 
 
-void LazyValueInfo::printCache(Function &F, raw_ostream &OS) {
+void LazyValueInfo::printLVI(Function &F, DominatorTree &DTree, raw_ostream &OS) {
   if (PImpl) {
-    getImpl(PImpl, AC, DL, DT).printCache(F, OS);
+    getImpl(PImpl, AC, DL, DT).printLVI(F, DTree, OS);
   }
 }
 
+// Print the LVI for the function arguments at the start of each basic block.
+void LazyValueInfoAnnotatedWriter::emitBasicBlockStartAnnot(
+    const BasicBlock *BB, formatted_raw_ostream &OS) {
+  // Find if there are latticevalues defined for arguments of the function.
+  auto *F = BB->getParent();
+  for (auto &Arg : F->args()) {
+    LVILatticeVal Result = LVIImpl->getValueInBlock(
+        const_cast<Argument *>(&Arg), const_cast<BasicBlock *>(BB));
+    if (Result.isUndefined())
+      continue;
+    OS << "; LatticeVal for: '" << Arg << "' is: " << Result << "\n";
+  }
+}
+
+// This function prints the LVI analysis for the instruction I at the beginning
+// of various basic blocks. It relies on calculated values that are stored in
+// the LazyValueInfoCache, and in the absence of cached values, recalculte the
+// LazyValueInfo for `I`, and print that info.
+void LazyValueInfoAnnotatedWriter::emitInstructionAnnot(
+    const Instruction *I, formatted_raw_ostream &OS) {
+
+  auto *ParentBB = I->getParent();
+  SmallPtrSet<const BasicBlock*, 16> BlocksContainingLVI;
+  // We can generate (solve) LVI values only for blocks that are dominated by
+  // the I's parent. However, to avoid generating LVI for all dominating blocks,
+  // that contain redundant/uninteresting information, we print LVI for
+  // blocks that may use this LVI information (such as immediate successor
+  // blocks, and blocks that contain uses of `I`).
+  auto printResult = [&](const BasicBlock *BB) {
+    if (!BlocksContainingLVI.insert(BB).second)
+      return;
+    LVILatticeVal Result = LVIImpl->getValueInBlock(
+        const_cast<Instruction *>(I), const_cast<BasicBlock *>(BB));
+      OS << "; LatticeVal for: '" << *I << "' in BB: '";
+      BB->printAsOperand(OS, false);
+      OS << "' is: " << Result << "\n";
+  };
+
+  printResult(ParentBB);
+  // Print the LVI analysis results for the the immediate successor blocks, that
+  // are dominated by `ParentBB`.
+  for (auto *BBSucc : successors(ParentBB))
+    if (DT.dominates(ParentBB, BBSucc))
+      printResult(BBSucc);
+
+  // Print LVI in blocks where `I` is used.
+  for (auto *U : I->users())
+    if (auto *UseI = dyn_cast<Instruction>(U))
+      if (!isa<PHINode>(UseI) || DT.dominates(ParentBB, UseI->getParent()))
+        printResult(UseI->getParent());
+
+}
+
 namespace {
 // Printer class for LazyValueInfo results.
 class LazyValueInfoPrinter : public FunctionPass {
@@ -1908,12 +1928,16 @@ public:
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesAll();
     AU.addRequired<LazyValueInfoWrapperPass>();
+    AU.addRequired<DominatorTreeWrapperPass>();
   }
 
+  // Get the mandatory dominator tree analysis and pass this in to the
+  // LVIPrinter. We cannot rely on the LVI's DT, since it's optional.
   bool runOnFunction(Function &F) override {
     dbgs() << "LVI for function '" << F.getName() << "':\n";
     auto &LVI = getAnalysis<LazyValueInfoWrapperPass>().getLVI();
-    LVI.printCache(F, dbgs());
+    auto &DTree = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+    LVI.printLVI(F, DTree, dbgs());
     return false;
   }
 };
diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp
index e6391792bc23..9713588537b3 100644
--- a/lib/Analysis/Lint.cpp
+++ b/lib/Analysis/Lint.cpp
@@ -58,13 +58,13 @@
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/Module.h"
 #include "llvm/IR/InstVisitor.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Value.h"
 #include "llvm/Pass.h"
diff --git a/lib/Analysis/MemDepPrinter.cpp b/lib/Analysis/MemDepPrinter.cpp
index e7a85ae06e68..5c0cbb26484c 100644
--- a/lib/Analysis/MemDepPrinter.cpp
+++ b/lib/Analysis/MemDepPrinter.cpp
@@ -10,9 +10,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Analysis/Passes.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/Passes.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/LLVMContext.h"
diff --git a/lib/Analysis/MemDerefPrinter.cpp b/lib/Analysis/MemDerefPrinter.cpp
index fa0cc5a46c2b..4231a78352ce 100644
--- a/lib/Analysis/MemDerefPrinter.cpp
+++ b/lib/Analysis/MemDerefPrinter.cpp
@@ -7,10 +7,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Analysis/Passes.h"
 #include "llvm/ADT/SetVector.h"
-#include "llvm/Analysis/MemoryDependenceAnalysis.h"
 #include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/Passes.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/InstIterator.h"
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index 188885063b39..3fdedbb0ab3c 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -15,17 +15,17 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
-#include "llvm/Analysis/PHITransAddr.h"
 #include "llvm/Analysis/OrderedBasicBlock.h"
-#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/PHITransAddr.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
diff --git a/lib/Analysis/MemorySSAUpdater.cpp b/lib/Analysis/MemorySSAUpdater.cpp
index da5c79ab6c81..1ff84471c094 100644
--- a/lib/Analysis/MemorySSAUpdater.cpp
+++ b/lib/Analysis/MemorySSAUpdater.cpp
@@ -14,6 +14,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
+#include "llvm/Analysis/MemorySSA.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/GlobalVariable.h"
@@ -24,7 +25,6 @@
 #include "llvm/IR/Module.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/FormattedStream.h"
-#include "llvm/Analysis/MemorySSA.h"
 #include <algorithm>
 
 #define DEBUG_TYPE "memoryssa"
@@ -124,17 +124,12 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefInBlock(MemoryAccess *MA) {
         return &*Iter;
     } else {
       // Otherwise, have to walk the all access iterator.
-      auto Iter = MA->getReverseIterator();
-      ++Iter;
-      while (&*Iter != &*Defs->begin()) {
-        if (!isa<MemoryUse>(*Iter))
-          return &*Iter;
-        --Iter;
-      }
-      // At this point it must be pointing at firstdef
-      assert(&*Iter == &*Defs->begin() &&
-             "Should have hit first def walking backwards");
-      return &*Iter;
+      auto End = MSSA->getWritableBlockAccesses(MA->getBlock())->rend();
+      for (auto &U : make_range(++MA->getReverseIterator(), End))
+        if (!isa<MemoryUse>(U))
+          return cast<MemoryAccess>(&U);
+      // Note that if MA comes before Defs->begin(), we won't hit a def.
+      return nullptr;
     }
   }
   return nullptr;
diff --git a/lib/Analysis/ModuleDebugInfoPrinter.cpp b/lib/Analysis/ModuleDebugInfoPrinter.cpp
index f675830aa67d..e12cdf9182c7 100644
--- a/lib/Analysis/ModuleDebugInfoPrinter.cpp
+++ b/lib/Analysis/ModuleDebugInfoPrinter.cpp
@@ -15,8 +15,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Analysis/Passes.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Passes.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/Function.h"
 #include "llvm/Pass.h"
diff --git a/lib/Analysis/ModuleSummaryAnalysis.cpp b/lib/Analysis/ModuleSummaryAnalysis.cpp
index 3253f27c010d..095647e1bd20 100644
--- a/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -447,6 +447,11 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
         });
   }
 
+  bool IsThinLTO = true;
+  if (auto *MD =
+          mdconst::extract_or_null<ConstantInt>(M.getModuleFlag("ThinLTO")))
+    IsThinLTO = MD->getZExtValue();
+
   for (auto &GlobalList : Index) {
     // Ignore entries for references that are undefined in the current module.
     if (GlobalList.second.SummaryList.empty())
@@ -455,6 +460,11 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
     assert(GlobalList.second.SummaryList.size() == 1 &&
            "Expected module's index to have one summary per GUID");
     auto &Summary = GlobalList.second.SummaryList[0];
+    if (!IsThinLTO) {
+      Summary->setNotEligibleToImport();
+      continue;
+    }
+
     bool AllRefsCanBeExternallyReferenced =
         llvm::all_of(Summary->refs(), [&](const ValueInfo &VI) {
           return !CantBePromoted.count(VI.getGUID());
diff --git a/lib/Analysis/ObjCARCInstKind.cpp b/lib/Analysis/ObjCARCInstKind.cpp
index 1e75c0824d03..f374dd33f86f 100644
--- a/lib/Analysis/ObjCARCInstKind.cpp
+++ b/lib/Analysis/ObjCARCInstKind.cpp
@@ -20,8 +20,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/ObjCARCInstKind.h"
-#include "llvm/Analysis/ObjCARCAnalysisUtils.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/Analysis/ObjCARCAnalysisUtils.h"
 #include "llvm/IR/Intrinsics.h"
 
 using namespace llvm;
diff --git a/lib/Analysis/RegionPrinter.cpp b/lib/Analysis/RegionPrinter.cpp
index 30a4e011060e..5986b8c4e0c3 100644
--- a/lib/Analysis/RegionPrinter.cpp
+++ b/lib/Analysis/RegionPrinter.cpp
@@ -9,14 +9,14 @@
 // Print out the region tree of a function using dotty/graphviz.
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/RegionPrinter.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/DOTGraphTraitsPass.h"
+#include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/RegionInfo.h"
 #include "llvm/Analysis/RegionIterator.h"
-#include "llvm/Analysis/RegionPrinter.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index d96697cafbe9..b9c4716b5528 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -91,8 +91,8 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/KnownBits.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/SaveAndRestore.h"
+#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -6793,7 +6793,7 @@ static bool CanConstantFold(const Instruction *I) {
 
   if (const CallInst *CI = dyn_cast<CallInst>(I))
     if (const Function *F = CI->getCalledFunction())
-      return canConstantFoldCallTo(F);
+      return canConstantFoldCallTo(CI, F);
   return false;
 }
 
diff --git a/lib/Analysis/ScalarEvolutionNormalization.cpp b/lib/Analysis/ScalarEvolutionNormalization.cpp
index 54c44c8e542d..3740039b8f86 100644
--- a/lib/Analysis/ScalarEvolutionNormalization.cpp
+++ b/lib/Analysis/ScalarEvolutionNormalization.cpp
@@ -12,9 +12,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Analysis/ScalarEvolutionNormalization.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
-#include "llvm/Analysis/ScalarEvolutionNormalization.h"
 using namespace llvm;
 
 /// TransformKind - Different types of transformations that
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp
index ac646716476b..488cb332a0b0 100644
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -133,6 +133,10 @@ bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
                                         Scale, AddrSpace);
 }
 
+bool TargetTransformInfo::isLSRCostLess(LSRCost &C1, LSRCost &C2) const {
+  return TTIImpl->isLSRCostLess(C1, C2);
+}
+
 bool TargetTransformInfo::isLegalMaskedStore(Type *DataType) const {
   return TTIImpl->isLegalMaskedStore(DataType);
 }
@@ -464,6 +468,10 @@ bool TargetTransformInfo::getTgtMemIntrinsic(IntrinsicInst *Inst,
   return TTIImpl->getTgtMemIntrinsic(Inst, Info);
 }
 
+unsigned TargetTransformInfo::getAtomicMemIntrinsicMaxElementSize() const {
+  return TTIImpl->getAtomicMemIntrinsicMaxElementSize();
+}
+
 Value *TargetTransformInfo::getOrCreateResultFromMemIntrinsic(
     IntrinsicInst *Inst, Type *ExpectedType) const {
   return TTIImpl->getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index a5dceb6c2271..c0181662fd9d 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -17,9 +17,9 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/Loads.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/OptimizationDiagnosticInfo.h"
 #include "llvm/Analysis/VectorUtils.h"
 #include "llvm/IR/CallSite.h"
@@ -1982,7 +1982,7 @@ static bool isAddOfNonZero(const Value *V1, const Value *V2, const Query &Q) {
 
 /// Return true if it is known that V1 != V2.
 static bool isKnownNonEqual(const Value *V1, const Value *V2, const Query &Q) {
-  if (V1->getType()->isVectorTy() || V1 == V2)
+  if (V1 == V2)
     return false;
   if (V1->getType() != V2->getType())
     // We can't look through casts yet.
@@ -1990,18 +1990,14 @@ static bool isKnownNonEqual(const Value *V1, const Value *V2, const Query &Q) {
   if (isAddOfNonZero(V1, V2, Q) || isAddOfNonZero(V2, V1, Q))
     return true;
 
-  if (IntegerType *Ty = dyn_cast<IntegerType>(V1->getType())) {
+  if (V1->getType()->isIntOrIntVectorTy()) {
     // Are any known bits in V1 contradictory to known bits in V2? If V1
     // has a known zero where V2 has a known one, they must not be equal.
-    auto BitWidth = Ty->getBitWidth();
-    KnownBits Known1(BitWidth);
-    computeKnownBits(V1, Known1, 0, Q);
-    KnownBits Known2(BitWidth);
-    computeKnownBits(V2, Known2, 0, Q);
+    KnownBits Known1 = computeKnownBits(V1, 0, Q);
+    KnownBits Known2 = computeKnownBits(V2, 0, Q);
 
-    APInt OppositeBits = (Known1.Zero & Known2.One) |
-                         (Known2.Zero & Known1.One);
-    if (OppositeBits.getBoolValue())
+    if (Known1.Zero.intersects(Known2.One) ||
+        Known2.Zero.intersects(Known1.One))
       return true;
   }
   return false;
@@ -3082,7 +3078,7 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str,
       Str = StringRef("", 1);
       return true;
     }
-    // We cannot instantiate a StringRef as we do not have an apropriate string
+    // We cannot instantiate a StringRef as we do not have an appropriate string
     // of 0s at hand.
     return false;
   }
diff --git a/lib/Analysis/VectorUtils.cpp b/lib/Analysis/VectorUtils.cpp
index 2d2249da4e13..0ace8fa382bc 100644
--- a/lib/Analysis/VectorUtils.cpp
+++ b/lib/Analysis/VectorUtils.cpp
@@ -11,19 +11,19 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Analysis/VectorUtils.h"
 #include "llvm/ADT/EquivalenceClasses.h"
 #include "llvm/Analysis/DemandedBits.h"
 #include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/IR/Constants.h"
 #include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/Value.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/IRBuilder.h"
 
 using namespace llvm;
 using namespace llvm::PatternMatch;
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index ff1ea44a18a7..9ad31125f4b8 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -15,9 +15,10 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/None.h"
 #include "llvm/ADT/Optional.h"
-#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/AsmParser/SlotMapping.h"
+#include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/IR/Argument.h"
 #include "llvm/IR/AutoUpgrade.h"
 #include "llvm/IR/BasicBlock.h"
@@ -41,7 +42,6 @@
 #include "llvm/IR/Value.h"
 #include "llvm/IR/ValueSymbolTable.h"
 #include "llvm/Support/Casting.h"
-#include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/SaveAndRestore.h"
diff --git a/lib/AsmParser/LLVMBuild.txt b/lib/AsmParser/LLVMBuild.txt
index 3bc31ed910a7..82dba8c15bb8 100644
--- a/lib/AsmParser/LLVMBuild.txt
+++ b/lib/AsmParser/LLVMBuild.txt
@@ -19,4 +19,4 @@
 type = Library
 name = AsmParser
 parent = Libraries
-required_libraries = Core Support
+required_libraries = BinaryFormat Core Support
diff --git a/lib/BinaryFormat/CMakeLists.txt b/lib/BinaryFormat/CMakeLists.txt
new file mode 100644
index 000000000000..cb78ea6fdf92
--- /dev/null
+++ b/lib/BinaryFormat/CMakeLists.txt
@@ -0,0 +1,8 @@
+add_llvm_library(LLVMBinaryFormat
+  Dwarf.cpp
+  Magic.cpp
+
+  ADDITIONAL_HEADER_DIRS
+  ${LLVM_MAIN_INCLUDE_DIR}/llvm/BinaryFormat
+  )
+  
\ No newline at end of file
diff --git a/lib/Support/Dwarf.cpp b/lib/BinaryFormat/Dwarf.cpp
similarity index 77%
rename from lib/Support/Dwarf.cpp
rename to lib/BinaryFormat/Dwarf.cpp
index 200546857de7..37c4579ef0f8 100644
--- a/lib/Support/Dwarf.cpp
+++ b/lib/BinaryFormat/Dwarf.cpp
@@ -1,4 +1,4 @@
-//===-- llvm/Support/Dwarf.cpp - Dwarf Framework ----------------*- C++ -*-===//
+//===-- llvm/BinaryFormat/Dwarf.cpp - Dwarf Framework ------------*- C++-*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,7 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Support/Dwarf.h"
+#include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/Support/ErrorHandling.h"
 
@@ -25,15 +25,15 @@ StringRef llvm::dwarf::TagString(unsigned Tag) {
 #define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR)                               \
   case DW_TAG_##NAME:                                                          \
     return "DW_TAG_" #NAME;
-#include "llvm/Support/Dwarf.def"
+#include "llvm/BinaryFormat/Dwarf.def"
   }
 }
 
 unsigned llvm::dwarf::getTag(StringRef TagString) {
   return StringSwitch<unsigned>(TagString)
 #define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR)                               \
-      .Case("DW_TAG_" #NAME, DW_TAG_##NAME)
-#include "llvm/Support/Dwarf.def"
+  .Case("DW_TAG_" #NAME, DW_TAG_##NAME)
+#include "llvm/BinaryFormat/Dwarf.def"
       .Default(DW_TAG_invalid);
 }
 
@@ -44,7 +44,7 @@ unsigned llvm::dwarf::TagVersion(dwarf::Tag Tag) {
 #define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR)                               \
   case DW_TAG_##NAME:                                                          \
     return VERSION;
-#include "llvm/Support/Dwarf.def"
+#include "llvm/BinaryFormat/Dwarf.def"
   }
 }
 
@@ -55,14 +55,16 @@ unsigned llvm::dwarf::TagVendor(dwarf::Tag Tag) {
 #define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR)                               \
   case DW_TAG_##NAME:                                                          \
     return DWARF_VENDOR_##VENDOR;
-#include "llvm/Support/Dwarf.def"
+#include "llvm/BinaryFormat/Dwarf.def"
   }
 }
 
 StringRef llvm::dwarf::ChildrenString(unsigned Children) {
   switch (Children) {
-  case DW_CHILDREN_no:                   return "DW_CHILDREN_no";
-  case DW_CHILDREN_yes:                  return "DW_CHILDREN_yes";
+  case DW_CHILDREN_no:
+    return "DW_CHILDREN_no";
+  case DW_CHILDREN_yes:
+    return "DW_CHILDREN_yes";
   }
   return StringRef();
 }
@@ -74,7 +76,7 @@ StringRef llvm::dwarf::AttributeString(unsigned Attribute) {
 #define HANDLE_DW_AT(ID, NAME, VERSION, VENDOR)                                \
   case DW_AT_##NAME:                                                           \
     return "DW_AT_" #NAME;
-#include "llvm/Support/Dwarf.def"
+#include "llvm/BinaryFormat/Dwarf.def"
   }
 }
 
@@ -85,7 +87,7 @@ unsigned llvm::dwarf::AttributeVersion(dwarf::Attribute Attribute) {
 #define HANDLE_DW_AT(ID, NAME, VERSION, VENDOR)                                \
   case DW_AT_##NAME:                                                           \
     return VERSION;
-#include "llvm/Support/Dwarf.def"
+#include "llvm/BinaryFormat/Dwarf.def"
   }
 }
 
@@ -96,7 +98,7 @@ unsigned llvm::dwarf::AttributeVendor(dwarf::Attribute Attribute) {
 #define HANDLE_DW_AT(ID, NAME, VERSION, VENDOR)                                \
   case DW_AT_##NAME:                                                           \
     return DWARF_VENDOR_##VENDOR;
-#include "llvm/Support/Dwarf.def"
+#include "llvm/BinaryFormat/Dwarf.def"
   }
 }
 
@@ -107,7 +109,7 @@ StringRef llvm::dwarf::FormEncodingString(unsigned Encoding) {
 #define HANDLE_DW_FORM(ID, NAME, VERSION, VENDOR)                              \
   case DW_FORM_##NAME:                                                         \
     return "DW_FORM_" #NAME;
-#include "llvm/Support/Dwarf.def"
+#include "llvm/BinaryFormat/Dwarf.def"
   }
 }
 
@@ -118,7 +120,7 @@ unsigned llvm::dwarf::FormVersion(dwarf::Form Form) {
 #define HANDLE_DW_FORM(ID, NAME, VERSION, VENDOR)                              \
   case DW_FORM_##NAME:                                                         \
     return VERSION;
-#include "llvm/Support/Dwarf.def"
+#include "llvm/BinaryFormat/Dwarf.def"
   }
 }
 
@@ -129,7 +131,7 @@ unsigned llvm::dwarf::FormVendor(dwarf::Form Form) {
 #define HANDLE_DW_FORM(ID, NAME, VERSION, VENDOR)                              \
   case DW_FORM_##NAME:                                                         \
     return DWARF_VENDOR_##VENDOR;
-#include "llvm/Support/Dwarf.def"
+#include "llvm/BinaryFormat/Dwarf.def"
   }
 }
 
@@ -140,7 +142,7 @@ StringRef llvm::dwarf::OperationEncodingString(unsigned Encoding) {
 #define HANDLE_DW_OP(ID, NAME, VERSION, VENDOR)                                \
   case DW_OP_##NAME:                                                           \
     return "DW_OP_" #NAME;
-#include "llvm/Support/Dwarf.def"
+#include "llvm/BinaryFormat/Dwarf.def"
   case DW_OP_LLVM_fragment:
     return "DW_OP_LLVM_fragment";
   }
@@ -149,8 +151,8 @@ StringRef llvm::dwarf::OperationEncodingString(unsigned Encoding) {
 unsigned llvm::dwarf::getOperationEncoding(StringRef OperationEncodingString) {
   return StringSwitch<unsigned>(OperationEncodingString)
 #define HANDLE_DW_OP(ID, NAME, VERSION, VENDOR)                                \
-      .Case("DW_OP_" #NAME, DW_OP_##NAME)
-#include "llvm/Support/Dwarf.def"
+  .Case("DW_OP_" #NAME, DW_OP_##NAME)
+#include "llvm/BinaryFormat/Dwarf.def"
       .Case("DW_OP_LLVM_fragment", DW_OP_LLVM_fragment)
       .Default(0);
 }
@@ -162,7 +164,7 @@ unsigned llvm::dwarf::OperationVersion(dwarf::LocationAtom Op) {
 #define HANDLE_DW_OP(ID, NAME, VERSION, VENDOR)                                \
   case DW_OP_##NAME:                                                           \
     return VERSION;
-#include "llvm/Support/Dwarf.def"
+#include "llvm/BinaryFormat/Dwarf.def"
   }
 }
 
@@ -173,7 +175,7 @@ unsigned llvm::dwarf::OperationVendor(dwarf::LocationAtom Op) {
 #define HANDLE_DW_OP(ID, NAME, VERSION, VENDOR)                                \
   case DW_OP_##NAME:                                                           \
     return DWARF_VENDOR_##VENDOR;
-#include "llvm/Support/Dwarf.def"
+#include "llvm/BinaryFormat/Dwarf.def"
   }
 }
 
@@ -184,15 +186,15 @@ StringRef llvm::dwarf::AttributeEncodingString(unsigned Encoding) {
 #define HANDLE_DW_ATE(ID, NAME, VERSION, VENDOR)                               \
   case DW_ATE_##NAME:                                                          \
     return "DW_ATE_" #NAME;
-#include "llvm/Support/Dwarf.def"
+#include "llvm/BinaryFormat/Dwarf.def"
   }
 }
 
 unsigned llvm::dwarf::getAttributeEncoding(StringRef EncodingString) {
   return StringSwitch<unsigned>(EncodingString)
 #define HANDLE_DW_ATE(ID, NAME, VERSION, VENDOR)                               \
-      .Case("DW_ATE_" #NAME, DW_ATE_##NAME)
-#include "llvm/Support/Dwarf.def"
+  .Case("DW_ATE_" #NAME, DW_ATE_##NAME)
+#include "llvm/BinaryFormat/Dwarf.def"
       .Default(0);
 }
 
@@ -203,7 +205,7 @@ unsigned llvm::dwarf::AttributeEncodingVersion(dwarf::TypeKind ATE) {
 #define HANDLE_DW_ATE(ID, NAME, VERSION, VENDOR)                               \
   case DW_ATE_##NAME:                                                          \
     return VERSION;
-#include "llvm/Support/Dwarf.def"
+#include "llvm/BinaryFormat/Dwarf.def"
   }
 }
 
@@ -214,28 +216,38 @@ unsigned llvm::dwarf::AttributeEncodingVendor(dwarf::TypeKind ATE) {
 #define HANDLE_DW_ATE(ID, NAME, VERSION, VENDOR)                               \
   case DW_ATE_##NAME:                                                          \
     return DWARF_VENDOR_##VENDOR;
-#include "llvm/Support/Dwarf.def"
+#include "llvm/BinaryFormat/Dwarf.def"
   }
 }
 
 StringRef llvm::dwarf::DecimalSignString(unsigned Sign) {
   switch (Sign) {
-  case DW_DS_unsigned:                   return "DW_DS_unsigned";
-  case DW_DS_leading_overpunch:          return "DW_DS_leading_overpunch";
-  case DW_DS_trailing_overpunch:         return "DW_DS_trailing_overpunch";
-  case DW_DS_leading_separate:           return "DW_DS_leading_separate";
-  case DW_DS_trailing_separate:          return "DW_DS_trailing_separate";
+  case DW_DS_unsigned:
+    return "DW_DS_unsigned";
+  case DW_DS_leading_overpunch:
+    return "DW_DS_leading_overpunch";
+  case DW_DS_trailing_overpunch:
+    return "DW_DS_trailing_overpunch";
+  case DW_DS_leading_separate:
+    return "DW_DS_leading_separate";
+  case DW_DS_trailing_separate:
+    return "DW_DS_trailing_separate";
   }
   return StringRef();
 }
 
 StringRef llvm::dwarf::EndianityString(unsigned Endian) {
   switch (Endian) {
-  case DW_END_default:                   return "DW_END_default";
-  case DW_END_big:                       return "DW_END_big";
-  case DW_END_little:                    return "DW_END_little";
-  case DW_END_lo_user:                   return "DW_END_lo_user";
-  case DW_END_hi_user:                   return "DW_END_hi_user";
+  case DW_END_default:
+    return "DW_END_default";
+  case DW_END_big:
+    return "DW_END_big";
+  case DW_END_little:
+    return "DW_END_little";
+  case DW_END_lo_user:
+    return "DW_END_lo_user";
+  case DW_END_hi_user:
+    return "DW_END_hi_user";
   }
   return StringRef();
 }
@@ -243,18 +255,24 @@ StringRef llvm::dwarf::EndianityString(unsigned Endian) {
 StringRef llvm::dwarf::AccessibilityString(unsigned Access) {
   switch (Access) {
   // Accessibility codes
-  case DW_ACCESS_public:                 return "DW_ACCESS_public";
-  case DW_ACCESS_protected:              return "DW_ACCESS_protected";
-  case DW_ACCESS_private:                return "DW_ACCESS_private";
+  case DW_ACCESS_public:
+    return "DW_ACCESS_public";
+  case DW_ACCESS_protected:
+    return "DW_ACCESS_protected";
+  case DW_ACCESS_private:
+    return "DW_ACCESS_private";
   }
   return StringRef();
 }
 
 StringRef llvm::dwarf::VisibilityString(unsigned Visibility) {
   switch (Visibility) {
-  case DW_VIS_local:                     return "DW_VIS_local";
-  case DW_VIS_exported:                  return "DW_VIS_exported";
-  case DW_VIS_qualified:                 return "DW_VIS_qualified";
+  case DW_VIS_local:
+    return "DW_VIS_local";
+  case DW_VIS_exported:
+    return "DW_VIS_exported";
+  case DW_VIS_qualified:
+    return "DW_VIS_qualified";
   }
   return StringRef();
 }
@@ -266,7 +284,7 @@ StringRef llvm::dwarf::VirtualityString(unsigned Virtuality) {
 #define HANDLE_DW_VIRTUALITY(ID, NAME)                                         \
   case DW_VIRTUALITY_##NAME:                                                   \
     return "DW_VIRTUALITY_" #NAME;
-#include "llvm/Support/Dwarf.def"
+#include "llvm/BinaryFormat/Dwarf.def"
   }
 }
 
@@ -274,7 +292,7 @@ unsigned llvm::dwarf::getVirtuality(StringRef VirtualityString) {
   return StringSwitch<unsigned>(VirtualityString)
 #define HANDLE_DW_VIRTUALITY(ID, NAME)                                         \
   .Case("DW_VIRTUALITY_" #NAME, DW_VIRTUALITY_##NAME)
-#include "llvm/Support/Dwarf.def"
+#include "llvm/BinaryFormat/Dwarf.def"
       .Default(DW_VIRTUALITY_invalid);
 }
 
@@ -285,7 +303,7 @@ StringRef llvm::dwarf::LanguageString(unsigned Language) {
 #define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR)                              \
   case DW_LANG_##NAME:                                                         \
     return "DW_LANG_" #NAME;
-#include "llvm/Support/Dwarf.def"
+#include "llvm/BinaryFormat/Dwarf.def"
   }
 }
 
@@ -293,7 +311,7 @@ unsigned llvm::dwarf::getLanguage(StringRef LanguageString) {
   return StringSwitch<unsigned>(LanguageString)
 #define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR)                              \
   .Case("DW_LANG_" #NAME, DW_LANG_##NAME)
-#include "llvm/Support/Dwarf.def"
+#include "llvm/BinaryFormat/Dwarf.def"
       .Default(0);
 }
 
@@ -304,7 +322,7 @@ unsigned llvm::dwarf::LanguageVersion(dwarf::SourceLanguage Lang) {
 #define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR)                              \
   case DW_LANG_##NAME:                                                         \
     return VERSION;
-#include "llvm/Support/Dwarf.def"
+#include "llvm/BinaryFormat/Dwarf.def"
   }
 }
 
@@ -315,16 +333,20 @@ unsigned llvm::dwarf::LanguageVendor(dwarf::SourceLanguage Lang) {
 #define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR)                              \
   case DW_LANG_##NAME:                                                         \
     return DWARF_VENDOR_##VENDOR;
-#include "llvm/Support/Dwarf.def"
+#include "llvm/BinaryFormat/Dwarf.def"
   }
 }
 
 StringRef llvm::dwarf::CaseString(unsigned Case) {
   switch (Case) {
-  case DW_ID_case_sensitive:             return "DW_ID_case_sensitive";
-  case DW_ID_up_case:                    return "DW_ID_up_case";
-  case DW_ID_down_case:                  return "DW_ID_down_case";
-  case DW_ID_case_insensitive:           return "DW_ID_case_insensitive";
+  case DW_ID_case_sensitive:
+    return "DW_ID_case_sensitive";
+  case DW_ID_up_case:
+    return "DW_ID_up_case";
+  case DW_ID_down_case:
+    return "DW_ID_down_case";
+  case DW_ID_case_insensitive:
+    return "DW_ID_case_insensitive";
   }
   return StringRef();
 }
@@ -333,42 +355,50 @@ StringRef llvm::dwarf::ConventionString(unsigned CC) {
   switch (CC) {
   default:
     return StringRef();
-#define HANDLE_DW_CC(ID, NAME)                                               \
-  case DW_CC_##NAME:                                                         \
+#define HANDLE_DW_CC(ID, NAME)                                                 \
+  case DW_CC_##NAME:                                                           \
     return "DW_CC_" #NAME;
-#include "llvm/Support/Dwarf.def"
+#include "llvm/BinaryFormat/Dwarf.def"
   }
 }
 
 unsigned llvm::dwarf::getCallingConvention(StringRef CCString) {
   return StringSwitch<unsigned>(CCString)
 #define HANDLE_DW_CC(ID, NAME) .Case("DW_CC_" #NAME, DW_CC_##NAME)
-#include "llvm/Support/Dwarf.def"
+#include "llvm/BinaryFormat/Dwarf.def"
       .Default(0);
 }
 
 StringRef llvm::dwarf::InlineCodeString(unsigned Code) {
   switch (Code) {
-  case DW_INL_not_inlined:               return "DW_INL_not_inlined";
-  case DW_INL_inlined:                   return "DW_INL_inlined";
-  case DW_INL_declared_not_inlined:      return "DW_INL_declared_not_inlined";
-  case DW_INL_declared_inlined:          return "DW_INL_declared_inlined";
+  case DW_INL_not_inlined:
+    return "DW_INL_not_inlined";
+  case DW_INL_inlined:
+    return "DW_INL_inlined";
+  case DW_INL_declared_not_inlined:
+    return "DW_INL_declared_not_inlined";
+  case DW_INL_declared_inlined:
+    return "DW_INL_declared_inlined";
   }
   return StringRef();
 }
 
 StringRef llvm::dwarf::ArrayOrderString(unsigned Order) {
   switch (Order) {
-  case DW_ORD_row_major:                 return "DW_ORD_row_major";
-  case DW_ORD_col_major:                 return "DW_ORD_col_major";
+  case DW_ORD_row_major:
+    return "DW_ORD_row_major";
+  case DW_ORD_col_major:
+    return "DW_ORD_col_major";
   }
   return StringRef();
 }
 
 StringRef llvm::dwarf::DiscriminantString(unsigned Discriminant) {
   switch (Discriminant) {
-  case DW_DSC_label:                     return "DW_DSC_label";
-  case DW_DSC_range:                     return "DW_DSC_range";
+  case DW_DSC_label:
+    return "DW_DSC_label";
+  case DW_DSC_range:
+    return "DW_DSC_range";
   }
   return StringRef();
 }
@@ -377,10 +407,10 @@ StringRef llvm::dwarf::LNStandardString(unsigned Standard) {
   switch (Standard) {
   default:
     return StringRef();
-#define HANDLE_DW_LNS(ID, NAME)                                               \
-  case DW_LNS_##NAME:                                                         \
+#define HANDLE_DW_LNS(ID, NAME)                                                \
+  case DW_LNS_##NAME:                                                          \
     return "DW_LNS_" #NAME;
-#include "llvm/Support/Dwarf.def"
+#include "llvm/BinaryFormat/Dwarf.def"
   }
 }
 
@@ -388,22 +418,28 @@ StringRef llvm::dwarf::LNExtendedString(unsigned Encoding) {
   switch (Encoding) {
   default:
     return StringRef();
-#define HANDLE_DW_LNE(ID, NAME)                                               \
-  case DW_LNE_##NAME:                                                         \
+#define HANDLE_DW_LNE(ID, NAME)                                                \
+  case DW_LNE_##NAME:                                                          \
     return "DW_LNE_" #NAME;
-#include "llvm/Support/Dwarf.def"
+#include "llvm/BinaryFormat/Dwarf.def"
   }
 }
 
 StringRef llvm::dwarf::MacinfoString(unsigned Encoding) {
   switch (Encoding) {
   // Macinfo Type Encodings
-  case DW_MACINFO_define:                return "DW_MACINFO_define";
-  case DW_MACINFO_undef:                 return "DW_MACINFO_undef";
-  case DW_MACINFO_start_file:            return "DW_MACINFO_start_file";
-  case DW_MACINFO_end_file:              return "DW_MACINFO_end_file";
-  case DW_MACINFO_vendor_ext:            return "DW_MACINFO_vendor_ext";
-  case DW_MACINFO_invalid:               return "DW_MACINFO_invalid";
+  case DW_MACINFO_define:
+    return "DW_MACINFO_define";
+  case DW_MACINFO_undef:
+    return "DW_MACINFO_undef";
+  case DW_MACINFO_start_file:
+    return "DW_MACINFO_start_file";
+  case DW_MACINFO_end_file:
+    return "DW_MACINFO_end_file";
+  case DW_MACINFO_vendor_ext:
+    return "DW_MACINFO_vendor_ext";
+  case DW_MACINFO_invalid:
+    return "DW_MACINFO_invalid";
   }
   return StringRef();
 }
@@ -422,10 +458,10 @@ StringRef llvm::dwarf::CallFrameString(unsigned Encoding) {
   switch (Encoding) {
   default:
     return StringRef();
-#define HANDLE_DW_CFA(ID, NAME)                                               \
-  case DW_CFA_##NAME:                                                         \
+#define HANDLE_DW_CFA(ID, NAME)                                                \
+  case DW_CFA_##NAME:                                                          \
     return "DW_CFA_" #NAME;
-#include "llvm/Support/Dwarf.def"
+#include "llvm/BinaryFormat/Dwarf.def"
   }
 }
 
@@ -433,10 +469,10 @@ StringRef llvm::dwarf::ApplePropertyString(unsigned Prop) {
   switch (Prop) {
   default:
     return StringRef();
-#define HANDLE_DW_APPLE_PROPERTY(ID, NAME)                                               \
-  case DW_APPLE_PROPERTY_##NAME:                                                         \
+#define HANDLE_DW_APPLE_PROPERTY(ID, NAME)                                     \
+  case DW_APPLE_PROPERTY_##NAME:                                               \
     return "DW_APPLE_PROPERTY_" #NAME;
-#include "llvm/Support/Dwarf.def"
+#include "llvm/BinaryFormat/Dwarf.def"
   }
 }
 
@@ -447,7 +483,7 @@ StringRef llvm::dwarf::UnitTypeString(unsigned UT) {
 #define HANDLE_DW_UT(ID, NAME)                                                 \
   case DW_UT_##NAME:                                                           \
     return "DW_UT_" #NAME;
-#include "llvm/Support/Dwarf.def"
+#include "llvm/BinaryFormat/Dwarf.def"
   }
 }
 
diff --git a/lib/BinaryFormat/LLVMBuild.txt b/lib/BinaryFormat/LLVMBuild.txt
new file mode 100644
index 000000000000..d7d4dcb5f23d
--- /dev/null
+++ b/lib/BinaryFormat/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/BinaryFormat/LLVMBuild.txt -------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = BinaryFormat
+parent = Libraries
+required_libraries = Support
diff --git a/lib/BinaryFormat/Magic.cpp b/lib/BinaryFormat/Magic.cpp
new file mode 100644
index 000000000000..ca4d93f99d92
--- /dev/null
+++ b/lib/BinaryFormat/Magic.cpp
@@ -0,0 +1,216 @@
+//===- llvm/BinaryFormat/Magic.cpp - File magic identification --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/BinaryFormat/Magic.h"
+
+#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/BinaryFormat/MachO.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/FileSystem.h"
+
+#if !defined(_MSC_VER) && !defined(__MINGW32__)
+#include <unistd.h>
+#else
+#include <io.h>
+#endif
+
+using namespace llvm;
+using namespace llvm::support::endian;
+using namespace llvm::sys::fs;
+
+template <size_t N>
+static bool startswith(StringRef Magic, const char (&S)[N]) {
+  return Magic.startswith(StringRef(S, N - 1));
+}
+
+/// @brief Identify the magic in magic.
+file_magic llvm::identify_magic(StringRef Magic) {
+  if (Magic.size() < 4)
+    return file_magic::unknown;
+  switch ((unsigned char)Magic[0]) {
+  case 0x00: {
+    // COFF bigobj, CL.exe's LTO object file, or short import library file
+    if (startswith(Magic, "\0\0\xFF\xFF")) {
+      size_t MinSize =
+          offsetof(COFF::BigObjHeader, UUID) + sizeof(COFF::BigObjMagic);
+      if (Magic.size() < MinSize)
+        return file_magic::coff_import_library;
+
+      const char *Start = Magic.data() + offsetof(COFF::BigObjHeader, UUID);
+      if (memcmp(Start, COFF::BigObjMagic, sizeof(COFF::BigObjMagic)) == 0)
+        return file_magic::coff_object;
+      if (memcmp(Start, COFF::ClGlObjMagic, sizeof(COFF::BigObjMagic)) == 0)
+        return file_magic::coff_cl_gl_object;
+      return file_magic::coff_import_library;
+    }
+    // Windows resource file
+    if (startswith(Magic, "\0\0\0\0\x20\0\0\0\xFF"))
+      return file_magic::windows_resource;
+    // 0x0000 = COFF unknown machine type
+    if (Magic[1] == 0)
+      return file_magic::coff_object;
+    if (startswith(Magic, "\0asm"))
+      return file_magic::wasm_object;
+    break;
+  }
+  case 0xDE: // 0x0B17C0DE = BC wraper
+    if (startswith(Magic, "\xDE\xC0\x17\x0B"))
+      return file_magic::bitcode;
+    break;
+  case 'B':
+    if (startswith(Magic, "BC\xC0\xDE"))
+      return file_magic::bitcode;
+    break;
+  case '!':
+    if (startswith(Magic, "!<arch>\n") || startswith(Magic, "!<thin>\n"))
+      return file_magic::archive;
+    break;
+
+  case '\177':
+    if (startswith(Magic, "\177ELF") && Magic.size() >= 18) {
+      bool Data2MSB = Magic[5] == 2;
+      unsigned high = Data2MSB ? 16 : 17;
+      unsigned low = Data2MSB ? 17 : 16;
+      if (Magic[high] == 0) {
+        switch (Magic[low]) {
+        default:
+          return file_magic::elf;
+        case 1:
+          return file_magic::elf_relocatable;
+        case 2:
+          return file_magic::elf_executable;
+        case 3:
+          return file_magic::elf_shared_object;
+        case 4:
+          return file_magic::elf_core;
+        }
+      }
+      // It's still some type of ELF file.
+      return file_magic::elf;
+    }
+    break;
+
+  case 0xCA:
+    if (startswith(Magic, "\xCA\xFE\xBA\xBE") ||
+        startswith(Magic, "\xCA\xFE\xBA\xBF")) {
+      // This is complicated by an overlap with Java class files.
+      // See the Mach-O section in /usr/share/file/magic for details.
+      if (Magic.size() >= 8 && Magic[7] < 43)
+        return file_magic::macho_universal_binary;
+    }
+    break;
+
+  // The two magic numbers for mach-o are:
+  // 0xfeedface - 32-bit mach-o
+  // 0xfeedfacf - 64-bit mach-o
+  case 0xFE:
+  case 0xCE:
+  case 0xCF: {
+    uint16_t type = 0;
+    if (startswith(Magic, "\xFE\xED\xFA\xCE") ||
+        startswith(Magic, "\xFE\xED\xFA\xCF")) {
+      /* Native endian */
+      size_t MinSize;
+      if (Magic[3] == char(0xCE))
+        MinSize = sizeof(MachO::mach_header);
+      else
+        MinSize = sizeof(MachO::mach_header_64);
+      if (Magic.size() >= MinSize)
+        type = Magic[12] << 24 | Magic[13] << 12 | Magic[14] << 8 | Magic[15];
+    } else if (startswith(Magic, "\xCE\xFA\xED\xFE") ||
+               startswith(Magic, "\xCF\xFA\xED\xFE")) {
+      /* Reverse endian */
+      size_t MinSize;
+      if (Magic[0] == char(0xCE))
+        MinSize = sizeof(MachO::mach_header);
+      else
+        MinSize = sizeof(MachO::mach_header_64);
+      if (Magic.size() >= MinSize)
+        type = Magic[15] << 24 | Magic[14] << 12 | Magic[13] << 8 | Magic[12];
+    }
+    switch (type) {
+    default:
+      break;
+    case 1:
+      return file_magic::macho_object;
+    case 2:
+      return file_magic::macho_executable;
+    case 3:
+      return file_magic::macho_fixed_virtual_memory_shared_lib;
+    case 4:
+      return file_magic::macho_core;
+    case 5:
+      return file_magic::macho_preload_executable;
+    case 6:
+      return file_magic::macho_dynamically_linked_shared_lib;
+    case 7:
+      return file_magic::macho_dynamic_linker;
+    case 8:
+      return file_magic::macho_bundle;
+    case 9:
+      return file_magic::macho_dynamically_linked_shared_lib_stub;
+    case 10:
+      return file_magic::macho_dsym_companion;
+    case 11:
+      return file_magic::macho_kext_bundle;
+    }
+    break;
+  }
+  case 0xF0: // PowerPC Windows
+  case 0x83: // Alpha 32-bit
+  case 0x84: // Alpha 64-bit
+  case 0x66: // MPS R4000 Windows
+  case 0x50: // mc68K
+  case 0x4c: // 80386 Windows
+  case 0xc4: // ARMNT Windows
+    if (Magic[1] == 0x01)
+      return file_magic::coff_object;
+    LLVM_FALLTHROUGH;
+
+  case 0x90: // PA-RISC Windows
+  case 0x68: // mc68K Windows
+    if (Magic[1] == 0x02)
+      return file_magic::coff_object;
+    break;
+
+  case 'M': // Possible MS-DOS stub on Windows PE file
+    if (startswith(Magic, "MZ")) {
+      uint32_t off = read32le(Magic.data() + 0x3c);
+      // PE/COFF file, either EXE or DLL.
+      if (off < Magic.size() &&
+          memcmp(Magic.data() + off, COFF::PEMagic, sizeof(COFF::PEMagic)) == 0)
+        return file_magic::pecoff_executable;
+    }
+    break;
+
+  case 0x64: // x86-64 Windows.
+    if (Magic[1] == char(0x86))
+      return file_magic::coff_object;
+    break;
+
+  default:
+    break;
+  }
+  return file_magic::unknown;
+}
+
+std::error_code llvm::identify_magic(const Twine &Path, file_magic &Result) {
+  int FD;
+  if (std::error_code EC = openFileForRead(Path, FD))
+    return EC;
+
+  char Buffer[32];
+  int Length = read(FD, Buffer, sizeof(Buffer));
+  if (close(FD) != 0 || Length < 0)
+    return std::error_code(errno, std::generic_category());
+
+  Result = identify_magic(StringRef(Buffer, Length));
+  return std::error_code();
+}
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index fffa9045b2fd..95987fac74e1 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -28,8 +28,8 @@
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/AutoUpgrade.h"
 #include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CallingConv.h"
 #include "llvm/IR/CallSite.h"
+#include "llvm/IR/CallingConv.h"
 #include "llvm/IR/Comdat.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/Constants.h"
@@ -40,13 +40,13 @@
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/DiagnosticPrinter.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/GVMaterializer.h"
 #include "llvm/IR/GlobalAlias.h"
 #include "llvm/IR/GlobalIFunc.h"
 #include "llvm/IR/GlobalIndirectSymbol.h"
 #include "llvm/IR/GlobalObject.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/GVMaterializer.h"
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/InstrTypes.h"
@@ -5370,12 +5370,20 @@ static Expected<StringRef> readStrtab(BitstreamCursor &Stream) {
 
 Expected<std::vector<BitcodeModule>>
 llvm::getBitcodeModuleList(MemoryBufferRef Buffer) {
+  auto FOrErr = getBitcodeFileContents(Buffer);
+  if (!FOrErr)
+    return FOrErr.takeError();
+  return std::move(FOrErr->Mods);
+}
+
+Expected<BitcodeFileContents>
+llvm::getBitcodeFileContents(MemoryBufferRef Buffer) {
   Expected<BitstreamCursor> StreamOrErr = initStream(Buffer);
   if (!StreamOrErr)
     return StreamOrErr.takeError();
   BitstreamCursor &Stream = *StreamOrErr;
 
-  std::vector<BitcodeModule> Modules;
+  BitcodeFileContents F;
   while (true) {
     uint64_t BCBegin = Stream.getCurrentByteNo();
 
@@ -5383,7 +5391,7 @@ llvm::getBitcodeModuleList(MemoryBufferRef Buffer) {
     // of the bitcode stream (e.g. Apple's ar tool). If we are close enough to
     // the end that there cannot possibly be another module, stop looking.
     if (BCBegin + 8 >= Stream.getBitcodeBytes().size())
-      return Modules;
+      return F;
 
     BitstreamEntry Entry = Stream.advance();
     switch (Entry.Kind) {
@@ -5409,10 +5417,10 @@ llvm::getBitcodeModuleList(MemoryBufferRef Buffer) {
         if (Stream.SkipBlock())
           return error("Malformed block");
 
-        Modules.push_back({Stream.getBitcodeBytes().slice(
-                               BCBegin, Stream.getCurrentByteNo() - BCBegin),
-                           Buffer.getBufferIdentifier(), IdentificationBit,
-                           ModuleBit});
+        F.Mods.push_back({Stream.getBitcodeBytes().slice(
+                              BCBegin, Stream.getCurrentByteNo() - BCBegin),
+                          Buffer.getBufferIdentifier(), IdentificationBit,
+                          ModuleBit});
         continue;
       }
 
@@ -5424,7 +5432,7 @@ llvm::getBitcodeModuleList(MemoryBufferRef Buffer) {
         // not have its own string table. A bitcode file may have multiple
         // string tables if it was created by binary concatenation, for example
         // with "llvm-cat -b".
-        for (auto I = Modules.rbegin(), E = Modules.rend(); I != E; ++I) {
+        for (auto I = F.Mods.rbegin(), E = F.Mods.rend(); I != E; ++I) {
           if (!I->Strtab.empty())
             break;
           I->Strtab = *Strtab;
diff --git a/lib/Bitcode/Reader/MetadataLoader.cpp b/lib/Bitcode/Reader/MetadataLoader.cpp
index d80e1da911ca..ee2fe2a0cc18 100644
--- a/lib/Bitcode/Reader/MetadataLoader.cpp
+++ b/lib/Bitcode/Reader/MetadataLoader.cpp
@@ -53,8 +53,8 @@
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
-#include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/ModuleSummaryIndex.h"
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index 9043b8c12d25..d5879fec95cb 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -3305,7 +3305,15 @@ static const uint64_t INDEX_VERSION = 3;
 /// Emit the per-module summary section alongside the rest of
 /// the module's bitcode.
 void ModuleBitcodeWriter::writePerModuleGlobalValueSummary() {
-  Stream.EnterSubblock(bitc::GLOBALVAL_SUMMARY_BLOCK_ID, 4);
+  // By default we compile with ThinLTO if the module has a summary, but the
+  // client can request full LTO with a module flag.
+  bool IsThinLTO = true;
+  if (auto *MD =
+          mdconst::extract_or_null<ConstantInt>(M.getModuleFlag("ThinLTO")))
+    IsThinLTO = MD->getZExtValue();
+  Stream.EnterSubblock(IsThinLTO ? bitc::GLOBALVAL_SUMMARY_BLOCK_ID
+                                 : bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID,
+                       4);
 
   Stream.EmitRecord(bitc::FS_VERSION, ArrayRef<uint64_t>{INDEX_VERSION});
 
diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt
index 73fc2b35fe4e..f7c09be15fb7 100644
--- a/lib/CMakeLists.txt
+++ b/lib/CMakeLists.txt
@@ -4,6 +4,7 @@
 add_subdirectory(IR)
 add_subdirectory(IRReader)
 add_subdirectory(CodeGen)
+add_subdirectory(BinaryFormat)
 add_subdirectory(Bitcode)
 add_subdirectory(Transforms)
 add_subdirectory(Linker)
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp
index 09a37a77e9fb..c2aecc651b79 100644
--- a/lib/CodeGen/Analysis.cpp
+++ b/lib/CodeGen/Analysis.cpp
@@ -24,8 +24,8 @@
 #include "llvm/IR/Module.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Transforms/Utils/GlobalStatus.h"
 
diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp
index 61149d9229b7..8b1376ab363d 100644
--- a/lib/CodeGen/AsmPrinter/ARMException.cpp
+++ b/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -14,6 +14,7 @@
 #include "DwarfException.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -27,7 +28,6 @@
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/Dwarf.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetOptions.h"
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index e61e22abe82a..407d5623d670 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -11,6 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/AsmPrinter.h"
 #include "AsmPrinterHandler.h"
 #include "CodeViewDebug.h"
 #include "DwarfDebug.h"
@@ -19,18 +20,19 @@
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/ObjectUtils.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/CodeGen/Analysis.h"
-#include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/GCMetadata.h"
 #include "llvm/CodeGen/GCMetadataPrinter.h"
 #include "llvm/CodeGen/GCStrategy.h"
@@ -82,14 +84,12 @@
 #include "llvm/Pass.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Compiler.h"
-#include "llvm/Support/Dwarf.h"
-#include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/Timer.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetLowering.h"
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index 0185c380cc39..0edf9051d342 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -15,6 +15,7 @@
 #include "DwarfDebug.h"
 #include "DwarfExpression.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/DIE.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -26,7 +27,6 @@
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MachineLocation.h"
-#include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index a0bf1632dff3..eae79ad101d3 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -11,9 +11,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index 385c78bbccef..e94616fd5900 100644
--- a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -1,4 +1,4 @@
-//===-- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp --*- C++ -*--===//
+//===- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp ----------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,37 +12,82 @@
 //===----------------------------------------------------------------------===//
 
 #include "CodeViewDebug.h"
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/TinyPtrVector.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/Config/llvm-config.h"
 #include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
 #include "llvm/DebugInfo/CodeView/CodeView.h"
 #include "llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h"
 #include "llvm/DebugInfo/CodeView/Line.h"
 #include "llvm/DebugInfo/CodeView/SymbolRecord.h"
-#include "llvm/DebugInfo/CodeView/TypeDatabase.h"
 #include "llvm/DebugInfo/CodeView/TypeDumpVisitor.h"
 #include "llvm/DebugInfo/CodeView/TypeIndex.h"
 #include "llvm/DebugInfo/CodeView/TypeRecord.h"
 #include "llvm/DebugInfo/CodeView/TypeTableCollection.h"
-#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
 #include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/BinaryByteStream.h"
 #include "llvm/Support/BinaryStreamReader.h"
-#include "llvm/Support/COFF.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ScopedPrinter.h"
+#include "llvm/Support/SMLoc.h"
 #include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cctype>
+#include <cstddef>
+#include <cstdint>
+#include <iterator>
+#include <limits>
+#include <string>
+#include <utility>
+#include <vector>
 
 using namespace llvm;
 using namespace llvm::codeview;
 
 CodeViewDebug::CodeViewDebug(AsmPrinter *AP)
-    : DebugHandlerBase(AP), OS(*Asm->OutStreamer), Allocator(),
-      TypeTable(Allocator), CurFn(nullptr) {
+    : DebugHandlerBase(AP), OS(*Asm->OutStreamer), TypeTable(Allocator) {
   // If module doesn't have named metadata anchors or COFF debug section
   // is not available, skip any debug info related stuff.
   if (!MMI->getModule()->getNamedMetadata("llvm.dbg.cu") ||
@@ -178,7 +223,8 @@ static const DISubprogram *getQualifiedNameComponents(
 static std::string getQualifiedName(ArrayRef<StringRef> QualifiedNameComponents,
                                     StringRef TypeName) {
   std::string FullyQualifiedName;
-  for (StringRef QualifiedNameComponent : reverse(QualifiedNameComponents)) {
+  for (StringRef QualifiedNameComponent :
+       llvm::reverse(QualifiedNameComponents)) {
     FullyQualifiedName.append(QualifiedNameComponent);
     FullyQualifiedName.append("::");
   }
@@ -571,7 +617,7 @@ static CPUType mapArchToCVCPUType(Triple::ArchType Type) {
   }
 }
 
-}  // anonymous namespace
+} // end anonymous namespace
 
 void CodeViewDebug::emitCompilerInformation() {
   MCContext &Context = MMI->getContext();
@@ -1581,11 +1627,11 @@ struct llvm::ClassInfo {
     uint64_t BaseOffset;
   };
   // [MemberInfo]
-  typedef std::vector<MemberInfo> MemberList;
+  using MemberList = std::vector<MemberInfo>;
 
-  typedef TinyPtrVector<const DISubprogram *> MethodsList;
+  using MethodsList = TinyPtrVector<const DISubprogram *>;
   // MethodName -> MethodsList
-  typedef MapVector<MDString *, MethodsList> MethodsMap;
+  using MethodsMap = MapVector<MDString *, MethodsList>;
 
   /// Base classes.
   std::vector<const DIDerivedType *> Inheritance;
@@ -1850,7 +1896,7 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) {
           translateMethodOptionFlags(SP), VFTableOffset, Name));
       MemberCount++;
     }
-    assert(Methods.size() > 0 && "Empty methods map entry");
+    assert(!Methods.empty() && "Empty methods map entry");
     if (Methods.size() == 1)
       FLBR.writeMemberType(Methods[0]);
     else {
diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/lib/CodeGen/AsmPrinter/CodeViewDebug.h
index 1c0c1644edaf..2cd495aec6dc 100644
--- a/lib/CodeGen/AsmPrinter/CodeViewDebug.h
+++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.h
@@ -1,4 +1,4 @@
-//===-- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h ----*- C++ -*--===//
+//===- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h --------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,29 +14,44 @@
 #ifndef LLVM_LIB_CODEGEN_ASMPRINTER_CODEVIEWDEBUG_H
 #define LLVM_LIB_CODEGEN_ASMPRINTER_CODEVIEWDEBUG_H
 
+#include "DbgValueHistoryCalculator.h"
 #include "DebugHandlerBase.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
 #include "llvm/DebugInfo/CodeView/TypeIndex.h"
 #include "llvm/DebugInfo/CodeView/TypeTableBuilder.h"
-#include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DebugLoc.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Compiler.h"
+#include <cstdint>
+#include <map>
+#include <string>
+#include <tuple>
+#include <unordered_map>
+#include <utility>
+#include <vector>
 
 namespace llvm {
 
-class StringRef;
-class LexicalScope;
 struct ClassInfo;
+class StringRef;
+class AsmPrinter;
+class Function;
+class GlobalVariable;
+class MCSectionCOFF;
+class MCStreamer;
+class MCSymbol;
+class MachineFunction;
 
 /// \brief Collects and handles line tables information in a CodeView format.
 class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
   MCStreamer &OS;
-  llvm::BumpPtrAllocator Allocator;
+  BumpPtrAllocator Allocator;
   codeview::TypeTableBuilder TypeTable;
 
   /// Represents the most general definition range.
@@ -110,7 +125,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
     unsigned LastFileId = 0;
     bool HaveLineInfo = false;
   };
-  FunctionInfo *CurFn;
+  FunctionInfo *CurFn = nullptr;
 
   /// The set of comdat .debug$S sections that we've seen so far. Each section
   /// must start with a magic version number that must only be emitted once.
@@ -176,8 +191,9 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
   std::vector<std::pair<std::string, codeview::TypeIndex>> LocalUDTs,
       GlobalUDTs;
 
-  typedef std::map<const DIFile *, std::string> FileToFilepathMapTy;
+  using FileToFilepathMapTy = std::map<const DIFile *, std::string>;
   FileToFilepathMapTy FileToFilepathMap;
+
   StringRef getFullFilepath(const DIFile *S);
 
   unsigned maybeRecordFile(const DIFile *F);
@@ -223,7 +239,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
   void emitInlinedCallSite(const FunctionInfo &FI, const DILocation *InlinedAt,
                            const InlineSite &Site);
 
-  typedef DbgValueHistoryMap::InlinedVariable InlinedVariable;
+  using InlinedVariable = DbgValueHistoryMap::InlinedVariable;
 
   void collectVariableInfo(const DISubprogram *SP);
 
@@ -309,7 +325,7 @@ protected:
 public:
   CodeViewDebug(AsmPrinter *Asm);
 
-  void setSymbolSize(const llvm::MCSymbol *, uint64_t) override {}
+  void setSymbolSize(const MCSymbol *, uint64_t) override {}
 
   /// \brief Emit the COFF section that holds the line table information.
   void endModule() override;
@@ -317,6 +333,7 @@ public:
   /// \brief Process beginning of an instruction.
   void beginInstruction(const MachineInstr *MI) override;
 };
-} // End of namespace llvm
 
-#endif
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_ASMPRINTER_CODEVIEWDEBUG_H
diff --git a/lib/CodeGen/AsmPrinter/DIEHash.cpp b/lib/CodeGen/AsmPrinter/DIEHash.cpp
index 201030f0ac5c..15ade3c96dfe 100644
--- a/lib/CodeGen/AsmPrinter/DIEHash.cpp
+++ b/lib/CodeGen/AsmPrinter/DIEHash.cpp
@@ -11,15 +11,15 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "ByteStreamer.h"
 #include "DIEHash.h"
+#include "ByteStreamer.h"
 #include "DwarfDebug.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/DIE.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/Dwarf.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/MD5.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/lib/CodeGen/AsmPrinter/DebugLocStream.h b/lib/CodeGen/AsmPrinter/DebugLocStream.h
index 3656e9d95099..0c551dfff9cc 100644
--- a/lib/CodeGen/AsmPrinter/DebugLocStream.h
+++ b/lib/CodeGen/AsmPrinter/DebugLocStream.h
@@ -10,9 +10,9 @@
 #ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCSTREAM_H
 #define LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCSTREAM_H
 
+#include "ByteStreamer.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallVector.h"
-#include "ByteStreamer.h"
 
 namespace llvm {
 
diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
index 05ac1cb02f76..b1ef8cfe989d 100644
--- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
+++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
@@ -16,12 +16,12 @@
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/CodeGen/DIE.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/FormattedStream.h"
diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index e08306b001fb..dd7f7931b06b 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -14,6 +14,7 @@
 #include "DwarfException.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -28,7 +29,6 @@
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MachineLocation.h"
-#include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Target/TargetFrameLowering.h"
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index b8f57472f17c..3c2fb8d99db7 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -15,8 +15,8 @@
 #define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H
 
 #include "DwarfUnit.h"
+#include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/IR/DebugInfo.h"
-#include "llvm/Support/Dwarf.h"
 
 namespace llvm {
 
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index bf27516e1ccd..e3fd21a1fd70 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -22,6 +22,7 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/CodeGen/DIE.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
@@ -38,7 +39,6 @@
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/LEB128.h"
diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index ccd326917bfd..d96479f43433 100644
--- a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -14,8 +14,8 @@
 #include "DwarfExpression.h"
 #include "DwarfDebug.h"
 #include "llvm/ADT/SmallBitVector.h"
+#include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/Support/Dwarf.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 667afbb450bd..7f7d3e650e02 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -18,19 +18,19 @@
 #include "DwarfExpression.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
-#include "llvm/ADT/iterator_range.h"
 #include "llvm/ADT/None.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/Metadata.h"
-#include "llvm/MC/MachineLocation.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCDwarf.h"
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MachineLocation.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
diff --git a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
index 342efc3611c7..c5795559fb7d 100644
--- a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
@@ -13,6 +13,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/GCMetadata.h"
 #include "llvm/CodeGen/GCMetadataPrinter.h"
@@ -25,8 +26,7 @@
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/TargetLoweringObjectFile.h" 
-#include "llvm/Support/ELF.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
 
 using namespace llvm;
 
diff --git a/lib/CodeGen/AsmPrinter/LLVMBuild.txt b/lib/CodeGen/AsmPrinter/LLVMBuild.txt
index 2bb66d12f376..bde8148d259b 100644
--- a/lib/CodeGen/AsmPrinter/LLVMBuild.txt
+++ b/lib/CodeGen/AsmPrinter/LLVMBuild.txt
@@ -19,4 +19,4 @@
 type = Library
 name = AsmPrinter
 parent = Libraries
-required_libraries = Analysis CodeGen Core DebugInfoCodeView DebugInfoMSF MC MCParser Support Target
+required_libraries = Analysis BinaryFormat CodeGen Core DebugInfoCodeView DebugInfoMSF MC MCParser Support Target
diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index 8baee4db772e..035f1a0063aa 100644
--- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -1,4 +1,4 @@
-//===-- OcamlGCPrinter.cpp - Ocaml frametable emitter ---------------------===//
+//===- OcamlGCPrinter.cpp - Ocaml frametable emitter ----------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,23 +11,27 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/GCs.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/GCMetadata.h"
 #include "llvm/CodeGen/GCMetadataPrinter.h"
+#include "llvm/CodeGen/GCs.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
 #include "llvm/IR/Mangler.h"
 #include "llvm/IR/Module.h"
-#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDirectives.h"
 #include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/FormattedStream.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include <cctype>
+#include <cstddef>
+#include <cstdint>
+#include <string>
+
 using namespace llvm;
 
 namespace {
@@ -37,7 +41,8 @@ public:
   void beginAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) override;
   void finishAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) override;
 };
-}
+
+} // end anonymous namespace
 
 static GCMetadataPrinterRegistry::Add<OcamlGCMetadataPrinter>
     Y("ocaml", "ocaml 3.10-compatible collector");
@@ -50,7 +55,7 @@ static void EmitCamlGlobal(const Module &M, AsmPrinter &AP, const char *Id) {
   std::string SymName;
   SymName += "caml";
   size_t Letter = SymName.size();
-  SymName.append(MId.begin(), find(MId, '.'));
+  SymName.append(MId.begin(), llvm::find(MId, '.'));
   SymName += "__";
   SymName += Id;
 
diff --git a/lib/CodeGen/AsmPrinter/WinException.cpp b/lib/CodeGen/AsmPrinter/WinException.cpp
index 815658bfb637..5d485f213573 100644
--- a/lib/CodeGen/AsmPrinter/WinException.cpp
+++ b/lib/CodeGen/AsmPrinter/WinException.cpp
@@ -14,6 +14,8 @@
 #include "WinException.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -29,8 +31,6 @@
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCWin64EH.h"
-#include "llvm/Support/COFF.h"
-#include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Target/TargetFrameLowering.h"
diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp
index d3fced436b68..be93ff0dad29 100644
--- a/lib/CodeGen/BasicTargetTransformInfo.cpp
+++ b/lib/CodeGen/BasicTargetTransformInfo.cpp
@@ -15,10 +15,10 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/BasicTTIImpl.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/TargetTransformInfoImpl.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Support/CommandLine.h"
 #include <utility>
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index 03ceac10beec..530954976292 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -1,4 +1,4 @@
-//===-- BranchFolding.cpp - Fold machine code branch instructions ---------===//
+//===- BranchFolding.cpp - Fold machine code branch instructions ----------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -18,30 +18,46 @@
 //===----------------------------------------------------------------------===//
 
 #include "BranchFolding.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/Function.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/BlockFrequency.h"
+#include "llvm/Support/BranchProbability.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
-#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <iterator>
+#include <numeric>
+#include <vector>
+
 using namespace llvm;
 
 #define DEBUG_TYPE "branch-folder"
@@ -69,10 +85,12 @@ TailMergeSize("tail-merge-size",
                               cl::init(3), cl::Hidden);
 
 namespace {
+
   /// BranchFolderPass - Wrap branch folder in a machine function pass.
   class BranchFolderPass : public MachineFunctionPass {
   public:
     static char ID;
+
     explicit BranchFolderPass(): MachineFunctionPass(ID) {}
 
     bool runOnMachineFunction(MachineFunction &MF) override;
@@ -84,7 +102,8 @@ namespace {
       MachineFunctionPass::getAnalysisUsage(AU);
     }
   };
-}
+
+} // end anonymous namespace
 
 char BranchFolderPass::ID = 0;
 char &llvm::BranchFolderPassID = BranchFolderPass::ID;
@@ -368,7 +387,7 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
 
   // Create the fall-through block.
   MachineFunction::iterator MBBI = CurMBB.getIterator();
-  MachineBasicBlock *NewMBB =MF.CreateMachineBasicBlock(BB);
+  MachineBasicBlock *NewMBB = MF.CreateMachineBasicBlock(BB);
   CurMBB.getParent()->insert(++MBBI, NewMBB);
 
   // Move all the successors of this block to the specified block.
@@ -506,7 +525,7 @@ static unsigned CountTerminators(MachineBasicBlock *MBB,
                                  MachineBasicBlock::iterator &I) {
   I = MBB->end();
   unsigned NumTerms = 0;
-  for (;;) {
+  while (true) {
     if (I == MBB->begin()) {
       I = MBB->end();
       break;
@@ -1601,7 +1620,6 @@ ReoptimizeBlock:
   // block doesn't fall through into some other block, see if we can find a
   // place to move this block where a fall-through will happen.
   if (!PrevBB.canFallThrough()) {
-
     // Now we know that there was no fall-through into this block, check to
     // see if it has a fall-through into its successor.
     bool CurFallsThru = MBB->canFallThrough();
diff --git a/lib/CodeGen/BranchRelaxation.cpp b/lib/CodeGen/BranchRelaxation.cpp
index e3de61c7816f..27ee12c4c5ff 100644
--- a/lib/CodeGen/BranchRelaxation.cpp
+++ b/lib/CodeGen/BranchRelaxation.cpp
@@ -7,17 +7,17 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LivePhysRegs.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 
 using namespace llvm;
 
diff --git a/lib/CodeGen/BuiltinGCs.cpp b/lib/CodeGen/BuiltinGCs.cpp
index e4eab8c513d9..abac555d6602 100644
--- a/lib/CodeGen/BuiltinGCs.cpp
+++ b/lib/CodeGen/BuiltinGCs.cpp
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/GCs.h"
 #include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/GCs.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/Support/Casting.h"
 
diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp
index dc2d38a95f99..c2ced19458ed 100644
--- a/lib/CodeGen/CalcSpillWeights.cpp
+++ b/lib/CodeGen/CalcSpillWeights.cpp
@@ -7,13 +7,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/VirtRegMap.h"
 #include "llvm/CodeGen/CalcSpillWeights.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp
index 256a0c95d365..faa5f139cf7b 100644
--- a/lib/CodeGen/CodeGen.cpp
+++ b/lib/CodeGen/CodeGen.cpp
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/InitializePasses.h"
 #include "llvm-c/Initialization.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/PassRegistry.h"
 
 using namespace llvm;
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
index 568b278dd47c..c2037cb7f1ae 100644
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -13,8 +13,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallSet.h"
@@ -31,6 +29,7 @@
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
@@ -236,12 +235,12 @@ class TypePromotionTransaction;
     void eliminateMostlyEmptyBlock(BasicBlock *BB);
     bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB,
                                        bool isPreheader);
-    bool optimizeBlock(BasicBlock &BB, bool& ModifiedDT);
-    bool optimizeInst(Instruction *I, bool& ModifiedDT);
+    bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT);
+    bool optimizeInst(Instruction *I, bool &ModifiedDT);
     bool optimizeMemoryInst(Instruction *I, Value *Addr,
                             Type *AccessTy, unsigned AS);
     bool optimizeInlineAsmInst(CallInst *CS);
-    bool optimizeCallInst(CallInst *CI, bool& ModifiedDT);
+    bool optimizeCallInst(CallInst *CI, bool &ModifiedDT);
     bool optimizeExt(Instruction *&I);
     bool optimizeExtUses(Instruction *I);
     bool optimizeLoadExt(LoadInst *I);
@@ -1662,25 +1661,29 @@ class MemCmpExpansion {
   BasicBlock *EndBlock;
   PHINode *PhiRes;
   bool IsUsedForZeroCmp;
+  const DataLayout &DL;
+
   int calculateNumBlocks(unsigned Size);
   void createLoadCmpBlocks();
   void createResultBlock();
   void setupResultBlockPHINodes();
   void setupEndBlockPHINodes();
-  void emitLoadCompareBlock(unsigned Index, int LoadSize, int GEPIndex,
-                            bool IsLittleEndian);
+  void emitLoadCompareBlock(unsigned Index, int LoadSize, int GEPIndex);
+  Value *getCompareLoadPairs(unsigned Index, unsigned Size,
+                             unsigned &NumBytesProcessed, IRBuilder<> &Builder);
   void emitLoadCompareBlockMultipleLoads(unsigned Index, unsigned Size,
                                          unsigned &NumBytesProcessed);
   void emitLoadCompareByteBlock(unsigned Index, int GEPIndex);
-  void emitMemCmpResultBlock(bool IsLittleEndian);
-  Value *getMemCmpExpansionZeroCase(unsigned Size, bool IsLittleEndian);
+  void emitMemCmpResultBlock();
+  Value *getMemCmpExpansionZeroCase(unsigned Size);
+  Value *getMemCmpEqZeroOneBlock(unsigned Size);
   unsigned getLoadSize(unsigned Size);
   unsigned getNumLoads(unsigned Size);
 
 public:
-  MemCmpExpansion(CallInst *CI, unsigned MaxLoadSize,
-                  unsigned NumLoadsPerBlock);
-  Value *getMemCmpExpansion(bool IsLittleEndian);
+  MemCmpExpansion(CallInst *CI, uint64_t Size, unsigned MaxLoadSize,
+                  unsigned NumLoadsPerBlock, const DataLayout &DL);
+  Value *getMemCmpExpansion(uint64_t Size);
 };
 
 MemCmpExpansion::ResultBlock::ResultBlock()
@@ -1694,39 +1697,41 @@ MemCmpExpansion::ResultBlock::ResultBlock()
 // return from.
 // 3. ResultBlock, block to branch to for early exit when a
 // LoadCmpBlock finds a difference.
-MemCmpExpansion::MemCmpExpansion(CallInst *CI, unsigned MaxLoadSize,
-                                 unsigned NumLoadsPerBlock)
-    : CI(CI), MaxLoadSize(MaxLoadSize), NumLoadsPerBlock(NumLoadsPerBlock) {
+MemCmpExpansion::MemCmpExpansion(CallInst *CI, uint64_t Size,
+                                 unsigned MaxLoadSize, unsigned LoadsPerBlock,
+                                 const DataLayout &TheDataLayout)
+    : CI(CI), MaxLoadSize(MaxLoadSize), NumLoadsPerBlock(LoadsPerBlock),
+      DL(TheDataLayout) {
+
+  // A memcmp with zero-comparison with only one block of load and compare does
+  // not need to set up any extra blocks. This case could be handled in the DAG,
+  // but since we have all of the machinery to flexibly expand any memcpy here,
+  // we choose to handle this case too to avoid fragmented lowering.
+  IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI);
+  NumBlocks = calculateNumBlocks(Size);
+  if (!IsUsedForZeroCmp || NumBlocks != 1) {
+    BasicBlock *StartBlock = CI->getParent();
+    EndBlock = StartBlock->splitBasicBlock(CI, "endblock");
+    setupEndBlockPHINodes();
+    createResultBlock();
+
+    // If return value of memcmp is not used in a zero equality, we need to
+    // calculate which source was larger. The calculation requires the
+    // two loaded source values of each load compare block.
+    // These will be saved in the phi nodes created by setupResultBlockPHINodes.
+    if (!IsUsedForZeroCmp)
+      setupResultBlockPHINodes();
+
+    // Create the number of required load compare basic blocks.
+    createLoadCmpBlocks();
+
+    // Update the terminator added by splitBasicBlock to branch to the first
+    // LoadCmpBlock.
+    StartBlock->getTerminator()->setSuccessor(0, LoadCmpBlocks[0]);
+  }
 
   IRBuilder<> Builder(CI->getContext());
-
-  BasicBlock *StartBlock = CI->getParent();
-  EndBlock = StartBlock->splitBasicBlock(CI, "endblock");
-  setupEndBlockPHINodes();
-  IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI);
-
-  ConstantInt *SizeCast = dyn_cast<ConstantInt>(CI->getArgOperand(2));
-  uint64_t Size = SizeCast->getZExtValue();
-
-  // Calculate how many load compare blocks are required for an expansion of
-  // given Size.
-  NumBlocks = calculateNumBlocks(Size);
-  createResultBlock();
-
-  // If return value of memcmp is not used in a zero equality, we need to
-  // calculate which source was larger. The calculation requires the
-  // two loaded source values of each load compare block.
-  // These will be saved in the phi nodes created by setupResultBlockPHINodes.
-  if (!IsUsedForZeroCmp)
-    setupResultBlockPHINodes();
-
-  // Create the number of required load compare basic blocks.
-  createLoadCmpBlocks();
-
-  // Update the terminator added by splitBasicBlock to branch to the first
-  // LoadCmpBlock.
   Builder.SetCurrentDebugLocation(CI->getDebugLoc());
-  StartBlock->getTerminator()->setSuccessor(0, LoadCmpBlocks[0]);
 }
 
 void MemCmpExpansion::createLoadCmpBlocks() {
@@ -1743,7 +1748,7 @@ void MemCmpExpansion::createResultBlock() {
 }
 
 // This function creates the IR instructions for loading and comparing 1 byte.
-// It loads 1 byte from each source of the memcmp paramters with the given
+// It loads 1 byte from each source of the memcmp parameters with the given
 // GEPIndex. It then subtracts the two loaded values and adds this result to the
 // final phi node for selecting the memcmp result.
 void MemCmpExpansion::emitLoadCompareByteBlock(unsigned Index, int GEPIndex) {
@@ -1754,13 +1759,13 @@ void MemCmpExpansion::emitLoadCompareByteBlock(unsigned Index, int GEPIndex) {
 
   Builder.SetInsertPoint(LoadCmpBlocks[Index]);
   Type *LoadSizeType = Type::getInt8Ty(CI->getContext());
-  // Cast source to LoadSizeType*
+  // Cast source to LoadSizeType*.
   if (Source1->getType() != LoadSizeType)
     Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo());
   if (Source2->getType() != LoadSizeType)
     Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo());
 
-  // Get the base address using the GEPIndex
+  // Get the base address using the GEPIndex.
   if (GEPIndex != 0) {
     Source1 = Builder.CreateGEP(LoadSizeType, Source1,
                                 ConstantInt::get(LoadSizeType, GEPIndex));
@@ -1778,16 +1783,15 @@ void MemCmpExpansion::emitLoadCompareByteBlock(unsigned Index, int GEPIndex) {
   PhiRes->addIncoming(Diff, LoadCmpBlocks[Index]);
 
   if (Index < (LoadCmpBlocks.size() - 1)) {
-    // Early exit branch if difference found to EndBlock, otherwise continue to
-    // next LoadCmpBlock
-
+    // Early exit branch if difference found to EndBlock. Otherwise, continue to
+    // next LoadCmpBlock,
     Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, Diff,
                                     ConstantInt::get(Diff->getType(), 0));
     BranchInst *CmpBr =
         BranchInst::Create(EndBlock, LoadCmpBlocks[Index + 1], Cmp);
     Builder.Insert(CmpBr);
   } else {
-    // The last block has an unconditional branch to EndBlock
+    // The last block has an unconditional branch to EndBlock.
     BranchInst *CmpBr = BranchInst::Create(EndBlock);
     Builder.Insert(CmpBr);
   }
@@ -1801,11 +1805,12 @@ unsigned MemCmpExpansion::getLoadSize(unsigned Size) {
   return MinAlign(PowerOf2Floor(Size), MaxLoadSize);
 }
 
-void MemCmpExpansion::emitLoadCompareBlockMultipleLoads(
-    unsigned Index, unsigned Size, unsigned &NumBytesProcessed) {
-
-  IRBuilder<> Builder(CI->getContext());
-
+/// Generate an equality comparison for one or more pairs of loaded values.
+/// This is used in the case where the memcmp() call is compared equal or not
+/// equal to zero.
+Value *MemCmpExpansion::getCompareLoadPairs(unsigned Index, unsigned Size,
+                                            unsigned &NumBytesProcessed,
+                                            IRBuilder<> &Builder) {
   std::vector<Value *> XorList, OrList;
   Value *Diff;
 
@@ -1813,8 +1818,13 @@ void MemCmpExpansion::emitLoadCompareBlockMultipleLoads(
   unsigned NumLoadsRemaining = getNumLoads(RemainingBytes);
   unsigned NumLoads = std::min(NumLoadsRemaining, NumLoadsPerBlock);
 
-  Builder.SetInsertPoint(LoadCmpBlocks[Index]);
+  // For a single-block expansion, start inserting before the memcmp call.
+  if (LoadCmpBlocks.empty())
+    Builder.SetInsertPoint(CI);
+  else
+    Builder.SetInsertPoint(LoadCmpBlocks[Index]);
 
+  Value *Cmp = nullptr;
   for (unsigned i = 0; i < NumLoads; ++i) {
     unsigned LoadSize = getLoadSize(RemainingBytes);
     unsigned GEPIndex = NumBytesProcessed / LoadSize;
@@ -1827,13 +1837,13 @@ void MemCmpExpansion::emitLoadCompareBlockMultipleLoads(
     Value *Source1 = CI->getArgOperand(0);
     Value *Source2 = CI->getArgOperand(1);
 
-    // Cast source to LoadSizeType*
+    // Cast source to LoadSizeType*.
     if (Source1->getType() != LoadSizeType)
       Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo());
     if (Source2->getType() != LoadSizeType)
       Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo());
 
-    // Get the base address using the GEPIndex
+    // Get the base address using the GEPIndex.
     if (GEPIndex != 0) {
       Source1 = Builder.CreateGEP(LoadSizeType, Source1,
                                   ConstantInt::get(LoadSizeType, GEPIndex));
@@ -1841,16 +1851,23 @@ void MemCmpExpansion::emitLoadCompareBlockMultipleLoads(
                                   ConstantInt::get(LoadSizeType, GEPIndex));
     }
 
-    // Load LoadSizeType from the base address
+    // Load LoadSizeType from the base address.
     Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
     Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);
-    if (LoadSizeType != MaxLoadType) {
-      LoadSrc1 = Builder.CreateZExtOrTrunc(LoadSrc1, MaxLoadType);
-      LoadSrc2 = Builder.CreateZExtOrTrunc(LoadSrc2, MaxLoadType);
+    if (NumLoads != 1) {
+      if (LoadSizeType != MaxLoadType) {
+        LoadSrc1 = Builder.CreateZExtOrTrunc(LoadSrc1, MaxLoadType);
+        LoadSrc2 = Builder.CreateZExtOrTrunc(LoadSrc2, MaxLoadType);
+      }
+      // If we have multiple loads per block, we need to generate a composite
+      // comparison using xor+or.
+      Diff = Builder.CreateXor(LoadSrc1, LoadSrc2);
+      Diff = Builder.CreateZExtOrTrunc(Diff, MaxLoadType);
+      XorList.push_back(Diff);
+    } else {
+      // If there's only one load per block, we just compare the loaded values.
+      Cmp = Builder.CreateICmpNE(LoadSrc1, LoadSrc2);
     }
-    Diff = Builder.CreateXor(LoadSrc1, LoadSrc2);
-    Diff = Builder.CreateZExtOrTrunc(Diff, MaxLoadType);
-    XorList.push_back(Diff);
   }
 
   auto pairWiseOr = [&](std::vector<Value *> &InList) -> std::vector<Value *> {
@@ -1864,27 +1881,36 @@ void MemCmpExpansion::emitLoadCompareBlockMultipleLoads(
     return OutList;
   };
 
-  // Pair wise OR the XOR results
-  OrList = pairWiseOr(XorList);
+  if (!Cmp) {
+    // Pairwise OR the XOR results.
+    OrList = pairWiseOr(XorList);
 
-  // Pair wise OR the OR results until one result left
-  while (OrList.size() != 1) {
-    OrList = pairWiseOr(OrList);
+    // Pairwise OR the OR results until one result left.
+    while (OrList.size() != 1) {
+      OrList = pairWiseOr(OrList);
+    }
+    Cmp = Builder.CreateICmpNE(OrList[0], ConstantInt::get(Diff->getType(), 0));
   }
 
-  Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, OrList[0],
-                                  ConstantInt::get(Diff->getType(), 0));
+  return Cmp;
+}
+
+void MemCmpExpansion::emitLoadCompareBlockMultipleLoads(
+    unsigned Index, unsigned Size, unsigned &NumBytesProcessed) {
+  IRBuilder<> Builder(CI->getContext());
+  Value *Cmp = getCompareLoadPairs(Index, Size, NumBytesProcessed, Builder);
+
   BasicBlock *NextBB = (Index == (LoadCmpBlocks.size() - 1))
                            ? EndBlock
                            : LoadCmpBlocks[Index + 1];
-  // Early exit branch if difference found to ResultBlock, otherwise continue to
-  // next LoadCmpBlock or EndBlock.
+  // Early exit branch if difference found to ResultBlock. Otherwise,
+  // continue to next LoadCmpBlock or EndBlock.
   BranchInst *CmpBr = BranchInst::Create(ResBlock.BB, NextBB, Cmp);
   Builder.Insert(CmpBr);
 
   // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0
   // since early exit to ResultBlock was not taken (no difference was found in
-  // any of the bytes)
+  // any of the bytes).
   if (Index == LoadCmpBlocks.size() - 1) {
     Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0);
     PhiRes->addIncoming(Zero, LoadCmpBlocks[Index]);
@@ -1901,7 +1927,7 @@ void MemCmpExpansion::emitLoadCompareBlockMultipleLoads(
 // a special case through emitLoadCompareByteBlock. The special handling can
 // simply subtract the loaded values and add it to the result phi node.
 void MemCmpExpansion::emitLoadCompareBlock(unsigned Index, int LoadSize,
-                                           int GEPIndex, bool IsLittleEndian) {
+                                           int GEPIndex) {
   if (LoadSize == 1) {
     MemCmpExpansion::emitLoadCompareByteBlock(Index, GEPIndex);
     return;
@@ -1916,13 +1942,13 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned Index, int LoadSize,
   Value *Source2 = CI->getArgOperand(1);
 
   Builder.SetInsertPoint(LoadCmpBlocks[Index]);
-  // Cast source to LoadSizeType*
+  // Cast source to LoadSizeType*.
   if (Source1->getType() != LoadSizeType)
     Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo());
   if (Source2->getType() != LoadSizeType)
     Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo());
 
-  // Get the base address using the GEPIndex
+  // Get the base address using the GEPIndex.
   if (GEPIndex != 0) {
     Source1 = Builder.CreateGEP(LoadSizeType, Source1,
                                 ConstantInt::get(LoadSizeType, GEPIndex));
@@ -1930,11 +1956,11 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned Index, int LoadSize,
                                 ConstantInt::get(LoadSizeType, GEPIndex));
   }
 
-  // Load LoadSizeType from the base address
+  // Load LoadSizeType from the base address.
   Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
   Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);
 
-  if (IsLittleEndian) {
+  if (DL.isLittleEndian()) {
     Function *F = LoadCmpBlocks[Index]->getParent();
 
     Function *Bswap = Intrinsic::getDeclaration(F->getParent(),
@@ -1962,14 +1988,14 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned Index, int LoadSize,
   BasicBlock *NextBB = (Index == (LoadCmpBlocks.size() - 1))
                            ? EndBlock
                            : LoadCmpBlocks[Index + 1];
-  // Early exit branch if difference found to ResultBlock, otherwise continue to
-  // next LoadCmpBlock or EndBlock.
+  // Early exit branch if difference found to ResultBlock. Otherwise, continue
+  // to next LoadCmpBlock or EndBlock.
   BranchInst *CmpBr = BranchInst::Create(ResBlock.BB, NextBB, Cmp);
   Builder.Insert(CmpBr);
 
   // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0
   // since early exit to ResultBlock was not taken (no difference was found in
-  // any of the bytes)
+  // any of the bytes).
   if (Index == LoadCmpBlocks.size() - 1) {
     Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0);
     PhiRes->addIncoming(Zero, LoadCmpBlocks[Index]);
@@ -1979,7 +2005,7 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned Index, int LoadSize,
 // This function populates the ResultBlock with a sequence to calculate the
 // memcmp result. It compares the two loaded source values and returns -1 if
 // src1 < src2 and 1 if src1 > src2.
-void MemCmpExpansion::emitMemCmpResultBlock(bool IsLittleEndian) {
+void MemCmpExpansion::emitMemCmpResultBlock() {
   IRBuilder<> Builder(CI->getContext());
 
   // Special case: if memcmp result is used in a zero equality, result does not
@@ -2010,17 +2036,17 @@ void MemCmpExpansion::emitMemCmpResultBlock(bool IsLittleEndian) {
 
 int MemCmpExpansion::calculateNumBlocks(unsigned Size) {
   int NumBlocks = 0;
-  bool haveOneByteLoad = false;
+  bool HaveOneByteLoad = false;
   unsigned RemainingSize = Size;
   unsigned LoadSize = MaxLoadSize;
   while (RemainingSize) {
     if (LoadSize == 1)
-      haveOneByteLoad = true;
+      HaveOneByteLoad = true;
     NumBlocks += RemainingSize / LoadSize;
     RemainingSize = RemainingSize % LoadSize;
     LoadSize = LoadSize / 2;
   }
-  NumBlocksNonOneByte = haveOneByteLoad ? (NumBlocks - 1) : NumBlocks;
+  NumBlocksNonOneByte = HaveOneByteLoad ? (NumBlocks - 1) : NumBlocks;
 
   if (IsUsedForZeroCmp)
     NumBlocks = NumBlocks / NumLoadsPerBlock +
@@ -2046,63 +2072,66 @@ void MemCmpExpansion::setupEndBlockPHINodes() {
   PhiRes = Builder.CreatePHI(Type::getInt32Ty(CI->getContext()), 2, "phi.res");
 }
 
-Value *MemCmpExpansion::getMemCmpExpansionZeroCase(unsigned Size,
-                                                   bool IsLittleEndian) {
+Value *MemCmpExpansion::getMemCmpExpansionZeroCase(unsigned Size) {
   unsigned NumBytesProcessed = 0;
-  // This loop populates each of the LoadCmpBlocks with IR sequence to handle
-  // multiple loads per block
-  for (unsigned i = 0; i < NumBlocks; ++i) {
+  // This loop populates each of the LoadCmpBlocks with the IR sequence to
+  // handle multiple loads per block.
+  for (unsigned i = 0; i < NumBlocks; ++i)
     emitLoadCompareBlockMultipleLoads(i, Size, NumBytesProcessed);
-  }
 
-  emitMemCmpResultBlock(IsLittleEndian);
+  emitMemCmpResultBlock();
   return PhiRes;
 }
 
+/// A memcmp expansion that compares equality with 0 and only has one block of
+/// load and compare can bypass the compare, branch, and phi IR that is required
+/// in the general case.
+Value *MemCmpExpansion::getMemCmpEqZeroOneBlock(unsigned Size) {
+  unsigned NumBytesProcessed = 0;
+  IRBuilder<> Builder(CI->getContext());
+  Value *Cmp = getCompareLoadPairs(0, Size, NumBytesProcessed, Builder);
+  return Builder.CreateZExt(Cmp, Type::getInt32Ty(CI->getContext()));
+}
+
 // This function expands the memcmp call into an inline expansion and returns
 // the memcmp result.
-Value *MemCmpExpansion::getMemCmpExpansion(bool IsLittleEndian) {
+Value *MemCmpExpansion::getMemCmpExpansion(uint64_t Size) {
+  if (IsUsedForZeroCmp)
+    return NumBlocks == 1 ? getMemCmpEqZeroOneBlock(Size) :
+                            getMemCmpExpansionZeroCase(Size);
 
-  ConstantInt *SizeCast = dyn_cast<ConstantInt>(CI->getArgOperand(2));
-  uint64_t Size = SizeCast->getZExtValue();
-
-  int LoadSize = MaxLoadSize;
-  int NumBytesToBeProcessed = Size;
-
-  if (IsUsedForZeroCmp) {
-    return getMemCmpExpansionZeroCase(Size, IsLittleEndian);
-  }
-
-  unsigned Index = 0;
-  // This loop calls emitLoadCompareBlock for comparing SizeVal bytes of the two
-  // memcmp source. It starts with loading using the maximum load size set by
+  // This loop calls emitLoadCompareBlock for comparing Size bytes of the two
+  // memcmp sources. It starts with loading using the maximum load size set by
   // the target. It processes any remaining bytes using a load size which is the
   // next smallest power of 2.
+  int LoadSize = MaxLoadSize;
+  int NumBytesToBeProcessed = Size;
+  unsigned Index = 0;
   while (NumBytesToBeProcessed) {
-    // Calculate how many blocks we can create with the current load size
+    // Calculate how many blocks we can create with the current load size.
     int NumBlocks = NumBytesToBeProcessed / LoadSize;
     int GEPIndex = (Size - NumBytesToBeProcessed) / LoadSize;
     NumBytesToBeProcessed = NumBytesToBeProcessed % LoadSize;
 
     // For each NumBlocks, populate the instruction sequence for loading and
-    // comparing LoadSize bytes
+    // comparing LoadSize bytes.
     while (NumBlocks--) {
-      emitLoadCompareBlock(Index, LoadSize, GEPIndex, IsLittleEndian);
+      emitLoadCompareBlock(Index, LoadSize, GEPIndex);
       Index++;
       GEPIndex++;
     }
-    // Get the next LoadSize to use
+    // Get the next LoadSize to use.
     LoadSize = LoadSize / 2;
   }
 
-  emitMemCmpResultBlock(IsLittleEndian);
+  emitMemCmpResultBlock();
   return PhiRes;
 }
 
 // This function checks to see if an expansion of memcmp can be generated.
 // It checks for constant compare size that is less than the max inline size.
 // If an expansion cannot occur, returns false to leave as a library call.
-// Otherwise, the library call is replaced wtih new IR instruction sequence.
+// Otherwise, the library call is replaced with a new IR instruction sequence.
 /// We want to transform:
 /// %call = call signext i32 @memcmp(i8* %0, i8* %1, i64 15)
 /// To:
@@ -2177,27 +2206,25 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
   NumMemCmpCalls++;
   IRBuilder<> Builder(CI->getContext());
 
-  // TTI call to check if target would like to expand memcmp and get the
-  // MaxLoadSize
+  // TTI call to check if target would like to expand memcmp. Also, get the
+  // MaxLoadSize.
   unsigned MaxLoadSize;
   if (!TTI->expandMemCmp(CI, MaxLoadSize))
     return false;
 
-  // Early exit from expansion if -Oz
-  if (CI->getParent()->getParent()->optForMinSize()) {
+  // Early exit from expansion if -Oz.
+  if (CI->getFunction()->optForMinSize())
     return false;
-  }
 
-  // Early exit from expansion if size is not a constant
+  // Early exit from expansion if size is not a constant.
   ConstantInt *SizeCast = dyn_cast<ConstantInt>(CI->getArgOperand(2));
   if (!SizeCast) {
     NumMemCmpNotConstant++;
     return false;
   }
 
-  // Early exit from expansion if size greater than max bytes to load
+  // Early exit from expansion if size greater than max bytes to load.
   uint64_t SizeVal = SizeCast->getZExtValue();
-
   unsigned NumLoads = 0;
   unsigned RemainingSize = SizeVal;
   unsigned LoadSize = MaxLoadSize;
@@ -2207,29 +2234,28 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
     LoadSize = LoadSize / 2;
   }
 
-  if (NumLoads >
-      TLI->getMaxExpandSizeMemcmp(CI->getParent()->getParent()->optForSize())) {
+  if (NumLoads > TLI->getMaxExpandSizeMemcmp(CI->getFunction()->optForSize())) {
     NumMemCmpGreaterThanMax++;
     return false;
   }
 
   NumMemCmpInlined++;
 
-  // MemCmpHelper object, creates and sets up basic blocks required for
-  // expanding memcmp with size SizeVal
+  // MemCmpHelper object creates and sets up basic blocks required for
+  // expanding memcmp with size SizeVal.
   unsigned NumLoadsPerBlock = MemCmpNumLoadsPerBlock;
-  MemCmpExpansion MemCmpHelper(CI, MaxLoadSize, NumLoadsPerBlock);
+  MemCmpExpansion MemCmpHelper(CI, SizeVal, MaxLoadSize, NumLoadsPerBlock, *DL);
 
-  Value *Res = MemCmpHelper.getMemCmpExpansion(DL->isLittleEndian());
+  Value *Res = MemCmpHelper.getMemCmpExpansion(SizeVal);
 
-  // Replace call with result of expansion and erarse call.
+  // Replace call with result of expansion and erase call.
   CI->replaceAllUsesWith(Res);
   CI->eraseFromParent();
 
   return true;
 }
 
-bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) {
+bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
   BasicBlock *BB = CI->getParent();
 
   // Lower inline assembly if we can.
@@ -2382,12 +2408,10 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) {
   }
 
   LibFunc Func;
-  if (TLInfo->getLibFunc(*CI->getCalledFunction(), Func) &&
-      Func == LibFunc_memcmp) {
-    if (expandMemCmp(CI, TTI, TLI, DL)) {
-      ModifiedDT = true;
-      return true;
-    }
+  if (TLInfo->getLibFunc(ImmutableCallSite(CI), Func) &&
+      Func == LibFunc_memcmp && expandMemCmp(CI, TTI, TLI, DL)) {
+    ModifiedDT = true;
+    return true;
   }
   return false;
 }
@@ -3934,7 +3958,7 @@ bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) {
 static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
                                     const TargetLowering &TLI,
                                     const TargetRegisterInfo &TRI) {
-  const Function *F = CI->getParent()->getParent();
+  const Function *F = CI->getFunction();
   TargetLowering::AsmOperandInfoVector TargetConstraints =
       TLI.ParseConstraints(F->getParent()->getDataLayout(), &TRI,
                             ImmutableCallSite(CI));
@@ -4531,7 +4555,7 @@ bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {
   bool MadeChange = false;
 
   const TargetRegisterInfo *TRI =
-      TM->getSubtargetImpl(*CS->getParent()->getParent())->getRegisterInfo();
+      TM->getSubtargetImpl(*CS->getFunction())->getRegisterInfo();
   TargetLowering::AsmOperandInfoVector TargetConstraints =
       TLI->ParseConstraints(*DL, TRI, CS);
   unsigned ArgNo = 0;
@@ -6015,7 +6039,7 @@ static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL,
   return true;
 }
 
-bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) {
+bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
   // Bail out if we inserted the instruction to prevent optimizations from
   // stepping on each other's toes.
   if (InsertedInsts.count(I))
@@ -6170,7 +6194,7 @@ static bool makeBitReverse(Instruction &I, const DataLayout &DL,
 // In this pass we look for GEP and cast instructions that are used
 // across basic blocks and rewrite them to improve basic-block-at-a-time
 // selection.
-bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool& ModifiedDT) {
+bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool &ModifiedDT) {
   SunkAddrs.clear();
   bool MadeChange = false;
 
diff --git a/lib/CodeGen/DFAPacketizer.cpp b/lib/CodeGen/DFAPacketizer.cpp
index 65f58e5686e0..853b9afa1026 100644
--- a/lib/CodeGen/DFAPacketizer.cpp
+++ b/lib/CodeGen/DFAPacketizer.cpp
@@ -23,49 +23,59 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "packets"
-
 #include "llvm/CodeGen/DFAPacketizer.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
 #include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/MC/MCInstrDesc.h"
 #include "llvm/MC/MCInstrItineraries.h"
-#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <iterator>
+#include <memory>
+#include <vector>
 
 using namespace llvm;
 
+#define DEBUG_TYPE "packets"
+
 static cl::opt<unsigned> InstrLimit("dfa-instr-limit", cl::Hidden,
   cl::init(0), cl::desc("If present, stops packetizing after N instructions"));
+
 static unsigned InstrCount = 0;
 
 // --------------------------------------------------------------------
 // Definitions shared between DFAPacketizer.cpp and DFAPacketizerEmitter.cpp
 
-namespace {
-  DFAInput addDFAFuncUnits(DFAInput Inp, unsigned FuncUnits) {
-    return (Inp << DFA_MAX_RESOURCES) | FuncUnits;
-  }
-
-  /// Return the DFAInput for an instruction class input vector.
-  /// This function is used in both DFAPacketizer.cpp and in
-  /// DFAPacketizerEmitter.cpp.
-  DFAInput getDFAInsnInput(const std::vector<unsigned> &InsnClass) {
-    DFAInput InsnInput = 0;
-    assert((InsnClass.size() <= DFA_MAX_RESTERMS) &&
-           "Exceeded maximum number of DFA terms");
-    for (auto U : InsnClass)
-      InsnInput = addDFAFuncUnits(InsnInput, U);
-    return InsnInput;
-  }
+static DFAInput addDFAFuncUnits(DFAInput Inp, unsigned FuncUnits) {
+  return (Inp << DFA_MAX_RESOURCES) | FuncUnits;
 }
+
+/// Return the DFAInput for an instruction class input vector.
+/// This function is used in both DFAPacketizer.cpp and in
+/// DFAPacketizerEmitter.cpp.
+static DFAInput getDFAInsnInput(const std::vector<unsigned> &InsnClass) {
+  DFAInput InsnInput = 0;
+  assert((InsnClass.size() <= DFA_MAX_RESTERMS) &&
+         "Exceeded maximum number of DFA terms");
+  for (auto U : InsnClass)
+    InsnInput = addDFAFuncUnits(InsnInput, U);
+  return InsnInput;
+}
+
 // --------------------------------------------------------------------
 
 DFAPacketizer::DFAPacketizer(const InstrItineraryData *I,
                              const DFAStateInput (*SIT)[2],
                              const unsigned *SET):
-  InstrItins(I), CurrentState(0), DFAStateInputTable(SIT),
-  DFAStateEntryTable(SET) {
+  InstrItins(I), DFAStateInputTable(SIT), DFAStateEntryTable(SET) {
   // Make sure DFA types are large enough for the number of terms & resources.
   static_assert((DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <=
                     (8 * sizeof(DFAInput)),
@@ -75,7 +85,6 @@ DFAPacketizer::DFAPacketizer(const InstrItineraryData *I,
       "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAStateInput");
 }
 
-
 // Read the DFA transition table and update CachedTable.
 //
 // Format of the transition tables:
@@ -97,7 +106,6 @@ void DFAPacketizer::ReadTable(unsigned int state) {
       DFAStateInputTable[i][1];
 }
 
-
 // Return the DFAInput for an instruction class.
 DFAInput DFAPacketizer::getInsnInput(unsigned InsnClass) {
   // Note: this logic must match that in DFAPacketizerDefs.h for input vectors.
@@ -112,16 +120,14 @@ DFAInput DFAPacketizer::getInsnInput(unsigned InsnClass) {
   return InsnInput;
 }
 
-
 // Return the DFAInput for an instruction class input vector.
 DFAInput DFAPacketizer::getInsnInput(const std::vector<unsigned> &InsnClass) {
   return getDFAInsnInput(InsnClass);
 }
 
-
 // Check if the resources occupied by a MCInstrDesc are available in the
 // current state.
-bool DFAPacketizer::canReserveResources(const llvm::MCInstrDesc *MID) {
+bool DFAPacketizer::canReserveResources(const MCInstrDesc *MID) {
   unsigned InsnClass = MID->getSchedClass();
   DFAInput InsnInput = getInsnInput(InsnClass);
   UnsignPair StateTrans = UnsignPair(CurrentState, InsnInput);
@@ -129,10 +135,9 @@ bool DFAPacketizer::canReserveResources(const llvm::MCInstrDesc *MID) {
   return CachedTable.count(StateTrans) != 0;
 }
 
-
 // Reserve the resources occupied by a MCInstrDesc and change the current
 // state to reflect that change.
-void DFAPacketizer::reserveResources(const llvm::MCInstrDesc *MID) {
+void DFAPacketizer::reserveResources(const MCInstrDesc *MID) {
   unsigned InsnClass = MID->getSchedClass();
   DFAInput InsnInput = getInsnInput(InsnClass);
   UnsignPair StateTrans = UnsignPair(CurrentState, InsnInput);
@@ -141,24 +146,22 @@ void DFAPacketizer::reserveResources(const llvm::MCInstrDesc *MID) {
   CurrentState = CachedTable[StateTrans];
 }
 
-
 // Check if the resources occupied by a machine instruction are available
 // in the current state.
-bool DFAPacketizer::canReserveResources(llvm::MachineInstr &MI) {
-  const llvm::MCInstrDesc &MID = MI.getDesc();
+bool DFAPacketizer::canReserveResources(MachineInstr &MI) {
+  const MCInstrDesc &MID = MI.getDesc();
   return canReserveResources(&MID);
 }
 
-
 // Reserve the resources occupied by a machine instruction and change the
 // current state to reflect that change.
-void DFAPacketizer::reserveResources(llvm::MachineInstr &MI) {
-  const llvm::MCInstrDesc &MID = MI.getDesc();
+void DFAPacketizer::reserveResources(MachineInstr &MI) {
+  const MCInstrDesc &MID = MI.getDesc();
   reserveResources(&MID);
 }
 
-
 namespace llvm {
+
 // This class extends ScheduleDAGInstrs and overrides the schedule method
 // to build the dependence graph.
 class DefaultVLIWScheduler : public ScheduleDAGInstrs {
@@ -166,9 +169,11 @@ private:
   AliasAnalysis *AA;
   /// Ordered list of DAG postprocessing steps.
   std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
+
 public:
   DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI,
                        AliasAnalysis *AA);
+
   // Actual scheduling work.
   void schedule() override;
 
@@ -176,11 +181,12 @@ public:
   void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) {
     Mutations.push_back(std::move(Mutation));
   }
+
 protected:
   void postprocessDAG();
 };
-}
 
+} // end namespace llvm
 
 DefaultVLIWScheduler::DefaultVLIWScheduler(MachineFunction &MF,
                                            MachineLoopInfo &MLI,
@@ -189,21 +195,18 @@ DefaultVLIWScheduler::DefaultVLIWScheduler(MachineFunction &MF,
   CanHandleTerminators = true;
 }
 
-
 /// Apply each ScheduleDAGMutation step in order.
 void DefaultVLIWScheduler::postprocessDAG() {
   for (auto &M : Mutations)
     M->apply(this);
 }
 
-
 void DefaultVLIWScheduler::schedule() {
   // Build the scheduling graph.
   buildSchedGraph(AA);
   postprocessDAG();
 }
 
-
 VLIWPacketizerList::VLIWPacketizerList(MachineFunction &mf,
                                        MachineLoopInfo &mli, AliasAnalysis *aa)
     : MF(mf), TII(mf.getSubtarget().getInstrInfo()), AA(aa) {
@@ -211,13 +214,11 @@ VLIWPacketizerList::VLIWPacketizerList(MachineFunction &mf,
   VLIWScheduler = new DefaultVLIWScheduler(MF, mli, AA);
 }
 
-
 VLIWPacketizerList::~VLIWPacketizerList() {
   delete VLIWScheduler;
   delete ResourceTracker;
 }
 
-
 // End the current packet, bundle packet instructions and reset DFA state.
 void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB,
                                    MachineBasicBlock::iterator MI) {
@@ -237,7 +238,6 @@ void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB,
   DEBUG(dbgs() << "End packet\n");
 }
 
-
 // Bundle machine instructions into packets.
 void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,
                                       MachineBasicBlock::iterator BeginItr,
@@ -336,7 +336,6 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,
   VLIWScheduler->finishBlock();
 }
 
-
 // Add a DAG mutation object to the ordered list.
 void VLIWPacketizerList::addMutation(
       std::unique_ptr<ScheduleDAGMutation> Mutation) {
diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp
index 265dda16bfa7..91d18e2bcaa6 100644
--- a/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -11,10 +11,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp
index 06ae5cd72c85..2f833260bca2 100644
--- a/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/lib/CodeGen/DwarfEHPrepare.cpp
@@ -12,13 +12,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/CFG.h"
 #include "llvm/Analysis/EHPersonalities.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
diff --git a/lib/CodeGen/ExpandISelPseudos.cpp b/lib/CodeGen/ExpandISelPseudos.cpp
index 88d422a0f545..324ea171293d 100644
--- a/lib/CodeGen/ExpandISelPseudos.cpp
+++ b/lib/CodeGen/ExpandISelPseudos.cpp
@@ -14,9 +14,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp
index 27cd639b2a49..4ce86f27a7dd 100644
--- a/lib/CodeGen/ExpandPostRAPseudos.cpp
+++ b/lib/CodeGen/ExpandPostRAPseudos.cpp
@@ -12,11 +12,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
diff --git a/lib/CodeGen/ExpandReductions.cpp b/lib/CodeGen/ExpandReductions.cpp
index a40ea28056dd..70dca3b74b2f 100644
--- a/lib/CodeGen/ExpandReductions.cpp
+++ b/lib/CodeGen/ExpandReductions.cpp
@@ -12,17 +12,17 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/CodeGen/ExpandReductions.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstIterator.h"
-#include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Module.h"
-#include "llvm/Transforms/Utils/LoopUtils.h"
 #include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
 
 using namespace llvm;
 
diff --git a/lib/CodeGen/FaultMaps.cpp b/lib/CodeGen/FaultMaps.cpp
index 43f364128978..2924b011e0c1 100644
--- a/lib/CodeGen/FaultMaps.cpp
+++ b/lib/CodeGen/FaultMaps.cpp
@@ -7,9 +7,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/FaultMaps.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/FaultMaps.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCObjectFileInfo.h"
diff --git a/lib/CodeGen/FuncletLayout.cpp b/lib/CodeGen/FuncletLayout.cpp
index 0bdd5e64a7f2..9c71b18619a1 100644
--- a/lib/CodeGen/FuncletLayout.cpp
+++ b/lib/CodeGen/FuncletLayout.cpp
@@ -11,10 +11,10 @@
 // funclets being contiguous.
 //
 //===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "funclet-layout"
diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp
index be21c7306da1..456fa799e8e1 100644
--- a/lib/CodeGen/GCMetadata.cpp
+++ b/lib/CodeGen/GCMetadata.cpp
@@ -11,22 +11,27 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/GCMetadata.h"
 #include "llvm/CodeGen/GCStrategy.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Pass.h"
-#include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <memory>
+#include <string>
+
 using namespace llvm;
 
 namespace {
 
 class Printer : public FunctionPass {
   static char ID;
+
   raw_ostream &OS;
 
 public:
@@ -38,7 +43,8 @@ public:
   bool runOnFunction(Function &F) override;
   bool doFinalization(Module &M) override;
 };
-}
+
+} // end anonymous namespace
 
 INITIALIZE_PASS(GCModuleInfo, "collector-metadata",
                 "Create Garbage Collector Module Metadata", false, false)
@@ -48,7 +54,7 @@ INITIALIZE_PASS(GCModuleInfo, "collector-metadata",
 GCFunctionInfo::GCFunctionInfo(const Function &F, GCStrategy &S)
     : F(F), S(S), FrameSize(~0LL) {}
 
-GCFunctionInfo::~GCFunctionInfo() {}
+GCFunctionInfo::~GCFunctionInfo() = default;
 
 // -----------------------------------------------------------------------------
 
@@ -67,7 +73,7 @@ GCFunctionInfo &GCModuleInfo::getFunctionInfo(const Function &F) {
     return *I->second;
 
   GCStrategy *S = getGCStrategy(F.getGC());
-  Functions.push_back(make_unique<GCFunctionInfo>(F, *S));
+  Functions.push_back(llvm::make_unique<GCFunctionInfo>(F, *S));
   GCFunctionInfo *GFI = Functions.back().get();
   FInfoMap[&F] = GFI;
   return *GFI;
diff --git a/lib/CodeGen/GCMetadataPrinter.cpp b/lib/CodeGen/GCMetadataPrinter.cpp
index d183c7f2980b..bc7beb6f6c2d 100644
--- a/lib/CodeGen/GCMetadataPrinter.cpp
+++ b/lib/CodeGen/GCMetadataPrinter.cpp
@@ -1,4 +1,4 @@
-//===-- GCMetadataPrinter.cpp - Garbage collection infrastructure ---------===//
+//===- GCMetadataPrinter.cpp - Garbage collection infrastructure ----------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,10 +12,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/GCMetadataPrinter.h"
+
 using namespace llvm;
 
 LLVM_INSTANTIATE_REGISTRY(GCMetadataPrinterRegistry)
 
-GCMetadataPrinter::GCMetadataPrinter() {}
+GCMetadataPrinter::GCMetadataPrinter() = default;
 
-GCMetadataPrinter::~GCMetadataPrinter() {}
+GCMetadataPrinter::~GCMetadataPrinter() = default;
diff --git a/lib/CodeGen/GlobalISel/IRTranslator.cpp b/lib/CodeGen/GlobalISel/IRTranslator.cpp
index afc18a15aa1c..dccd8e0706ca 100644
--- a/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -16,10 +16,10 @@
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/OptimizationDiagnosticInfo.h"
-#include "llvm/CodeGen/GlobalISel/CallLowering.h"
 #include "llvm/CodeGen/Analysis.h"
-#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/GlobalISel/CallLowering.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
@@ -784,6 +784,21 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
       return false;
     MIB.addUse(getOrCreateVReg(*Arg));
   }
+
+  // Add a MachineMemOperand if it is a target mem intrinsic.
+  const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
+  TargetLowering::IntrinsicInfo Info;
+  // TODO: Add a GlobalISel version of getTgtMemIntrinsic.
+  if (TLI.getTgtMemIntrinsic(Info, CI, ID)) {
+    MachineMemOperand::Flags Flags =
+        Info.vol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone;
+    Flags |=
+        Info.readMem ? MachineMemOperand::MOLoad : MachineMemOperand::MOStore;
+    uint64_t Size = Info.memVT.getSizeInBits() >> 3;
+    MIB.addMemOperand(MF->getMachineMemOperand(MachinePointerInfo(Info.ptrVal),
+                                               Flags, Size, Info.align));
+  }
+
   return true;
 }
 
diff --git a/lib/CodeGen/GlobalISel/Legalizer.cpp b/lib/CodeGen/GlobalISel/Legalizer.cpp
index aec379197dfb..1b50489deeba 100644
--- a/lib/CodeGen/GlobalISel/Legalizer.cpp
+++ b/lib/CodeGen/GlobalISel/Legalizer.cpp
@@ -15,7 +15,6 @@
 
 #include "llvm/CodeGen/GlobalISel/Legalizer.h"
 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
-#include "llvm/CodeGen/GlobalISel/Legalizer.h"
 #include "llvm/CodeGen/GlobalISel/Utils.h"
 #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
diff --git a/lib/CodeGen/GlobalMerge.cpp b/lib/CodeGen/GlobalMerge.cpp
index 3603f9b7ed93..c6ca49ce24d7 100644
--- a/lib/CodeGen/GlobalMerge.cpp
+++ b/lib/CodeGen/GlobalMerge.cpp
@@ -553,7 +553,8 @@ bool GlobalMerge::doInitialization(Module &M) {
   // Grab all non-const globals.
   for (auto &GV : M.globals()) {
     // Merge is safe for "normal" internal or external globals only
-    if (GV.isDeclaration() || GV.isThreadLocal() || GV.hasSection())
+    if (GV.isDeclaration() || GV.isThreadLocal() ||
+        GV.hasSection() || GV.hasImplicitSection())
       continue;
 
     // It's not safe to merge globals that may be preempted
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index 1c33f3b6800e..c98c9b68ac0e 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -12,7 +12,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/Passes.h"
 #include "BranchFolding.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/ScopeExit.h"
@@ -25,6 +24,7 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/TargetSchedule.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
diff --git a/lib/CodeGen/ImplicitNullChecks.cpp b/lib/CodeGen/ImplicitNullChecks.cpp
index 444416a77008..b831ddfa601a 100644
--- a/lib/CodeGen/ImplicitNullChecks.cpp
+++ b/lib/CodeGen/ImplicitNullChecks.cpp
@@ -31,21 +31,21 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/FaultMaps.h"
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 
 using namespace llvm;
 
diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp
index b7ab404070b1..4e6a3ec21866 100644
--- a/lib/CodeGen/InlineSpiller.cpp
+++ b/lib/CodeGen/InlineSpiller.cpp
@@ -857,21 +857,46 @@ void InlineSpiller::insertReload(unsigned NewVReg,
   ++NumReloads;
 }
 
+/// Check if \p Def fully defines a VReg with an undefined value.
+/// If that's the case, that means the value of VReg is actually
+/// not relevant.
+static bool isFullUndefDef(const MachineInstr &Def) {
+  if (!Def.isImplicitDef())
+    return false;
+  assert(Def.getNumOperands() == 1 &&
+         "Implicit def with more than one definition");
+  // We can say that the VReg defined by Def is undef, only if it is
+  // fully defined by Def. Otherwise, some of the lanes may not be
+  // undef and the value of the VReg matters.
+  return !Def.getOperand(0).getSubReg();
+}
+
 /// insertSpill - Insert a spill of NewVReg after MI.
 void InlineSpiller::insertSpill(unsigned NewVReg, bool isKill,
                                  MachineBasicBlock::iterator MI) {
   MachineBasicBlock &MBB = *MI->getParent();
 
   MachineInstrSpan MIS(MI);
-  TII.storeRegToStackSlot(MBB, std::next(MI), NewVReg, isKill, StackSlot,
-                          MRI.getRegClass(NewVReg), &TRI);
+  bool IsRealSpill = true;
+  if (isFullUndefDef(*MI)) {
+    // Don't spill undef value.
+    // Anything works for undef, in particular keeping the memory
+    // uninitialized is a viable option and it saves code size and
+    // run time.
+    BuildMI(MBB, std::next(MI), MI->getDebugLoc(), TII.get(TargetOpcode::KILL))
+        .addReg(NewVReg, getKillRegState(isKill));
+    IsRealSpill = false;
+  } else
+    TII.storeRegToStackSlot(MBB, std::next(MI), NewVReg, isKill, StackSlot,
+                            MRI.getRegClass(NewVReg), &TRI);
 
   LIS.InsertMachineInstrRangeInMaps(std::next(MI), MIS.end());
 
   DEBUG(dumpMachineInstrRangeWithSlotIndex(std::next(MI), MIS.end(), LIS,
                                            "spill"));
   ++NumSpills;
-  HSpiller.addToMergeableSpills(*std::next(MI), StackSlot, Original);
+  if (IsRealSpill)
+    HSpiller.addToMergeableSpills(*std::next(MI), StackSlot, Original);
 }
 
 /// spillAroundUses - insert spill code around each use of Reg.
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index be3b258315bb..f2defb4fd623 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -11,7 +11,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/BasicTTIImpl.h"
@@ -31,21 +30,11 @@
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Transforms/Scalar.h"
 using namespace llvm;
 
-// Enable or disable FastISel. Both options are needed, because
-// FastISel is enabled by default with -fast, and we wish to be
-// able to enable or disable fast-isel independently from -O0.
-static cl::opt<cl::boolOrDefault>
-EnableFastISelOption("fast-isel", cl::Hidden,
-  cl::desc("Enable the \"fast\" instruction selector"));
-
-static cl::opt<cl::boolOrDefault>
-    EnableGlobalISel("global-isel", cl::Hidden,
-                     cl::desc("Enable the \"global\" instruction selector"));
-
 void LLVMTargetMachine::initAsmInfo() {
   MRI = TheTarget.createMCRegInfo(getTargetTriple().str());
   MII = TheTarget.createMCInstrInfo();
@@ -71,8 +60,7 @@ void LLVMTargetMachine::initAsmInfo() {
 
   TmpAsmInfo->setPreserveAsmComments(Options.MCOptions.PreserveAsmComments);
 
-  if (Options.CompressDebugSections)
-    TmpAsmInfo->setCompressDebugSections(DebugCompressionType::DCT_ZlibGnu);
+  TmpAsmInfo->setCompressDebugSections(Options.CompressDebugSections);
 
   TmpAsmInfo->setRelaxELFRelocations(Options.RelaxELFRelocations);
 
@@ -106,9 +94,7 @@ static MCContext *
 addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM,
                         bool DisableVerify, AnalysisID StartBefore,
                         AnalysisID StartAfter, AnalysisID StopBefore,
-                        AnalysisID StopAfter,
-                        MachineFunctionInitializer *MFInitializer = nullptr) {
-
+                        AnalysisID StopAfter) {
   // Targets may override createPassConfig to provide a target-specific
   // subclass.
   TargetPassConfig *PassConfig = TM->createPassConfig(PM);
@@ -117,99 +103,22 @@ addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM,
   // Set PassConfig options provided by TargetMachine.
   PassConfig->setDisableVerify(DisableVerify);
   PM.add(PassConfig);
-
-  // When in emulated TLS mode, add the LowerEmuTLS pass.
-  if (TM->Options.EmulatedTLS)
-    PM.add(createLowerEmuTLSPass());
-
-  PM.add(createPreISelIntrinsicLoweringPass());
-
-  // Add internal analysis passes from the target machine.
-  PM.add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis()));
-
-  PassConfig->addIRPasses();
-
-  PassConfig->addCodeGenPrepare();
-
-  PassConfig->addPassesToHandleExceptions();
-
-  PassConfig->addISelPrepare();
-
   MachineModuleInfo *MMI = new MachineModuleInfo(TM);
-  MMI->setMachineFunctionInitializer(MFInitializer);
   PM.add(MMI);
 
-  // Enable FastISel with -fast, but allow that to be overridden.
-  TM->setO0WantsFastISel(EnableFastISelOption != cl::BOU_FALSE);
-  if (EnableFastISelOption == cl::BOU_TRUE ||
-      (TM->getOptLevel() == CodeGenOpt::None &&
-       TM->getO0WantsFastISel()))
-    TM->setFastISel(true);
-
-  // Ask the target for an isel.
-  // Enable GlobalISel if the target wants to, but allow that to be overriden.
-  if (EnableGlobalISel == cl::BOU_TRUE || (EnableGlobalISel == cl::BOU_UNSET &&
-                                           PassConfig->isGlobalISelEnabled())) {
-    if (PassConfig->addIRTranslator())
-      return nullptr;
-
-    PassConfig->addPreLegalizeMachineIR();
-
-    if (PassConfig->addLegalizeMachineIR())
-      return nullptr;
-
-    // Before running the register bank selector, ask the target if it
-    // wants to run some passes.
-    PassConfig->addPreRegBankSelect();
-
-    if (PassConfig->addRegBankSelect())
-      return nullptr;
-
-    PassConfig->addPreGlobalInstructionSelect();
-
-    if (PassConfig->addGlobalInstructionSelect())
-      return nullptr;
-
-    // Pass to reset the MachineFunction if the ISel failed.
-    PM.add(createResetMachineFunctionPass(
-        PassConfig->reportDiagnosticWhenGlobalISelFallback(),
-        PassConfig->isGlobalISelAbortEnabled()));
-
-    // Provide a fallback path when we do not want to abort on
-    // not-yet-supported input.
-    if (!PassConfig->isGlobalISelAbortEnabled() &&
-        PassConfig->addInstSelector())
-      return nullptr;
-
-  } else if (PassConfig->addInstSelector())
+  if (PassConfig->addISelPasses())
     return nullptr;
-
   PassConfig->addMachinePasses();
-
   PassConfig->setInitialized();
 
   return &MMI->getContext();
 }
 
-bool LLVMTargetMachine::addPassesToEmitFile(
-    PassManagerBase &PM, raw_pwrite_stream &Out, CodeGenFileType FileType,
-    bool DisableVerify, AnalysisID StartBefore, AnalysisID StartAfter,
-    AnalysisID StopBefore, AnalysisID StopAfter,
-    MachineFunctionInitializer *MFInitializer) {
-  // Add common CodeGen passes.
-  MCContext *Context =
-      addPassesToGenerateCode(this, PM, DisableVerify, StartBefore, StartAfter,
-                              StopBefore, StopAfter, MFInitializer);
-  if (!Context)
-    return true;
-
-  if (StopBefore || StopAfter) {
-    PM.add(createPrintMIRPass(Out));
-    return false;
-  }
-
+bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM,
+    raw_pwrite_stream &Out, CodeGenFileType FileType,
+    MCContext &Context) {
   if (Options.MCOptions.MCSaveTempLabels)
-    Context->setAllowTemporaryLabels(false);
+    Context.setAllowTemporaryLabels(false);
 
   const MCSubtargetInfo &STI = *getMCSubtargetInfo();
   const MCAsmInfo &MAI = *getMCAsmInfo();
@@ -226,14 +135,14 @@ bool LLVMTargetMachine::addPassesToEmitFile(
     // Create a code emitter if asked to show the encoding.
     MCCodeEmitter *MCE = nullptr;
     if (Options.MCOptions.ShowMCEncoding)
-      MCE = getTarget().createMCCodeEmitter(MII, MRI, *Context);
+      MCE = getTarget().createMCCodeEmitter(MII, MRI, Context);
 
     MCAsmBackend *MAB =
         getTarget().createMCAsmBackend(MRI, getTargetTriple().str(), TargetCPU,
                                        Options.MCOptions);
     auto FOut = llvm::make_unique<formatted_raw_ostream>(Out);
     MCStreamer *S = getTarget().createAsmStreamer(
-        *Context, std::move(FOut), Options.MCOptions.AsmVerbose,
+        Context, std::move(FOut), Options.MCOptions.AsmVerbose,
         Options.MCOptions.MCUseDwarfDirectory, InstPrinter, MCE, MAB,
         Options.MCOptions.ShowMCInst);
     AsmStreamer.reset(S);
@@ -242,7 +151,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(
   case CGFT_ObjectFile: {
     // Create the code emitter for the target if it exists.  If not, .o file
     // emission fails.
-    MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(MII, MRI, *Context);
+    MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(MII, MRI, Context);
     MCAsmBackend *MAB =
         getTarget().createMCAsmBackend(MRI, getTargetTriple().str(), TargetCPU,
                                        Options.MCOptions);
@@ -250,11 +159,11 @@ bool LLVMTargetMachine::addPassesToEmitFile(
       return true;
 
     // Don't waste memory on names of temp labels.
-    Context->setUseNamesOnTempLabels(false);
+    Context.setUseNamesOnTempLabels(false);
 
     Triple T(getTargetTriple().str());
     AsmStreamer.reset(getTarget().createMCObjectStreamer(
-        T, *Context, *MAB, Out, MCE, STI, Options.MCOptions.MCRelaxAll,
+        T, Context, *MAB, Out, MCE, STI, Options.MCOptions.MCRelaxAll,
         Options.MCOptions.MCIncrementalLinkerCompatible,
         /*DWARFMustBeAtTheEnd*/ true));
     break;
@@ -262,7 +171,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(
   case CGFT_Null:
     // The Null output is intended for use for performance analysis and testing,
     // not real users.
-    AsmStreamer.reset(getTarget().createNullStreamer(*Context));
+    AsmStreamer.reset(getTarget().createNullStreamer(Context));
     break;
   }
 
@@ -273,8 +182,28 @@ bool LLVMTargetMachine::addPassesToEmitFile(
     return true;
 
   PM.add(Printer);
-  PM.add(createFreeMachineFunctionPass());
+  return false;
+}
 
+bool LLVMTargetMachine::addPassesToEmitFile(
+    PassManagerBase &PM, raw_pwrite_stream &Out, CodeGenFileType FileType,
+    bool DisableVerify, AnalysisID StartBefore, AnalysisID StartAfter,
+    AnalysisID StopBefore, AnalysisID StopAfter) {
+  // Add common CodeGen passes.
+  MCContext *Context =
+      addPassesToGenerateCode(this, PM, DisableVerify, StartBefore, StartAfter,
+                              StopBefore, StopAfter);
+  if (!Context)
+    return true;
+
+  if (StopBefore || StopAfter) {
+    PM.add(createPrintMIRPass(Out));
+  } else {
+    if (addAsmPrinter(PM, Out, FileType, *Context))
+      return true;
+  }
+
+  PM.add(createFreeMachineFunctionPass());
   return false;
 }
 
diff --git a/lib/CodeGen/LexicalScopes.cpp b/lib/CodeGen/LexicalScopes.cpp
index 40ee7ea785f0..995c58a63564 100644
--- a/lib/CodeGen/LexicalScopes.cpp
+++ b/lib/CodeGen/LexicalScopes.cpp
@@ -14,9 +14,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/LexicalScopes.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/LexicalScopes.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstr.h"
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index 0c05dbeacba0..471dcea4bb39 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -14,15 +14,15 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "LiveRangeCalc.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/iterator_range.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/LiveInterval.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveVariables.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
diff --git a/lib/CodeGen/LiveIntervalUnion.cpp b/lib/CodeGen/LiveIntervalUnion.cpp
index b4aa0dc326a5..b3248e53d0a5 100644
--- a/lib/CodeGen/LiveIntervalUnion.cpp
+++ b/lib/CodeGen/LiveIntervalUnion.cpp
@@ -13,10 +13,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/SparseBitVector.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/CodeGen/LiveInterval.h"
 #include "llvm/CodeGen/LiveIntervalUnion.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SparseBitVector.h"
+#include "llvm/CodeGen/LiveInterval.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include <cassert>
diff --git a/lib/CodeGen/LiveRegMatrix.cpp b/lib/CodeGen/LiveRegMatrix.cpp
index 882de1a3fad9..60033db38ee4 100644
--- a/lib/CodeGen/LiveRegMatrix.cpp
+++ b/lib/CodeGen/LiveRegMatrix.cpp
@@ -11,17 +11,17 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/LiveRegMatrix.h"
 #include "RegisterCoalescer.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LiveInterval.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/LiveRegMatrix.h"
-#include "llvm/CodeGen/VirtRegMap.h"
 #include "llvm/CodeGen/LiveIntervalUnion.h"
 #include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/Pass.h"
+#include "llvm/CodeGen/VirtRegMap.h"
 #include "llvm/MC/LaneBitmask.h"
 #include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetRegisterInfo.h"
diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp
index 17cab0ae910e..b109f1922a3e 100644
--- a/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -14,7 +14,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallSet.h"
@@ -23,6 +22,7 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/StackProtector.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
diff --git a/lib/CodeGen/MIRParser/MILexer.h b/lib/CodeGen/MIRParser/MILexer.h
index edba749b5fce..3e9513111bf4 100644
--- a/lib/CodeGen/MIRParser/MILexer.h
+++ b/lib/CodeGen/MIRParser/MILexer.h
@@ -16,8 +16,8 @@
 #define LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H
 
 #include "llvm/ADT/APSInt.h"
-#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
 #include <functional>
 
 namespace llvm {
diff --git a/lib/CodeGen/MIRParser/MIParser.cpp b/lib/CodeGen/MIRParser/MIParser.cpp
index 1d36ff4e1458..f58d1f8b83ae 100644
--- a/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/lib/CodeGen/MIRParser/MIParser.cpp
@@ -11,11 +11,19 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "MIParser.h"
-
 #include "MILexer.h"
+#include "MIParser.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/AsmParser/Parser.h"
 #include "llvm/AsmParser/SlotMapping.h"
 #include "llvm/CodeGen/MIRPrinter.h"
@@ -26,19 +34,48 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/ModuleSlotTracker.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
 #include "llvm/IR/ValueSymbolTable.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LowLevelTypeImpl.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SMLoc.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
 #include <cctype>
+#include <cstddef>
+#include <cstdint>
+#include <limits>
+#include <string>
+#include <utility>
 
 using namespace llvm;
 
@@ -2039,7 +2076,7 @@ bool MIParser::parseMemoryPseudoSourceValue(const PseudoSourceValue *&PSV) {
     // The token was already consumed, so use return here instead of break.
     return false;
   }
-  case MIToken::kw_call_entry: {
+  case MIToken::kw_call_entry:
     lex();
     switch (Token.kind()) {
     case MIToken::GlobalValue:
@@ -2059,7 +2096,6 @@ bool MIParser::parseMemoryPseudoSourceValue(const PseudoSourceValue *&PSV) {
           "expected a global value or an external symbol after 'call-entry'");
     }
     break;
-  }
   default:
     llvm_unreachable("The current token should be pseudo source value");
   }
diff --git a/lib/CodeGen/MIRParser/MIParser.h b/lib/CodeGen/MIRParser/MIParser.h
index 9b3879cf8377..2307881068ef 100644
--- a/lib/CodeGen/MIRParser/MIParser.h
+++ b/lib/CodeGen/MIRParser/MIParser.h
@@ -1,4 +1,4 @@
-//===- MIParser.h - Machine Instructions Parser ---------------------------===//
+//===- MIParser.h - Machine Instructions Parser -----------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,21 +15,19 @@
 #define LLVM_LIB_CODEGEN_MIRPARSER_MIPARSER_H
 
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/Allocator.h"
 
 namespace llvm {
 
-class StringRef;
-class BasicBlock;
 class MachineBasicBlock;
 class MachineFunction;
-class MachineInstr;
-class MachineRegisterInfo;
 class MDNode;
 class RegisterBank;
 struct SlotMapping;
 class SMDiagnostic;
 class SourceMgr;
+class StringRef;
 class TargetRegisterClass;
 
 struct VRegInfo {
@@ -45,8 +43,8 @@ struct VRegInfo {
   unsigned PreferredReg = 0;
 };
 
-typedef StringMap<const TargetRegisterClass*> Name2RegClassMap;
-typedef StringMap<const RegisterBank*> Name2RegBankMap;
+using Name2RegClassMap = StringMap<const TargetRegisterClass *>;
+using Name2RegBankMap = StringMap<const RegisterBank *>;
 
 struct PerFunctionMIParsingState {
   BumpPtrAllocator Allocator;
@@ -122,4 +120,4 @@ bool parseMDNode(PerFunctionMIParsingState &PFS, MDNode *&Node, StringRef Src,
 
 } // end namespace llvm
 
-#endif
+#endif // LLVM_LIB_CODEGEN_MIRPARSER_MIPARSER_H
diff --git a/lib/CodeGen/MIRParser/MIRParser.cpp b/lib/CodeGen/MIRParser/MIRParser.cpp
index ff12297e3fc6..78b57f357781 100644
--- a/lib/CodeGen/MIRParser/MIRParser.cpp
+++ b/lib/CodeGen/MIRParser/MIRParser.cpp
@@ -50,18 +50,24 @@ namespace llvm {
 /// file.
 class MIRParserImpl {
   SourceMgr SM;
+  yaml::Input In;
   StringRef Filename;
   LLVMContext &Context;
-  StringMap<std::unique_ptr<yaml::MachineFunction>> Functions;
   SlotMapping IRSlots;
   /// Maps from register class names to register classes.
   Name2RegClassMap Names2RegClasses;
   /// Maps from register bank names to register banks.
   Name2RegBankMap Names2RegBanks;
+  /// True when the MIR file doesn't have LLVM IR. Dummy IR functions are
+  /// created and inserted into the given module when this is true.
+  bool NoLLVMIR = false;
+  /// True when a well formed MIR file does not contain any MIR/machine function
+  /// parts.
+  bool NoMIRDocuments = false;
 
 public:
-  MIRParserImpl(std::unique_ptr<MemoryBuffer> Contents, StringRef Filename,
-                LLVMContext &Context);
+  MIRParserImpl(std::unique_ptr<MemoryBuffer> Contents,
+                StringRef Filename, LLVMContext &Context);
 
   void reportDiagnostic(const SMDiagnostic &Diag);
 
@@ -85,22 +91,22 @@ public:
   /// file.
   ///
   /// Return null if an error occurred.
-  std::unique_ptr<Module> parse();
+  std::unique_ptr<Module> parseIRModule();
+
+  bool parseMachineFunctions(Module &M, MachineModuleInfo &MMI);
 
   /// Parse the machine function in the current YAML document.
   ///
-  /// \param NoLLVMIR - set to true when the MIR file doesn't have LLVM IR.
-  /// A dummy IR function is created and inserted into the given module when
-  /// this parameter is true.
   ///
   /// Return true if an error occurred.
-  bool parseMachineFunction(yaml::Input &In, Module &M, bool NoLLVMIR);
+  bool parseMachineFunction(Module &M, MachineModuleInfo &MMI);
 
   /// Initialize the machine function to the state that's described in the MIR
   /// file.
   ///
   /// Return true if error occurred.
-  bool initializeMachineFunction(MachineFunction &MF);
+  bool initializeMachineFunction(const yaml::MachineFunction &YamlMF,
+                                 MachineFunction &MF);
 
   bool parseRegisterInfo(PerFunctionMIParsingState &PFS,
                          const yaml::MachineFunction &YamlMF);
@@ -144,9 +150,6 @@ private:
   SMDiagnostic diagFromBlockStringDiag(const SMDiagnostic &Error,
                                        SMRange SourceRange);
 
-  /// Create an empty function with the given name.
-  void createDummyFunction(StringRef Name, Module &M);
-
   void initNames2RegClasses(const MachineFunction &MF);
   void initNames2RegBanks(const MachineFunction &MF);
 
@@ -166,10 +169,19 @@ private:
 
 } // end namespace llvm
 
+static void handleYAMLDiag(const SMDiagnostic &Diag, void *Context) {
+  reinterpret_cast<MIRParserImpl *>(Context)->reportDiagnostic(Diag);
+}
+
 MIRParserImpl::MIRParserImpl(std::unique_ptr<MemoryBuffer> Contents,
                              StringRef Filename, LLVMContext &Context)
-    : SM(), Filename(Filename), Context(Context) {
-  SM.AddNewSourceBuffer(std::move(Contents), SMLoc());
+    : SM(),
+      In(SM.getMemoryBuffer(
+            SM.AddNewSourceBuffer(std::move(Contents), SMLoc()))->getBuffer(),
+            nullptr, handleYAMLDiag, this),
+      Filename(Filename),
+      Context(Context) {
+  In.setContext(&In);
 }
 
 bool MIRParserImpl::error(const Twine &Message) {
@@ -206,24 +218,16 @@ void MIRParserImpl::reportDiagnostic(const SMDiagnostic &Diag) {
   Context.diagnose(DiagnosticInfoMIRParser(Kind, Diag));
 }
 
-static void handleYAMLDiag(const SMDiagnostic &Diag, void *Context) {
-  reinterpret_cast<MIRParserImpl *>(Context)->reportDiagnostic(Diag);
-}
-
-std::unique_ptr<Module> MIRParserImpl::parse() {
-  yaml::Input In(SM.getMemoryBuffer(SM.getMainFileID())->getBuffer(),
-                 /*Ctxt=*/nullptr, handleYAMLDiag, this);
-  In.setContext(&In);
-
+std::unique_ptr<Module> MIRParserImpl::parseIRModule() {
   if (!In.setCurrentDocument()) {
     if (In.error())
       return nullptr;
     // Create an empty module when the MIR file is empty.
+    NoMIRDocuments = true;
     return llvm::make_unique<Module>(Filename, Context);
   }
 
   std::unique_ptr<Module> M;
-  bool NoLLVMIR = false;
   // Parse the block scalar manually so that we can return unique pointer
   // without having to go trough YAML traits.
   if (const auto *BSN =
@@ -237,49 +241,68 @@ std::unique_ptr<Module> MIRParserImpl::parse() {
     }
     In.nextDocument();
     if (!In.setCurrentDocument())
-      return M;
+      NoMIRDocuments = true;
   } else {
     // Create an new, empty module.
     M = llvm::make_unique<Module>(Filename, Context);
     NoLLVMIR = true;
   }
-
-  // Parse the machine functions.
-  do {
-    if (parseMachineFunction(In, *M, NoLLVMIR))
-      return nullptr;
-    In.nextDocument();
-  } while (In.setCurrentDocument());
-
   return M;
 }
 
-bool MIRParserImpl::parseMachineFunction(yaml::Input &In, Module &M,
-                                         bool NoLLVMIR) {
-  auto MF = llvm::make_unique<yaml::MachineFunction>();
-  yaml::EmptyContext Ctx;
-  yaml::yamlize(In, *MF, false, Ctx);
-  if (In.error())
-    return true;
-  auto FunctionName = MF->Name;
-  if (Functions.find(FunctionName) != Functions.end())
-    return error(Twine("redefinition of machine function '") + FunctionName +
-                 "'");
-  Functions.insert(std::make_pair(FunctionName, std::move(MF)));
-  if (NoLLVMIR)
-    createDummyFunction(FunctionName, M);
-  else if (!M.getFunction(FunctionName))
-    return error(Twine("function '") + FunctionName +
-                 "' isn't defined in the provided LLVM IR");
+bool MIRParserImpl::parseMachineFunctions(Module &M, MachineModuleInfo &MMI) {
+  if (NoMIRDocuments)
+    return false;
+
+  // Parse the machine functions.
+  do {
+    if (parseMachineFunction(M, MMI))
+      return true;
+    In.nextDocument();
+  } while (In.setCurrentDocument());
+
   return false;
 }
 
-void MIRParserImpl::createDummyFunction(StringRef Name, Module &M) {
+/// Create an empty function with the given name.
+static Function *createDummyFunction(StringRef Name, Module &M) {
   auto &Context = M.getContext();
   Function *F = cast<Function>(M.getOrInsertFunction(
       Name, FunctionType::get(Type::getVoidTy(Context), false)));
   BasicBlock *BB = BasicBlock::Create(Context, "entry", F);
   new UnreachableInst(Context, BB);
+  return F;
+}
+
+bool MIRParserImpl::parseMachineFunction(Module &M, MachineModuleInfo &MMI) {
+  // Parse the yaml.
+  yaml::MachineFunction YamlMF;
+  yaml::EmptyContext Ctx;
+  yaml::yamlize(In, YamlMF, false, Ctx);
+  if (In.error())
+    return true;
+
+  // Search for the corresponding IR function.
+  StringRef FunctionName = YamlMF.Name;
+  Function *F = M.getFunction(FunctionName);
+  if (!F) {
+    if (NoLLVMIR) {
+      F = createDummyFunction(FunctionName, M);
+    } else {
+      return error(Twine("function '") + FunctionName +
+                   "' isn't defined in the provided LLVM IR");
+    }
+  }
+  if (MMI.getMachineFunction(*F) != nullptr)
+    return error(Twine("redefinition of machine function '") + FunctionName +
+                 "'");
+
+  // Create the MachineFunction.
+  MachineFunction &MF = MMI.getOrCreateMachineFunction(*F);
+  if (initializeMachineFunction(YamlMF, MF))
+    return true;
+
+  return false;
 }
 
 static bool isSSA(const MachineFunction &MF) {
@@ -319,15 +342,12 @@ void MIRParserImpl::computeFunctionProperties(MachineFunction &MF) {
     Properties.set(MachineFunctionProperties::Property::NoVRegs);
 }
 
-bool MIRParserImpl::initializeMachineFunction(MachineFunction &MF) {
-  auto It = Functions.find(MF.getName());
-  if (It == Functions.end())
-    return error(Twine("no machine function information for function '") +
-                 MF.getName() + "' in the MIR file");
+bool
+MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF,
+                                         MachineFunction &MF) {
   // TODO: Recreate the machine function.
   initNames2RegClasses(MF);
   initNames2RegBanks(MF);
-  const yaml::MachineFunction &YamlMF = *It->getValue();
   if (YamlMF.Alignment)
     MF.setAlignment(YamlMF.Alignment);
   MF.setExposesReturnsTwice(YamlMF.ExposesReturnsTwice);
@@ -838,16 +858,18 @@ MIRParser::MIRParser(std::unique_ptr<MIRParserImpl> Impl)
 
 MIRParser::~MIRParser() {}
 
-std::unique_ptr<Module> MIRParser::parseLLVMModule() { return Impl->parse(); }
+std::unique_ptr<Module> MIRParser::parseIRModule() {
+  return Impl->parseIRModule();
+}
 
-bool MIRParser::initializeMachineFunction(MachineFunction &MF) {
-  return Impl->initializeMachineFunction(MF);
+bool MIRParser::parseMachineFunctions(Module &M, MachineModuleInfo &MMI) {
+  return Impl->parseMachineFunctions(M, MMI);
 }
 
 std::unique_ptr<MIRParser> llvm::createMIRParserFromFile(StringRef Filename,
                                                          SMDiagnostic &Error,
                                                          LLVMContext &Context) {
-  auto FileOrErr = MemoryBuffer::getFile(Filename);
+  auto FileOrErr = MemoryBuffer::getFileOrSTDIN(Filename);
   if (std::error_code EC = FileOrErr.getError()) {
     Error = SMDiagnostic(Filename, SourceMgr::DK_Error,
                          "Could not open input file: " + EC.message());
diff --git a/lib/CodeGen/MIRPrinter.cpp b/lib/CodeGen/MIRPrinter.cpp
index 293fc7358b8e..c524a9835f33 100644
--- a/lib/CodeGen/MIRPrinter.cpp
+++ b/lib/CodeGen/MIRPrinter.cpp
@@ -12,35 +12,65 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/MIRPrinter.h"
-
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/None.h"
 #include "llvm/ADT/SmallBitVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/CodeGen/GlobalISel/RegisterBank.h"
-#include "llvm/CodeGen/MIRYamlMapping.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MIRPrinter.h"
+#include "llvm/CodeGen/MIRYamlMapping.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DebugInfo.h"
-#include "llvm/IR/IRPrintingPasses.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IRPrintingPasses.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/ModuleSlotTracker.h"
+#include "llvm/IR/Value.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/MC/MCDwarf.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/Options.h"
-#include "llvm/Support/YAMLTraits.h"
+#include "llvm/Support/LowLevelTypeImpl.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/YAMLTraits.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cinttypes>
+#include <cstdint>
+#include <iterator>
+#include <string>
+#include <utility>
+#include <vector>
 
 using namespace llvm;
 
@@ -147,6 +177,7 @@ template <> struct BlockScalarTraits<Module> {
   static void output(const Module &Mod, void *Ctxt, raw_ostream &OS) {
     Mod.print(OS, nullptr);
   }
+
   static StringRef input(StringRef Str, void *Ctxt, Module &Mod) {
     llvm_unreachable("LLVM Module is supposed to be parsed separately");
     return "";
@@ -210,6 +241,8 @@ void MIRPrinter::print(const MachineFunction &MF) {
   }
   StrOS.flush();
   yaml::Output Out(OS);
+  if (!SimplifyMIR)
+      Out.setWriteDefaultValues(true);
   Out << YamlMF;
 }
 
@@ -516,7 +549,6 @@ bool MIPrinter::canPredictSuccessors(const MachineBasicBlock &MBB) const {
   return std::equal(MBB.succ_begin(), MBB.succ_end(), GuessedSuccs.begin());
 }
 
-
 void MIPrinter::print(const MachineBasicBlock &MBB) {
   assert(MBB.getNumber() >= 0 && "Invalid MBB number");
   OS << "bb." << MBB.getNumber();
@@ -908,7 +940,7 @@ void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI,
     OS << "%const." << Op.getIndex();
     printOffset(Op.getOffset());
     break;
-  case MachineOperand::MO_TargetIndex: {
+  case MachineOperand::MO_TargetIndex:
     OS << "target-index(";
     if (const auto *Name = getTargetIndexName(
             *Op.getParent()->getParent()->getParent(), Op.getIndex()))
@@ -918,15 +950,20 @@ void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI,
     OS << ')';
     printOffset(Op.getOffset());
     break;
-  }
   case MachineOperand::MO_JumpTableIndex:
     OS << "%jump-table." << Op.getIndex();
     break;
-  case MachineOperand::MO_ExternalSymbol:
+  case MachineOperand::MO_ExternalSymbol: {
+    StringRef Name = Op.getSymbolName();
     OS << '$';
-    printLLVMNameWithoutPrefix(OS, Op.getSymbolName());
+    if (Name.empty()) {
+      OS << "\"\"";
+    } else {
+      printLLVMNameWithoutPrefix(OS, Name);
+    }
     printOffset(Op.getOffset());
     break;
+  }
   case MachineOperand::MO_GlobalAddress:
     Op.getGlobal()->printAsOperand(OS, /*PrintType=*/false, MST);
     printOffset(Op.getOffset());
diff --git a/lib/CodeGen/MIRPrintingPass.cpp b/lib/CodeGen/MIRPrintingPass.cpp
index 671cf1eddc2d..09354cf70c3c 100644
--- a/lib/CodeGen/MIRPrintingPass.cpp
+++ b/lib/CodeGen/MIRPrintingPass.cpp
@@ -14,9 +14,9 @@
 
 #include "llvm/CodeGen/MIRPrinter.h"
 
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MIRYamlMapping.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp
index c1ca8e8e83b4..fc52b0da0d61 100644
--- a/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/lib/CodeGen/MachineBlockPlacement.cpp
@@ -25,8 +25,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/TargetPassConfig.h"
 #include "BranchFolding.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -41,7 +39,9 @@
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachinePostDominators.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/TailDuplicator.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index 34f6bbd59e9b..582ff139f886 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -13,7 +13,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/ScopedHashTable.h"
 #include "llvm/ADT/SmallSet.h"
@@ -22,6 +21,7 @@
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/RecyclingAllocator.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp
index f83b5481e0a5..7d5a68192e6b 100644
--- a/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/lib/CodeGen/MachineCopyPropagation.cpp
@@ -11,7 +11,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallVector.h"
@@ -19,6 +18,7 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/lib/CodeGen/MachineDominanceFrontier.cpp b/lib/CodeGen/MachineDominanceFrontier.cpp
index acb7c4810b16..28ecc8f96805 100644
--- a/lib/CodeGen/MachineDominanceFrontier.cpp
+++ b/lib/CodeGen/MachineDominanceFrontier.cpp
@@ -12,7 +12,6 @@
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/Passes.h"
 
-
 using namespace llvm;
 
 namespace llvm {
diff --git a/lib/CodeGen/MachineDominators.cpp b/lib/CodeGen/MachineDominators.cpp
index e3a6c51c47ad..65e9e5d195a4 100644
--- a/lib/CodeGen/MachineDominators.cpp
+++ b/lib/CodeGen/MachineDominators.cpp
@@ -13,8 +13,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/ADT/SmallBitVector.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/Support/CommandLine.h"
 
 using namespace llvm;
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index ac4ccb81b884..bbdae6e1a49e 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -20,7 +20,6 @@
 #include "llvm/Analysis/EHPersonalities.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunctionInitializer.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
@@ -52,8 +51,6 @@ static cl::opt<unsigned>
                       cl::desc("Force the alignment of all functions."),
                       cl::init(0), cl::Hidden);
 
-void MachineFunctionInitializer::anchor() {}
-
 static const char *getPropertyName(MachineFunctionProperties::Property Prop) {
   typedef MachineFunctionProperties::Property P;
   switch(Prop) {
diff --git a/lib/CodeGen/MachineFunctionPass.cpp b/lib/CodeGen/MachineFunctionPass.cpp
index 2265676ff8b1..5ffe33006131 100644
--- a/lib/CodeGen/MachineFunctionPass.cpp
+++ b/lib/CodeGen/MachineFunctionPass.cpp
@@ -42,7 +42,7 @@ bool MachineFunctionPass::runOnFunction(Function &F) {
     return false;
 
   MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>();
-  MachineFunction &MF = MMI.getMachineFunction(F);
+  MachineFunction &MF = MMI.getOrCreateMachineFunction(F);
 
   MachineFunctionProperties &MFProps = MF.getProperties();
 
diff --git a/lib/CodeGen/MachineFunctionPrinterPass.cpp b/lib/CodeGen/MachineFunctionPrinterPass.cpp
index 0d533c3f4f23..55d9defced3a 100644
--- a/lib/CodeGen/MachineFunctionPrinterPass.cpp
+++ b/lib/CodeGen/MachineFunctionPrinterPass.cpp
@@ -11,9 +11,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/SlotIndexes.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index 306b75dbbae7..2a6cb07dbd2d 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -11,20 +11,20 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/None.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/MemoryLocation.h"
 #include "llvm/CodeGen/GlobalISel/RegisterBank.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineInstrBundle.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
@@ -47,8 +47,8 @@
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Value.h"
 #include "llvm/MC/MCInstrDesc.h"
-#include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index 95c62d820b0e..52d5819f8dbc 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -16,7 +16,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
@@ -26,6 +25,7 @@
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/CodeGen/TargetSchedule.h"
 #include "llvm/Support/CommandLine.h"
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index c1b72430e605..825290a438a6 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -7,14 +7,13 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/TinyPtrVector.h"
 #include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionInitializer.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/DerivedTypes.h"
@@ -259,7 +258,14 @@ void MachineModuleInfo::addPersonality(const Function *Personality) {
 
 /// \}
 
-MachineFunction &MachineModuleInfo::getMachineFunction(const Function &F) {
+MachineFunction *
+MachineModuleInfo::getMachineFunction(const Function &F) const {
+  auto I = MachineFunctions.find(&F);
+  return I != MachineFunctions.end() ? I->second.get() : nullptr;
+}
+
+MachineFunction &
+MachineModuleInfo::getOrCreateMachineFunction(const Function &F) {
   // Shortcut for the common case where a sequence of MachineFunctionPasses
   // all query for the same Function.
   if (LastRequest == &F)
@@ -273,10 +279,6 @@ MachineFunction &MachineModuleInfo::getMachineFunction(const Function &F) {
     MF = new MachineFunction(&F, TM, NextFnNum++, *this);
     // Update the set entry.
     I.first->second.reset(MF);
-
-    if (MFInitializer)
-      if (MFInitializer->initializeMachineFunction(*MF))
-        report_fatal_error("Unable to initialize machine function");
   } else {
     MF = I.first->second.get();
   }
diff --git a/lib/CodeGen/MachineOutliner.cpp b/lib/CodeGen/MachineOutliner.cpp
index 9ea3c00a2fc4..fd6b2427891d 100644
--- a/lib/CodeGen/MachineOutliner.cpp
+++ b/lib/CodeGen/MachineOutliner.cpp
@@ -1111,7 +1111,7 @@ MachineOutliner::createOutlinedFunction(Module &M, const OutlinedFunction &OF,
   Builder.CreateRetVoid();
 
   MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>();
-  MachineFunction &MF = MMI.getMachineFunction(*F);
+  MachineFunction &MF = MMI.getOrCreateMachineFunction(*F);
   MachineBasicBlock &MBB = *MF.CreateMachineBasicBlock();
   const TargetSubtargetInfo &STI = MF.getSubtarget();
   const TargetInstrInfo &TII = *STI.getInstrInfo();
@@ -1207,7 +1207,7 @@ bool MachineOutliner::runOnModule(Module &M) {
     return false;
 
   MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>();
-  const TargetSubtargetInfo &STI = MMI.getMachineFunction(*M.begin())
+  const TargetSubtargetInfo &STI = MMI.getOrCreateMachineFunction(*M.begin())
                                       .getSubtarget();
   const TargetRegisterInfo *TRI = STI.getRegisterInfo();
   const TargetInstrInfo *TII = STI.getInstrInfo();
@@ -1216,7 +1216,7 @@ bool MachineOutliner::runOnModule(Module &M) {
 
   // Build instruction mappings for each function in the module.
   for (Function &F : M) {
-    MachineFunction &MF = MMI.getMachineFunction(F);
+    MachineFunction &MF = MMI.getOrCreateMachineFunction(F);
 
     // Is the function empty? Safe to outline from?
     if (F.empty() || !TII->isFunctionSafeToOutlineFrom(MF))
diff --git a/lib/CodeGen/MachinePipeliner.cpp b/lib/CodeGen/MachinePipeliner.cpp
index 8f5ac8b3fc45..19e9a50e2c43 100644
--- a/lib/CodeGen/MachinePipeliner.cpp
+++ b/lib/CodeGen/MachinePipeliner.cpp
@@ -61,7 +61,6 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/iterator_range.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/PriorityQueue.h"
 #include "llvm/ADT/SetVector.h"
@@ -69,6 +68,7 @@
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/MemoryLocation.h"
 #include "llvm/Analysis/ValueTracking.h"
diff --git a/lib/CodeGen/MachineRegionInfo.cpp b/lib/CodeGen/MachineRegionInfo.cpp
index 2402ffdbbcb1..1e74104e89ed 100644
--- a/lib/CodeGen/MachineRegionInfo.cpp
+++ b/lib/CodeGen/MachineRegionInfo.cpp
@@ -7,10 +7,10 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/MachineRegionInfo.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/RegionInfoImpl.h"
 #include "llvm/CodeGen/MachinePostDominators.h"
-#include "llvm/CodeGen/MachineRegionInfo.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index 128910f8eb2a..9a92ee279cdc 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -11,6 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/CodeGen/LowLevelType.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
@@ -18,7 +19,6 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/Function.h"
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index edc3783afa2f..01a2286b8d66 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -12,13 +12,14 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/MachineScheduler.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/iterator_range.h"
 #include "llvm/ADT/PriorityQueue.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/LiveInterval.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
@@ -30,12 +31,11 @@
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachinePassRegistry.h"
-#include "llvm/CodeGen/RegisterPressure.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineScheduler.h"
 #include "llvm/CodeGen/MachineValueType.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/RegisterPressure.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
 #include "llvm/CodeGen/ScheduleDAGInstrs.h"
 #include "llvm/CodeGen/ScheduleDAGMutation.h"
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index 7c34e71a0cce..79e3fea3f90c 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -16,7 +16,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SparseBitVector.h"
@@ -33,6 +32,7 @@
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachinePostDominators.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp
index 01391a1a0e50..6c5abc66fba1 100644
--- a/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/lib/CodeGen/MachineTraceMetrics.cpp
@@ -7,6 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/MachineTraceMetrics.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Optional.h"
@@ -21,7 +22,6 @@
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineTraceMetrics.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index f6dbf667cf02..e65c256c1bb5 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -23,7 +23,6 @@
 // the verifier errors.
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/SetOperations.h"
@@ -36,6 +35,7 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/StackMaps.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/InlineAsm.h"
@@ -945,7 +945,6 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
     VerifyStackMapConstant(VarStart + StatepointOpers::NumDeoptOperandsOffset);
 
     // TODO: verify we have properly encoded deopt arguments
-   
   };
 }
 
@@ -1947,9 +1946,11 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
       SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI);
       const VNInfo *PVNI = LR.getVNInfoBefore(PEnd);
 
-      // All predecessors must have a live-out value if this is not a
-      // subregister liverange.
-      if (!PVNI && LaneMask.none()) {
+      // All predecessors must have a live-out value. However for a phi
+      // instruction with subregister intervals
+      // only one of the subregisters (not necessarily the current one) needs to
+      // be defined.
+      if (!PVNI && (LaneMask.none() || !IsPHI) ) {
         report("Register not marked live out of predecessor", *PI);
         report_context(LR, Reg, LaneMask);
         report_context(*VNI);
diff --git a/lib/CodeGen/OptimizePHIs.cpp b/lib/CodeGen/OptimizePHIs.cpp
index 76ad668104b4..f7aeb4204c5b 100644
--- a/lib/CodeGen/OptimizePHIs.cpp
+++ b/lib/CodeGen/OptimizePHIs.cpp
@@ -12,12 +12,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
diff --git a/lib/CodeGen/PatchableFunction.cpp b/lib/CodeGen/PatchableFunction.cpp
index 00e72971a01e..513e82716564 100644
--- a/lib/CodeGen/PatchableFunction.cpp
+++ b/lib/CodeGen/PatchableFunction.cpp
@@ -12,10 +12,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp
index 6d643457e9a9..da8fac6d3834 100644
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -66,7 +66,6 @@
 //     C = copy A    <-- same-bank copy
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
@@ -79,6 +78,7 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/MC/MCInstrDesc.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
diff --git a/lib/CodeGen/PostRAHazardRecognizer.cpp b/lib/CodeGen/PostRAHazardRecognizer.cpp
index 5bc5f7524dbf..425a59dc0375 100644
--- a/lib/CodeGen/PostRAHazardRecognizer.cpp
+++ b/lib/CodeGen/PostRAHazardRecognizer.cpp
@@ -27,9 +27,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
diff --git a/lib/CodeGen/RegAllocBase.cpp b/lib/CodeGen/RegAllocBase.cpp
index fb49a934431c..a7b7a9f8ab15 100644
--- a/lib/CodeGen/RegAllocBase.cpp
+++ b/lib/CodeGen/RegAllocBase.cpp
@@ -21,13 +21,12 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/VirtRegMap.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Timer.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 
 using namespace llvm;
 
diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp
index 24be7ea98d82..774306154a89 100644
--- a/lib/CodeGen/RegAllocBasic.cpp
+++ b/lib/CodeGen/RegAllocBasic.cpp
@@ -12,7 +12,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/Passes.h"
 #include "AllocationOrder.h"
 #include "LiveDebugVariables.h"
 #include "RegAllocBase.h"
@@ -28,6 +27,7 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/RegAllocRegistry.h"
 #include "llvm/CodeGen/VirtRegMap.h"
 #include "llvm/PassAnalysisSupport.h"
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index 47d726f6da7a..50d241bff23d 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -1,4 +1,4 @@
-//===-- RegAllocGreedy.cpp - greedy register allocator --------------------===//
+//===- RegAllocGreedy.cpp - greedy register allocator ---------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -19,36 +19,63 @@
 #include "SpillPlacement.h"
 #include "Spiller.h"
 #include "SplitKit.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
 #include "llvm/CodeGen/CalcSpillWeights.h"
 #include "llvm/CodeGen/EdgeBundles.h"
+#include "llvm/CodeGen/LiveInterval.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveIntervalUnion.h"
 #include "llvm/CodeGen/LiveRangeEdit.h"
 #include "llvm/CodeGen/LiveRegMatrix.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/RegAllocRegistry.h"
 #include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
 #include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/IR/Function.h"
 #include "llvm/IR/LLVMContext.h"
-#include "llvm/PassAnalysisSupport.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/BlockFrequency.h"
 #include "llvm/Support/BranchProbability.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Timer.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <memory>
 #include <queue>
+#include <tuple>
+#include <utility>
 
 using namespace llvm;
 
@@ -106,13 +133,14 @@ static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator",
                                        createGreedyRegisterAllocator);
 
 namespace {
+
 class RAGreedy : public MachineFunctionPass,
                  public RegAllocBase,
                  private LiveRangeEdit::Delegate {
   // Convenient shortcuts.
-  typedef std::priority_queue<std::pair<unsigned, unsigned> > PQueue;
-  typedef SmallPtrSet<LiveInterval *, 4> SmallLISet;
-  typedef SmallSet<unsigned, 16> SmallVirtRegSet;
+  using PQueue = std::priority_queue<std::pair<unsigned, unsigned>>;
+  using SmallLISet = SmallPtrSet<LiveInterval *, 4>;
+  using SmallVirtRegSet = SmallSet<unsigned, 16>;
 
   // context
   MachineFunction *MF;
@@ -201,12 +229,12 @@ class RAGreedy : public MachineFunctionPass,
 
   // RegInfo - Keep additional information about each live range.
   struct RegInfo {
-    LiveRangeStage Stage;
+    LiveRangeStage Stage = RS_New;
 
     // Cascade - Eviction loop prevention. See canEvictInterference().
-    unsigned Cascade;
+    unsigned Cascade = 0;
 
-    RegInfo() : Stage(RS_New), Cascade(0) {}
+    RegInfo() = default;
   };
 
   IndexedMap<RegInfo, VirtReg2IndexFunctor> ExtraRegInfo;
@@ -232,10 +260,10 @@ class RAGreedy : public MachineFunctionPass,
 
   /// Cost of evicting interference.
   struct EvictionCost {
-    unsigned BrokenHints; ///< Total number of broken hints.
-    float MaxWeight;      ///< Maximum spill weight evicted.
+    unsigned BrokenHints = 0; ///< Total number of broken hints.
+    float MaxWeight = 0;      ///< Maximum spill weight evicted.
 
-    EvictionCost(): BrokenHints(0), MaxWeight(0) {}
+    EvictionCost() = default;
 
     bool isMax() const { return BrokenHints == ~0u; }
 
@@ -413,10 +441,12 @@ private:
     /// Its currently assigned register.
     /// In case of a physical register Reg == PhysReg.
     unsigned PhysReg;
+
     HintInfo(BlockFrequency Freq, unsigned Reg, unsigned PhysReg)
         : Freq(Freq), Reg(Reg), PhysReg(PhysReg) {}
   };
-  typedef SmallVector<HintInfo, 4> HintsInfo;
+  using HintsInfo = SmallVector<HintInfo, 4>;
+
   BlockFrequency getBrokenHintFreq(const HintsInfo &, unsigned);
   void collectHintInfo(unsigned, HintsInfo &);
 
@@ -436,6 +466,7 @@ private:
     }
   }
 };
+
 } // end anonymous namespace
 
 char RAGreedy::ID = 0;
@@ -475,7 +506,6 @@ const char *const RAGreedy::StageName[] = {
 // This helps stabilize decisions based on float comparisons.
 const float Hysteresis = (2007 / 2048.0f); // 0.97998046875
 
-
 FunctionPass* llvm::createGreedyRegisterAllocator() {
   return new RAGreedy();
 }
@@ -511,7 +541,6 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
   MachineFunctionPass::getAnalysisUsage(AU);
 }
 
-
 //===----------------------------------------------------------------------===//
 //                     LiveRangeEdit delegate methods
 //===----------------------------------------------------------------------===//
@@ -634,7 +663,6 @@ LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) {
   return LI;
 }
 
-
 //===----------------------------------------------------------------------===//
 //                            Direct Assignment
 //===----------------------------------------------------------------------===//
@@ -682,7 +710,6 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
   return CheapReg ? CheapReg : PhysReg;
 }
 
-
 //===----------------------------------------------------------------------===//
 //                         Interference eviction
 //===----------------------------------------------------------------------===//
@@ -954,7 +981,6 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
   return BestPhys;
 }
 
-
 //===----------------------------------------------------------------------===//
 //                              Region Splitting
 //===----------------------------------------------------------------------===//
@@ -1025,7 +1051,6 @@ bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf,
   return SpillPlacer->scanActiveBundles();
 }
 
-
 /// addThroughConstraints - Add constraints and links to SpillPlacer from the
 /// live-through blocks in Blocks.
 void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,
@@ -1083,7 +1108,7 @@ void RAGreedy::growRegion(GlobalSplitCandidate &Cand) {
   unsigned Visited = 0;
 #endif
 
-  for (;;) {
+  while (true) {
     ArrayRef<unsigned> NewBundles = SpillPlacer->getRecentPositive();
     // Find new through blocks in the periphery of PrefRegBundles.
     for (int i = 0, e = NewBundles.size(); i != e; ++i) {
@@ -1197,8 +1222,8 @@ BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) {
   for (unsigned i = 0; i != UseBlocks.size(); ++i) {
     const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
     SpillPlacement::BlockConstraint &BC = SplitConstraints[i];
-    bool RegIn  = LiveBundles[Bundles->getBundle(BC.Number, 0)];
-    bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, 1)];
+    bool RegIn  = LiveBundles[Bundles->getBundle(BC.Number, false)];
+    bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, true)];
     unsigned Ins = 0;
 
     if (BI.LiveIn)
@@ -1211,8 +1236,8 @@ BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) {
 
   for (unsigned i = 0, e = Cand.ActiveBlocks.size(); i != e; ++i) {
     unsigned Number = Cand.ActiveBlocks[i];
-    bool RegIn  = LiveBundles[Bundles->getBundle(Number, 0)];
-    bool RegOut = LiveBundles[Bundles->getBundle(Number, 1)];
+    bool RegIn  = LiveBundles[Bundles->getBundle(Number, false)];
+    bool RegOut = LiveBundles[Bundles->getBundle(Number, true)];
     if (!RegIn && !RegOut)
       continue;
     if (RegIn && RegOut) {
@@ -1264,7 +1289,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
     unsigned IntvIn = 0, IntvOut = 0;
     SlotIndex IntfIn, IntfOut;
     if (BI.LiveIn) {
-      unsigned CandIn = BundleCand[Bundles->getBundle(Number, 0)];
+      unsigned CandIn = BundleCand[Bundles->getBundle(Number, false)];
       if (CandIn != NoCand) {
         GlobalSplitCandidate &Cand = GlobalCand[CandIn];
         IntvIn = Cand.IntvIdx;
@@ -1273,7 +1298,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
       }
     }
     if (BI.LiveOut) {
-      unsigned CandOut = BundleCand[Bundles->getBundle(Number, 1)];
+      unsigned CandOut = BundleCand[Bundles->getBundle(Number, true)];
       if (CandOut != NoCand) {
         GlobalSplitCandidate &Cand = GlobalCand[CandOut];
         IntvOut = Cand.IntvIdx;
@@ -1313,7 +1338,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
       unsigned IntvIn = 0, IntvOut = 0;
       SlotIndex IntfIn, IntfOut;
 
-      unsigned CandIn = BundleCand[Bundles->getBundle(Number, 0)];
+      unsigned CandIn = BundleCand[Bundles->getBundle(Number, false)];
       if (CandIn != NoCand) {
         GlobalSplitCandidate &Cand = GlobalCand[CandIn];
         IntvIn = Cand.IntvIdx;
@@ -1321,7 +1346,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
         IntfIn = Cand.Intf.first();
       }
 
-      unsigned CandOut = BundleCand[Bundles->getBundle(Number, 1)];
+      unsigned CandOut = BundleCand[Bundles->getBundle(Number, true)];
       if (CandOut != NoCand) {
         GlobalSplitCandidate &Cand = GlobalCand[CandOut];
         IntvOut = Cand.IntvIdx;
@@ -1533,7 +1558,6 @@ unsigned RAGreedy::doRegionSplit(LiveInterval &VirtReg, unsigned BestCand,
   return 0;
 }
 
-
 //===----------------------------------------------------------------------===//
 //                            Per-Block Splitting
 //===----------------------------------------------------------------------===//
@@ -1580,7 +1604,6 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order,
   return 0;
 }
 
-
 //===----------------------------------------------------------------------===//
 //                         Per-Instruction Splitting
 //===----------------------------------------------------------------------===//
@@ -1664,12 +1687,10 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
   return 0;
 }
 
-
 //===----------------------------------------------------------------------===//
 //                             Local Splitting
 //===----------------------------------------------------------------------===//
 
-
 /// calcGapWeights - Compute the maximum spill weight that needs to be evicted
 /// in order to use PhysReg between two entries in SA->UseSlots.
 ///
@@ -1740,7 +1761,7 @@ void RAGreedy::calcGapWeights(unsigned PhysReg,
         break;
 
       for (; Gap != NumGaps; ++Gap) {
-        GapWeight[Gap] = llvm::huge_valf;
+        GapWeight[Gap] = huge_valf;
         if (Uses[Gap+1].getBaseIndex() >= I->end)
           break;
       }
@@ -1846,7 +1867,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
     // Remove any gaps with regmask clobbers.
     if (Matrix->checkRegMaskInterference(VirtReg, PhysReg))
       for (unsigned i = 0, e = RegMaskGaps.size(); i != e; ++i)
-        GapWeight[RegMaskGaps[i]] = llvm::huge_valf;
+        GapWeight[RegMaskGaps[i]] = huge_valf;
 
     // Try to find the best sequence of gaps to close.
     // The new spill weight must be larger than any gap interference.
@@ -1858,7 +1879,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
     // It is the spill weight that needs to be evicted.
     float MaxGap = GapWeight[0];
 
-    for (;;) {
+    while (true) {
       // Live before/after split?
       const bool LiveBefore = SplitBefore != 0 || BI.LiveIn;
       const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut;
@@ -1881,7 +1902,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
       // Legally, without causing looping?
       bool Legal = !ProgressRequired || NewGaps < NumGaps;
 
-      if (Legal && MaxGap < llvm::huge_valf) {
+      if (Legal && MaxGap < huge_valf) {
         // Estimate the new spill weight. Each instruction reads or writes the
         // register. Conservatively assume there are no read-modify-write
         // instructions.
@@ -2680,6 +2701,7 @@ void RAGreedy::reportNumberOfSplillsReloads(MachineLoop *L, unsigned &Reloads,
 
   if (Reloads || FoldedReloads || Spills || FoldedSpills) {
     using namespace ore;
+
     MachineOptimizationRemarkMissed R(DEBUG_TYPE, "LoopSpillReload",
                                       L->getStartLoc(), L->getHeader());
     if (Spills)
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index b2dfef91add5..e3baff4be4bc 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -29,15 +29,16 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/RegAllocPBQP.h"
 #include "RegisterCoalescer.h"
 #include "Spiller.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/CalcSpillWeights.h"
@@ -56,7 +57,6 @@
 #include "llvm/CodeGen/PBQP/Math.h"
 #include "llvm/CodeGen/PBQP/Solution.h"
 #include "llvm/CodeGen/PBQPRAConstraint.h"
-#include "llvm/CodeGen/RegAllocPBQP.h"
 #include "llvm/CodeGen/RegAllocRegistry.h"
 #include "llvm/CodeGen/SlotIndexes.h"
 #include "llvm/CodeGen/VirtRegMap.h"
@@ -84,8 +84,8 @@
 #include <string>
 #include <system_error>
 #include <tuple>
-#include <vector>
 #include <utility>
+#include <vector>
 
 using namespace llvm;
 
@@ -738,7 +738,15 @@ void RegAllocPBQP::finalizeAlloc(MachineFunction &MF,
 
     if (PReg == 0) {
       const TargetRegisterClass &RC = *MRI.getRegClass(LI.reg);
-      PReg = RC.getRawAllocationOrder(MF).front();
+      const ArrayRef<MCPhysReg> RawPRegOrder = RC.getRawAllocationOrder(MF);
+      for (unsigned CandidateReg : RawPRegOrder) {
+        if (!VRM.getRegInfo().isReserved(CandidateReg)) {
+          PReg = CandidateReg;
+          break;
+        }
+      }
+      assert(PReg &&
+             "No un-reserved physical registers in this register class");
     }
 
     VRM.assignVirt2Phys(LI.reg, PReg);
diff --git a/lib/CodeGen/RegisterClassInfo.cpp b/lib/CodeGen/RegisterClassInfo.cpp
index 82a3bd9a0bd1..956dec39fc38 100644
--- a/lib/CodeGen/RegisterClassInfo.cpp
+++ b/lib/CodeGen/RegisterClassInfo.cpp
@@ -14,12 +14,12 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/RegisterClassInfo.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/RegisterClassInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp
index c726edc88b41..88e0a3b58940 100644
--- a/lib/CodeGen/RegisterPressure.cpp
+++ b/lib/CodeGen/RegisterPressure.cpp
@@ -12,9 +12,10 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/RegisterPressure.h"
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/LiveInterval.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
@@ -24,7 +25,6 @@
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterClassInfo.h"
-#include "llvm/CodeGen/RegisterPressure.h"
 #include "llvm/CodeGen/SlotIndexes.h"
 #include "llvm/MC/LaneBitmask.h"
 #include "llvm/MC/MCRegisterInfo.h"
diff --git a/lib/CodeGen/RegisterUsageInfo.cpp b/lib/CodeGen/RegisterUsageInfo.cpp
index 66f196678dea..d7a3ac080823 100644
--- a/lib/CodeGen/RegisterUsageInfo.cpp
+++ b/lib/CodeGen/RegisterUsageInfo.cpp
@@ -12,11 +12,22 @@
 ///
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/RegisterUsageInfo.h"
 #include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/IR/Function.h"
 #include "llvm/IR/Module.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <utility>
+#include <vector>
 
 using namespace llvm;
 
@@ -63,7 +74,7 @@ PhysicalRegisterUsageInfo::getRegUsageInfo(const Function *FP) {
 void PhysicalRegisterUsageInfo::print(raw_ostream &OS, const Module *M) const {
   const TargetRegisterInfo *TRI;
 
-  typedef std::pair<const Function *, std::vector<uint32_t>> FuncPtrRegMaskPair;
+  using FuncPtrRegMaskPair = std::pair<const Function *, std::vector<uint32_t>>;
 
   SmallVector<const FuncPtrRegMaskPair *, 64> FPRMPairVector;
 
diff --git a/lib/CodeGen/RenameIndependentSubregs.cpp b/lib/CodeGen/RenameIndependentSubregs.cpp
index cc32e43968bb..d2eff950d861 100644
--- a/lib/CodeGen/RenameIndependentSubregs.cpp
+++ b/lib/CodeGen/RenameIndependentSubregs.cpp
@@ -32,10 +32,10 @@
 #include "llvm/CodeGen/LiveInterval.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
 
 using namespace llvm;
 
@@ -212,7 +212,7 @@ void RenameIndependentSubregs::rewriteOperands(const IntEqClasses &Classes,
     const SmallVectorImpl<SubRangeInfo> &SubRangeInfos,
     const SmallVectorImpl<LiveInterval*> &Intervals) const {
   const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo();
-  unsigned Reg = Intervals[0]->reg;;
+  unsigned Reg = Intervals[0]->reg;
   for (MachineRegisterInfo::reg_nodbg_iterator I = MRI->reg_nodbg_begin(Reg),
        E = MRI->reg_nodbg_end(); I != E; ) {
     MachineOperand &MO = *I++;
@@ -243,6 +243,11 @@ void RenameIndependentSubregs::rewriteOperands(const IntEqClasses &Classes,
 
     unsigned VReg = Intervals[ID]->reg;
     MO.setReg(VReg);
+    if (MO.isTied()) {
+      /// Undef use operands are not tracked in the equivalence class but need
+      /// to be update if they are tied.
+      MO.getParent()->substituteRegister(Reg, VReg, 0, TRI);
+    }
   }
   // TODO: We could attempt to recompute new register classes while visiting
   // the operands: Some of the split register may be fine with less constraint
diff --git a/lib/CodeGen/ResetMachineFunctionPass.cpp b/lib/CodeGen/ResetMachineFunctionPass.cpp
index 3e259927ac5c..01b3db43b283 100644
--- a/lib/CodeGen/ResetMachineFunctionPass.cpp
+++ b/lib/CodeGen/ResetMachineFunctionPass.cpp
@@ -14,9 +14,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/Support/Debug.h"
 using namespace llvm;
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index dc72ac073258..3cd270cec3a6 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -12,11 +12,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/iterator_range.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/Support/CommandLine.h"
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index 3fdbd2459361..7dd66d799be4 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -12,19 +12,20 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
 #include "llvm/ADT/IntEqClasses.h"
-#include "llvm/ADT/iterator_range.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/SparseSet.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LivePhysRegs.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBundle.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
@@ -33,7 +34,6 @@
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/CodeGen/RegisterPressure.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/CodeGen/ScheduleDAGInstrs.h"
 #include "llvm/CodeGen/ScheduleDFS.h"
 #include "llvm/CodeGen/SlotIndexes.h"
 #include "llvm/IR/Constants.h"
diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp
index ca2881cb91e0..bb6a45996f63 100644
--- a/lib/CodeGen/ScheduleDAGPrinter.cpp
+++ b/lib/CodeGen/ScheduleDAGPrinter.cpp
@@ -11,11 +11,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/ScheduleDAG.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/GraphWriter.h"
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index fb51a4eb1421..a0967f574006 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1028,13 +1028,13 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
   switch (Opc) {
   default: break;
   case ISD::AssertSext:
-    return DAG.getNode(ISD::AssertSext, DL, PVT,
-                       SExtPromoteOperand(Op.getOperand(0), PVT),
-                       Op.getOperand(1));
+    if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
+      return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
+    break;
   case ISD::AssertZext:
-    return DAG.getNode(ISD::AssertZext, DL, PVT,
-                       ZExtPromoteOperand(Op.getOperand(0), PVT),
-                       Op.getOperand(1));
+    if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
+      return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
+    break;
   case ISD::Constant: {
     unsigned ExtOpc =
       Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
@@ -1563,7 +1563,7 @@ SDValue DAGCombiner::combine(SDNode *N) {
 
   // If N is a commutative binary node, try commuting it to enable more
   // sdisel CSE.
-  if (!RV.getNode() && SelectionDAG::isCommutativeBinOp(N->getOpcode()) &&
+  if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
       N->getNumValues() == 1) {
     SDValue N0 = N->getOperand(0);
     SDValue N1 = N->getOperand(1);
@@ -12488,12 +12488,18 @@ void DAGCombiner::getStoreMergeCandidates(
   if (BasePtr.Base.isUndef())
     return;
 
-  bool IsLoadSrc = isa<LoadSDNode>(St->getValue());
   bool IsConstantSrc = isa<ConstantSDNode>(St->getValue()) ||
                        isa<ConstantFPSDNode>(St->getValue());
   bool IsExtractVecSrc =
       (St->getValue().getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
        St->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR);
+  bool IsLoadSrc = isa<LoadSDNode>(St->getValue());
+  BaseIndexOffset LBasePtr;
+  // Match on loadbaseptr if relevant.
+  if (IsLoadSrc)
+    LBasePtr = BaseIndexOffset::match(
+        cast<LoadSDNode>(St->getValue())->getBasePtr(), DAG);
+
   auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr) -> bool {
     if (Other->isVolatile() || Other->isIndexed())
       return false;
@@ -12502,9 +12508,15 @@ void DAGCombiner::getStoreMergeCandidates(
       if (!(MemVT.isInteger() && MemVT.bitsEq(Other->getMemoryVT()) &&
             isa<ConstantFPSDNode>(Other->getValue())))
         return false;
-    if (IsLoadSrc)
-      if (!isa<LoadSDNode>(Other->getValue()))
+    if (IsLoadSrc) {
+      // The Load's Base Ptr must also match
+      if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Other->getValue())) {
+        auto LPtr = BaseIndexOffset::match(OtherLd->getBasePtr(), DAG);
+        if (!(LBasePtr.equalBaseIndex(LPtr)))
+          return false;
+      } else
         return false;
+    }
     if (IsConstantSrc)
       if (!(isa<ConstantSDNode>(Other->getValue()) ||
             isa<ConstantFPSDNode>(Other->getValue())))
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 5003b79974eb..b2599b2e17f1 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -39,6 +39,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/FastISel.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APSInt.h"
 #include "llvm/ADT/DenseMap.h"
@@ -50,7 +51,6 @@
 #include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/CodeGen/Analysis.h"
-#include "llvm/CodeGen/FastISel.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
 #include "llvm/CodeGen/ISDOpcodes.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index da2fb72bec45..e54eaa3b81be 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -4598,6 +4598,14 @@ void SelectionDAG::Legalize() {
   AssignTopologicalOrder();
 
   SmallPtrSet<SDNode *, 16> LegalizedNodes;
+  // Use a delete listener to remove nodes which were deleted during
+  // legalization from LegalizeNodes. This is needed to handle the situation
+  // where a new node is allocated by the object pool to the same address of a
+  // previously deleted node.
+  DAGNodeDeletedListener DeleteListener(
+      *this,
+      [&LegalizedNodes](SDNode *N, SDNode *E) { LegalizedNodes.erase(N); });
+
   SelectionDAGLegalize Legalizer(*this, LegalizedNodes);
 
   // Visit all the nodes. We start in topological order, so that we see
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index d80a281279b6..137994093277 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -11,12 +11,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/SchedulerRegistry.h"
 #include "InstrEmitter.h"
 #include "ScheduleDAGSDNodes.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/InlineAsm.h"
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 579112c9bfc8..593efc5121f9 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -15,13 +15,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/SchedulerRegistry.h"
 #include "ScheduleDAGSDNodes.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/InlineAsm.h"
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
index eee4a4b06718..631cb34717c4 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
@@ -18,12 +18,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/SchedulerRegistry.h"
 #include "ScheduleDAGSDNodes.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LatencyPriorityQueue.h"
 #include "llvm/CodeGen/ResourcePriorityQueue.h"
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/Support/Debug.h"
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 80a03ea4eea0..dff8bd2ad37d 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -11,6 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/SelectionDAG.h"
 #include "SDNodeDbgValue.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
@@ -19,9 +20,9 @@
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/None.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Analysis/ValueTracking.h"
@@ -33,7 +34,6 @@
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineValueType.h"
 #include "llvm/CodeGen/RuntimeLibcalls.h"
-#include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
 #include "llvm/CodeGen/ValueTypes.h"
@@ -589,6 +589,11 @@ void SelectionDAG::RemoveDeadNodes(SmallVectorImpl<SDNode *> &DeadNodes) {
   // worklist.
   while (!DeadNodes.empty()) {
     SDNode *N = DeadNodes.pop_back_val();
+    // Skip to next node if we've already managed to delete the node. This could
+    // happen if replacing a node causes a node previously added to the node to
+    // be deleted.
+    if (N->getOpcode() == ISD::DELETED_NODE)
+      continue;
 
     for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next)
       DUL->NodeDeleted(N, nullptr);
@@ -2661,7 +2666,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
       if (DemandedElts[EltIdx]) {
         computeKnownBits(InVal, Known2, Depth + 1);
         Known.One &= Known2.One.zextOrTrunc(Known.One.getBitWidth());
-        Known.Zero &= Known2.Zero.zextOrTrunc(Known.Zero.getBitWidth());;
+        Known.Zero &= Known2.Zero.zextOrTrunc(Known.Zero.getBitWidth());
       }
 
       // If we demand the source vector then add its common known bits, ensuring
@@ -2677,7 +2682,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
       computeKnownBits(InVec, Known, Depth + 1);
       computeKnownBits(InVal, Known2, Depth + 1);
       Known.One &= Known2.One.zextOrTrunc(Known.One.getBitWidth());
-      Known.Zero &= Known2.Zero.zextOrTrunc(Known.Zero.getBitWidth());;
+      Known.Zero &= Known2.Zero.zextOrTrunc(Known.Zero.getBitWidth());
     }
     break;
   }
@@ -3883,7 +3888,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
   // fold (add Sym, c) -> Sym+c
   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Cst1))
     return FoldSymbolOffset(Opcode, VT, GA, Cst2);
-  if (isCommutativeBinOp(Opcode))
+  if (TLI->isCommutativeBinOp(Opcode))
     if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Cst2))
       return FoldSymbolOffset(Opcode, VT, GA, Cst1);
 
@@ -4029,7 +4034,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
   ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2);
 
   // Canonicalize constant to RHS if commutative.
-  if (isCommutativeBinOp(Opcode)) {
+  if (TLI->isCommutativeBinOp(Opcode)) {
     if (N1C && !N2C) {
       std::swap(N1C, N2C);
       std::swap(N1, N2);
@@ -4413,7 +4418,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
 
   // Canonicalize an UNDEF to the RHS, even over a constant.
   if (N1.isUndef()) {
-    if (isCommutativeBinOp(Opcode)) {
+    if (TLI->isCommutativeBinOp(Opcode)) {
       std::swap(N1, N2);
     } else {
       switch (Opcode) {
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index b895da21a7ff..d34ac40b9496 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -101,7 +101,8 @@ static const unsigned MaxParallelChains = 64;
 
 static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
                                       const SDValue *Parts, unsigned NumParts,
-                                      MVT PartVT, EVT ValueVT, const Value *V);
+                                      MVT PartVT, EVT ValueVT, const Value *V,
+                                      bool IsABIRegCopy);
 
 /// getCopyFromParts - Create a value that contains the specified legal parts
 /// combined into the value they represent.  If the parts combine to a type
@@ -111,10 +112,11 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
 static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL,
                                 const SDValue *Parts, unsigned NumParts,
                                 MVT PartVT, EVT ValueVT, const Value *V,
-                                Optional<ISD::NodeType> AssertOp = None) {
+                                Optional<ISD::NodeType> AssertOp = None,
+                                bool IsABIRegCopy = false) {
   if (ValueVT.isVector())
     return getCopyFromPartsVector(DAG, DL, Parts, NumParts,
-                                  PartVT, ValueVT, V);
+                                  PartVT, ValueVT, V, IsABIRegCopy);
 
   assert(NumParts > 0 && "No parts to assemble!");
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -258,7 +260,8 @@ static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V,
 /// ValueVT (ISD::AssertSext).
 static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
                                       const SDValue *Parts, unsigned NumParts,
-                                      MVT PartVT, EVT ValueVT, const Value *V) {
+                                      MVT PartVT, EVT ValueVT, const Value *V,
+                                      bool IsABIRegCopy) {
   assert(ValueVT.isVector() && "Not a vector value");
   assert(NumParts > 0 && "No parts to assemble!");
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -269,9 +272,18 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
     EVT IntermediateVT;
     MVT RegisterVT;
     unsigned NumIntermediates;
-    unsigned NumRegs =
-    TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
-                               NumIntermediates, RegisterVT);
+    unsigned NumRegs;
+
+    if (IsABIRegCopy) {
+      NumRegs = TLI.getVectorTypeBreakdownForCallingConv(
+          *DAG.getContext(), ValueVT, IntermediateVT, NumIntermediates,
+          RegisterVT);
+    } else {
+      NumRegs =
+          TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
+                                     NumIntermediates, RegisterVT);
+    }
+
     assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
     NumParts = NumRegs; // Silence a compiler warning.
     assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
@@ -300,9 +312,14 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
 
     // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
     // intermediate operands.
+    EVT BuiltVectorTy =
+        EVT::getVectorVT(*DAG.getContext(), IntermediateVT.getScalarType(),
+                         (IntermediateVT.isVector()
+                              ? IntermediateVT.getVectorNumElements() * NumParts
+                              : NumIntermediates));
     Val = DAG.getNode(IntermediateVT.isVector() ? ISD::CONCAT_VECTORS
                                                 : ISD::BUILD_VECTOR,
-                      DL, ValueVT, Ops);
+                      DL, BuiltVectorTy, Ops);
   }
 
   // There is now one part, held in Val.  Correct it to match ValueVT.
@@ -341,13 +358,29 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
       TLI.isTypeLegal(ValueVT))
     return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
 
-  // Handle cases such as i8 -> <1 x i1>
   if (ValueVT.getVectorNumElements() != 1) {
-    diagnosePossiblyInvalidConstraint(*DAG.getContext(), V,
-                                      "non-trivial scalar-to-vector conversion");
-    return DAG.getUNDEF(ValueVT);
+     // Certain ABIs require that vectors are passed as integers. For vectors
+     // are the same size, this is an obvious bitcast.
+     if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) {
+       return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+     } else if (ValueVT.getSizeInBits() < PartEVT.getSizeInBits()) {
+       // Bitcast Val back the original type and extract the corresponding
+       // vector we want.
+       unsigned Elts = PartEVT.getSizeInBits() / ValueVT.getScalarSizeInBits();
+       EVT WiderVecType = EVT::getVectorVT(*DAG.getContext(),
+                                           ValueVT.getVectorElementType(), Elts);
+       Val = DAG.getBitcast(WiderVecType, Val);
+       return DAG.getNode(
+           ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
+           DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+     }
+
+     diagnosePossiblyInvalidConstraint(
+         *DAG.getContext(), V, "non-trivial scalar-to-vector conversion");
+     return DAG.getUNDEF(ValueVT);
   }
 
+  // Handle cases such as i8 -> <1 x i1>
   EVT ValueSVT = ValueVT.getVectorElementType();
   if (ValueVT.getVectorNumElements() == 1 && ValueSVT != PartEVT)
     Val = ValueVT.isFloatingPoint() ? DAG.getFPExtendOrRound(Val, DL, ValueSVT)
@@ -358,7 +391,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
 
 static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl,
                                  SDValue Val, SDValue *Parts, unsigned NumParts,
-                                 MVT PartVT, const Value *V);
+                                 MVT PartVT, const Value *V, bool IsABIRegCopy);
 
 /// getCopyToParts - Create a series of nodes that contain the specified value
 /// split into legal parts.  If the parts contain more bits than Val, then, for
@@ -366,12 +399,14 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl,
 static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
                            SDValue *Parts, unsigned NumParts, MVT PartVT,
                            const Value *V,
-                           ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
+                           ISD::NodeType ExtendKind = ISD::ANY_EXTEND,
+                           bool IsABIRegCopy = false) {
   EVT ValueVT = Val.getValueType();
 
   // Handle the vector case separately.
   if (ValueVT.isVector())
-    return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V);
+    return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V,
+                                IsABIRegCopy);
 
   unsigned PartBits = PartVT.getSizeInBits();
   unsigned OrigNumParts = NumParts;
@@ -496,7 +531,9 @@ static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
 /// value split into legal parts.
 static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
                                  SDValue Val, SDValue *Parts, unsigned NumParts,
-                                 MVT PartVT, const Value *V) {
+                                 MVT PartVT, const Value *V,
+                                 bool IsABIRegCopy) {
+
   EVT ValueVT = Val.getValueType();
   assert(ValueVT.isVector() && "Not a vector");
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -537,13 +574,20 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
 
       // Promoted vector extract
       Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
-    } else{
-      // Vector -> scalar conversion.
-      assert(ValueVT.getVectorNumElements() == 1 &&
-             "Only trivial vector-to-scalar conversions should get here!");
-      Val = DAG.getNode(
-          ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
-          DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+    } else {
+      if (ValueVT.getVectorNumElements() == 1) {
+        Val = DAG.getNode(
+            ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
+            DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+
+      } else {
+        assert(PartVT.getSizeInBits() > ValueVT.getSizeInBits() &&
+               "lossy conversion of vector to scalar type");
+        EVT IntermediateType =
+            EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
+        Val = DAG.getBitcast(IntermediateType, Val);
+        Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
+      }
     }
 
     assert(Val.getValueType() == PartVT && "Unexpected vector part value type");
@@ -555,15 +599,31 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
   EVT IntermediateVT;
   MVT RegisterVT;
   unsigned NumIntermediates;
-  unsigned NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT,
-                                                IntermediateVT,
-                                                NumIntermediates, RegisterVT);
+  unsigned NumRegs;
+  if (IsABIRegCopy) {
+    NumRegs = TLI.getVectorTypeBreakdownForCallingConv(
+        *DAG.getContext(), ValueVT, IntermediateVT, NumIntermediates,
+        RegisterVT);
+  } else {
+    NumRegs =
+        TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
+                                   NumIntermediates, RegisterVT);
+  }
   unsigned NumElements = ValueVT.getVectorNumElements();
 
   assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
   NumParts = NumRegs; // Silence a compiler warning.
   assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
 
+  // Convert the vector to the appropiate type if necessary.
+  unsigned DestVectorNoElts =
+      NumIntermediates *
+      (IntermediateVT.isVector() ? IntermediateVT.getVectorNumElements() : 1);
+  EVT BuiltVectorTy = EVT::getVectorVT(
+      *DAG.getContext(), IntermediateVT.getScalarType(), DestVectorNoElts);
+  if (Val.getValueType() != BuiltVectorTy)
+    Val = DAG.getNode(ISD::BITCAST, DL, BuiltVectorTy, Val);
+
   // Split the vector into intermediate operands.
   SmallVector<SDValue, 8> Ops(NumIntermediates);
   for (unsigned i = 0; i != NumIntermediates; ++i) {
@@ -596,22 +656,31 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
   }
 }
 
-RegsForValue::RegsForValue() {}
+RegsForValue::RegsForValue() { IsABIMangled = false; }
 
 RegsForValue::RegsForValue(const SmallVector<unsigned, 4> &regs, MVT regvt,
-                           EVT valuevt)
-    : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {}
+                           EVT valuevt, bool IsABIMangledValue)
+    : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs),
+      RegCount(1, regs.size()), IsABIMangled(IsABIMangledValue) {}
 
 RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
-                           const DataLayout &DL, unsigned Reg, Type *Ty) {
+                           const DataLayout &DL, unsigned Reg, Type *Ty,
+                           bool IsABIMangledValue) {
   ComputeValueVTs(TLI, DL, Ty, ValueVTs);
 
+  IsABIMangled = IsABIMangledValue;
+
   for (EVT ValueVT : ValueVTs) {
-    unsigned NumRegs = TLI.getNumRegisters(Context, ValueVT);
-    MVT RegisterVT = TLI.getRegisterType(Context, ValueVT);
+    unsigned NumRegs = IsABIMangledValue
+                           ? TLI.getNumRegistersForCallingConv(Context, ValueVT)
+                           : TLI.getNumRegisters(Context, ValueVT);
+    MVT RegisterVT = IsABIMangledValue
+                         ? TLI.getRegisterTypeForCallingConv(Context, ValueVT)
+                         : TLI.getRegisterType(Context, ValueVT);
     for (unsigned i = 0; i != NumRegs; ++i)
       Regs.push_back(Reg + i);
     RegVTs.push_back(RegisterVT);
+    RegCount.push_back(NumRegs);
     Reg += NumRegs;
   }
 }
@@ -632,8 +701,10 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
   for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
     // Copy the legal parts from the registers.
     EVT ValueVT = ValueVTs[Value];
-    unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVT);
-    MVT RegisterVT = RegVTs[Value];
+    unsigned NumRegs = RegCount[Value];
+    MVT RegisterVT = IsABIMangled
+                         ? TLI.getRegisterTypeForCallingConv(RegVTs[Value])
+                         : RegVTs[Value];
 
     Parts.resize(NumRegs);
     for (unsigned i = 0; i != NumRegs; ++i) {
@@ -728,9 +799,11 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG,
   unsigned NumRegs = Regs.size();
   SmallVector<SDValue, 8> Parts(NumRegs);
   for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
-    EVT ValueVT = ValueVTs[Value];
-    unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT);
-    MVT RegisterVT = RegVTs[Value];
+    unsigned NumParts = RegCount[Value];
+
+    MVT RegisterVT = IsABIMangled
+                         ? TLI.getRegisterTypeForCallingConv(RegVTs[Value])
+                         : RegVTs[Value];
 
     if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT))
       ExtendKind = ISD::ZERO_EXTEND;
@@ -953,10 +1026,16 @@ SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) {
 
   if (It != FuncInfo.ValueMap.end()) {
     unsigned InReg = It->second;
+    bool IsABIRegCopy =
+        V && ((isa<CallInst>(V) &&
+               !(static_cast<const CallInst *>(V))->isInlineAsm()) ||
+              isa<ReturnInst>(V));
+
     RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
-                     DAG.getDataLayout(), InReg, Ty);
+                     DAG.getDataLayout(), InReg, Ty, IsABIRegCopy);
     SDValue Chain = DAG.getEntryNode();
-    Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
+    Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr,
+                                 V);
     resolveDanglingDebugInfo(V, Result);
   }
 
@@ -1142,8 +1221,13 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
   // If this is an instruction which fast-isel has deferred, select it now.
   if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
     unsigned InReg = FuncInfo.InitializeRegForValue(Inst);
+    bool IsABIRegCopy =
+        V && ((isa<CallInst>(V) &&
+               !(static_cast<const CallInst *>(V))->isInlineAsm()) ||
+              isa<ReturnInst>(V));
+
     RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg,
-                     Inst->getType());
+                     Inst->getType(), IsABIRegCopy);
     SDValue Chain = DAG.getEntryNode();
     return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
   }
@@ -1371,12 +1455,12 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
         if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger())
           VT = TLI.getTypeForExtReturn(Context, VT, ExtendKind);
 
-        unsigned NumParts = TLI.getNumRegisters(Context, VT);
-        MVT PartVT = TLI.getRegisterType(Context, VT);
+        unsigned NumParts = TLI.getNumRegistersForCallingConv(Context, VT);
+        MVT PartVT = TLI.getRegisterTypeForCallingConv(Context, VT);
         SmallVector<SDValue, 4> Parts(NumParts);
         getCopyToParts(DAG, getCurSDLoc(),
                        SDValue(RetOp.getNode(), RetOp.getResNo() + j),
-                       &Parts[0], NumParts, PartVT, &I, ExtendKind);
+                       &Parts[0], NumParts, PartVT, &I, ExtendKind, true);
 
         // 'inreg' on function refers to return value
         ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
@@ -5998,20 +6082,6 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
   }
 }
 
-/// Return true if it only matters that the value is equal or not-equal to zero.
-static bool IsOnlyUsedInZeroEqualityComparison(const Value *V) {
-  for (const User *U : V->users()) {
-    if (const ICmpInst *IC = dyn_cast<ICmpInst>(U))
-      if (IC->isEquality())
-        if (const Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
-          if (C->isNullValue())
-            continue;
-    // Unknown instruction.
-    return false;
-  }
-  return true;
-}
-
 static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
                              SelectionDAGBuilder &Builder) {
 
@@ -6098,7 +6168,7 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
 
   // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS)  != 0
   // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS)  != 0
-  if (!CSize || !IsOnlyUsedInZeroEqualityComparison(&I))
+  if (!CSize || !isOnlyUsedInZeroEqualityComparison(&I))
     return false;
 
   // If the target has a fast compare for the given size, it will return a
@@ -7126,8 +7196,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
 
           SDLoc dl = getCurSDLoc();
           // Use the produced MatchedRegs object to
-          MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl,
-                                    Chain, &Flag, CS.getInstruction());
+          MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Flag,
+                                    CS.getInstruction());
           MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse,
                                            true, OpInfo.getMatchedOperand(), dl,
                                            DAG, AsmNodeOperands);
@@ -7813,8 +7883,10 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
   } else {
     for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
       EVT VT = RetTys[I];
-      MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
-      unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
+      MVT RegisterVT =
+          getRegisterTypeForCallingConv(CLI.RetTy->getContext(), VT);
+      unsigned NumRegs =
+          getNumRegistersForCallingConv(CLI.RetTy->getContext(), VT);
       for (unsigned i = 0; i != NumRegs; ++i) {
         ISD::InputArg MyFlags;
         MyFlags.VT = RegisterVT;
@@ -7863,7 +7935,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
       SDValue Op = SDValue(Args[i].Node.getNode(),
                            Args[i].Node.getResNo() + Value);
       ISD::ArgFlagsTy Flags;
-      unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy);
+
+      // Certain targets (such as MIPS), may have a different ABI alignment
+      // for a type depending on the context. Give the target a chance to
+      // specify the alignment it wants.
+      unsigned OriginalAlignment = getABIAlignmentForCallingConv(ArgTy, DL);
 
       if (Args[i].IsZExt)
         Flags.setZExt();
@@ -7918,8 +7994,9 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
         Flags.setInConsecutiveRegs();
       Flags.setOrigAlign(OriginalAlignment);
 
-      MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT);
-      unsigned NumParts = getNumRegisters(CLI.RetTy->getContext(), VT);
+      MVT PartVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(), VT);
+      unsigned NumParts =
+          getNumRegistersForCallingConv(CLI.RetTy->getContext(), VT);
       SmallVector<SDValue, 4> Parts(NumParts);
       ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
 
@@ -7949,7 +8026,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
       }
 
       getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT,
-                     CLI.CS ? CLI.CS->getInstruction() : nullptr, ExtendKind);
+                     CLI.CS ? CLI.CS->getInstruction() : nullptr, ExtendKind,
+                     true);
 
       for (unsigned j = 0; j != NumParts; ++j) {
         // if it isn't first piece, alignment must be 1
@@ -8049,12 +8127,14 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
     unsigned CurReg = 0;
     for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
       EVT VT = RetTys[I];
-      MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
-      unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
+      MVT RegisterVT =
+          getRegisterTypeForCallingConv(CLI.RetTy->getContext(), VT);
+      unsigned NumRegs =
+          getNumRegistersForCallingConv(CLI.RetTy->getContext(), VT);
 
       ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg],
                                               NumRegs, RegisterVT, VT, nullptr,
-                                              AssertOp));
+                                              AssertOp, true));
       CurReg += NumRegs;
     }
 
@@ -8090,8 +8170,15 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
   assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg");
 
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  // If this is an InlineAsm we have to match the registers required, not the
+  // notional registers required by the type.
+  bool IsABIRegCopy =
+    V && ((isa<CallInst>(V) &&
+           !(static_cast<const CallInst *>(V))->isInlineAsm()) ||
+          isa<ReturnInst>(V));
+
   RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg,
-                   V->getType());
+                   V->getType(), IsABIRegCopy);
   SDValue Chain = DAG.getEntryNode();
 
   ISD::NodeType ExtendType = (FuncInfo.PreferredExtendType.find(V) ==
@@ -8333,7 +8420,12 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
       EVT VT = ValueVTs[Value];
       Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
       ISD::ArgFlagsTy Flags;
-      unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy);
+
+      // Certain targets (such as MIPS), may have a different ABI alignment
+      // for a type depending on the context. Give the target a chance to
+      // specify the alignment it wants.
+      unsigned OriginalAlignment =
+          TLI->getABIAlignmentForCallingConv(ArgTy, DL);
 
       if (Arg.hasAttribute(Attribute::ZExt))
         Flags.setZExt();
@@ -8395,8 +8487,10 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
       if (ArgCopyElisionCandidates.count(&Arg))
         Flags.setCopyElisionCandidate();
 
-      MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
-      unsigned NumRegs = TLI->getNumRegisters(*CurDAG->getContext(), VT);
+      MVT RegisterVT =
+          TLI->getRegisterTypeForCallingConv(*CurDAG->getContext(), VT);
+      unsigned NumRegs =
+          TLI->getNumRegistersForCallingConv(*CurDAG->getContext(), VT);
       for (unsigned i = 0; i != NumRegs; ++i) {
         ISD::InputArg MyFlags(Flags, RegisterVT, VT, isArgValueUsed,
                               ArgNo, PartBase+i*RegisterVT.getStoreSize());
@@ -8500,8 +8594,10 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
 
     for (unsigned Val = 0; Val != NumValues; ++Val) {
       EVT VT = ValueVTs[Val];
-      MVT PartVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
-      unsigned NumParts = TLI->getNumRegisters(*CurDAG->getContext(), VT);
+      MVT PartVT =
+          TLI->getRegisterTypeForCallingConv(*CurDAG->getContext(), VT);
+      unsigned NumParts =
+          TLI->getNumRegistersForCallingConv(*CurDAG->getContext(), VT);
 
       // Even an apparant 'unused' swifterror argument needs to be returned. So
       // we do generate a copy for it that can be used on return from the
@@ -8514,7 +8610,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
           AssertOp = ISD::AssertZext;
 
         ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts,
-                                             PartVT, VT, nullptr, AssertOp));
+                                             PartVT, VT, nullptr, AssertOp,
+                                             true));
       }
 
       i += NumParts;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 77e131fa551c..431d52b4b9b9 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -975,18 +975,28 @@ struct RegsForValue {
   /// expanded value requires multiple registers.
   SmallVector<unsigned, 4> Regs;
 
+  /// This list holds the number of registers for each value.
+  SmallVector<unsigned, 4> RegCount;
+
+  /// Records if this value needs to be treated in an ABI dependant manner,
+  /// different to normal type legalization.
+  bool IsABIMangled;
+
   RegsForValue();
 
-  RegsForValue(const SmallVector<unsigned, 4> &regs, MVT regvt, EVT valuevt);
+  RegsForValue(const SmallVector<unsigned, 4> &regs, MVT regvt, EVT valuevt,
+               bool IsABIMangledValue = false);
 
   RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
-               const DataLayout &DL, unsigned Reg, Type *Ty);
+               const DataLayout &DL, unsigned Reg, Type *Ty,
+               bool IsABIMangledValue = false);
 
   /// Add the specified values to this one.
   void append(const RegsForValue &RHS) {
     ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end());
     RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end());
     Regs.append(RHS.Regs.begin(), RHS.Regs.end());
+    RegCount.push_back(RHS.Regs.size());
   }
 
   /// Emit a series of CopyFromReg nodes that copies from this value and returns
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 0dbd9e846aa6..3dd58975b1f1 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -11,12 +11,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/SelectionDAG.h"
 #include "ScheduleDAGSDNodes.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Intrinsics.h"
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index b5ccd64ee76c..b67f11f85b70 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -1,4 +1,4 @@
-//===-- SelectionDAGISel.cpp - Implement the SelectionDAGISel class -------===//
+//===- SelectionDAGISel.cpp - Implement the SelectionDAGISel class --------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -17,11 +17,11 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/None.h"
 #include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
@@ -31,6 +31,7 @@
 #include "llvm/CodeGen/FastISel.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
 #include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -38,7 +39,6 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachinePassRegistry.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -51,9 +51,11 @@
 #include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/InstrTypes.h"
@@ -64,6 +66,7 @@
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
 #include "llvm/MC/MCInstrDesc.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Pass.h"
@@ -89,6 +92,7 @@
 #include <cassert>
 #include <cstdint>
 #include <iterator>
+#include <limits>
 #include <memory>
 #include <string>
 #include <utility>
@@ -333,11 +337,12 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
 /// SplitCriticalSideEffectEdges - Look for critical edges with a PHI value that
 /// may trap on it.  In this case we have to split the edge so that the path
 /// through the predecessor block that doesn't go to the phi block doesn't
-/// execute the possibly trapping instruction.
-///
+/// execute the possibly trapping instruction. If available, we pass a
+/// dominator tree to be updated when we split critical edges. This is because
+/// SelectionDAGISel preserves the DominatorTree.
 /// This is required for correctness, so it must be done at -O0.
 ///
-static void SplitCriticalSideEffectEdges(Function &Fn) {
+static void SplitCriticalSideEffectEdges(Function &Fn, DominatorTree *DT) {
   // Loop for blocks with phi nodes.
   for (BasicBlock &BB : Fn) {
     PHINode *PN = dyn_cast<PHINode>(BB.begin());
@@ -363,7 +368,7 @@ static void SplitCriticalSideEffectEdges(Function &Fn) {
         // Okay, we have to split this edge.
         SplitCriticalEdge(
             Pred->getTerminator(), GetSuccessorNumber(Pred, &BB),
-            CriticalEdgeSplittingOptions().setMergeIdenticalEdges());
+            CriticalEdgeSplittingOptions(DT).setMergeIdenticalEdges());
         goto ReprocessBlock;
       }
   }
@@ -399,10 +404,12 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
   LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
   GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : nullptr;
   ORE = make_unique<OptimizationRemarkEmitter>(&Fn);
+  auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+  DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
 
   DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
 
-  SplitCriticalSideEffectEdges(const_cast<Function &>(Fn));
+  SplitCriticalSideEffectEdges(const_cast<Function &>(Fn), DT);
 
   CurDAG->init(*MF, *ORE);
   FuncInfo->set(Fn, *MF, CurDAG);
@@ -763,7 +770,6 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
 
     DEBUG(dbgs() << "Optimized type-legalized selection DAG: BB#" << BlockNumber
           << " '" << BlockName << "'\n"; CurDAG->dump());
-
   }
 
   {
@@ -1134,7 +1140,7 @@ static void processDbgDeclares(FunctionLoweringInfo *FuncInfo) {
       // Check if the variable is a static alloca or a byval or inalloca
       // argument passed in memory. If it is not, then we will ignore this
       // intrinsic and handle this during isel like dbg.value.
-      int FI = INT_MAX;
+      int FI = std::numeric_limits<int>::max();
       if (const auto *AI = dyn_cast<AllocaInst>(Address)) {
         auto SI = FuncInfo->StaticAllocaMap.find(AI);
         if (SI != FuncInfo->StaticAllocaMap.end())
@@ -1142,7 +1148,7 @@ static void processDbgDeclares(FunctionLoweringInfo *FuncInfo) {
       } else if (const auto *Arg = dyn_cast<Argument>(Address))
         FI = FuncInfo->getArgumentFrameIndex(Arg);
 
-      if (FI == INT_MAX)
+      if (FI == std::numeric_limits<int>::max())
         continue;
 
       DIExpression *Expr = DI->getExpression();
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index 2764688518c2..11561dfa5947 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -11,13 +11,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/SelectionDAG.h"
 #include "ScheduleDAGSDNodes.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/Support/Debug.h"
diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index eed667dbe7e0..5d78bba86d73 100644
--- a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -17,9 +17,9 @@
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/GCMetadata.h"
 #include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/StackMaps.h"
 #include "llvm/IR/CallingConv.h"
@@ -840,7 +840,7 @@ SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP,
       //       completely and make statepoint call to return a tuple.
       unsigned Reg = FuncInfo.CreateRegs(RetTy);
       RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
-                       DAG.getDataLayout(), Reg, RetTy);
+                       DAG.getDataLayout(), Reg, RetTy, true);
       SDValue Chain = DAG.getEntryNode();
 
       RFV.getCopyToRegs(ReturnValue, DAG, getCurSDLoc(), Chain, nullptr);
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index adb2b188265b..cfda0fffd031 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -2166,7 +2166,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
           return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
         if (N0.getOperand(1) == N1.getOperand(1))
           return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
-        if (DAG.isCommutativeBinOp(N0.getOpcode())) {
+        if (isCommutativeBinOp(N0.getOpcode())) {
           // If X op Y == Y op X, try other combinations.
           if (N0.getOperand(0) == N1.getOperand(1))
             return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
@@ -2230,7 +2230,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
           return DAG.getSetCC(dl, VT, N0.getOperand(1),
                               DAG.getConstant(0, dl, N0.getValueType()), Cond);
         if (N0.getOperand(1) == N1) {
-          if (DAG.isCommutativeBinOp(N0.getOpcode()))
+          if (isCommutativeBinOp(N0.getOpcode()))
             return DAG.getSetCC(dl, VT, N0.getOperand(0),
                                 DAG.getConstant(0, dl, N0.getValueType()),
                                 Cond);
@@ -2257,7 +2257,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
         return DAG.getSetCC(dl, VT, N1.getOperand(1),
                         DAG.getConstant(0, dl, N1.getValueType()), Cond);
       if (N1.getOperand(1) == N0) {
-        if (DAG.isCommutativeBinOp(N1.getOpcode()))
+        if (isCommutativeBinOp(N1.getOpcode()))
           return DAG.getSetCC(dl, VT, N1.getOperand(0),
                           DAG.getConstant(0, dl, N1.getValueType()), Cond);
         if (N1.getNode()->hasOneUse()) {
diff --git a/lib/CodeGen/ShadowStackGCLowering.cpp b/lib/CodeGen/ShadowStackGCLowering.cpp
index 6750fde57638..7b60d22c7ace 100644
--- a/lib/CodeGen/ShadowStackGCLowering.cpp
+++ b/lib/CodeGen/ShadowStackGCLowering.cpp
@@ -16,9 +16,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/IntrinsicInst.h"
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index 09e9c3bb3354..7886737b879c 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -12,11 +12,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
diff --git a/lib/CodeGen/StackMaps.cpp b/lib/CodeGen/StackMaps.cpp
index 916b6f08c1b9..b4fa29d9a86b 100644
--- a/lib/CodeGen/StackMaps.cpp
+++ b/lib/CodeGen/StackMaps.cpp
@@ -7,6 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/StackMaps.h"
 #include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Twine.h"
@@ -15,7 +16,6 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/CodeGen/StackMaps.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
index ca8bde2d114a..d8e7840a2576 100644
--- a/lib/CodeGen/StackProtector.cpp
+++ b/lib/CodeGen/StackProtector.cpp
@@ -21,6 +21,7 @@
 #include "llvm/Analysis/OptimizationDiagnosticInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/StackProtector.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constants.h"
@@ -28,6 +29,7 @@
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instruction.h"
@@ -58,6 +60,7 @@ static cl::opt<bool> EnableSelectionDAGSP("enable-selectiondag-sp",
                                           cl::init(true), cl::Hidden);
 
 char StackProtector::ID = 0;
+
 INITIALIZE_PASS_BEGIN(StackProtector, DEBUG_TYPE,
                       "Insert stack protectors", false, true)
 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
@@ -92,6 +95,11 @@ void StackProtector::adjustForColoring(const AllocaInst *From,
   }
 }
 
+void StackProtector::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<TargetPassConfig>();
+  AU.addPreserved<DominatorTreeWrapperPass>();
+}
+
 bool StackProtector::runOnFunction(Function &Fn) {
   F = &Fn;
   M = F->getParent();
diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp
index d1758ecbd79f..856bca19dee8 100644
--- a/lib/CodeGen/StackSlotColoring.cpp
+++ b/lib/CodeGen/StackSlotColoring.cpp
@@ -11,7 +11,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
@@ -22,6 +21,7 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/CommandLine.h"
diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp
index ad0b04373656..489a607eb176 100644
--- a/lib/CodeGen/TailDuplication.cpp
+++ b/lib/CodeGen/TailDuplication.cpp
@@ -1,4 +1,4 @@
-//===-- TailDuplication.cpp - Duplicate blocks into predecessors' tails ---===//
+//===- TailDuplication.cpp - Duplicate blocks into predecessors' tails ----===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,22 +12,25 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/TailDuplicator.h"
-#include "llvm/IR/Function.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/Pass.h"
+
 using namespace llvm;
 
 #define DEBUG_TYPE "tailduplication"
 
 namespace {
+
 /// Perform tail duplication. Delegates to TailDuplicator
 class TailDuplicatePass : public MachineFunctionPass {
   TailDuplicator Duplicator;
 
 public:
   static char ID;
+
   explicit TailDuplicatePass() : MachineFunctionPass(ID) {}
 
   bool runOnMachineFunction(MachineFunction &MF) override;
@@ -35,8 +38,9 @@ public:
   void getAnalysisUsage(AnalysisUsage &AU) const override;
 };
 
+} // end anonymous namespace
+
 char TailDuplicatePass::ID = 0;
-}
 
 char &llvm::TailDuplicateID = TailDuplicatePass::ID;
 
diff --git a/lib/CodeGen/TailDuplicator.cpp b/lib/CodeGen/TailDuplicator.cpp
index d40f7af431a9..dc7265dcf6c2 100644
--- a/lib/CodeGen/TailDuplicator.cpp
+++ b/lib/CodeGen/TailDuplicator.cpp
@@ -1,4 +1,4 @@
-//===-- TailDuplicator.cpp - Duplicate blocks into predecessors' tails ---===//
+//===- TailDuplicator.cpp - Duplicate blocks into predecessors' tails -----===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,22 +12,36 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/TailDuplicator.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineSSAUpdater.h"
+#include "llvm/CodeGen/TailDuplicator.h"
+#include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/Function.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <iterator>
+#include <utility>
+
 using namespace llvm;
 
 #define DEBUG_TYPE "tailduplication"
@@ -41,15 +55,13 @@ STATISTIC(NumTailDupRemoved,
 STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
 STATISTIC(NumAddedPHIs, "Number of phis added");
 
-namespace llvm {
-
 // Heuristic for tail duplication.
 static cl::opt<unsigned> TailDuplicateSize(
     "tail-dup-size",
     cl::desc("Maximum instructions to consider tail duplicating"), cl::init(2),
     cl::Hidden);
 
-cl::opt<unsigned> TailDupIndirectBranchSize(
+static cl::opt<unsigned> TailDupIndirectBranchSize(
     "tail-dup-indirect-size",
     cl::desc("Maximum instructions to consider tail duplicating blocks that "
              "end with indirect branches."), cl::init(20),
@@ -138,7 +150,7 @@ bool TailDuplicator::tailDuplicateAndUpdate(
     bool IsSimple, MachineBasicBlock *MBB,
     MachineBasicBlock *ForcedLayoutPred,
     SmallVectorImpl<MachineBasicBlock*> *DuplicatedPreds,
-    llvm::function_ref<void(MachineBasicBlock *)> *RemovalCallback) {
+    function_ref<void(MachineBasicBlock *)> *RemovalCallback) {
   // Save the successors list.
   SmallSetVector<MachineBasicBlock *, 8> Succs(MBB->succ_begin(),
                                                MBB->succ_end());
@@ -971,7 +983,7 @@ void TailDuplicator::appendCopies(MachineBasicBlock *MBB,
 /// the CFG.
 void TailDuplicator::removeDeadBlock(
     MachineBasicBlock *MBB,
-    llvm::function_ref<void(MachineBasicBlock *)> *RemovalCallback) {
+    function_ref<void(MachineBasicBlock *)> *RemovalCallback) {
   assert(MBB->pred_empty() && "MBB must be dead!");
   DEBUG(dbgs() << "\nRemoving MBB: " << *MBB);
 
@@ -985,5 +997,3 @@ void TailDuplicator::removeDeadBlock(
   // Remove the block.
   MBB->eraseFromParent();
 }
-
-} // End llvm namespace
diff --git a/lib/CodeGen/TargetFrameLoweringImpl.cpp b/lib/CodeGen/TargetFrameLoweringImpl.cpp
index e5def6752e07..9dd98b4020d2 100644
--- a/lib/CodeGen/TargetFrameLoweringImpl.cpp
+++ b/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -1,4 +1,4 @@
-//===----- TargetFrameLoweringImpl.cpp - Implement target frame interface --==//
+//===- TargetFrameLoweringImpl.cpp - Implement target frame interface ------==//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,19 +14,21 @@
 #include "llvm/ADT/BitVector.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/Attributes.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/Function.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
-#include <cstdlib>
+
 using namespace llvm;
 
-TargetFrameLowering::~TargetFrameLowering() {
-}
+TargetFrameLowering::~TargetFrameLowering() = default;
 
 /// The default implementation just looks at attribute "no-frame-pointer-elim".
 bool TargetFrameLowering::noFramePointerElim(const MachineFunction &MF) const {
diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
index c43a5e18ad23..581cfaf60755 100644
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@@ -11,7 +11,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Target/TargetLowering.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
@@ -34,6 +33,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
@@ -1637,8 +1637,10 @@ void llvm::GetReturnInfo(Type *ReturnType, AttributeList attr,
         VT = MinVT;
     }
 
-    unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT);
-    MVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT);
+    unsigned NumParts =
+        TLI.getNumRegistersForCallingConv(ReturnType->getContext(), VT);
+    MVT PartVT =
+        TLI.getRegisterTypeForCallingConv(ReturnType->getContext(), VT);
 
     // 'inreg' on function refers to return value
     ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 1d232c71d824..a0c68e1dcce8 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -12,14 +12,18 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/BinaryFormat/MachO.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
-#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/IR/Comdat.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
@@ -48,11 +52,7 @@
 #include "llvm/ProfileData/InstrProf.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CodeGen.h"
-#include "llvm/Support/COFF.h"
-#include "llvm/Support/Dwarf.h"
-#include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachO.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetMachine.h"
 #include <cassert>
@@ -61,10 +61,53 @@
 using namespace llvm;
 using namespace dwarf;
 
+static void GetObjCImageInfo(ArrayRef<Module::ModuleFlagEntry> ModuleFlags,
+                             unsigned &Version, unsigned &Flags,
+                             StringRef &Section) {
+  for (const auto &MFE: ModuleFlags) {
+    // Ignore flags with 'Require' behaviour.
+    if (MFE.Behavior == Module::Require)
+      continue;
+
+    StringRef Key = MFE.Key->getString();
+    if (Key == "Objective-C Image Info Version") {
+      Version = mdconst::extract<ConstantInt>(MFE.Val)->getZExtValue();
+    } else if (Key == "Objective-C Garbage Collection" ||
+               Key == "Objective-C GC Only" ||
+               Key == "Objective-C Is Simulated" ||
+               Key == "Objective-C Class Properties" ||
+               Key == "Objective-C Image Swift Version") {
+      Flags |= mdconst::extract<ConstantInt>(MFE.Val)->getZExtValue();
+    } else if (Key == "Objective-C Image Info Section") {
+      Section = cast<MDString>(MFE.Val)->getString();
+    }
+  }
+}
+
 //===----------------------------------------------------------------------===//
 //                                  ELF
 //===----------------------------------------------------------------------===//
 
+void TargetLoweringObjectFileELF::emitModuleFlags(
+    MCStreamer &Streamer, ArrayRef<Module::ModuleFlagEntry> ModuleFlags,
+    const TargetMachine &TM) const {
+  unsigned Version = 0;
+  unsigned Flags = 0;
+  StringRef Section;
+
+  GetObjCImageInfo(ModuleFlags, Version, Flags, Section);
+  if (Section.empty())
+    return;
+
+  auto &C = getContext();
+  auto *S = C.getELFSection(Section, ELF::SHT_PROGBITS, ELF::SHF_ALLOC);
+  Streamer.SwitchSection(S);
+  Streamer.EmitLabel(C.getOrCreateSymbol(StringRef("OBJC_IMAGE_INFO")));
+  Streamer.EmitIntValue(Version, 4);
+  Streamer.EmitIntValue(Flags, 4);
+  Streamer.AddBlankLine();
+}
+
 MCSymbol *TargetLoweringObjectFileELF::getCFIPersonalitySymbol(
     const GlobalValue *GV, const TargetMachine &TM,
     MachineModuleInfo *MMI) const {
@@ -248,6 +291,25 @@ MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal(
     const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
   StringRef SectionName = GO->getSection();
 
+  // Check if '#pragma clang section' name is applicable.
+  // Note that pragma directive overrides -ffunction-section, -fdata-section
+  // and so section name is exactly as user specified and not uniqued.
+  const GlobalVariable *GV = dyn_cast<GlobalVariable>(GO);
+  if (GV && GV->hasImplicitSection()) {
+    auto Attrs = GV->getAttributes();
+    if (Attrs.hasAttribute("bss-section") && Kind.isBSS()) {
+      SectionName = Attrs.getAttribute("bss-section").getValueAsString();
+    } else if (Attrs.hasAttribute("rodata-section") && Kind.isReadOnly()) {
+      SectionName = Attrs.getAttribute("rodata-section").getValueAsString();
+    } else if (Attrs.hasAttribute("data-section") && Kind.isData()) {
+      SectionName = Attrs.getAttribute("data-section").getValueAsString();
+    }
+  }
+  const Function *F = dyn_cast<Function>(GO);
+  if (F && F->hasFnAttribute("implicit-section-name")) {
+    SectionName = F->getFnAttribute("implicit-section-name").getValueAsString();
+  }
+
   // Infer section flags from the section name if we can.
   Kind = getELFKindForNamedSection(SectionName, Kind);
 
@@ -560,32 +622,12 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
 void TargetLoweringObjectFileMachO::emitModuleFlags(
     MCStreamer &Streamer, ArrayRef<Module::ModuleFlagEntry> ModuleFlags,
     const TargetMachine &TM) const {
-  unsigned VersionVal = 0;
-  unsigned ImageInfoFlags = 0;
   MDNode *LinkerOptions = nullptr;
-  StringRef SectionVal;
 
   for (const auto &MFE : ModuleFlags) {
-    // Ignore flags with 'Require' behavior.
-    if (MFE.Behavior == Module::Require)
-      continue;
-
     StringRef Key = MFE.Key->getString();
-    Metadata *Val = MFE.Val;
-
-    if (Key == "Objective-C Image Info Version") {
-      VersionVal = mdconst::extract<ConstantInt>(Val)->getZExtValue();
-    } else if (Key == "Objective-C Garbage Collection" ||
-               Key == "Objective-C GC Only" ||
-               Key == "Objective-C Is Simulated" ||
-               Key == "Objective-C Class Properties" ||
-               Key == "Objective-C Image Swift Version") {
-      ImageInfoFlags |= mdconst::extract<ConstantInt>(Val)->getZExtValue();
-    } else if (Key == "Objective-C Image Info Section") {
-      SectionVal = cast<MDString>(Val)->getString();
-    } else if (Key == "Linker Options") {
-      LinkerOptions = cast<MDNode>(Val);
-    }
+    if (Key == "Linker Options")
+      LinkerOptions = cast<MDNode>(MFE.Val);
   }
 
   // Emit the linker options if present.
@@ -598,8 +640,14 @@ void TargetLoweringObjectFileMachO::emitModuleFlags(
     }
   }
 
+  unsigned VersionVal = 0;
+  unsigned ImageInfoFlags = 0;
+  StringRef SectionVal;
+  GetObjCImageInfo(ModuleFlags, VersionVal, ImageInfoFlags, SectionVal);
+
   // The section is mandatory. If we don't have it, then we don't have GC info.
-  if (SectionVal.empty()) return;
+  if (SectionVal.empty())
+    return;
 
   StringRef Segment, Section;
   unsigned TAA = 0, StubSize = 0;
@@ -1137,6 +1185,24 @@ void TargetLoweringObjectFileCOFF::emitModuleFlags(
       }
     }
   }
+
+  unsigned Version = 0;
+  unsigned Flags = 0;
+  StringRef Section;
+
+  GetObjCImageInfo(ModuleFlags, Version, Flags, Section);
+  if (Section.empty())
+    return;
+
+  auto &C = getContext();
+  auto *S = C.getCOFFSection(
+      Section, COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ,
+      SectionKind::getReadOnly());
+  Streamer.SwitchSection(S);
+  Streamer.EmitLabel(C.getOrCreateSymbol(StringRef("OBJC_IMAGE_INFO")));
+  Streamer.EmitIntValue(Version, 4);
+  Streamer.EmitIntValue(Flags, 4);
+  Streamer.AddBlankLine();
 }
 
 void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx,
diff --git a/lib/CodeGen/TargetOptionsImpl.cpp b/lib/CodeGen/TargetOptionsImpl.cpp
index c20d5ab814f8..ed845e1706f8 100644
--- a/lib/CodeGen/TargetOptionsImpl.cpp
+++ b/lib/CodeGen/TargetOptionsImpl.cpp
@@ -11,10 +11,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Module.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
diff --git a/lib/CodeGen/TargetPassConfig.cpp b/lib/CodeGen/TargetPassConfig.cpp
index 72d5e995ac22..b1918b19e1df 100644
--- a/lib/CodeGen/TargetPassConfig.cpp
+++ b/lib/CodeGen/TargetPassConfig.cpp
@@ -20,6 +20,7 @@
 #include "llvm/Analysis/CallGraphSCCPass.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/ScopedNoAliasAA.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/TypeBasedAliasAnalysis.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/RegAllocRegistry.h"
@@ -95,6 +96,16 @@ static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden,
 static cl::opt<bool> EnableMachineOutliner("enable-machine-outliner",
     cl::Hidden,
     cl::desc("Enable machine outliner"));
+// Enable or disable FastISel. Both options are needed, because
+// FastISel is enabled by default with -fast, and we wish to be
+// able to enable or disable fast-isel independently from -O0.
+static cl::opt<cl::boolOrDefault>
+EnableFastISelOption("fast-isel", cl::Hidden,
+  cl::desc("Enable the \"fast\" instruction selector"));
+
+static cl::opt<cl::boolOrDefault>
+    EnableGlobalISel("global-isel", cl::Hidden,
+                     cl::desc("Enable the \"global\" instruction selector"));
 
 static cl::opt<std::string>
 PrintMachineInstrs("print-machineinstrs", cl::ValueOptional,
@@ -571,6 +582,66 @@ void TargetPassConfig::addISelPrepare() {
     addPass(createVerifierPass());
 }
 
+bool TargetPassConfig::addCoreISelPasses() {
+  // Enable FastISel with -fast, but allow that to be overridden.
+  TM->setO0WantsFastISel(EnableFastISelOption != cl::BOU_FALSE);
+  if (EnableFastISelOption == cl::BOU_TRUE ||
+      (TM->getOptLevel() == CodeGenOpt::None && TM->getO0WantsFastISel()))
+    TM->setFastISel(true);
+
+  // Ask the target for an isel.
+  // Enable GlobalISel if the target wants to, but allow that to be overriden.
+  if (EnableGlobalISel == cl::BOU_TRUE ||
+      (EnableGlobalISel == cl::BOU_UNSET && isGlobalISelEnabled())) {
+    if (addIRTranslator())
+      return true;
+
+    addPreLegalizeMachineIR();
+
+    if (addLegalizeMachineIR())
+      return true;
+
+    // Before running the register bank selector, ask the target if it
+    // wants to run some passes.
+    addPreRegBankSelect();
+
+    if (addRegBankSelect())
+      return true;
+
+    addPreGlobalInstructionSelect();
+
+    if (addGlobalInstructionSelect())
+      return true;
+
+    // Pass to reset the MachineFunction if the ISel failed.
+    addPass(createResetMachineFunctionPass(
+        reportDiagnosticWhenGlobalISelFallback(), isGlobalISelAbortEnabled()));
+
+    // Provide a fallback path when we do not want to abort on
+    // not-yet-supported input.
+    if (!isGlobalISelAbortEnabled() && addInstSelector())
+      return true;
+
+  } else if (addInstSelector())
+    return true;
+
+  return false;
+}
+
+bool TargetPassConfig::addISelPasses() {
+  if (TM->Options.EmulatedTLS)
+    addPass(createLowerEmuTLSPass());
+
+  addPass(createPreISelIntrinsicLoweringPass());
+  addPass(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis()));
+  addIRPasses();
+  addCodeGenPrepare();
+  addPassesToHandleExceptions();
+  addISelPrepare();
+
+  return addCoreISelPasses();
+}
+
 /// -regalloc=... command line option.
 static FunctionPass *useDefaultRegisterAllocator() { return nullptr; }
 static cl::opt<RegisterRegAlloc::FunctionPassCtor, false,
diff --git a/lib/CodeGen/TargetRegisterInfo.cpp b/lib/CodeGen/TargetRegisterInfo.cpp
index 41ec082a24cf..c8537ad2f313 100644
--- a/lib/CodeGen/TargetRegisterInfo.cpp
+++ b/lib/CodeGen/TargetRegisterInfo.cpp
@@ -11,6 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -21,7 +22,6 @@
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 
 #define DEBUG_TYPE "target-reg-info"
 
diff --git a/lib/CodeGen/TargetSchedule.cpp b/lib/CodeGen/TargetSchedule.cpp
index 0df34ce43112..9210ea8a83f6 100644
--- a/lib/CodeGen/TargetSchedule.cpp
+++ b/lib/CodeGen/TargetSchedule.cpp
@@ -12,10 +12,10 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/TargetSchedule.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/CodeGen/TargetSchedule.h"
 #include "llvm/MC/MCInstrDesc.h"
 #include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/MC/MCSchedule.h"
@@ -337,8 +337,8 @@ computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx,
 }
 
 static Optional<double>
-getRTroughputFromItineraries(unsigned schedClass,
-                             const InstrItineraryData *IID){
+getRThroughputFromItineraries(unsigned schedClass,
+                              const InstrItineraryData *IID){
   double Unknown = std::numeric_limits<double>::infinity();
   double Throughput = Unknown;
 
@@ -356,9 +356,9 @@ getRTroughputFromItineraries(unsigned schedClass,
 }
 
 static Optional<double>
-getRTroughputFromInstrSchedModel(const MCSchedClassDesc *SCDesc,
-                                 const TargetSubtargetInfo *STI,
-                                 const MCSchedModel &SchedModel) {
+getRThroughputFromInstrSchedModel(const MCSchedClassDesc *SCDesc,
+                                  const TargetSubtargetInfo *STI,
+                                  const MCSchedModel &SchedModel) {
   double Unknown = std::numeric_limits<double>::infinity();
   double Throughput = Unknown;
 
@@ -380,11 +380,11 @@ getRTroughputFromInstrSchedModel(const MCSchedClassDesc *SCDesc,
 Optional<double>
 TargetSchedModel::computeInstrRThroughput(const MachineInstr *MI) const {
   if (hasInstrItineraries())
-    return getRTroughputFromItineraries(MI->getDesc().getSchedClass(),
-                                        getInstrItineraries());
+    return getRThroughputFromItineraries(MI->getDesc().getSchedClass(),
+                                         getInstrItineraries());
   if (hasInstrSchedModel())
-    return getRTroughputFromInstrSchedModel(resolveSchedClass(MI), STI,
-                                            SchedModel);
+    return getRThroughputFromInstrSchedModel(resolveSchedClass(MI), STI,
+                                             SchedModel);
   return Optional<double>();
 }
 
@@ -392,11 +392,11 @@ Optional<double>
 TargetSchedModel::computeInstrRThroughput(unsigned Opcode) const {
   unsigned SchedClass = TII->get(Opcode).getSchedClass();
   if (hasInstrItineraries())
-    return getRTroughputFromItineraries(SchedClass, getInstrItineraries());
+    return getRThroughputFromItineraries(SchedClass, getInstrItineraries());
   if (hasInstrSchedModel()) {
     const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SchedClass);
     if (SCDesc->isValid() && !SCDesc->isVariant())
-      return getRTroughputFromInstrSchedModel(SCDesc, STI, SchedModel);
+      return getRThroughputFromInstrSchedModel(SCDesc, STI, SchedModel);
   }
   return Optional<double>();
 }
diff --git a/lib/CodeGen/TargetSubtargetInfo.cpp b/lib/CodeGen/TargetSubtargetInfo.cpp
index 0a444e0fff07..82e85bab1474 100644
--- a/lib/CodeGen/TargetSubtargetInfo.cpp
+++ b/lib/CodeGen/TargetSubtargetInfo.cpp
@@ -11,10 +11,10 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/TargetSchedule.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 using namespace llvm;
 
 //---------------------------------------------------------------------------
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index d10ca1a7ff91..124c2790f68c 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -72,6 +72,17 @@ void VirtRegMap::grow() {
   Virt2SplitMap.resize(NumRegs);
 }
 
+void VirtRegMap::assignVirt2Phys(unsigned virtReg, MCPhysReg physReg) {
+  assert(TargetRegisterInfo::isVirtualRegister(virtReg) &&
+         TargetRegisterInfo::isPhysicalRegister(physReg));
+  assert(Virt2PhysMap[virtReg] == NO_PHYS_REG &&
+         "attempt to assign physical register to already mapped "
+         "virtual register");
+  assert(!getRegInfo().isReserved(physReg) &&
+         "Attempt to map virtReg to a reserved physReg");
+  Virt2PhysMap[virtReg] = physReg;
+}
+
 unsigned VirtRegMap::createSpillSlot(const TargetRegisterClass *RC) {
   unsigned Size = TRI->getSpillSize(*RC);
   unsigned Align = TRI->getSpillAlignment(*RC);
diff --git a/lib/CodeGen/WinEHPrepare.cpp b/lib/CodeGen/WinEHPrepare.cpp
index 4e7542bf31e0..c63a0a9e60ea 100644
--- a/lib/CodeGen/WinEHPrepare.cpp
+++ b/lib/CodeGen/WinEHPrepare.cpp
@@ -16,13 +16,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/CFG.h"
 #include "llvm/Analysis/EHPersonalities.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/WinEHFuncInfo.h"
 #include "llvm/IR/Verifier.h"
 #include "llvm/MC/MCSymbol.h"
diff --git a/lib/CodeGen/XRayInstrumentation.cpp b/lib/CodeGen/XRayInstrumentation.cpp
index 2df3602733f3..1a8d5a4f45da 100644
--- a/lib/CodeGen/XRayInstrumentation.cpp
+++ b/lib/CodeGen/XRayInstrumentation.cpp
@@ -1,4 +1,4 @@
-//===-- XRayInstrumentation.cpp - Adds XRay instrumentation to functions. -===//
+//===- XRayInstrumentation.cpp - Adds XRay instrumentation to functions. --===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,20 +14,26 @@
 //
 //===---------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/Analysis.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Pass.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
 
 using namespace llvm;
 
 namespace {
+
 struct XRayInstrumentation : public MachineFunctionPass {
   static char ID;
 
@@ -66,7 +72,8 @@ private:
   void prependRetWithPatchableExit(MachineFunction &MF,
                                    const TargetInstrInfo *TII);
 };
-} // anonymous namespace
+
+} // end anonymous namespace
 
 void XRayInstrumentation::replaceRetWithPatchableRet(
     MachineFunction &MF, const TargetInstrInfo *TII) {
@@ -134,18 +141,23 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) {
     if (Attr.getValueAsString().getAsInteger(10, XRayThreshold))
       return false; // Invalid value for threshold.
 
+    // Count the number of MachineInstr`s in MachineFunction
+    int64_t MICount = 0;
+    for (const auto& MBB : MF)
+      MICount += MBB.size();
+
     // Check if we have a loop.
     // FIXME: Maybe make this smarter, and see whether the loops are dependent
     // on inputs or side-effects?
     MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
-    if (MLI.empty() && F.size() < XRayThreshold)
+    if (MLI.empty() && MICount < XRayThreshold)
       return false; // Function is too small and has no loops.
   }
 
   // We look for the first non-empty MachineBasicBlock, so that we can insert
   // the function instrumentation in the appropriate place.
-  auto MBI =
-      find_if(MF, [&](const MachineBasicBlock &MBB) { return !MBB.empty(); });
+  auto MBI = llvm::find_if(
+      MF, [&](const MachineBasicBlock &MBB) { return !MBB.empty(); });
   if (MBI == MF.end())
     return false; // The function is empty.
 
diff --git a/lib/DebugInfo/CodeView/CMakeLists.txt b/lib/DebugInfo/CodeView/CMakeLists.txt
index 410b89bc949e..2f9e8981b698 100644
--- a/lib/DebugInfo/CodeView/CMakeLists.txt
+++ b/lib/DebugInfo/CodeView/CMakeLists.txt
@@ -3,11 +3,9 @@ add_llvm_library(LLVMDebugInfoCodeView
   CodeViewRecordIO.cpp
   CVSymbolVisitor.cpp
   CVTypeVisitor.cpp
-  EnumTables.cpp
-  Formatters.cpp
-  LazyRandomTypeCollection.cpp
-  Line.cpp
   DebugChecksumsSubsection.cpp
+  DebugCrossExSubsection.cpp
+  DebugCrossImpSubsection.cpp
   DebugFrameDataSubsection.cpp
   DebugInlineeLinesSubsection.cpp
   DebugLinesSubsection.cpp
@@ -15,7 +13,12 @@ add_llvm_library(LLVMDebugInfoCodeView
   DebugSubsection.cpp
   DebugSubsectionRecord.cpp
   DebugSubsectionVisitor.cpp
+  DebugSymbolRVASubsection.cpp
   DebugSymbolsSubsection.cpp
+  EnumTables.cpp
+  Formatters.cpp
+  LazyRandomTypeCollection.cpp
+  Line.cpp
   RecordSerialization.cpp
   SymbolRecordMapping.cpp
   SymbolDumper.cpp
diff --git a/lib/DebugInfo/CodeView/DebugChecksumsSubsection.cpp b/lib/DebugInfo/CodeView/DebugChecksumsSubsection.cpp
index 1a85a339f8c3..c31b8d1c96d5 100644
--- a/lib/DebugInfo/CodeView/DebugChecksumsSubsection.cpp
+++ b/lib/DebugInfo/CodeView/DebugChecksumsSubsection.cpp
@@ -25,8 +25,8 @@ struct FileChecksumEntryHeader {
                               // Checksum bytes follow.
 };
 
-Error llvm::VarStreamArrayExtractor<FileChecksumEntry>::extract(
-    BinaryStreamRef Stream, uint32_t &Len, FileChecksumEntry &Item) {
+Error llvm::VarStreamArrayExtractor<FileChecksumEntry>::
+operator()(BinaryStreamRef Stream, uint32_t &Len, FileChecksumEntry &Item) {
   BinaryStreamReader Reader(Stream);
 
   const FileChecksumEntryHeader *Header;
diff --git a/lib/DebugInfo/CodeView/DebugCrossExSubsection.cpp b/lib/DebugInfo/CodeView/DebugCrossExSubsection.cpp
new file mode 100644
index 000000000000..21e2cc56075b
--- /dev/null
+++ b/lib/DebugInfo/CodeView/DebugCrossExSubsection.cpp
@@ -0,0 +1,51 @@
+//===- DebugCrossExSubsection.cpp -------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/CodeView/DebugCrossExSubsection.h"
+
+#include "llvm/DebugInfo/CodeView/CodeViewError.h"
+
+using namespace llvm;
+using namespace llvm::codeview;
+
+Error DebugCrossModuleExportsSubsectionRef::initialize(
+    BinaryStreamReader Reader) {
+  if (Reader.bytesRemaining() % sizeof(CrossModuleExport) != 0)
+    return make_error<CodeViewError>(
+        cv_error_code::corrupt_record,
+        "Cross Scope Exports section is an invalid size!");
+
+  uint32_t Size = Reader.bytesRemaining() / sizeof(CrossModuleExport);
+  return Reader.readArray(References, Size);
+}
+
+Error DebugCrossModuleExportsSubsectionRef::initialize(BinaryStreamRef Stream) {
+  BinaryStreamReader Reader(Stream);
+  return initialize(Reader);
+}
+
+void DebugCrossModuleExportsSubsection::addMapping(uint32_t Local,
+                                                   uint32_t Global) {
+  Mappings[Local] = Global;
+}
+
+uint32_t DebugCrossModuleExportsSubsection::calculateSerializedSize() const {
+  return Mappings.size() * sizeof(CrossModuleExport);
+}
+
+Error DebugCrossModuleExportsSubsection::commit(
+    BinaryStreamWriter &Writer) const {
+  for (const auto &M : Mappings) {
+    if (auto EC = Writer.writeInteger(M.first))
+      return EC;
+    if (auto EC = Writer.writeInteger(M.second))
+      return EC;
+  }
+  return Error::success();
+}
diff --git a/lib/DebugInfo/CodeView/DebugCrossImpSubsection.cpp b/lib/DebugInfo/CodeView/DebugCrossImpSubsection.cpp
new file mode 100644
index 000000000000..2c4a0b779342
--- /dev/null
+++ b/lib/DebugInfo/CodeView/DebugCrossImpSubsection.cpp
@@ -0,0 +1,91 @@
+//===- DebugCrossImpSubsection.cpp ------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/CodeView/DebugCrossImpSubsection.h"
+
+#include "llvm/DebugInfo/CodeView/CodeViewError.h"
+#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h"
+
+using namespace llvm;
+using namespace llvm::codeview;
+
+namespace llvm {
+Error VarStreamArrayExtractor<CrossModuleImportItem>::
+operator()(BinaryStreamRef Stream, uint32_t &Len,
+           codeview::CrossModuleImportItem &Item) {
+  BinaryStreamReader Reader(Stream);
+  if (Reader.bytesRemaining() < sizeof(CrossModuleImport))
+    return make_error<CodeViewError>(
+        cv_error_code::insufficient_buffer,
+        "Not enough bytes for a Cross Module Import Header!");
+  if (auto EC = Reader.readObject(Item.Header))
+    return EC;
+  if (Reader.bytesRemaining() < Item.Header->Count * sizeof(uint32_t))
+    return make_error<CodeViewError>(
+        cv_error_code::insufficient_buffer,
+        "Not enough to read specified number of Cross Module References!");
+  if (auto EC = Reader.readArray(Item.Imports, Item.Header->Count))
+    return EC;
+  return Error::success();
+}
+}
+
+Error DebugCrossModuleImportsSubsectionRef::initialize(
+    BinaryStreamReader Reader) {
+  return Reader.readArray(References, Reader.bytesRemaining());
+}
+
+Error DebugCrossModuleImportsSubsectionRef::initialize(BinaryStreamRef Stream) {
+  BinaryStreamReader Reader(Stream);
+  return initialize(Reader);
+}
+
+void DebugCrossModuleImportsSubsection::addImport(StringRef Module,
+                                                  uint32_t ImportId) {
+  Strings.insert(Module);
+  std::vector<support::ulittle32_t> Targets = {support::ulittle32_t(ImportId)};
+  auto Result = Mappings.insert(std::make_pair(Module, Targets));
+  if (!Result.second)
+    Result.first->getValue().push_back(Targets[0]);
+}
+
+uint32_t DebugCrossModuleImportsSubsection::calculateSerializedSize() const {
+  uint32_t Size = 0;
+  for (const auto &Item : Mappings) {
+    Size += sizeof(CrossModuleImport);
+    Size += sizeof(support::ulittle32_t) * Item.second.size();
+  }
+  return Size;
+}
+
+Error DebugCrossModuleImportsSubsection::commit(
+    BinaryStreamWriter &Writer) const {
+  using T = decltype(&*Mappings.begin());
+  std::vector<T> Ids;
+  Ids.reserve(Mappings.size());
+
+  for (const auto &M : Mappings)
+    Ids.push_back(&M);
+
+  std::sort(Ids.begin(), Ids.end(), [this](const T &L1, const T &L2) {
+    return Strings.getStringId(L1->getKey()) <
+           Strings.getStringId(L2->getKey());
+  });
+
+  for (const auto &Item : Ids) {
+    CrossModuleImport Imp;
+    Imp.ModuleNameOffset = Strings.getStringId(Item->getKey());
+    Imp.Count = Item->getValue().size();
+    if (auto EC = Writer.writeObject(Imp))
+      return EC;
+    if (auto EC = Writer.writeArray(makeArrayRef(Item->getValue())))
+      return EC;
+  }
+  return Error::success();
+}
diff --git a/lib/DebugInfo/CodeView/DebugInlineeLinesSubsection.cpp b/lib/DebugInfo/CodeView/DebugInlineeLinesSubsection.cpp
index 520a0ee4454f..e7719d05dbdc 100644
--- a/lib/DebugInfo/CodeView/DebugInlineeLinesSubsection.cpp
+++ b/lib/DebugInfo/CodeView/DebugInlineeLinesSubsection.cpp
@@ -17,9 +17,8 @@
 using namespace llvm;
 using namespace llvm::codeview;
 
-Error VarStreamArrayExtractor<InlineeSourceLine>::extract(
-    BinaryStreamRef Stream, uint32_t &Len, InlineeSourceLine &Item,
-    bool HasExtraFiles) {
+Error VarStreamArrayExtractor<InlineeSourceLine>::
+operator()(BinaryStreamRef Stream, uint32_t &Len, InlineeSourceLine &Item) {
   BinaryStreamReader Reader(Stream);
 
   if (auto EC = Reader.readObject(Item.Header))
@@ -44,8 +43,8 @@ Error DebugInlineeLinesSubsectionRef::initialize(BinaryStreamReader Reader) {
   if (auto EC = Reader.readEnum(Signature))
     return EC;
 
-  if (auto EC =
-          Reader.readArray(Lines, Reader.bytesRemaining(), hasExtraFiles()))
+  Lines.getExtractor().HasExtraFiles = hasExtraFiles();
+  if (auto EC = Reader.readArray(Lines, Reader.bytesRemaining()))
     return EC;
 
   assert(Reader.bytesRemaining() == 0);
diff --git a/lib/DebugInfo/CodeView/DebugLinesSubsection.cpp b/lib/DebugInfo/CodeView/DebugLinesSubsection.cpp
index 2fce06ca2a17..fbcad61d60a6 100644
--- a/lib/DebugInfo/CodeView/DebugLinesSubsection.cpp
+++ b/lib/DebugInfo/CodeView/DebugLinesSubsection.cpp
@@ -17,9 +17,8 @@
 using namespace llvm;
 using namespace llvm::codeview;
 
-Error LineColumnExtractor::extract(BinaryStreamRef Stream, uint32_t &Len,
-                                   LineColumnEntry &Item,
-                                   const LineFragmentHeader *Header) {
+Error LineColumnExtractor::operator()(BinaryStreamRef Stream, uint32_t &Len,
+                                      LineColumnEntry &Item) {
   using namespace codeview;
   const LineBlockFragmentHeader *BlockHeader;
   BinaryStreamReader Reader(Stream);
@@ -56,8 +55,8 @@ Error DebugLinesSubsectionRef::initialize(BinaryStreamReader Reader) {
   if (auto EC = Reader.readObject(Header))
     return EC;
 
-  if (auto EC =
-          Reader.readArray(LinesAndColumns, Reader.bytesRemaining(), Header))
+  LinesAndColumns.getExtractor().Header = Header;
+  if (auto EC = Reader.readArray(LinesAndColumns, Reader.bytesRemaining()))
     return EC;
 
   return Error::success();
@@ -145,7 +144,7 @@ uint32_t DebugLinesSubsection::calculateSerializedSize() const {
 }
 
 void DebugLinesSubsection::setRelocationAddress(uint16_t Segment,
-                                                uint16_t Offset) {
+                                                uint32_t Offset) {
   RelocOffset = Offset;
   RelocSegment = Segment;
 }
diff --git a/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp b/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp
index 2e72242181b0..6e647c4b976b 100644
--- a/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp
+++ b/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp
@@ -23,6 +23,9 @@ Error DebugStringTableSubsectionRef::initialize(BinaryStreamRef Contents) {
   Stream = Contents;
   return Error::success();
 }
+Error DebugStringTableSubsectionRef::initialize(BinaryStreamReader &Reader) {
+  return Reader.readStreamRef(Stream);
+}
 
 Expected<StringRef>
 DebugStringTableSubsectionRef::getString(uint32_t Offset) const {
@@ -52,20 +55,19 @@ uint32_t DebugStringTableSubsection::calculateSerializedSize() const {
 }
 
 Error DebugStringTableSubsection::commit(BinaryStreamWriter &Writer) const {
-  assert(Writer.bytesRemaining() == StringSize);
-  uint32_t MaxOffset = 1;
+  uint32_t Begin = Writer.getOffset();
+  uint32_t End = Begin + StringSize;
 
   for (auto &Pair : Strings) {
     StringRef S = Pair.getKey();
-    uint32_t Offset = Pair.getValue();
+    uint32_t Offset = Begin + Pair.getValue();
     Writer.setOffset(Offset);
     if (auto EC = Writer.writeCString(S))
       return EC;
-    MaxOffset = std::max<uint32_t>(MaxOffset, Offset + S.size() + 1);
+    assert(Writer.getOffset() <= End);
   }
 
-  Writer.setOffset(MaxOffset);
-  assert(Writer.bytesRemaining() == 0);
+  Writer.setOffset(End);
   return Error::success();
 }
 
diff --git a/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp b/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp
index cfd1c5d3ab0c..e9124e68fe82 100644
--- a/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp
+++ b/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp
@@ -34,14 +34,6 @@ Error DebugSubsectionRecord::initialize(BinaryStreamRef Stream,
 
   DebugSubsectionKind Kind =
       static_cast<DebugSubsectionKind>(uint32_t(Header->Kind));
-  switch (Kind) {
-  case DebugSubsectionKind::FileChecksums:
-  case DebugSubsectionKind::Lines:
-  case DebugSubsectionKind::InlineeLines:
-    break;
-  default:
-    llvm_unreachable("Unexpected debug fragment kind!");
-  }
   if (auto EC = Reader.readStreamRef(Info.Data, Header->Length))
     return EC;
   Info.Container = Container;
@@ -50,9 +42,7 @@ Error DebugSubsectionRecord::initialize(BinaryStreamRef Stream,
 }
 
 uint32_t DebugSubsectionRecord::getRecordLength() const {
-  uint32_t Result = sizeof(DebugSubsectionHeader) + Data.getLength();
-  assert(Result % alignOf(Container) == 0);
-  return Result;
+  return sizeof(DebugSubsectionHeader) + Data.getLength();
 }
 
 DebugSubsectionKind DebugSubsectionRecord::kind() const { return Kind; }
@@ -64,25 +54,29 @@ DebugSubsectionRecordBuilder::DebugSubsectionRecordBuilder(
     : Subsection(std::move(Subsection)), Container(Container) {}
 
 uint32_t DebugSubsectionRecordBuilder::calculateSerializedLength() {
-  uint32_t Size =
-      sizeof(DebugSubsectionHeader) +
-      alignTo(Subsection->calculateSerializedSize(), alignOf(Container));
+  // The length of the entire subsection is always padded to 4 bytes, regardless
+  // of the container kind.
+  uint32_t Size = sizeof(DebugSubsectionHeader) +
+                  alignTo(Subsection->calculateSerializedSize(), 4);
   return Size;
 }
 
-Error DebugSubsectionRecordBuilder::commit(BinaryStreamWriter &Writer) {
+Error DebugSubsectionRecordBuilder::commit(BinaryStreamWriter &Writer) const {
   assert(Writer.getOffset() % alignOf(Container) == 0 &&
          "Debug Subsection not properly aligned");
 
   DebugSubsectionHeader Header;
   Header.Kind = uint32_t(Subsection->kind());
-  Header.Length = calculateSerializedLength() - sizeof(DebugSubsectionHeader);
+  // The value written into the Header's Length field is only padded to the
+  // container's alignment
+  Header.Length =
+      alignTo(Subsection->calculateSerializedSize(), alignOf(Container));
 
   if (auto EC = Writer.writeObject(Header))
     return EC;
   if (auto EC = Subsection->commit(Writer))
     return EC;
-  if (auto EC = Writer.padToAlignment(alignOf(Container)))
+  if (auto EC = Writer.padToAlignment(4))
     return EC;
 
   return Error::success();
diff --git a/lib/DebugInfo/CodeView/DebugSubsectionVisitor.cpp b/lib/DebugInfo/CodeView/DebugSubsectionVisitor.cpp
index f2c4dea8685f..8550107741ce 100644
--- a/lib/DebugInfo/CodeView/DebugSubsectionVisitor.cpp
+++ b/lib/DebugInfo/CodeView/DebugSubsectionVisitor.cpp
@@ -1,4 +1,4 @@
-//===- DebugSubsectionVisitor.cpp ---------------------------*- C++ -*-===//
+//===- DebugSubsectionVisitor.cpp -------------------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -10,9 +10,15 @@
 #include "llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h"
 
 #include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h"
+#include "llvm/DebugInfo/CodeView/DebugCrossExSubsection.h"
+#include "llvm/DebugInfo/CodeView/DebugCrossImpSubsection.h"
+#include "llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h"
 #include "llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h"
 #include "llvm/DebugInfo/CodeView/DebugLinesSubsection.h"
+#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h"
 #include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h"
+#include "llvm/DebugInfo/CodeView/DebugSymbolRVASubsection.h"
+#include "llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h"
 #include "llvm/DebugInfo/CodeView/DebugUnknownSubsection.h"
 #include "llvm/Support/BinaryStreamReader.h"
 #include "llvm/Support/BinaryStreamRef.h"
@@ -20,8 +26,40 @@
 using namespace llvm;
 using namespace llvm::codeview;
 
+DebugSubsectionState::DebugSubsectionState() {}
+
+DebugSubsectionState::DebugSubsectionState(
+    const DebugStringTableSubsectionRef &Strings)
+    : Strings(&Strings) {}
+
+DebugSubsectionState::DebugSubsectionState(
+    const DebugStringTableSubsectionRef &Strings,
+    const DebugChecksumsSubsectionRef &Checksums)
+    : Strings(&Strings), Checksums(&Checksums) {}
+
+void DebugSubsectionState::initializeStrings(const DebugSubsectionRecord &SR) {
+  assert(SR.kind() == DebugSubsectionKind::StringTable);
+  assert(!Strings && "Found a string table even though we already have one!");
+
+  OwnedStrings = llvm::make_unique<DebugStringTableSubsectionRef>();
+  consumeError(OwnedStrings->initialize(SR.getRecordData()));
+  Strings = OwnedStrings.get();
+}
+
+void DebugSubsectionState::initializeChecksums(
+    const DebugSubsectionRecord &FCR) {
+  assert(FCR.kind() == DebugSubsectionKind::FileChecksums);
+  if (Checksums)
+    return;
+
+  OwnedChecksums = llvm::make_unique<DebugChecksumsSubsectionRef>();
+  consumeError(OwnedChecksums->initialize(FCR.getRecordData()));
+  Checksums = OwnedChecksums.get();
+}
+
 Error llvm::codeview::visitDebugSubsection(const DebugSubsectionRecord &R,
-                                           DebugSubsectionVisitor &V) {
+                                           DebugSubsectionVisitor &V,
+                                           const DebugSubsectionState &State) {
   BinaryStreamReader Reader(R.getRecordData());
   switch (R.kind()) {
   case DebugSubsectionKind::Lines: {
@@ -29,20 +67,56 @@ Error llvm::codeview::visitDebugSubsection(const DebugSubsectionRecord &R,
     if (auto EC = Fragment.initialize(Reader))
       return EC;
 
-    return V.visitLines(Fragment);
+    return V.visitLines(Fragment, State);
   }
   case DebugSubsectionKind::FileChecksums: {
     DebugChecksumsSubsectionRef Fragment;
     if (auto EC = Fragment.initialize(Reader))
       return EC;
 
-    return V.visitFileChecksums(Fragment);
+    return V.visitFileChecksums(Fragment, State);
   }
   case DebugSubsectionKind::InlineeLines: {
     DebugInlineeLinesSubsectionRef Fragment;
     if (auto EC = Fragment.initialize(Reader))
       return EC;
-    return V.visitInlineeLines(Fragment);
+    return V.visitInlineeLines(Fragment, State);
+  }
+  case DebugSubsectionKind::CrossScopeExports: {
+    DebugCrossModuleExportsSubsectionRef Section;
+    if (auto EC = Section.initialize(Reader))
+      return EC;
+    return V.visitCrossModuleExports(Section, State);
+  }
+  case DebugSubsectionKind::CrossScopeImports: {
+    DebugCrossModuleImportsSubsectionRef Section;
+    if (auto EC = Section.initialize(Reader))
+      return EC;
+    return V.visitCrossModuleImports(Section, State);
+  }
+  case DebugSubsectionKind::Symbols: {
+    DebugSymbolsSubsectionRef Section;
+    if (auto EC = Section.initialize(Reader))
+      return EC;
+    return V.visitSymbols(Section, State);
+  }
+  case DebugSubsectionKind::StringTable: {
+    DebugStringTableSubsectionRef Section;
+    if (auto EC = Section.initialize(Reader))
+      return EC;
+    return V.visitStringTable(Section, State);
+  }
+  case DebugSubsectionKind::FrameData: {
+    DebugFrameDataSubsectionRef Section;
+    if (auto EC = Section.initialize(Reader))
+      return EC;
+    return V.visitFrameData(Section, State);
+  }
+  case DebugSubsectionKind::CoffSymbolRVA: {
+    DebugSymbolRVASubsectionRef Section;
+    if (auto EC = Section.initialize(Reader))
+      return EC;
+    return V.visitCOFFSymbolRVAs(Section, State);
   }
   default: {
     DebugUnknownSubsectionRef Fragment(R.kind(), R.getRecordData());
diff --git a/lib/DebugInfo/CodeView/DebugSymbolRVASubsection.cpp b/lib/DebugInfo/CodeView/DebugSymbolRVASubsection.cpp
new file mode 100644
index 000000000000..5f91b68f3ad8
--- /dev/null
+++ b/lib/DebugInfo/CodeView/DebugSymbolRVASubsection.cpp
@@ -0,0 +1,31 @@
+//===- DebugSymbolRVASubsection.cpp ------------------------------*- C++-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/CodeView/DebugSymbolRVASubsection.h"
+
+using namespace llvm;
+using namespace llvm::codeview;
+
+DebugSymbolRVASubsectionRef::DebugSymbolRVASubsectionRef()
+    : DebugSubsectionRef(DebugSubsectionKind::CoffSymbolRVA) {}
+
+Error DebugSymbolRVASubsectionRef::initialize(BinaryStreamReader &Reader) {
+  return Reader.readArray(RVAs, Reader.bytesRemaining() / sizeof(uint32_t));
+}
+
+DebugSymbolRVASubsection::DebugSymbolRVASubsection()
+    : DebugSubsection(DebugSubsectionKind::CoffSymbolRVA) {}
+
+Error DebugSymbolRVASubsection::commit(BinaryStreamWriter &Writer) const {
+  return Writer.writeArray(makeArrayRef(RVAs));
+}
+
+uint32_t DebugSymbolRVASubsection::calculateSerializedSize() const {
+  return RVAs.size() * sizeof(uint32_t);
+}
diff --git a/lib/DebugInfo/CodeView/TypeTableCollection.cpp b/lib/DebugInfo/CodeView/TypeTableCollection.cpp
index 699694fde928..8d974d522f28 100644
--- a/lib/DebugInfo/CodeView/TypeTableCollection.cpp
+++ b/lib/DebugInfo/CodeView/TypeTableCollection.cpp
@@ -51,7 +51,8 @@ void TypeTableCollection::ensureTypeExists(TypeIndex Index) {
 
   CVType Type;
   uint32_t Len;
-  error(VarStreamArrayExtractor<CVType>::extract(Bytes, Len, Type));
+  VarStreamArrayExtractor<CVType> Extract;
+  error(Extract(Bytes, Len, Type));
 
   TypeDatabaseVisitor DBV(Database);
   error(codeview::visitTypeRecord(Type, Index, DBV));
diff --git a/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp b/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp
index e7b4b777b43f..57eac91f8c19 100644
--- a/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp
+++ b/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp
@@ -7,13 +7,14 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h"
+
 #include "llvm/ADT/None.h"
 #include "llvm/ADT/Optional.h"
-#include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h"
+#include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
 #include "llvm/DebugInfo/DWARF/DWARFUnit.h"
 #include "llvm/Support/DataExtractor.h"
-#include "llvm/Support/Dwarf.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cstddef>
diff --git a/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp b/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
index a12f8adfafe5..97b52f0fbdd6 100644
--- a/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
+++ b/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
@@ -7,12 +7,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h"
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
 #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h"
-#include "llvm/Support/Dwarf.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp b/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp
index 6e550f2e9ec9..358e9bf43d00 100644
--- a/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp
+++ b/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp
@@ -15,7 +15,7 @@
 
 using namespace llvm;
 
-void DWARFCompileUnit::dump(raw_ostream &OS) {
+void DWARFCompileUnit::dump(raw_ostream &OS, DIDumpOptions DumpOpts) {
   OS << format("0x%08x", getOffset()) << ": Compile Unit:"
      << " length = " << format("0x%08x", getLength())
      << " version = " << format("0x%04x", getVersion());
@@ -27,7 +27,7 @@ void DWARFCompileUnit::dump(raw_ostream &OS) {
      << ")\n";
 
   if (DWARFDie CUDie = getUnitDIE(false))
-    CUDie.dump(OS, -1U);
+    CUDie.dump(OS, -1U, 0, DumpOpts);
   else
     OS << "<compile unit can't be parsed!>\n\n";
 }
diff --git a/lib/DebugInfo/DWARF/DWARFContext.cpp b/lib/DebugInfo/DWARF/DWARFContext.cpp
index 1be156d6ea9b..42ab48808f9a 100644
--- a/lib/DebugInfo/DWARF/DWARFContext.cpp
+++ b/lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -84,6 +84,123 @@ static void dumpAccelSection(raw_ostream &OS, StringRef Name,
   Accel.dump(OS);
 }
 
+static void
+dumpDWARFv5StringOffsetsSection(raw_ostream &OS, StringRef SectionName,
+                                const DWARFSection &StringOffsetsSection,
+                                StringRef StringSection, bool LittleEndian) {
+  DataExtractor StrOffsetExt(StringOffsetsSection.Data, LittleEndian, 0);
+  uint32_t Offset = 0;
+  uint64_t SectionSize = StringOffsetsSection.Data.size();
+
+  while (Offset < SectionSize) {
+    unsigned Version = 0;
+    DwarfFormat Format = DWARF32;
+    unsigned EntrySize = 4;
+    // Perform validation and extract the segment size from the header.
+    if (!StrOffsetExt.isValidOffsetForDataOfSize(Offset, 4)) {
+      OS << "error: invalid contribution to string offsets table in section ."
+         << SectionName << ".\n";
+      return;
+    }
+    uint32_t ContributionStart = Offset;
+    uint64_t ContributionSize = StrOffsetExt.getU32(&Offset);
+    // A contribution size of 0xffffffff indicates DWARF64, with the actual size
+    // in the following 8 bytes. Otherwise, the DWARF standard mandates that
+    // the contribution size must be at most 0xfffffff0.
+    if (ContributionSize == 0xffffffff) {
+      if (!StrOffsetExt.isValidOffsetForDataOfSize(Offset, 8)) {
+        OS << "error: invalid contribution to string offsets table in section ."
+           << SectionName << ".\n";
+        return;
+      }
+      Format = DWARF64;
+      EntrySize = 8;
+      ContributionSize = StrOffsetExt.getU64(&Offset);
+    } else if (ContributionSize > 0xfffffff0) {
+      OS << "error: invalid contribution to string offsets table in section ."
+         << SectionName << ".\n";
+      return;
+    }
+
+    // We must ensure that we don't read a partial record at the end, so we
+    // validate for a multiple of EntrySize. Also, we're expecting a version
+    // number and padding, which adds an additional 4 bytes.
+    uint64_t ValidationSize =
+        4 + ((ContributionSize + EntrySize - 1) & (-(uint64_t)EntrySize));
+    if (!StrOffsetExt.isValidOffsetForDataOfSize(Offset, ValidationSize)) {
+      OS << "error: contribution to string offsets table in section ."
+         << SectionName << " has invalid length.\n";
+      return;
+    }
+
+    Version = StrOffsetExt.getU16(&Offset);
+    Offset += 2;
+    OS << format("0x%8.8x: ", ContributionStart);
+    OS << "Contribution size = " << ContributionSize
+       << ", Version = " << Version << "\n";
+
+    uint32_t ContributionBase = Offset;
+    DataExtractor StrData(StringSection, LittleEndian, 0);
+    while (Offset - ContributionBase < ContributionSize) {
+      OS << format("0x%8.8x: ", Offset);
+      // FIXME: We can only extract strings in DWARF32 format at the moment.
+      uint64_t StringOffset = getRelocatedValue(
+          StrOffsetExt, EntrySize, &Offset, &StringOffsetsSection.Relocs);
+      if (Format == DWARF32) {
+        OS << format("%8.8x ", StringOffset);
+        uint32_t StringOffset32 = (uint32_t)StringOffset;
+        const char *S = StrData.getCStr(&StringOffset32);
+        if (S)
+          OS << format("\"%s\"", S);
+      } else
+        OS << format("%16.16x ", StringOffset);
+      OS << "\n";
+    }
+  }
+}
+
+// Dump a DWARF string offsets section. This may be a DWARF v5 formatted
+// string offsets section, where each compile or type unit contributes a
+// number of entries (string offsets), with each contribution preceded by
+// a header containing size and version number. Alternatively, it may be a
+// monolithic series of string offsets, as generated by the pre-DWARF v5
+// implementation of split DWARF.
+static void dumpStringOffsetsSection(raw_ostream &OS, StringRef SectionName,
+                                     const DWARFSection &StringOffsetsSection,
+                                     StringRef StringSection, bool LittleEndian,
+                                     unsigned MaxVersion) {
+  if (StringOffsetsSection.Data.empty())
+    return;
+  OS << "\n." << SectionName << " contents:\n";
+  // If we have at least one (compile or type) unit with DWARF v5 or greater,
+  // we assume that the section is formatted like a DWARF v5 string offsets
+  // section.
+  if (MaxVersion >= 5)
+    dumpDWARFv5StringOffsetsSection(OS, SectionName, StringOffsetsSection,
+                                    StringSection, LittleEndian);
+  else {
+    DataExtractor strOffsetExt(StringOffsetsSection.Data, LittleEndian, 0);
+    uint32_t offset = 0;
+    uint64_t size = StringOffsetsSection.Data.size();
+    // Ensure that size is a multiple of the size of an entry.
+    if (size & ((uint64_t)(sizeof(uint32_t) - 1))) {
+      OS << "error: size of ." << SectionName << " is not a multiple of "
+         << sizeof(uint32_t) << ".\n";
+      size &= -(uint64_t)sizeof(uint32_t);
+    }
+    DataExtractor StrData(StringSection, LittleEndian, 0);
+    while (offset < size) {
+      OS << format("0x%8.8x: ", offset);
+      uint32_t StringOffset = strOffsetExt.getU32(&offset);
+      OS << format("%8.8x  ", StringOffset);
+      const char *S = StrData.getCStr(&StringOffset);
+      if (S)
+        OS << format("\"%s\"", S);
+      OS << "\n";
+    }
+  }
+}
+
 void DWARFContext::dump(raw_ostream &OS, DIDumpOptions DumpOpts){
 
   DIDumpType DumpType = DumpOpts.DumpType;
@@ -104,14 +221,14 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpOptions DumpOpts){
   if (DumpType == DIDT_All || DumpType == DIDT_Info) {
     OS << "\n.debug_info contents:\n";
     for (const auto &CU : compile_units())
-      CU->dump(OS);
+      CU->dump(OS, DumpOpts);
   }
 
   if ((DumpType == DIDT_All || DumpType == DIDT_InfoDwo) &&
       getNumDWOCompileUnits()) {
     OS << "\n.debug_info.dwo contents:\n";
     for (const auto &DWOCU : dwo_compile_units())
-      DWOCU->dump(OS);
+      DWOCU->dump(OS, DumpOpts);
   }
 
   if ((DumpType == DIDT_All || DumpType == DIDT_Types) && getNumTypeUnits()) {
@@ -258,17 +375,15 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpOptions DumpOpts){
                        true /* GnuStyle */)
         .dump("debug_gnu_pubtypes", OS);
 
-  if ((DumpType == DIDT_All || DumpType == DIDT_StrOffsetsDwo) &&
-      !getStringOffsetDWOSection().empty()) {
-    OS << "\n.debug_str_offsets.dwo contents:\n";
-    DataExtractor strOffsetExt(getStringOffsetDWOSection(), isLittleEndian(),
-                               0);
-    offset = 0;
-    uint64_t size = getStringOffsetDWOSection().size();
-    while (offset < size) {
-      OS << format("0x%8.8x: ", offset);
-      OS << format("%8.8x\n", strOffsetExt.getU32(&offset));
-    }
+  if (DumpType == DIDT_All || DumpType == DIDT_StrOffsets)
+    dumpStringOffsetsSection(OS, "debug_str_offsets", getStringOffsetSection(),
+                             getStringSection(), isLittleEndian(),
+                             getMaxVersion());
+
+  if (DumpType == DIDT_All || DumpType == DIDT_StrOffsetsDwo) {
+    dumpStringOffsetsSection(OS, "debug_str_offsets.dwo",
+                             getStringOffsetDWOSection(), getStringDWOSection(),
+                             isLittleEndian(), getMaxVersion());
   }
 
   if ((DumpType == DIDT_All || DumpType == DIDT_GdbIndex) &&
@@ -1109,6 +1224,10 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj,
       TypesDWOSections[Section].Data = data;
     }
 
+    // Map platform specific debug section names to DWARF standard section
+    // names.
+    name = Obj.mapDebugSectionName(name);
+
     if (RelocatedSection == Obj.section_end())
       continue;
 
@@ -1141,6 +1260,7 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj,
             .Case("debug_loc", &LocSection.Relocs)
             .Case("debug_info.dwo", &InfoDWOSection.Relocs)
             .Case("debug_line", &LineSection.Relocs)
+            .Case("debug_str_offsets", &StringOffsetSection.Relocs)
             .Case("debug_ranges", &RangeSection.Relocs)
             .Case("debug_addr", &AddrSection.Relocs)
             .Case("apple_names", &AppleNamesSection.Relocs)
@@ -1211,6 +1331,7 @@ StringRef *DWARFContextInMemory::MapSectionToMember(StringRef Name) {
       .Case("debug_frame", &DebugFrameSection)
       .Case("eh_frame", &EHFrameSection)
       .Case("debug_str", &StringSection)
+      .Case("debug_str_offsets", &StringOffsetSection.Data)
       .Case("debug_ranges", &RangeSection.Data)
       .Case("debug_macinfo", &MacinfoSection)
       .Case("debug_pubnames", &PubNamesSection)
@@ -1222,7 +1343,7 @@ StringRef *DWARFContextInMemory::MapSectionToMember(StringRef Name) {
       .Case("debug_loc.dwo", &LocDWOSection.Data)
       .Case("debug_line.dwo", &LineDWOSection.Data)
       .Case("debug_str.dwo", &StringDWOSection)
-      .Case("debug_str_offsets.dwo", &StringOffsetDWOSection)
+      .Case("debug_str_offsets.dwo", &StringOffsetDWOSection.Data)
       .Case("debug_addr", &AddrSection.Data)
       .Case("apple_names", &AppleNamesSection.Data)
       .Case("apple_types", &AppleTypesSection.Data)
diff --git a/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp b/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
index b55ed6a46849..e6e007896cc8 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
@@ -7,18 +7,19 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h"
+
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Optional.h"
-#include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h"
+#include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/DataExtractor.h"
-#include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
@@ -584,6 +585,7 @@ void DWARFDebugFrame::parse(DataExtractor Data) {
           switch (AugmentationString[i]) {
             default:
               ReportError("Unknown augmentation character in entry at %lx");
+              llvm_unreachable("ReportError should not return.");
             case 'L':
               LSDAPointerEncoding = Data.getU8(&Offset);
               break;
diff --git a/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp b/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
index 35f673c7acc6..dbcc64fc0832 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
@@ -7,9 +7,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h"
-#include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h"
 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
 #include "llvm/DebugInfo/DWARF/DWARFUnit.h"
 #include "llvm/Support/DataExtractor.h"
diff --git a/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
index f32e8fe76357..cda3e75fbc3e 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
@@ -9,10 +9,10 @@
 
 #include "llvm/DebugInfo/DWARF/DWARFDebugLine.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
 #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h"
-#include "llvm/Support/Dwarf.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp b/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
index d5c34216ed53..2178bef65d1d 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
@@ -7,11 +7,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/StringRef.h"
-#include "llvm/DebugInfo/DWARF/DWARFContext.h"
 #include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
 #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h"
-#include "llvm/Support/Dwarf.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
diff --git a/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp b/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp
index e0a9adde8e58..1b77be6192dd 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp
@@ -7,9 +7,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "SyntaxHighlighting.h"
 #include "llvm/DebugInfo/DWARF/DWARFDebugMacro.h"
-#include "llvm/Support/Dwarf.h"
+#include "SyntaxHighlighting.h"
+#include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cstdint>
 
diff --git a/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp b/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp
index daded255f8c7..5a4e39f3c2af 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp
@@ -7,10 +7,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/StringRef.h"
 #include "llvm/DebugInfo/DWARF/DWARFDebugPubTable.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/Support/DataExtractor.h"
-#include "llvm/Support/Dwarf.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cstdint>
diff --git a/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp b/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp
index 6b5e1d3c931b..43201293fe60 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/DebugInfo/DWARF/DWARFContext.h"
 #include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cinttypes>
diff --git a/lib/DebugInfo/DWARF/DWARFDie.cpp b/lib/DebugInfo/DWARF/DWARFDie.cpp
index fd45c77d3745..b4b682dd11b5 100644
--- a/lib/DebugInfo/DWARF/DWARFDie.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDie.cpp
@@ -7,18 +7,18 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/DebugInfo/DWARF/DWARFDie.h"
 #include "SyntaxHighlighting.h"
 #include "llvm/ADT/None.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h"
 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
 #include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h"
-#include "llvm/DebugInfo/DWARF/DWARFDie.h"
 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
 #include "llvm/DebugInfo/DWARF/DWARFUnit.h"
 #include "llvm/Support/DataExtractor.h"
-#include "llvm/Support/Dwarf.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
@@ -67,7 +67,8 @@ static void dumpRanges(raw_ostream &OS, const DWARFAddressRangesVector& Ranges,
 
 static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die,
                           uint32_t *OffsetPtr, dwarf::Attribute Attr,
-                          dwarf::Form Form, unsigned Indent) {
+                          dwarf::Form Form, unsigned Indent,
+                          DIDumpOptions DumpOpts) {
   if (!Die.isValid())
     return;
   const char BaseIndent[] = "            ";
@@ -78,13 +79,15 @@ static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die,
     WithColor(OS, syntax::Attribute) << attrString;
   else
     WithColor(OS, syntax::Attribute).get() << format("DW_AT_Unknown_%x", Attr);
-  
-  auto formString = FormEncodingString(Form);
-  if (!formString.empty())
-    OS << " [" << formString << ']';
-  else
-    OS << format(" [DW_FORM_Unknown_%x]", Form);
-  
+
+  if (!DumpOpts.Brief) {
+    auto formString = FormEncodingString(Form);
+    if (!formString.empty())
+      OS << " [" << formString << ']';
+    else
+      OS << format(" [DW_FORM_Unknown_%x]", Form);
+  }
+
   DWARFUnit *U = Die.getDwarfUnit();
   DWARFFormValue formValue(Form);
   
@@ -301,8 +304,8 @@ void DWARFDie::getCallerFrame(uint32_t &CallFile, uint32_t &CallLine,
   CallDiscriminator = toUnsigned(find(DW_AT_GNU_discriminator), 0);
 }
 
-void DWARFDie::dump(raw_ostream &OS, unsigned RecurseDepth,
-                    unsigned Indent) const {
+void DWARFDie::dump(raw_ostream &OS, unsigned RecurseDepth, unsigned Indent,
+                    DIDumpOptions DumpOpts) const {
   if (!isValid())
     return;
   DataExtractor debug_info_data = U->getDebugInfoExtractor();
@@ -322,10 +325,12 @@ void DWARFDie::dump(raw_ostream &OS, unsigned RecurseDepth,
         else
           WithColor(OS, syntax::Tag).get().indent(Indent)
           << format("DW_TAG_Unknown_%x", getTag());
-        
-        OS << format(" [%u] %c\n", abbrCode,
-                     AbbrevDecl->hasChildren() ? '*' : ' ');
-        
+
+        if (!DumpOpts.Brief)
+          OS << format(" [%u] %c", abbrCode,
+                       AbbrevDecl->hasChildren() ? '*' : ' ');
+        OS << '\n';
+
         // Dump all data in the DIE for the attributes.
         for (const auto &AttrSpec : AbbrevDecl->attributes()) {
           if (AttrSpec.Form == DW_FORM_implicit_const) {
@@ -335,13 +340,13 @@ void DWARFDie::dump(raw_ostream &OS, unsigned RecurseDepth,
             continue;
           }
           dumpAttribute(OS, *this, &offset, AttrSpec.Attr, AttrSpec.Form,
-                        Indent);
+                        Indent, DumpOpts);
         }
         
         DWARFDie child = getFirstChild();
         if (RecurseDepth > 0 && child) {
           while (child) {
-            child.dump(OS, RecurseDepth-1, Indent+2);
+            child.dump(OS, RecurseDepth-1, Indent+2, DumpOpts);
             child = child.getSibling();
           }
         }
diff --git a/lib/DebugInfo/DWARF/DWARFFormValue.cpp b/lib/DebugInfo/DWARF/DWARFFormValue.cpp
index 0963d7bfd713..ed1f5f46dcfb 100644
--- a/lib/DebugInfo/DWARF/DWARFFormValue.cpp
+++ b/lib/DebugInfo/DWARF/DWARFFormValue.cpp
@@ -13,10 +13,10 @@
 #include "llvm/ADT/None.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
 #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h"
 #include "llvm/DebugInfo/DWARF/DWARFUnit.h"
-#include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
@@ -301,6 +301,7 @@ bool DWARFFormValue::isFormClass(DWARFFormValue::FormClass FC) const {
     return (FC == FC_Address);
   case DW_FORM_GNU_str_index:
   case DW_FORM_GNU_strp_alt:
+  case DW_FORM_strx:
     return (FC == FC_String);
   case DW_FORM_implicit_const:
     return (FC == FC_Constant);
@@ -415,6 +416,7 @@ bool DWARFFormValue::extractValue(const DataExtractor &Data,
       break;
     case DW_FORM_GNU_addr_index:
     case DW_FORM_GNU_str_index:
+    case DW_FORM_strx:
       Value.uval = Data.getULEB128(OffsetPtr);
       break;
     default:
@@ -542,6 +544,7 @@ void DWARFFormValue::dump(raw_ostream &OS) const {
     OS << format(" .debug_str[0x%8.8x] = ", (uint32_t)UValue);
     dumpString(OS);
     break;
+  case DW_FORM_strx:
   case DW_FORM_GNU_str_index:
     OS << format(" indexed (%8.8x) string = ", (uint32_t)UValue);
     dumpString(OS);
@@ -620,10 +623,11 @@ Optional<const char *> DWARFFormValue::getAsCString() const {
   if (Form == DW_FORM_GNU_strp_alt || U == nullptr)
     return None;
   uint32_t Offset = Value.uval;
-  if (Form == DW_FORM_GNU_str_index) {
-    uint32_t StrOffset;
+  if (Form == DW_FORM_GNU_str_index || Form == DW_FORM_strx) {
+    uint64_t StrOffset;
     if (!U->getStringOffsetSectionItem(Offset, StrOffset))
       return None;
+    StrOffset += U->getStringOffsetSectionRelocation(Offset);
     Offset = StrOffset;
   }
   if (const char *Str = U->getStringExtractor().getCStr(&Offset)) {
diff --git a/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp b/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp
index 0625d01097c9..ebd6104ab878 100644
--- a/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp
+++ b/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp
@@ -7,9 +7,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/DebugInfo/DWARF/DWARFGdbIndex.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/DebugInfo/DWARF/DWARFGdbIndex.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
diff --git a/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp b/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp
index 25824f6eb83b..fd1684d33a16 100644
--- a/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp
+++ b/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp
@@ -7,10 +7,10 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/DebugInfo/DWARF/DWARFTypeUnit.h"
 #include "llvm/DebugInfo/DIContext.h"
 #include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h"
 #include "llvm/DebugInfo/DWARF/DWARFDie.h"
-#include "llvm/DebugInfo/DWARF/DWARFTypeUnit.h"
 #include "llvm/DebugInfo/DWARF/DWARFUnit.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/lib/DebugInfo/DWARF/DWARFUnit.cpp b/lib/DebugInfo/DWARF/DWARFUnit.cpp
index c5add6a478b3..09e6a292e5fe 100644
--- a/lib/DebugInfo/DWARF/DWARFUnit.cpp
+++ b/lib/DebugInfo/DWARF/DWARFUnit.cpp
@@ -7,8 +7,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/SmallString.h"
+#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h"
 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
@@ -16,7 +17,6 @@
 #include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h"
 #include "llvm/DebugInfo/DWARF/DWARFDie.h"
 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
-#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/DataExtractor.h"
@@ -33,8 +33,9 @@ using namespace dwarf;
 
 void DWARFUnitSectionBase::parse(DWARFContext &C, const DWARFSection &Section) {
   parseImpl(C, Section, C.getDebugAbbrev(), &C.getRangeSection(),
-            C.getStringSection(), StringRef(), &C.getAddrSection(),
-            C.getLineSection().Data, C.isLittleEndian(), false);
+            C.getStringSection(), C.getStringOffsetSection(),
+            &C.getAddrSection(), C.getLineSection().Data, C.isLittleEndian(),
+            false);
 }
 
 void DWARFUnitSectionBase::parseDWO(DWARFContext &C,
@@ -48,19 +49,14 @@ void DWARFUnitSectionBase::parseDWO(DWARFContext &C,
 
 DWARFUnit::DWARFUnit(DWARFContext &DC, const DWARFSection &Section,
                      const DWARFDebugAbbrev *DA, const DWARFSection *RS,
-                     StringRef SS, StringRef SOS, const DWARFSection *AOS,
-                     StringRef LS, bool LE, bool IsDWO,
+                     StringRef SS, const DWARFSection &SOS,
+                     const DWARFSection *AOS, StringRef LS, bool LE, bool IsDWO,
                      const DWARFUnitSectionBase &UnitSection,
                      const DWARFUnitIndex::Entry *IndexEntry)
     : Context(DC), InfoSection(Section), Abbrev(DA), RangeSection(RS),
-      LineSection(LS), StringSection(SS), StringOffsetSection([&]() {
-        if (IndexEntry)
-          if (const auto *C = IndexEntry->getOffset(DW_SECT_STR_OFFSETS))
-            return SOS.slice(C->Offset, C->Offset + C->Length);
-        return SOS;
-      }()),
-      AddrOffsetSection(AOS), isLittleEndian(LE), isDWO(IsDWO),
-      UnitSection(UnitSection), IndexEntry(IndexEntry) {
+      LineSection(LS), StringSection(SS), StringOffsetSection(SOS),
+      StringOffsetSectionBase(0), AddrOffsetSection(AOS), isLittleEndian(LE),
+      isDWO(IsDWO), UnitSection(UnitSection), IndexEntry(IndexEntry) {
   clear();
 }
 
@@ -77,17 +73,25 @@ bool DWARFUnit::getAddrOffsetSectionItem(uint32_t Index,
 }
 
 bool DWARFUnit::getStringOffsetSectionItem(uint32_t Index,
-                                                  uint32_t &Result) const {
-  // FIXME: string offset section entries are 8-byte for DWARF64.
-  const uint32_t ItemSize = 4;
-  uint32_t Offset = Index * ItemSize;
-  if (StringOffsetSection.size() < Offset + ItemSize)
+                                           uint64_t &Result) const {
+  unsigned ItemSize = getFormat() == DWARF64 ? 8 : 4;
+  uint32_t Offset = StringOffsetSectionBase + Index * ItemSize;
+  if (StringOffsetSection.Data.size() < Offset + ItemSize)
     return false;
-  DataExtractor DA(StringOffsetSection, isLittleEndian, 0);
-  Result = DA.getU32(&Offset);
+  DataExtractor DA(StringOffsetSection.Data, isLittleEndian, 0);
+  Result = ItemSize == 4 ? DA.getU32(&Offset) : DA.getU64(&Offset);
   return true;
 }
 
+uint64_t DWARFUnit::getStringOffsetSectionRelocation(uint32_t Index) const {
+  unsigned ItemSize = getFormat() == DWARF64 ? 8 : 4;
+  uint64_t ByteOffset = StringOffsetSectionBase + Index * ItemSize;
+  RelocAddrMap::const_iterator AI = getStringOffsetsRelocMap().find(ByteOffset);
+  if (AI != getStringOffsetsRelocMap().end())
+    return AI->second.Value;
+  return 0;
+}
+
 bool DWARFUnit::extractImpl(DataExtractor debug_info, uint32_t *offset_ptr) {
   Length = debug_info.getU32(offset_ptr);
   Version = debug_info.getU16(offset_ptr);
@@ -119,6 +123,9 @@ bool DWARFUnit::extractImpl(DataExtractor debug_info, uint32_t *offset_ptr) {
   if (!LengthOK || !VersionOK || !AddrSizeOK)
     return false;
 
+  // Keep track of the highest DWARF version we encounter across all units.
+  Context.setMaxVersionIfGreater(Version);
+
   Abbrevs = Abbrev->getAbbreviationDeclarationSet(AbbrOffset);
   return Abbrevs != nullptr;
 }
@@ -242,6 +249,17 @@ size_t DWARFUnit::extractDIEsIfNeeded(bool CUDieOnly) {
       setBaseAddress(*BaseAddr);
     AddrOffsetSectionBase = toSectionOffset(UnitDie.find(DW_AT_GNU_addr_base), 0);
     RangeSectionBase = toSectionOffset(UnitDie.find(DW_AT_rnglists_base), 0);
+
+    // In general, we derive the offset of the unit's contibution to the
+    // debug_str_offsets{.dwo} section from the unit DIE's
+    // DW_AT_str_offsets_base attribute. In dwp files we add to it the offset
+    // we get from the index table.
+    StringOffsetSectionBase =
+        toSectionOffset(UnitDie.find(DW_AT_str_offsets_base), 0);
+    if (IndexEntry)
+      if (const auto *C = IndexEntry->getOffset(DW_SECT_STR_OFFSETS))
+        StringOffsetSectionBase += C->Offset;
+
     // Don't fall back to DW_AT_GNU_ranges_base: it should be ignored for
     // skeleton CU DIE, so that DWARF users not aware of it are not broken.
   }
diff --git a/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp b/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp
index 0981a4dfdfa5..59b3d0ca55a6 100644
--- a/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp
+++ b/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp
@@ -7,9 +7,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/lib/DebugInfo/DWARF/LLVMBuild.txt b/lib/DebugInfo/DWARF/LLVMBuild.txt
index 9f8b1047ef6b..8242a7f2e7f7 100644
--- a/lib/DebugInfo/DWARF/LLVMBuild.txt
+++ b/lib/DebugInfo/DWARF/LLVMBuild.txt
@@ -19,4 +19,4 @@
 type = Library
 name = DebugInfoDWARF
 parent = DebugInfo
-required_libraries = Object Support
+required_libraries = BinaryFormat Object Support
diff --git a/lib/DebugInfo/PDB/DIA/DIAEnumDebugStreams.cpp b/lib/DebugInfo/PDB/DIA/DIAEnumDebugStreams.cpp
index cae817c1b367..f62c4991fe33 100644
--- a/lib/DebugInfo/PDB/DIA/DIAEnumDebugStreams.cpp
+++ b/lib/DebugInfo/PDB/DIA/DIAEnumDebugStreams.cpp
@@ -7,9 +7,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/DebugInfo/PDB/PDBSymbol.h"
-#include "llvm/DebugInfo/PDB/DIA/DIADataStream.h"
 #include "llvm/DebugInfo/PDB/DIA/DIAEnumDebugStreams.h"
+#include "llvm/DebugInfo/PDB/DIA/DIADataStream.h"
+#include "llvm/DebugInfo/PDB/PDBSymbol.h"
 
 using namespace llvm;
 using namespace llvm::pdb;
diff --git a/lib/DebugInfo/PDB/DIA/DIAEnumLineNumbers.cpp b/lib/DebugInfo/PDB/DIA/DIAEnumLineNumbers.cpp
index 4741d9c9a849..796ce214b383 100644
--- a/lib/DebugInfo/PDB/DIA/DIAEnumLineNumbers.cpp
+++ b/lib/DebugInfo/PDB/DIA/DIAEnumLineNumbers.cpp
@@ -7,9 +7,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/DebugInfo/PDB/PDBSymbol.h"
 #include "llvm/DebugInfo/PDB/DIA/DIAEnumLineNumbers.h"
 #include "llvm/DebugInfo/PDB/DIA/DIALineNumber.h"
+#include "llvm/DebugInfo/PDB/PDBSymbol.h"
 
 using namespace llvm;
 using namespace llvm::pdb;
diff --git a/lib/DebugInfo/PDB/DIA/DIAEnumSourceFiles.cpp b/lib/DebugInfo/PDB/DIA/DIAEnumSourceFiles.cpp
index ccf8c4e622cc..b9311d060128 100644
--- a/lib/DebugInfo/PDB/DIA/DIAEnumSourceFiles.cpp
+++ b/lib/DebugInfo/PDB/DIA/DIAEnumSourceFiles.cpp
@@ -7,9 +7,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/DebugInfo/PDB/PDBSymbol.h"
 #include "llvm/DebugInfo/PDB/DIA/DIAEnumSourceFiles.h"
 #include "llvm/DebugInfo/PDB/DIA/DIASourceFile.h"
+#include "llvm/DebugInfo/PDB/PDBSymbol.h"
 
 using namespace llvm;
 using namespace llvm::pdb;
diff --git a/lib/DebugInfo/PDB/DIA/DIAEnumSymbols.cpp b/lib/DebugInfo/PDB/DIA/DIAEnumSymbols.cpp
index 3c211b569044..266638530c2f 100644
--- a/lib/DebugInfo/PDB/DIA/DIAEnumSymbols.cpp
+++ b/lib/DebugInfo/PDB/DIA/DIAEnumSymbols.cpp
@@ -7,10 +7,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/DebugInfo/PDB/PDBSymbol.h"
 #include "llvm/DebugInfo/PDB/DIA/DIAEnumSymbols.h"
 #include "llvm/DebugInfo/PDB/DIA/DIARawSymbol.h"
 #include "llvm/DebugInfo/PDB/DIA/DIASession.h"
+#include "llvm/DebugInfo/PDB/PDBSymbol.h"
 
 using namespace llvm;
 using namespace llvm::pdb;
diff --git a/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp b/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp
index 4e2474c51cb1..0b48a366bd24 100644
--- a/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp
+++ b/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp
@@ -372,8 +372,11 @@ DIARawSymbol::findChildren(PDB_SymType Type) const {
   enum SymTagEnum EnumVal = static_cast<enum SymTagEnum>(Type);
 
   CComPtr<IDiaEnumSymbols> DiaEnumerator;
-  if (S_OK != Symbol->findChildrenEx(EnumVal, nullptr, nsNone, &DiaEnumerator))
-    return nullptr;
+  if (S_OK !=
+      Symbol->findChildrenEx(EnumVal, nullptr, nsNone, &DiaEnumerator)) {
+    if (S_OK != Symbol->findChildren(EnumVal, nullptr, nsNone, &DiaEnumerator))
+      return nullptr;
+  }
 
   return llvm::make_unique<DIAEnumSymbols>(Session, DiaEnumerator);
 }
diff --git a/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp b/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp
index 22c2ef31bd71..396dffaa68b1 100644
--- a/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp
+++ b/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp
@@ -10,6 +10,7 @@
 #include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h"
 
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/BinaryFormat/COFF.h"
 #include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h"
 #include "llvm/DebugInfo/MSF/MSFBuilder.h"
 #include "llvm/DebugInfo/MSF/MSFCommon.h"
@@ -19,7 +20,6 @@
 #include "llvm/DebugInfo/PDB/Native/RawError.h"
 #include "llvm/Support/BinaryItemStream.h"
 #include "llvm/Support/BinaryStreamWriter.h"
-#include "llvm/Support/COFF.h"
 
 using namespace llvm;
 using namespace llvm::codeview;
@@ -38,12 +38,12 @@ template <> struct BinaryItemTraits<CVSymbol> {
 
 static uint32_t calculateDiSymbolStreamSize(uint32_t SymbolByteSize,
                                             uint32_t C13Size) {
-  uint32_t Size = sizeof(uint32_t); // Signature
-  Size += SymbolByteSize;           // Symbol Data
-  Size += 0;                        // TODO: Layout.C11Bytes
-  Size += C13Size;                  // C13 Debug Info Size
-  Size += sizeof(uint32_t);         // GlobalRefs substream size (always 0)
-  Size += 0;                        // GlobalRefs substream bytes
+  uint32_t Size = sizeof(uint32_t);   // Signature
+  Size += alignTo(SymbolByteSize, 4); // Symbol Data
+  Size += 0;                          // TODO: Layout.C11Bytes
+  Size += C13Size;                    // C13 Debug Info Size
+  Size += sizeof(uint32_t);           // GlobalRefs substream size (always 0)
+  Size += 0;                          // GlobalRefs substream bytes
   return Size;
 }
 
@@ -156,6 +156,8 @@ Error DbiModuleDescriptorBuilder::commit(BinaryStreamWriter &ModiWriter,
     BinaryStreamRef RecordsRef(Records);
     if (auto EC = SymbolWriter.writeStreamRef(RecordsRef))
       return EC;
+    if (auto EC = SymbolWriter.padToAlignment(4))
+      return EC;
     // TODO: Write C11 Line data
     assert(SymbolWriter.getOffset() % alignOf(CodeViewContainer::Pdb) == 0 &&
            "Invalid debug section alignment!");
diff --git a/lib/DebugInfo/PDB/Native/DbiStream.cpp b/lib/DebugInfo/PDB/Native/DbiStream.cpp
index 320b11dc5cab..24322d942fac 100644
--- a/lib/DebugInfo/PDB/Native/DbiStream.cpp
+++ b/lib/DebugInfo/PDB/Native/DbiStream.cpp
@@ -216,10 +216,12 @@ FixedStreamArray<SecMapEntry> DbiStream::getSectionMap() const {
 
 void DbiStream::visitSectionContributions(
     ISectionContribVisitor &Visitor) const {
-  if (SectionContribVersion == DbiSecContribVer60) {
+  if (!SectionContribs.empty()) {
+    assert(SectionContribVersion == DbiSecContribVer60);
     for (auto &SC : SectionContribs)
       Visitor.visit(SC);
-  } else if (SectionContribVersion == DbiSecContribV2) {
+  } else if (!SectionContribs2.empty()) {
+    assert(SectionContribVersion == DbiSecContribV2);
     for (auto &SC : SectionContribs2)
       Visitor.visit(SC);
   }
diff --git a/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp b/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
index 55c20fdb9af6..355c7b57f4d1 100644
--- a/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
+++ b/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
@@ -10,6 +10,7 @@
 #include "llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h"
 
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/BinaryFormat/COFF.h"
 #include "llvm/DebugInfo/MSF/MSFBuilder.h"
 #include "llvm/DebugInfo/MSF/MappedBlockStream.h"
 #include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h"
@@ -17,7 +18,6 @@
 #include "llvm/DebugInfo/PDB/Native/RawError.h"
 #include "llvm/Object/COFF.h"
 #include "llvm/Support/BinaryStreamWriter.h"
-#include "llvm/Support/COFF.h"
 
 using namespace llvm;
 using namespace llvm::codeview;
diff --git a/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp b/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp
index c4ff30011a17..4186f2eb6ba0 100644
--- a/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp
+++ b/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp
@@ -90,14 +90,14 @@ Error ModuleDebugStreamRef::commit() { return Error::success(); }
 
 Expected<codeview::DebugChecksumsSubsectionRef>
 ModuleDebugStreamRef::findChecksumsSubsection() const {
+  codeview::DebugChecksumsSubsectionRef Result;
   for (const auto &SS : subsections()) {
     if (SS.kind() != DebugSubsectionKind::FileChecksums)
       continue;
 
-    codeview::DebugChecksumsSubsectionRef Result;
     if (auto EC = Result.initialize(SS.getRecordData()))
       return std::move(EC);
     return Result;
   }
-  return make_error<RawError>(raw_error_code::no_entry);
+  return Result;
 }
diff --git a/lib/DebugInfo/PDB/Native/PublicsStream.cpp b/lib/DebugInfo/PDB/Native/PublicsStream.cpp
index 58202577672a..091ac67035dc 100644
--- a/lib/DebugInfo/PDB/Native/PublicsStream.cpp
+++ b/lib/DebugInfo/PDB/Native/PublicsStream.cpp
@@ -105,10 +105,12 @@ Error PublicsStream::reload() {
                                            "Could not read a thunk map."));
 
   // Something called "section map" follows.
-  if (auto EC = Reader.readArray(SectionOffsets, Header->NumSections))
-    return joinErrors(std::move(EC),
-                      make_error<RawError>(raw_error_code::corrupt_file,
-                                           "Could not read a section map."));
+  if (Reader.bytesRemaining() > 0) {
+    if (auto EC = Reader.readArray(SectionOffsets, Header->NumSections))
+      return joinErrors(std::move(EC),
+                        make_error<RawError>(raw_error_code::corrupt_file,
+                                             "Could not read a section map."));
+  }
 
   if (Reader.bytesRemaining() > 0)
     return make_error<RawError>(raw_error_code::corrupt_file,
diff --git a/lib/DebugInfo/PDB/PDBContext.cpp b/lib/DebugInfo/PDB/PDBContext.cpp
index f6b6b951ebe1..df0feac2bc40 100644
--- a/lib/DebugInfo/PDB/PDBContext.cpp
+++ b/lib/DebugInfo/PDB/PDBContext.cpp
@@ -12,8 +12,8 @@
 #include "llvm/DebugInfo/PDB/IPDBLineNumber.h"
 #include "llvm/DebugInfo/PDB/IPDBSourceFile.h"
 #include "llvm/DebugInfo/PDB/PDBSymbol.h"
-#include "llvm/DebugInfo/PDB/PDBSymbolFunc.h"
 #include "llvm/DebugInfo/PDB/PDBSymbolData.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolFunc.h"
 #include "llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h"
 #include "llvm/Object/COFF.h"
 
diff --git a/lib/DebugInfo/PDB/PDBSymbolBlock.cpp b/lib/DebugInfo/PDB/PDBSymbolBlock.cpp
index 7385d3ba1489..7076b4aec347 100644
--- a/lib/DebugInfo/PDB/PDBSymbolBlock.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolBlock.cpp
@@ -9,8 +9,8 @@
 
 #include "llvm/DebugInfo/PDB/PDBSymbolBlock.h"
 
-#include "llvm/DebugInfo/PDB/PDBSymbol.h"
 #include "llvm/DebugInfo/PDB/PDBSymDumper.h"
+#include "llvm/DebugInfo/PDB/PDBSymbol.h"
 
 #include <utility>
 
diff --git a/lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp b/lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp
index e08450e0ad0c..f73cd36d057a 100644
--- a/lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp
@@ -9,8 +9,8 @@
 
 #include "llvm/DebugInfo/PDB/PDBSymbolCompilandDetails.h"
 
-#include "llvm/DebugInfo/PDB/PDBSymbol.h"
 #include "llvm/DebugInfo/PDB/PDBSymDumper.h"
+#include "llvm/DebugInfo/PDB/PDBSymbol.h"
 
 #include <utility>
 
diff --git a/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp b/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp
index 2f1c43666ae5..df696fa8c5f2 100644
--- a/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp
@@ -10,8 +10,8 @@
 #include "llvm/DebugInfo/PDB/PDBSymbolCompilandEnv.h"
 
 #include "llvm/DebugInfo/PDB/IPDBRawSymbol.h"
-#include "llvm/DebugInfo/PDB/PDBSymbol.h"
 #include "llvm/DebugInfo/PDB/PDBSymDumper.h"
+#include "llvm/DebugInfo/PDB/PDBSymbol.h"
 
 #include <utility>
 
diff --git a/lib/DebugInfo/PDB/PDBSymbolCustom.cpp b/lib/DebugInfo/PDB/PDBSymbolCustom.cpp
index 9ec20bb62d75..a7b69a755941 100644
--- a/lib/DebugInfo/PDB/PDBSymbolCustom.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolCustom.cpp
@@ -10,8 +10,8 @@
 #include "llvm/DebugInfo/PDB/PDBSymbolCustom.h"
 
 #include "llvm/DebugInfo/PDB/IPDBRawSymbol.h"
-#include "llvm/DebugInfo/PDB/PDBSymbol.h"
 #include "llvm/DebugInfo/PDB/PDBSymDumper.h"
+#include "llvm/DebugInfo/PDB/PDBSymbol.h"
 
 #include <utility>
 
diff --git a/lib/DebugInfo/PDB/PDBSymbolFunc.cpp b/lib/DebugInfo/PDB/PDBSymbolFunc.cpp
index 0734a1f8314a..5a5cb4c1b5ca 100644
--- a/lib/DebugInfo/PDB/PDBSymbolFunc.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolFunc.cpp
@@ -12,10 +12,10 @@
 #include "llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h"
 #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
 #include "llvm/DebugInfo/PDB/IPDBSession.h"
+#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
 #include "llvm/DebugInfo/PDB/PDBSymbolData.h"
 #include "llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h"
 #include "llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h"
-#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
 #include "llvm/DebugInfo/PDB/PDBTypes.h"
 
 #include <unordered_set>
diff --git a/lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp b/lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp
index 482c95e3a850..4a4195beb4ea 100644
--- a/lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp
@@ -9,8 +9,8 @@
 
 #include "llvm/DebugInfo/PDB/PDBSymbolFuncDebugEnd.h"
 
-#include "llvm/DebugInfo/PDB/PDBSymbol.h"
 #include "llvm/DebugInfo/PDB/PDBSymDumper.h"
+#include "llvm/DebugInfo/PDB/PDBSymbol.h"
 
 #include <utility>
 
diff --git a/lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp b/lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp
index ae23c7619e2a..a448a404dc4a 100644
--- a/lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp
@@ -9,8 +9,8 @@
 
 #include "llvm/DebugInfo/PDB/PDBSymbolFuncDebugStart.h"
 
-#include "llvm/DebugInfo/PDB/PDBSymbol.h"
 #include "llvm/DebugInfo/PDB/PDBSymDumper.h"
+#include "llvm/DebugInfo/PDB/PDBSymbol.h"
 
 #include <utility>
 
diff --git a/lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp b/lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp
index 87bb4044216b..dbec16fcbaac 100644
--- a/lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp
@@ -9,8 +9,8 @@
 
 #include "llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h"
 
-#include "llvm/DebugInfo/PDB/PDBSymbol.h"
 #include "llvm/DebugInfo/PDB/PDBSymDumper.h"
+#include "llvm/DebugInfo/PDB/PDBSymbol.h"
 
 #include <utility>
 
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp
index 0ee18d471624..0fdf8b6d0f77 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp
@@ -9,8 +9,8 @@
 
 #include "llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h"
 
-#include "llvm/DebugInfo/PDB/PDBSymbol.h"
 #include "llvm/DebugInfo/PDB/PDBSymDumper.h"
+#include "llvm/DebugInfo/PDB/PDBSymbol.h"
 
 #include <utility>
 
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp
index f617d8d0c2df..726e7e1cdbb4 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp
@@ -9,8 +9,8 @@
 
 #include "llvm/DebugInfo/PDB/PDBSymbolTypeCustom.h"
 
-#include "llvm/DebugInfo/PDB/PDBSymbol.h"
 #include "llvm/DebugInfo/PDB/PDBSymDumper.h"
+#include "llvm/DebugInfo/PDB/PDBSymbol.h"
 
 #include <utility>
 
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp
index 68ba87c1cdf8..6c84b984d210 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp
@@ -10,8 +10,8 @@
 
 #include "llvm/DebugInfo/PDB/PDBSymbolTypeDimension.h"
 
-#include "llvm/DebugInfo/PDB/PDBSymbol.h"
 #include "llvm/DebugInfo/PDB/PDBSymDumper.h"
+#include "llvm/DebugInfo/PDB/PDBSymbol.h"
 
 #include <utility>
 
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp
index ec27985e91d1..c01877287888 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp
@@ -9,8 +9,8 @@
 
 #include "llvm/DebugInfo/PDB/PDBSymbolTypeFriend.h"
 
-#include "llvm/DebugInfo/PDB/PDBSymbol.h"
 #include "llvm/DebugInfo/PDB/PDBSymDumper.h"
+#include "llvm/DebugInfo/PDB/PDBSymbol.h"
 
 #include <utility>
 
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp
index 473529d1b043..0304c6286c8f 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp
@@ -12,9 +12,9 @@
 #include "llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h"
 #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
 #include "llvm/DebugInfo/PDB/IPDBSession.h"
+#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
 #include "llvm/DebugInfo/PDB/PDBSymbol.h"
 #include "llvm/DebugInfo/PDB/PDBSymbolTypeFunctionArg.h"
-#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
 
 #include <utility>
 
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp
index 86e0ec4f8565..7cfba823b4fa 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp
@@ -9,8 +9,8 @@
 
 #include "llvm/DebugInfo/PDB/PDBSymbolTypeManaged.h"
 
-#include "llvm/DebugInfo/PDB/PDBSymbol.h"
 #include "llvm/DebugInfo/PDB/PDBSymDumper.h"
+#include "llvm/DebugInfo/PDB/PDBSymbol.h"
 
 #include <utility>
 
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp
index a516a4d2c429..ddc0574617c5 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp
@@ -9,8 +9,8 @@
 
 #include "llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h"
 
-#include "llvm/DebugInfo/PDB/PDBSymbol.h"
 #include "llvm/DebugInfo/PDB/PDBSymDumper.h"
+#include "llvm/DebugInfo/PDB/PDBSymbol.h"
 
 #include <utility>
 
diff --git a/lib/DebugInfo/PDB/PDBSymbolUnknown.cpp b/lib/DebugInfo/PDB/PDBSymbolUnknown.cpp
index dbbea9c93e20..fdbe845f455a 100644
--- a/lib/DebugInfo/PDB/PDBSymbolUnknown.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolUnknown.cpp
@@ -9,8 +9,8 @@
 
 #include "llvm/DebugInfo/PDB/PDBSymbolUnknown.h"
 
-#include "llvm/DebugInfo/PDB/PDBSymbol.h"
 #include "llvm/DebugInfo/PDB/PDBSymDumper.h"
+#include "llvm/DebugInfo/PDB/PDBSymbol.h"
 
 #include <utility>
 
diff --git a/lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp b/lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp
index 020aec9e98a8..f40578f4372a 100644
--- a/lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp
@@ -9,8 +9,8 @@
 
 #include "llvm/DebugInfo/PDB/PDBSymbolUsingNamespace.h"
 
-#include "llvm/DebugInfo/PDB/PDBSymbol.h"
 #include "llvm/DebugInfo/PDB/PDBSymDumper.h"
+#include "llvm/DebugInfo/PDB/PDBSymbol.h"
 
 #include <utility>
 
diff --git a/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp b/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
index f672680cb9ea..2a89faff9647 100644
--- a/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
+++ b/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
@@ -15,12 +15,12 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/BinaryFormat/COFF.h"
 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
 #include "llvm/Object/COFF.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Object/SymbolSize.h"
-#include "llvm/Support/COFF.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/DataExtractor.h"
 #include "llvm/Support/Error.h"
diff --git a/lib/DebugInfo/Symbolize/Symbolize.cpp b/lib/DebugInfo/Symbolize/Symbolize.cpp
index 9de3ddc039d6..19711ca58c6f 100644
--- a/lib/DebugInfo/Symbolize/Symbolize.cpp
+++ b/lib/DebugInfo/Symbolize/Symbolize.cpp
@@ -16,6 +16,7 @@
 #include "SymbolizableObjectFile.h"
 
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/BinaryFormat/COFF.h"
 #include "llvm/Config/config.h"
 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
 #include "llvm/DebugInfo/PDB/PDB.h"
@@ -24,7 +25,6 @@
 #include "llvm/Object/ELFObjectFile.h"
 #include "llvm/Object/MachO.h"
 #include "llvm/Object/MachOUniversal.h"
-#include "llvm/Support/COFF.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Compression.h"
 #include "llvm/Support/DataExtractor.h"
@@ -39,6 +39,8 @@
 
 #if defined(_MSC_VER)
 #include <Windows.h>
+
+// This must be included after windows.h.
 #include <DbgHelp.h>
 #pragma comment(lib, "dbghelp.lib")
 
diff --git a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
index 0051c69efb7d..a7b1fe206f10 100644
--- a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
+++ b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
@@ -12,10 +12,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Config/config.h"
 #include "IntelJITEventsWrapper.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Config/config.h"
 #include "llvm/DebugInfo/DIContext.h"
 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
 #include "llvm/ExecutionEngine/JITEventListener.h"
diff --git a/lib/ExecutionEngine/IntelJITEvents/jitprofiling.c b/lib/ExecutionEngine/IntelJITEvents/jitprofiling.c
index e9668892c05b..f2d36a76a315 100644
--- a/lib/ExecutionEngine/IntelJITEvents/jitprofiling.c
+++ b/lib/ExecutionEngine/IntelJITEvents/jitprofiling.c
@@ -22,8 +22,8 @@
 #include <windows.h>
 #pragma optimize("", off)
 #else  /* ITT_PLATFORM==ITT_PLATFORM_WIN */
-#include <pthread.h>
 #include <dlfcn.h>
+#include <pthread.h>
 #include <stdint.h>
 #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
 #include <malloc.h>
diff --git a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
index ee75bee9c533..64dca930722e 100644
--- a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
+++ b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
@@ -22,7 +22,7 @@
 #include "Interpreter.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/Config/config.h"     // Detect libffi
+#include "llvm/Config/config.h" // Detect libffi
 #include "llvm/ExecutionEngine/GenericValue.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
@@ -33,8 +33,8 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/Mutex.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/UniqueLock.h"
+#include "llvm/Support/raw_ostream.h"
 #include <cassert>
 #include <cmath>
 #include <csignal>
diff --git a/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
index 57b5d85bb550..3581d6458395 100644
--- a/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
+++ b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Config/config.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Config/config.h"
 #include "llvm/ExecutionEngine/JITEventListener.h"
 #include "llvm/ExecutionEngine/OProfileWrapper.h"
 #include "llvm/ExecutionEngine/RuntimeDyld.h"
diff --git a/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
index 711b887da6ef..e3a456849f90 100644
--- a/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
+++ b/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
@@ -7,9 +7,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ExecutionEngine/Orc/IndirectionUtils.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Triple.h"
-#include "llvm/ExecutionEngine/Orc/IndirectionUtils.h"
 #include "llvm/ExecutionEngine/Orc/OrcABISupport.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/IRBuilder.h"
diff --git a/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h b/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
index a27573f93b97..7dd6b17d33cb 100644
--- a/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
+++ b/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
@@ -20,11 +20,11 @@
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
 #include "llvm/ExecutionEngine/GenericValue.h"
 #include "llvm/ExecutionEngine/JITSymbol.h"
-#include "llvm/ExecutionEngine/RuntimeDyld.h"
 #include "llvm/ExecutionEngine/Orc/CompileUtils.h"
 #include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
 #include "llvm/ExecutionEngine/Orc/LazyEmittingLayer.h"
 #include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
+#include "llvm/ExecutionEngine/RuntimeDyld.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Mangler.h"
 #include "llvm/Object/Archive.h"
@@ -34,10 +34,10 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetMachine.h"
+#include <algorithm>
 #include <cassert>
 #include <cstddef>
 #include <cstdint>
-#include <algorithm>
 #include <map>
 #include <memory>
 #include <set>
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index ab86e5d6a0fd..2b69f1a0269f 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -12,13 +12,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ExecutionEngine/RuntimeDyld.h"
-#include "RuntimeDyldCheckerImpl.h"
 #include "RuntimeDyldCOFF.h"
+#include "RuntimeDyldCheckerImpl.h"
 #include "RuntimeDyldELF.h"
 #include "RuntimeDyldImpl.h"
 #include "RuntimeDyldMachO.h"
-#include "llvm/Object/ELFObjectFile.h"
 #include "llvm/Object/COFF.h"
+#include "llvm/Object/ELFObjectFile.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/MutexGuard.h"
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index 9ce3974529bb..3d12eadea4dd 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -18,10 +18,10 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/Object/ELFObjectFile.h"
 #include "llvm/Object/ObjectFile.h"
-#include "llvm/Support/ELF.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/TargetRegistry.h"
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
index 18c23c5a2a5d..5268bc5a1868 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
@@ -28,8 +28,8 @@
 #include "llvm/Support/Mutex.h"
 #include "llvm/Support/SwapByteOrder.h"
 #include <map>
-#include <unordered_map>
 #include <system_error>
+#include <unordered_map>
 
 using namespace llvm;
 using namespace llvm::object;
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFI386.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFI386.h
index 6aa1a2bdb926..901f77865ba1 100644
--- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFI386.h
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFI386.h
@@ -14,9 +14,9 @@
 #ifndef LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDCOFFI386_H
 #define LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDCOFFI386_H
 
-#include "llvm/Object/COFF.h"
-#include "llvm/Support/COFF.h"
 #include "../RuntimeDyldCOFF.h"
+#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/Object/COFF.h"
 
 #define DEBUG_TYPE "dyld"
 
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h
index 318afa21a88b..3e4b0c8f75bb 100644
--- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h
@@ -14,9 +14,9 @@
 #ifndef LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDCOFFTHUMB_H
 #define LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDCOFFTHUMB_H
 
-#include "llvm/Object/COFF.h"
-#include "llvm/Support/COFF.h"
 #include "../RuntimeDyldCOFF.h"
+#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/Object/COFF.h"
 
 #define DEBUG_TYPE "dyld"
 
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h
index 26e73989d7ed..7cbb43854151 100644
--- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h
@@ -14,9 +14,9 @@
 #ifndef LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDCOFF86_64_H
 #define LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDCOFF86_64_H
 
-#include "llvm/Object/COFF.h"
-#include "llvm/Support/COFF.h"
 #include "../RuntimeDyldCOFF.h"
+#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/Object/COFF.h"
 
 #define DEBUG_TYPE "dyld"
 
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFMips.cpp b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFMips.cpp
index cae4d69789a2..926996d6f7b3 100644
--- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFMips.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFMips.cpp
@@ -8,7 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "RuntimeDyldELFMips.h"
-#include "llvm/Support/ELF.h"
+#include "llvm/BinaryFormat/ELF.h"
 
 #define DEBUG_TYPE "dyld"
 
diff --git a/lib/ExecutionEngine/SectionMemoryManager.cpp b/lib/ExecutionEngine/SectionMemoryManager.cpp
index 50478eac6827..8904475f084f 100644
--- a/lib/ExecutionEngine/SectionMemoryManager.cpp
+++ b/lib/ExecutionEngine/SectionMemoryManager.cpp
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Config/config.h"
 #include "llvm/ExecutionEngine/SectionMemoryManager.h"
+#include "llvm/Config/config.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Process.h"
 
diff --git a/lib/Fuzzer/FuzzerDriver.cpp b/lib/Fuzzer/FuzzerDriver.cpp
index e93c79cfcec6..9aad3771784d 100644
--- a/lib/Fuzzer/FuzzerDriver.cpp
+++ b/lib/Fuzzer/FuzzerDriver.cpp
@@ -10,9 +10,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "FuzzerCorpus.h"
+#include "FuzzerIO.h"
 #include "FuzzerInterface.h"
 #include "FuzzerInternal.h"
-#include "FuzzerIO.h"
 #include "FuzzerMutate.h"
 #include "FuzzerRandom.h"
 #include "FuzzerShmem.h"
@@ -149,7 +149,7 @@ static bool ParseOneFlag(const char *Param) {
         int Val = MyStol(Str);
         *FlagDescriptions[F].IntFlag = Val;
         if (Flags.verbosity >= 2)
-          Printf("Flag: %s %d\n", Name, Val);;
+          Printf("Flag: %s %d\n", Name, Val);
         return true;
       } else if (FlagDescriptions[F].UIntFlag) {
         unsigned int Val = std::stoul(Str);
diff --git a/lib/Fuzzer/FuzzerExtFunctionsDlsymWin.cpp b/lib/Fuzzer/FuzzerExtFunctionsDlsymWin.cpp
index 77521698c80a..321b3ec5d414 100644
--- a/lib/Fuzzer/FuzzerExtFunctionsDlsymWin.cpp
+++ b/lib/Fuzzer/FuzzerExtFunctionsDlsymWin.cpp
@@ -14,6 +14,8 @@
 #include "FuzzerExtFunctions.h"
 #include "FuzzerIO.h"
 #include "Windows.h"
+
+// This must be included after Windows.h.
 #include "Psapi.h"
 
 namespace fuzzer {
diff --git a/lib/Fuzzer/FuzzerLoop.cpp b/lib/Fuzzer/FuzzerLoop.cpp
index 14caa203c5ef..f6083282ab61 100644
--- a/lib/Fuzzer/FuzzerLoop.cpp
+++ b/lib/Fuzzer/FuzzerLoop.cpp
@@ -10,8 +10,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "FuzzerCorpus.h"
-#include "FuzzerInternal.h"
 #include "FuzzerIO.h"
+#include "FuzzerInternal.h"
 #include "FuzzerMutate.h"
 #include "FuzzerRandom.h"
 #include "FuzzerShmem.h"
diff --git a/lib/Fuzzer/FuzzerMerge.cpp b/lib/Fuzzer/FuzzerMerge.cpp
index e66460c29e2f..612f4bbb28f2 100644
--- a/lib/Fuzzer/FuzzerMerge.cpp
+++ b/lib/Fuzzer/FuzzerMerge.cpp
@@ -9,9 +9,9 @@
 // Merging corpora.
 //===----------------------------------------------------------------------===//
 
-#include "FuzzerInternal.h"
-#include "FuzzerIO.h"
 #include "FuzzerMerge.h"
+#include "FuzzerIO.h"
+#include "FuzzerInternal.h"
 #include "FuzzerTracePC.h"
 #include "FuzzerUtil.h"
 
diff --git a/lib/Fuzzer/FuzzerMutate.cpp b/lib/Fuzzer/FuzzerMutate.cpp
index e60d4130de10..53cb9027e455 100644
--- a/lib/Fuzzer/FuzzerMutate.cpp
+++ b/lib/Fuzzer/FuzzerMutate.cpp
@@ -9,11 +9,11 @@
 // Mutate a test input.
 //===----------------------------------------------------------------------===//
 
+#include "FuzzerMutate.h"
 #include "FuzzerCorpus.h"
 #include "FuzzerDefs.h"
 #include "FuzzerExtFunctions.h"
 #include "FuzzerIO.h"
-#include "FuzzerMutate.h"
 #include "FuzzerOptions.h"
 
 namespace fuzzer {
diff --git a/lib/Fuzzer/FuzzerShmemPosix.cpp b/lib/Fuzzer/FuzzerShmemPosix.cpp
index 2723bdd86f48..50cdcfb509dc 100644
--- a/lib/Fuzzer/FuzzerShmemPosix.cpp
+++ b/lib/Fuzzer/FuzzerShmemPosix.cpp
@@ -14,14 +14,14 @@
 #include "FuzzerIO.h"
 #include "FuzzerShmem.h"
 
-#include <sys/types.h>
-#include <sys/stat.h>
 #include <errno.h>
 #include <fcntl.h>
-#include <sys/mman.h>
 #include <semaphore.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
 #include <unistd.h>
 
 namespace fuzzer {
diff --git a/lib/Fuzzer/FuzzerShmemWindows.cpp b/lib/Fuzzer/FuzzerShmemWindows.cpp
index 6325b4b8e5b4..d330ebf4fd07 100644
--- a/lib/Fuzzer/FuzzerShmemWindows.cpp
+++ b/lib/Fuzzer/FuzzerShmemWindows.cpp
@@ -14,10 +14,10 @@
 #include "FuzzerIO.h"
 #include "FuzzerShmem.h"
 
-#include <sys/types.h>
-#include <sys/stat.h>
 #include <fcntl.h>
 #include <stdio.h>
+#include <sys/stat.h>
+#include <sys/types.h>
 
 namespace fuzzer {
 
diff --git a/lib/Fuzzer/FuzzerTracePC.cpp b/lib/Fuzzer/FuzzerTracePC.cpp
index ce0f7a47eee6..ea93468ea0ed 100644
--- a/lib/Fuzzer/FuzzerTracePC.cpp
+++ b/lib/Fuzzer/FuzzerTracePC.cpp
@@ -12,12 +12,12 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "FuzzerTracePC.h"
 #include "FuzzerCorpus.h"
 #include "FuzzerDefs.h"
 #include "FuzzerDictionary.h"
 #include "FuzzerExtFunctions.h"
 #include "FuzzerIO.h"
-#include "FuzzerTracePC.h"
 #include "FuzzerUtil.h"
 #include "FuzzerValueBitMap.h"
 #include <map>
diff --git a/lib/Fuzzer/FuzzerTraceState.cpp b/lib/Fuzzer/FuzzerTraceState.cpp
index a486223d650c..8670e2ad6727 100644
--- a/lib/Fuzzer/FuzzerTraceState.cpp
+++ b/lib/Fuzzer/FuzzerTraceState.cpp
@@ -10,8 +10,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "FuzzerDictionary.h"
-#include "FuzzerInternal.h"
 #include "FuzzerIO.h"
+#include "FuzzerInternal.h"
 #include "FuzzerMutate.h"
 #include "FuzzerTracePC.h"
 #include <algorithm>
diff --git a/lib/Fuzzer/FuzzerUtilWindows.cpp b/lib/Fuzzer/FuzzerUtilWindows.cpp
index 08bb3cf3be15..25ac976fc2db 100644
--- a/lib/Fuzzer/FuzzerUtilWindows.cpp
+++ b/lib/Fuzzer/FuzzerUtilWindows.cpp
@@ -22,6 +22,8 @@
 #include <stdio.h>
 #include <sys/types.h>
 #include <windows.h>
+
+// This must be included after windows.h.
 #include <Psapi.h>
 
 namespace fuzzer {
diff --git a/lib/Fuzzer/afl/afl_driver.cpp b/lib/Fuzzer/afl/afl_driver.cpp
index 3815ed11cf60..d0521bdfdd67 100644
--- a/lib/Fuzzer/afl/afl_driver.cpp
+++ b/lib/Fuzzer/afl/afl_driver.cpp
@@ -12,8 +12,8 @@
 Usage:
 ################################################################################
 cat << EOF > test_fuzzer.cc
-#include <stdint.h>
 #include <stddef.h>
+#include <stdint.h>
 extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
   if (size > 0 && data[0] == 'H')
     if (size > 1 && data[1] == 'I')
@@ -50,18 +50,18 @@ statistics from the file. If that fails then the process will quit.
 
 */
 #include <assert.h>
-#include <stdio.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
 #include <errno.h>
 #include <signal.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
 #include <sys/resource.h>
 #include <sys/time.h>
+#include <unistd.h>
 
-#include <iostream>
 #include <fstream>
+#include <iostream>
 #include <vector>
 
 // Platform detection. Copied from FuzzerInternal.h
diff --git a/lib/Fuzzer/test/AFLDriverTest.cpp b/lib/Fuzzer/test/AFLDriverTest.cpp
index e3f5f7100883..b949adc7de15 100644
--- a/lib/Fuzzer/test/AFLDriverTest.cpp
+++ b/lib/Fuzzer/test/AFLDriverTest.cpp
@@ -3,8 +3,8 @@
 
 // Contains dummy functions used to avoid dependency on AFL.
 #include <stdint.h>
-#include <stdlib.h>
 #include <stdio.h>
+#include <stdlib.h>
 
 extern "C" void __afl_manual_init() {}
 
diff --git a/lib/Fuzzer/test/AbsNegAndConstant64Test.cpp b/lib/Fuzzer/test/AbsNegAndConstant64Test.cpp
index 69b0d59fb8ef..dfb6007b7970 100644
--- a/lib/Fuzzer/test/AbsNegAndConstant64Test.cpp
+++ b/lib/Fuzzer/test/AbsNegAndConstant64Test.cpp
@@ -2,11 +2,11 @@
 // License. See LICENSE.TXT for details.
 
 // abs(x) < 0 and y == Const puzzle, 64-bit variant.
-#include <cstring>
-#include <cstdint>
-#include <cstdlib>
 #include <cstddef>
+#include <cstdint>
 #include <cstdio>
+#include <cstdlib>
+#include <cstring>
 
 extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
   if (Size < 16) return 0;
diff --git a/lib/Fuzzer/test/AbsNegAndConstantTest.cpp b/lib/Fuzzer/test/AbsNegAndConstantTest.cpp
index 69075a454c99..e9d983ff1ebf 100644
--- a/lib/Fuzzer/test/AbsNegAndConstantTest.cpp
+++ b/lib/Fuzzer/test/AbsNegAndConstantTest.cpp
@@ -2,11 +2,11 @@
 // License. See LICENSE.TXT for details.
 
 // abs(x) < 0 and y == Const puzzle.
-#include <cstring>
-#include <cstdint>
-#include <cstdlib>
 #include <cstddef>
+#include <cstdint>
 #include <cstdio>
+#include <cstdlib>
+#include <cstring>
 
 extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
   if (Size < 8) return 0;
diff --git a/lib/Fuzzer/test/AccumulateAllocationsTest.cpp b/lib/Fuzzer/test/AccumulateAllocationsTest.cpp
index 604d8fa299ae..e9acd7ccbd30 100644
--- a/lib/Fuzzer/test/AccumulateAllocationsTest.cpp
+++ b/lib/Fuzzer/test/AccumulateAllocationsTest.cpp
@@ -2,8 +2,8 @@
 // License. See LICENSE.TXT for details.
 
 // Test with a more mallocs than frees, but no leak.
-#include <cstdint>
 #include <cstddef>
+#include <cstdint>
 
 const int kAllocatedPointersSize = 10000;
 int NumAllocatedPointers = 0;
diff --git a/lib/Fuzzer/test/BadStrcmpTest.cpp b/lib/Fuzzer/test/BadStrcmpTest.cpp
index 159cd7ea5f70..ba2b068f741d 100644
--- a/lib/Fuzzer/test/BadStrcmpTest.cpp
+++ b/lib/Fuzzer/test/BadStrcmpTest.cpp
@@ -2,9 +2,9 @@
 // License. See LICENSE.TXT for details.
 
 // Test that we don't creash in case of bad strcmp params.
+#include <cstddef>
 #include <cstdint>
 #include <cstring>
-#include <cstddef>
 
 static volatile int Sink;
 
diff --git a/lib/Fuzzer/test/BufferOverflowOnInput.cpp b/lib/Fuzzer/test/BufferOverflowOnInput.cpp
index b9d14052aee4..75e1fb90a19a 100644
--- a/lib/Fuzzer/test/BufferOverflowOnInput.cpp
+++ b/lib/Fuzzer/test/BufferOverflowOnInput.cpp
@@ -3,9 +3,9 @@
 
 // Simple test for a fuzzer. The fuzzer must find the string "Hi!".
 #include <assert.h>
+#include <cstddef>
 #include <cstdint>
 #include <cstdlib>
-#include <cstddef>
 #include <iostream>
 
 static volatile bool SeedLargeBuffer;
diff --git a/lib/Fuzzer/test/CallerCalleeTest.cpp b/lib/Fuzzer/test/CallerCalleeTest.cpp
index 3ec025d02301..ed9f37cc1521 100644
--- a/lib/Fuzzer/test/CallerCalleeTest.cpp
+++ b/lib/Fuzzer/test/CallerCalleeTest.cpp
@@ -3,9 +3,9 @@
 
 // Simple test for a fuzzer.
 // Try to find the target using the indirect caller-callee pairs.
+#include <cstddef>
 #include <cstdint>
 #include <cstdlib>
-#include <cstddef>
 #include <cstring>
 #include <iostream>
 
diff --git a/lib/Fuzzer/test/CleanseTest.cpp b/lib/Fuzzer/test/CleanseTest.cpp
index faea8dcb3c30..ee1845701269 100644
--- a/lib/Fuzzer/test/CleanseTest.cpp
+++ b/lib/Fuzzer/test/CleanseTest.cpp
@@ -3,9 +3,9 @@
 
 // Test the the fuzzer is able to 'cleanse' the reproducer
 // by replacing all irrelevant bytes with garbage.
+#include <cstddef>
 #include <cstdint>
 #include <cstdlib>
-#include <cstddef>
 
 extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
   if (Size >= 20 && Data[1] == '1' && Data[5] == '5' && Data[10] == 'A' &&
diff --git a/lib/Fuzzer/test/CustomMutatorTest.cpp b/lib/Fuzzer/test/CustomMutatorTest.cpp
index 4f84519a90e6..521d7f506b4d 100644
--- a/lib/Fuzzer/test/CustomMutatorTest.cpp
+++ b/lib/Fuzzer/test/CustomMutatorTest.cpp
@@ -3,9 +3,9 @@
 
 // Simple test for a cutom mutator.
 #include <assert.h>
+#include <cstddef>
 #include <cstdint>
 #include <cstdlib>
-#include <cstddef>
 #include <iostream>
 
 #include "FuzzerInterface.h"
diff --git a/lib/Fuzzer/test/CxxStringEqTest.cpp b/lib/Fuzzer/test/CxxStringEqTest.cpp
index e0e23c972ccb..924851c5ad53 100644
--- a/lib/Fuzzer/test/CxxStringEqTest.cpp
+++ b/lib/Fuzzer/test/CxxStringEqTest.cpp
@@ -3,11 +3,11 @@
 
 // Simple test for a fuzzer. Must find a specific string
 // used in std::string operator ==.
+#include <cstddef>
 #include <cstdint>
 #include <cstdlib>
-#include <cstddef>
-#include <string>
 #include <iostream>
+#include <string>
 
 static volatile int Sink;
 
diff --git a/lib/Fuzzer/test/DSOTestMain.cpp b/lib/Fuzzer/test/DSOTestMain.cpp
index 3e225d886128..e0c857d4fdec 100644
--- a/lib/Fuzzer/test/DSOTestMain.cpp
+++ b/lib/Fuzzer/test/DSOTestMain.cpp
@@ -4,9 +4,9 @@
 // Source code for a simple DSO.
 
 #include <cstdint>
+#include <cstdio>
 #include <cstdlib>
 #include <cstring>
-#include <cstdio>
 extern int DSO1(int a);
 extern int DSO2(int a);
 extern int DSOTestExtra(int a);
diff --git a/lib/Fuzzer/test/DivTest.cpp b/lib/Fuzzer/test/DivTest.cpp
index 63f6960f4e90..bce13feb790f 100644
--- a/lib/Fuzzer/test/DivTest.cpp
+++ b/lib/Fuzzer/test/DivTest.cpp
@@ -3,9 +3,9 @@
 
 // Simple test for a fuzzer: find the interesting argument for div.
 #include <assert.h>
+#include <cstddef>
 #include <cstdint>
 #include <cstring>
-#include <cstddef>
 #include <iostream>
 
 static volatile int Sink;
diff --git a/lib/Fuzzer/test/FourIndependentBranchesTest.cpp b/lib/Fuzzer/test/FourIndependentBranchesTest.cpp
index 62b3be76e3aa..bbf5ea235c7a 100644
--- a/lib/Fuzzer/test/FourIndependentBranchesTest.cpp
+++ b/lib/Fuzzer/test/FourIndependentBranchesTest.cpp
@@ -2,9 +2,9 @@
 // License. See LICENSE.TXT for details.
 
 // Simple test for a fuzzer. The fuzzer must find the string "FUZZ".
+#include <cstddef>
 #include <cstdint>
 #include <cstdlib>
-#include <cstddef>
 #include <iostream>
 
 extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
diff --git a/lib/Fuzzer/test/FullCoverageSetTest.cpp b/lib/Fuzzer/test/FullCoverageSetTest.cpp
index 415e0b4760c5..6d7e48fe51f8 100644
--- a/lib/Fuzzer/test/FullCoverageSetTest.cpp
+++ b/lib/Fuzzer/test/FullCoverageSetTest.cpp
@@ -2,9 +2,9 @@
 // License. See LICENSE.TXT for details.
 
 // Simple test for a fuzzer. The fuzzer must find the string "FUZZER".
+#include <cstddef>
 #include <cstdint>
 #include <cstdlib>
-#include <cstddef>
 #include <iostream>
 
 extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
diff --git a/lib/Fuzzer/test/FuzzerUnittest.cpp b/lib/Fuzzer/test/FuzzerUnittest.cpp
index 78ea874f2ce2..c8beb4331bfa 100644
--- a/lib/Fuzzer/test/FuzzerUnittest.cpp
+++ b/lib/Fuzzer/test/FuzzerUnittest.cpp
@@ -6,12 +6,12 @@
 #define _LIBCPP_HAS_NO_ASAN
 
 #include "FuzzerCorpus.h"
-#include "FuzzerInternal.h"
 #include "FuzzerDictionary.h"
+#include "FuzzerInternal.h"
 #include "FuzzerMerge.h"
 #include "FuzzerMutate.h"
-#include "FuzzerTracePC.h"
 #include "FuzzerRandom.h"
+#include "FuzzerTracePC.h"
 #include "gtest/gtest.h"
 #include <memory>
 #include <set>
diff --git a/lib/Fuzzer/test/LeakTest.cpp b/lib/Fuzzer/test/LeakTest.cpp
index 22e5164050e5..ea89e3901057 100644
--- a/lib/Fuzzer/test/LeakTest.cpp
+++ b/lib/Fuzzer/test/LeakTest.cpp
@@ -2,8 +2,8 @@
 // License. See LICENSE.TXT for details.
 
 // Test with a leak.
-#include <cstdint>
 #include <cstddef>
+#include <cstdint>
 
 static volatile void *Sink;
 
diff --git a/lib/Fuzzer/test/LeakTimeoutTest.cpp b/lib/Fuzzer/test/LeakTimeoutTest.cpp
index 4f31b3e52c16..92526194a508 100644
--- a/lib/Fuzzer/test/LeakTimeoutTest.cpp
+++ b/lib/Fuzzer/test/LeakTimeoutTest.cpp
@@ -2,8 +2,8 @@
 // License. See LICENSE.TXT for details.
 
 // Test with a leak.
-#include <cstdint>
 #include <cstddef>
+#include <cstdint>
 
 static volatile int *Sink;
 
diff --git a/lib/Fuzzer/test/LoadTest.cpp b/lib/Fuzzer/test/LoadTest.cpp
index eef16c7be51e..67a28c7cb22f 100644
--- a/lib/Fuzzer/test/LoadTest.cpp
+++ b/lib/Fuzzer/test/LoadTest.cpp
@@ -3,9 +3,9 @@
 
 // Simple test for a fuzzer: find interesting value of array index.
 #include <assert.h>
+#include <cstddef>
 #include <cstdint>
 #include <cstring>
-#include <cstddef>
 #include <iostream>
 
 static volatile int Sink;
diff --git a/lib/Fuzzer/test/Memcmp64BytesTest.cpp b/lib/Fuzzer/test/Memcmp64BytesTest.cpp
index e81526b578a3..5b6cb707173f 100644
--- a/lib/Fuzzer/test/Memcmp64BytesTest.cpp
+++ b/lib/Fuzzer/test/Memcmp64BytesTest.cpp
@@ -3,10 +3,10 @@
 
 // Simple test for a fuzzer. The fuzzer must find a particular string.
 #include <cassert>
-#include <cstring>
 #include <cstdint>
 #include <cstdio>
 #include <cstdlib>
+#include <cstring>
 
 extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
   const char kString64Bytes[] =
diff --git a/lib/Fuzzer/test/MemcmpTest.cpp b/lib/Fuzzer/test/MemcmpTest.cpp
index fdbf94683f76..8dbb7d84fbba 100644
--- a/lib/Fuzzer/test/MemcmpTest.cpp
+++ b/lib/Fuzzer/test/MemcmpTest.cpp
@@ -2,10 +2,10 @@
 // License. See LICENSE.TXT for details.
 
 // Simple test for a fuzzer. The fuzzer must find a particular string.
-#include <cstring>
 #include <cstdint>
 #include <cstdio>
 #include <cstdlib>
+#include <cstring>
 
 extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
   // TODO: check other sizes.
diff --git a/lib/Fuzzer/test/NotinstrumentedTest.cpp b/lib/Fuzzer/test/NotinstrumentedTest.cpp
index ffe952c749d2..91418990b192 100644
--- a/lib/Fuzzer/test/NotinstrumentedTest.cpp
+++ b/lib/Fuzzer/test/NotinstrumentedTest.cpp
@@ -2,8 +2,8 @@
 // License. See LICENSE.TXT for details.
 
 // This test should not be instrumented.
-#include <cstdint>
 #include <cstddef>
+#include <cstdint>
 
 extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
   return 0;
diff --git a/lib/Fuzzer/test/NthRunCrashTest.cpp b/lib/Fuzzer/test/NthRunCrashTest.cpp
index b43e69e51b25..da5fbd33e962 100644
--- a/lib/Fuzzer/test/NthRunCrashTest.cpp
+++ b/lib/Fuzzer/test/NthRunCrashTest.cpp
@@ -2,8 +2,8 @@
 // License. See LICENSE.TXT for details.
 
 // Crash on the N-th execution.
-#include <cstdint>
 #include <cstddef>
+#include <cstdint>
 #include <iostream>
 
 static int Counter;
diff --git a/lib/Fuzzer/test/NullDerefOnEmptyTest.cpp b/lib/Fuzzer/test/NullDerefOnEmptyTest.cpp
index 153710920a5f..459db51f8a3b 100644
--- a/lib/Fuzzer/test/NullDerefOnEmptyTest.cpp
+++ b/lib/Fuzzer/test/NullDerefOnEmptyTest.cpp
@@ -2,9 +2,9 @@
 // License. See LICENSE.TXT for details.
 
 // Simple test for a fuzzer. The fuzzer must find the empty string.
+#include <cstddef>
 #include <cstdint>
 #include <cstdlib>
-#include <cstddef>
 #include <iostream>
 
 static volatile int *Null = 0;
diff --git a/lib/Fuzzer/test/NullDerefTest.cpp b/lib/Fuzzer/test/NullDerefTest.cpp
index 3f03d2498197..1b44b682ace6 100644
--- a/lib/Fuzzer/test/NullDerefTest.cpp
+++ b/lib/Fuzzer/test/NullDerefTest.cpp
@@ -2,9 +2,9 @@
 // License. See LICENSE.TXT for details.
 
 // Simple test for a fuzzer. The fuzzer must find the string "Hi!".
+#include <cstddef>
 #include <cstdint>
 #include <cstdlib>
-#include <cstddef>
 #include <iostream>
 
 static volatile int Sink;
diff --git a/lib/Fuzzer/test/OneHugeAllocTest.cpp b/lib/Fuzzer/test/OneHugeAllocTest.cpp
index 8d3d1d6d302d..32a557871000 100644
--- a/lib/Fuzzer/test/OneHugeAllocTest.cpp
+++ b/lib/Fuzzer/test/OneHugeAllocTest.cpp
@@ -3,9 +3,9 @@
 
 // Tests OOM handling when there is a single large allocation.
 #include <assert.h>
+#include <cstddef>
 #include <cstdint>
 #include <cstdlib>
-#include <cstddef>
 #include <cstring>
 #include <iostream>
 
diff --git a/lib/Fuzzer/test/OutOfMemorySingleLargeMallocTest.cpp b/lib/Fuzzer/test/OutOfMemorySingleLargeMallocTest.cpp
index 316b7682b8e6..a07795a08dff 100644
--- a/lib/Fuzzer/test/OutOfMemorySingleLargeMallocTest.cpp
+++ b/lib/Fuzzer/test/OutOfMemorySingleLargeMallocTest.cpp
@@ -3,9 +3,9 @@
 
 // Tests OOM handling.
 #include <assert.h>
+#include <cstddef>
 #include <cstdint>
 #include <cstdlib>
-#include <cstddef>
 #include <cstring>
 #include <iostream>
 
diff --git a/lib/Fuzzer/test/OutOfMemoryTest.cpp b/lib/Fuzzer/test/OutOfMemoryTest.cpp
index 078a39ee1fe9..5e59bde09853 100644
--- a/lib/Fuzzer/test/OutOfMemoryTest.cpp
+++ b/lib/Fuzzer/test/OutOfMemoryTest.cpp
@@ -3,9 +3,9 @@
 
 // Tests OOM handling.
 #include <assert.h>
+#include <cstddef>
 #include <cstdint>
 #include <cstdlib>
-#include <cstddef>
 #include <cstring>
 #include <iostream>
 #include <thread>
diff --git a/lib/Fuzzer/test/RepeatedBytesTest.cpp b/lib/Fuzzer/test/RepeatedBytesTest.cpp
index 2fa6c78c26d8..14222f284747 100644
--- a/lib/Fuzzer/test/RepeatedBytesTest.cpp
+++ b/lib/Fuzzer/test/RepeatedBytesTest.cpp
@@ -3,9 +3,9 @@
 
 // Simple test for a fuzzer. The fuzzer must find repeated bytes.
 #include <assert.h>
+#include <cstddef>
 #include <cstdint>
 #include <cstdlib>
-#include <cstddef>
 #include <iostream>
 
 extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
diff --git a/lib/Fuzzer/test/RepeatedMemcmp.cpp b/lib/Fuzzer/test/RepeatedMemcmp.cpp
index 7377f65ed76d..18369deac3b0 100644
--- a/lib/Fuzzer/test/RepeatedMemcmp.cpp
+++ b/lib/Fuzzer/test/RepeatedMemcmp.cpp
@@ -1,11 +1,10 @@
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 
-
-#include <cstring>
 #include <cstdint>
 #include <cstdio>
 #include <cstdlib>
+#include <cstring>
 
 extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
   int Matches1 = 0;
diff --git a/lib/Fuzzer/test/ShrinkControlFlowTest.cpp b/lib/Fuzzer/test/ShrinkControlFlowTest.cpp
index 0fd7c5e9a1fb..d09542963626 100644
--- a/lib/Fuzzer/test/ShrinkControlFlowTest.cpp
+++ b/lib/Fuzzer/test/ShrinkControlFlowTest.cpp
@@ -2,11 +2,11 @@
 // License. See LICENSE.TXT for details.
 
 // Test that we can find the minimal item in the corpus (3 bytes: "FUZ").
-#include <cstdint>
-#include <cstdlib>
 #include <cstddef>
-#include <cstring>
+#include <cstdint>
 #include <cstdio>
+#include <cstdlib>
+#include <cstring>
 
 static volatile int Sink;
 
diff --git a/lib/Fuzzer/test/ShrinkValueProfileTest.cpp b/lib/Fuzzer/test/ShrinkValueProfileTest.cpp
index 026b8ce26591..86e4e3cb0d9a 100644
--- a/lib/Fuzzer/test/ShrinkValueProfileTest.cpp
+++ b/lib/Fuzzer/test/ShrinkValueProfileTest.cpp
@@ -2,11 +2,11 @@
 // License. See LICENSE.TXT for details.
 
 // Test that we can find the minimal item in the corpus (3 bytes: "FUZ").
-#include <cstdint>
-#include <cstdlib>
 #include <cstddef>
-#include <cstring>
+#include <cstdint>
 #include <cstdio>
+#include <cstdlib>
+#include <cstring>
 
 static volatile uint32_t Sink;
 
diff --git a/lib/Fuzzer/test/SignedIntOverflowTest.cpp b/lib/Fuzzer/test/SignedIntOverflowTest.cpp
index 7df32ad57933..d80060207dee 100644
--- a/lib/Fuzzer/test/SignedIntOverflowTest.cpp
+++ b/lib/Fuzzer/test/SignedIntOverflowTest.cpp
@@ -3,11 +3,11 @@
 
 // Test for signed-integer-overflow.
 #include <assert.h>
+#include <climits>
+#include <cstddef>
 #include <cstdint>
 #include <cstdlib>
-#include <cstddef>
 #include <iostream>
-#include <climits>
 
 static volatile int Sink;
 static int Large = INT_MAX;
diff --git a/lib/Fuzzer/test/SimpleCmpTest.cpp b/lib/Fuzzer/test/SimpleCmpTest.cpp
index 12b5cdda0660..8acad4ac77e8 100644
--- a/lib/Fuzzer/test/SimpleCmpTest.cpp
+++ b/lib/Fuzzer/test/SimpleCmpTest.cpp
@@ -3,9 +3,9 @@
 
 // Simple test for a fuzzer. The fuzzer must find several narrow ranges.
 #include <cstdint>
+#include <cstdio>
 #include <cstdlib>
 #include <cstring>
-#include <cstdio>
 
 extern int AllLines[];
 
diff --git a/lib/Fuzzer/test/SimpleDictionaryTest.cpp b/lib/Fuzzer/test/SimpleDictionaryTest.cpp
index cd7292bd006c..a1cd20047224 100644
--- a/lib/Fuzzer/test/SimpleDictionaryTest.cpp
+++ b/lib/Fuzzer/test/SimpleDictionaryTest.cpp
@@ -5,9 +5,9 @@
 // The fuzzer must find a string based on dictionary words:
 //   "Elvis"
 //   "Presley"
+#include <cstddef>
 #include <cstdint>
 #include <cstdlib>
-#include <cstddef>
 #include <cstring>
 #include <iostream>
 
diff --git a/lib/Fuzzer/test/SimpleHashTest.cpp b/lib/Fuzzer/test/SimpleHashTest.cpp
index 00599de78ebe..99e96cb25dcd 100644
--- a/lib/Fuzzer/test/SimpleHashTest.cpp
+++ b/lib/Fuzzer/test/SimpleHashTest.cpp
@@ -5,9 +5,9 @@
 // and then compares the last 4 bytes with the computed value.
 // A fuzzer with cmp traces is expected to defeat this check.
 #include <cstdint>
+#include <cstdio>
 #include <cstdlib>
 #include <cstring>
-#include <cstdio>
 
 // A modified jenkins_one_at_a_time_hash initialized by non-zero,
 // so that simple_hash(0) != 0. See also
diff --git a/lib/Fuzzer/test/SimpleTest.cpp b/lib/Fuzzer/test/SimpleTest.cpp
index e53ea160ed8f..a8b4988dff10 100644
--- a/lib/Fuzzer/test/SimpleTest.cpp
+++ b/lib/Fuzzer/test/SimpleTest.cpp
@@ -3,9 +3,9 @@
 
 // Simple test for a fuzzer. The fuzzer must find the string "Hi!".
 #include <assert.h>
+#include <cstddef>
 #include <cstdint>
 #include <cstdlib>
-#include <cstddef>
 #include <iostream>
 
 static volatile int Sink;
diff --git a/lib/Fuzzer/test/SimpleThreadedTest.cpp b/lib/Fuzzer/test/SimpleThreadedTest.cpp
index 5f02d3f8457c..1abdc3fc6d6b 100644
--- a/lib/Fuzzer/test/SimpleThreadedTest.cpp
+++ b/lib/Fuzzer/test/SimpleThreadedTest.cpp
@@ -3,8 +3,8 @@
 
 // Threaded test for a fuzzer. The fuzzer should find "H"
 #include <assert.h>
-#include <cstdint>
 #include <cstddef>
+#include <cstdint>
 #include <cstring>
 #include <iostream>
 #include <thread>
diff --git a/lib/Fuzzer/test/SingleByteInputTest.cpp b/lib/Fuzzer/test/SingleByteInputTest.cpp
index 4ce819d230ce..72b58ba912eb 100644
--- a/lib/Fuzzer/test/SingleByteInputTest.cpp
+++ b/lib/Fuzzer/test/SingleByteInputTest.cpp
@@ -2,10 +2,10 @@
 // License. See LICENSE.TXT for details.
 
 // Simple test for a fuzzer, need just one byte to crash.
-#include <cstdint>
-#include <cstdlib>
 #include <cstddef>
+#include <cstdint>
 #include <cstdio>
+#include <cstdlib>
 
 extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
   if (Size > 0 && Data[Size/2] == 42) {
diff --git a/lib/Fuzzer/test/SingleMemcmpTest.cpp b/lib/Fuzzer/test/SingleMemcmpTest.cpp
index c73f68a7ee6e..83c09e0428ec 100644
--- a/lib/Fuzzer/test/SingleMemcmpTest.cpp
+++ b/lib/Fuzzer/test/SingleMemcmpTest.cpp
@@ -2,10 +2,10 @@
 // License. See LICENSE.TXT for details.
 
 // Simple test for a fuzzer. The fuzzer must find a particular string.
-#include <cstring>
 #include <cstdint>
 #include <cstdio>
 #include <cstdlib>
+#include <cstring>
 
 extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
   char *S = (char*)Data;
diff --git a/lib/Fuzzer/test/SingleStrcmpTest.cpp b/lib/Fuzzer/test/SingleStrcmpTest.cpp
index 48f481dfc51a..149073444c9c 100644
--- a/lib/Fuzzer/test/SingleStrcmpTest.cpp
+++ b/lib/Fuzzer/test/SingleStrcmpTest.cpp
@@ -2,10 +2,10 @@
 // License. See LICENSE.TXT for details.
 
 // Simple test for a fuzzer. The fuzzer must find a particular string.
-#include <cstring>
 #include <cstdint>
 #include <cstdio>
 #include <cstdlib>
+#include <cstring>
 
 extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
   if (Size >= 7) {
diff --git a/lib/Fuzzer/test/SingleStrncmpTest.cpp b/lib/Fuzzer/test/SingleStrncmpTest.cpp
index e5601da86329..b302670fb743 100644
--- a/lib/Fuzzer/test/SingleStrncmpTest.cpp
+++ b/lib/Fuzzer/test/SingleStrncmpTest.cpp
@@ -2,10 +2,10 @@
 // License. See LICENSE.TXT for details.
 
 // Simple test for a fuzzer. The fuzzer must find a particular string.
-#include <cstring>
 #include <cstdint>
 #include <cstdio>
 #include <cstdlib>
+#include <cstring>
 
 extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
   char *S = (char*)Data;
diff --git a/lib/Fuzzer/test/SpamyTest.cpp b/lib/Fuzzer/test/SpamyTest.cpp
index d294d4dc53e0..721134e1841c 100644
--- a/lib/Fuzzer/test/SpamyTest.cpp
+++ b/lib/Fuzzer/test/SpamyTest.cpp
@@ -3,9 +3,9 @@
 
 // The test spams to stderr and stdout.
 #include <assert.h>
+#include <cstddef>
 #include <cstdint>
 #include <cstdio>
-#include <cstddef>
 #include <iostream>
 
 extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
diff --git a/lib/Fuzzer/test/StrcmpTest.cpp b/lib/Fuzzer/test/StrcmpTest.cpp
index cd91dda76f30..e7636e8812fc 100644
--- a/lib/Fuzzer/test/StrcmpTest.cpp
+++ b/lib/Fuzzer/test/StrcmpTest.cpp
@@ -2,11 +2,11 @@
 // License. See LICENSE.TXT for details.
 
 // Break through a series of strcmp.
-#include <cstring>
+#include <cassert>
 #include <cstdint>
 #include <cstdio>
 #include <cstdlib>
-#include <cassert>
+#include <cstring>
 
 bool Eq(const uint8_t *Data, size_t Size, const char *Str) {
   char Buff[1024];
diff --git a/lib/Fuzzer/test/StrncmpOOBTest.cpp b/lib/Fuzzer/test/StrncmpOOBTest.cpp
index f70b003afad6..4ed71d9d021d 100644
--- a/lib/Fuzzer/test/StrncmpOOBTest.cpp
+++ b/lib/Fuzzer/test/StrncmpOOBTest.cpp
@@ -3,10 +3,10 @@
 
 // Test that libFuzzer itself does not read out of bounds.
 #include <assert.h>
-#include <cstdint>
-#include <cstring>
-#include <cstdlib>
 #include <cstddef>
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
 #include <iostream>
 
 static volatile int Sink;
diff --git a/lib/Fuzzer/test/StrncmpTest.cpp b/lib/Fuzzer/test/StrncmpTest.cpp
index 5ffd011dcdff..f71f01ee3098 100644
--- a/lib/Fuzzer/test/StrncmpTest.cpp
+++ b/lib/Fuzzer/test/StrncmpTest.cpp
@@ -2,10 +2,10 @@
 // License. See LICENSE.TXT for details.
 
 // Simple test for a fuzzer. The fuzzer must find a particular string.
-#include <cstring>
 #include <cstdint>
 #include <cstdio>
 #include <cstdlib>
+#include <cstring>
 
 static volatile int sink;
 
diff --git a/lib/Fuzzer/test/StrstrTest.cpp b/lib/Fuzzer/test/StrstrTest.cpp
index f021e75ec0fd..a3ea4e03b3d2 100644
--- a/lib/Fuzzer/test/StrstrTest.cpp
+++ b/lib/Fuzzer/test/StrstrTest.cpp
@@ -2,11 +2,11 @@
 // License. See LICENSE.TXT for details.
 
 // Test strstr and strcasestr hooks.
-#include <string>
-#include <string.h>
 #include <cstdint>
 #include <cstdio>
 #include <cstdlib>
+#include <string.h>
+#include <string>
 
 // Windows does not have strcasestr and memmem, so we are not testing them.
 #ifdef _WIN32
diff --git a/lib/Fuzzer/test/SwapCmpTest.cpp b/lib/Fuzzer/test/SwapCmpTest.cpp
index b90ac72c22c4..bbfbefe6ab71 100644
--- a/lib/Fuzzer/test/SwapCmpTest.cpp
+++ b/lib/Fuzzer/test/SwapCmpTest.cpp
@@ -3,9 +3,9 @@
 
 // The fuzzer must find several constants with swapped bytes.
 #include <cstdint>
+#include <cstdio>
 #include <cstdlib>
 #include <cstring>
-#include <cstdio>
 
 extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
   if (Size < 14) return 0;
diff --git a/lib/Fuzzer/test/Switch2Test.cpp b/lib/Fuzzer/test/Switch2Test.cpp
index 3c6a3004907e..5f66ac8b499e 100644
--- a/lib/Fuzzer/test/Switch2Test.cpp
+++ b/lib/Fuzzer/test/Switch2Test.cpp
@@ -2,11 +2,11 @@
 // License. See LICENSE.TXT for details.
 
 // Simple test for a fuzzer. The fuzzer must find the interesting switch value.
-#include <cstdint>
-#include <cstdlib>
-#include <cstdio>
-#include <cstring>
 #include <cstddef>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
 
 int Switch(int a) {
   switch(a) {
diff --git a/lib/Fuzzer/test/SwitchTest.cpp b/lib/Fuzzer/test/SwitchTest.cpp
index 3dc051ff7b5b..86944cad21c5 100644
--- a/lib/Fuzzer/test/SwitchTest.cpp
+++ b/lib/Fuzzer/test/SwitchTest.cpp
@@ -2,11 +2,11 @@
 // License. See LICENSE.TXT for details.
 
 // Simple test for a fuzzer. The fuzzer must find the interesting switch value.
-#include <cstdint>
-#include <cstdlib>
-#include <cstdio>
-#include <cstring>
 #include <cstddef>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
 
 static volatile int Sink;
 
diff --git a/lib/Fuzzer/test/TableLookupTest.cpp b/lib/Fuzzer/test/TableLookupTest.cpp
index f9d5610820ff..8126eeabaf42 100644
--- a/lib/Fuzzer/test/TableLookupTest.cpp
+++ b/lib/Fuzzer/test/TableLookupTest.cpp
@@ -3,11 +3,11 @@
 
 // Make sure the fuzzer eventually finds all possible values of a variable
 // within a range.
-#include <cstring>
+#include <cassert>
 #include <cstdint>
 #include <cstdio>
 #include <cstdlib>
-#include <cassert>
+#include <cstring>
 #include <set>
 
 const size_t N = 1 << 12;
diff --git a/lib/Fuzzer/test/ThreadedLeakTest.cpp b/lib/Fuzzer/test/ThreadedLeakTest.cpp
index 751107110871..538d3b434808 100644
--- a/lib/Fuzzer/test/ThreadedLeakTest.cpp
+++ b/lib/Fuzzer/test/ThreadedLeakTest.cpp
@@ -2,8 +2,8 @@
 // License. See LICENSE.TXT for details.
 
 // The fuzzer should find a leak in a non-main thread.
-#include <cstdint>
 #include <cstddef>
+#include <cstdint>
 #include <thread>
 
 static volatile int *Sink;
diff --git a/lib/Fuzzer/test/ThreadedTest.cpp b/lib/Fuzzer/test/ThreadedTest.cpp
index 09137a9a70c1..bb51ba764eba 100644
--- a/lib/Fuzzer/test/ThreadedTest.cpp
+++ b/lib/Fuzzer/test/ThreadedTest.cpp
@@ -3,8 +3,8 @@
 
 // Threaded test for a fuzzer. The fuzzer should not crash.
 #include <assert.h>
-#include <cstdint>
 #include <cstddef>
+#include <cstdint>
 #include <cstring>
 #include <thread>
 
diff --git a/lib/Fuzzer/test/TimeoutEmptyTest.cpp b/lib/Fuzzer/test/TimeoutEmptyTest.cpp
index 8066f480b655..1ddf1fa34589 100644
--- a/lib/Fuzzer/test/TimeoutEmptyTest.cpp
+++ b/lib/Fuzzer/test/TimeoutEmptyTest.cpp
@@ -2,8 +2,8 @@
 // License. See LICENSE.TXT for details.
 
 // Simple test for a fuzzer. The fuzzer must find the empty string.
-#include <cstdint>
 #include <cstddef>
+#include <cstdint>
 
 extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
   static volatile int Zero = 0;
diff --git a/lib/Fuzzer/test/TimeoutTest.cpp b/lib/Fuzzer/test/TimeoutTest.cpp
index f8107012c841..e3cdba3eec38 100644
--- a/lib/Fuzzer/test/TimeoutTest.cpp
+++ b/lib/Fuzzer/test/TimeoutTest.cpp
@@ -2,9 +2,9 @@
 // License. See LICENSE.TXT for details.
 
 // Simple test for a fuzzer. The fuzzer must find the string "Hi!".
+#include <cstddef>
 #include <cstdint>
 #include <cstdlib>
-#include <cstddef>
 #include <iostream>
 
 static volatile int Sink;
diff --git a/lib/Fuzzer/test/TraceMallocTest.cpp b/lib/Fuzzer/test/TraceMallocTest.cpp
index 43e6950e185f..af9975603aa1 100644
--- a/lib/Fuzzer/test/TraceMallocTest.cpp
+++ b/lib/Fuzzer/test/TraceMallocTest.cpp
@@ -3,9 +3,9 @@
 
 // Tests -trace_malloc
 #include <assert.h>
+#include <cstddef>
 #include <cstdint>
 #include <cstdlib>
-#include <cstddef>
 #include <iostream>
 
 int *Ptr;
diff --git a/lib/Fuzzer/test/TwoDifferentBugsTest.cpp b/lib/Fuzzer/test/TwoDifferentBugsTest.cpp
index 42c0d192ba86..77d2cb1a25f9 100644
--- a/lib/Fuzzer/test/TwoDifferentBugsTest.cpp
+++ b/lib/Fuzzer/test/TwoDifferentBugsTest.cpp
@@ -2,9 +2,9 @@
 // License. See LICENSE.TXT for details.
 
 // Simple test for a fuzzer. This test may trigger two different bugs.
+#include <cstddef>
 #include <cstdint>
 #include <cstdlib>
-#include <cstddef>
 #include <iostream>
 
 static volatile int *Null = 0;
diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp
index ec4663018bd4..556e122ff82f 100644
--- a/lib/IR/AsmWriter.cpp
+++ b/lib/IR/AsmWriter.cpp
@@ -20,6 +20,7 @@
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/IR/AssemblyAnnotationWriter.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/CFG.h"
@@ -39,7 +40,6 @@
 #include "llvm/IR/UseListOrder.h"
 #include "llvm/IR/ValueSymbolTable.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/FormattedStream.h"
diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp
index a76c944f0005..a518f7b5c81a 100644
--- a/lib/IR/Attributes.cpp
+++ b/lib/IR/Attributes.cpp
@@ -13,17 +13,17 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/IR/Attributes.h"
 #include "AttributeImpl.h"
 #include "LLVMContextImpl.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/Optional.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
-#include "llvm/IR/Attributes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Type.h"
diff --git a/lib/IR/Comdat.cpp b/lib/IR/Comdat.cpp
index e27ecad0a884..c735f9b2eb1e 100644
--- a/lib/IR/Comdat.cpp
+++ b/lib/IR/Comdat.cpp
@@ -11,9 +11,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/IR/Comdat.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/IR/Comdat.h"
 
 using namespace llvm;
 
diff --git a/lib/IR/ConstantRange.cpp b/lib/IR/ConstantRange.cpp
index 509caba3acd4..21d1996ef851 100644
--- a/lib/IR/ConstantRange.cpp
+++ b/lib/IR/ConstantRange.cpp
@@ -21,10 +21,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/IR/Instruction.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Operator.h"
 #include "llvm/IR/ConstantRange.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Operator.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
@@ -577,9 +577,6 @@ ConstantRange ConstantRange::truncate(uint32_t DstTySize) const {
   if (isFullSet())
     return ConstantRange(DstTySize, /*isFullSet=*/true);
 
-  APInt MaxValue = APInt::getLowBitsSet(getBitWidth(), DstTySize);
-  APInt MaxBitValue = APInt::getOneBitSet(getBitWidth(), DstTySize);
-
   APInt LowerDiv(Lower), UpperDiv(Upper);
   ConstantRange Union(DstTySize, /*isFullSet=*/false);
 
@@ -587,35 +584,42 @@ ConstantRange ConstantRange::truncate(uint32_t DstTySize) const {
   // We use the non-wrapped set code to analyze the [Lower, MaxValue) part, and
   // then we do the union with [MaxValue, Upper)
   if (isWrappedSet()) {
-    // If Upper is greater than Max Value, it covers the whole truncated range.
-    if (Upper.uge(MaxValue))
+    // If Upper is greater than or equal to MaxValue(DstTy), it covers the whole
+    // truncated range.
+    if (Upper.getActiveBits() > DstTySize ||
+        Upper.countTrailingOnes() == DstTySize)
       return ConstantRange(DstTySize, /*isFullSet=*/true);
 
     Union = ConstantRange(APInt::getMaxValue(DstTySize),Upper.trunc(DstTySize));
     UpperDiv.setAllBits();
 
     // Union covers the MaxValue case, so return if the remaining range is just
-    // MaxValue.
+    // MaxValue(DstTy).
     if (LowerDiv == UpperDiv)
       return Union;
   }
 
   // Chop off the most significant bits that are past the destination bitwidth.
-  if (LowerDiv.uge(MaxValue)) {
-    APInt Div(getBitWidth(), 0);
-    APInt::udivrem(LowerDiv, MaxBitValue, Div, LowerDiv);
-    UpperDiv -= MaxBitValue * Div;
+  if (LowerDiv.getActiveBits() > DstTySize) {
+    // Mask to just the signficant bits and subtract from LowerDiv/UpperDiv.
+    APInt Adjust = LowerDiv & APInt::getBitsSetFrom(getBitWidth(), DstTySize);
+    LowerDiv -= Adjust;
+    UpperDiv -= Adjust;
   }
 
-  if (UpperDiv.ule(MaxValue))
+  unsigned UpperDivWidth = UpperDiv.getActiveBits();
+  if (UpperDivWidth <= DstTySize)
     return ConstantRange(LowerDiv.trunc(DstTySize),
                          UpperDiv.trunc(DstTySize)).unionWith(Union);
 
   // The truncated value wraps around. Check if we can do better than fullset.
-  UpperDiv -= MaxBitValue;
-  if (UpperDiv.ult(LowerDiv))
-    return ConstantRange(LowerDiv.trunc(DstTySize),
-                         UpperDiv.trunc(DstTySize)).unionWith(Union);
+  if (UpperDivWidth == DstTySize + 1) {
+    // Clear the MSB so that UpperDiv wraps around.
+    UpperDiv.clearBit(DstTySize);
+    if (UpperDiv.ult(LowerDiv))
+      return ConstantRange(LowerDiv.trunc(DstTySize),
+                           UpperDiv.trunc(DstTySize)).unionWith(Union);
+  }
 
   return ConstantRange(DstTySize, /*isFullSet=*/true);
 }
diff --git a/lib/IR/Constants.cpp b/lib/IR/Constants.cpp
index 8b0ff66334a7..27150a89d9b2 100644
--- a/lib/IR/Constants.cpp
+++ b/lib/IR/Constants.cpp
@@ -127,7 +127,7 @@ bool Constant::isOneValue() const {
 
   // Check for FP which are bitcasted from 1 integers
   if (const ConstantFP *CFP = dyn_cast<ConstantFP>(this))
-    return CFP->getValueAPF().bitcastToAPInt() == 1;
+    return CFP->getValueAPF().bitcastToAPInt().isOneValue();
 
   // Check for constant vectors which are splats of 1 values.
   if (const ConstantVector *CV = dyn_cast<ConstantVector>(this))
@@ -1157,21 +1157,14 @@ bool ConstantInt::isValueValidForType(Type *Ty, uint64_t Val) {
   unsigned NumBits = Ty->getIntegerBitWidth(); // assert okay
   if (Ty->isIntegerTy(1))
     return Val == 0 || Val == 1;
-  if (NumBits >= 64)
-    return true; // always true, has to fit in largest type
-  uint64_t Max = (1ll << NumBits) - 1;
-  return Val <= Max;
+  return isUIntN(NumBits, Val);
 }
 
 bool ConstantInt::isValueValidForType(Type *Ty, int64_t Val) {
   unsigned NumBits = Ty->getIntegerBitWidth();
   if (Ty->isIntegerTy(1))
     return Val == 0 || Val == 1 || Val == -1;
-  if (NumBits >= 64)
-    return true; // always true, has to fit in largest type
-  int64_t Min = -(1ll << (NumBits-1));
-  int64_t Max = (1ll << (NumBits-1)) - 1;
-  return (Val >= Min && Val <= Max);
+  return isIntN(NumBits, Val);
 }
 
 bool ConstantFP::isValueValidForType(Type *Ty, const APFloat& Val) {
diff --git a/lib/IR/Core.cpp b/lib/IR/Core.cpp
index 50292b6e20bf..4ff0261a7f08 100644
--- a/lib/IR/Core.cpp
+++ b/lib/IR/Core.cpp
@@ -568,6 +568,14 @@ LLVMTypeRef LLVMGetTypeByName(LLVMModuleRef M, const char *Name) {
 
 /*--.. Operations on array, pointer, and vector types (sequence types) .....--*/
 
+void LLVMGetSubtypes(LLVMTypeRef Tp, LLVMTypeRef *Arr) {
+    int i = 0;
+    for (auto *T : unwrap(Tp)->subtypes()) {
+        Arr[i] = wrap(T);
+        i++;
+    }
+}
+
 LLVMTypeRef LLVMArrayType(LLVMTypeRef ElementType, unsigned ElementCount) {
   return wrap(ArrayType::get(unwrap(ElementType), ElementCount));
 }
@@ -587,6 +595,10 @@ LLVMTypeRef LLVMGetElementType(LLVMTypeRef WrappedTy) {
   return wrap(cast<SequentialType>(Ty)->getElementType());
 }
 
+unsigned LLVMGetNumContainedTypes(LLVMTypeRef Tp) {
+    return unwrap(Tp)->getNumContainedTypes();
+}
+
 unsigned LLVMGetArrayLength(LLVMTypeRef ArrayTy) {
   return unwrap<ArrayType>(ArrayTy)->getNumElements();
 }
diff --git a/lib/IR/DIBuilder.cpp b/lib/IR/DIBuilder.cpp
index 7754ac03b43d..7e598b43ac16 100644
--- a/lib/IR/DIBuilder.cpp
+++ b/lib/IR/DIBuilder.cpp
@@ -12,14 +12,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/IR/DIBuilder.h"
+#include "LLVMContextImpl.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/Dwarf.h"
-#include "LLVMContextImpl.h"
 
 using namespace llvm;
 using namespace llvm::dwarf;
diff --git a/lib/IR/DataLayout.cpp b/lib/IR/DataLayout.cpp
index d5e29649a237..5de281a95237 100644
--- a/lib/IR/DataLayout.cpp
+++ b/lib/IR/DataLayout.cpp
@@ -16,11 +16,11 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/IR/DataLayout.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/IR/Constants.h"
-#include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/GetElementPtrTypeIterator.h"
 #include "llvm/IR/GlobalVariable.h"
diff --git a/lib/IR/DebugInfo.cpp b/lib/IR/DebugInfo.cpp
index ca3828420a72..56cec57a4d07 100644
--- a/lib/IR/DebugInfo.cpp
+++ b/lib/IR/DebugInfo.cpp
@@ -12,6 +12,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/IR/DebugInfo.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/None.h"
@@ -20,7 +21,6 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constants.h"
-#include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/Function.h"
diff --git a/lib/IR/DebugLoc.cpp b/lib/IR/DebugLoc.cpp
index 0485fece7c42..6297395b4c00 100644
--- a/lib/IR/DebugLoc.cpp
+++ b/lib/IR/DebugLoc.cpp
@@ -8,9 +8,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/IR/DebugLoc.h"
-#include "llvm/IR/IntrinsicInst.h"
 #include "LLVMContextImpl.h"
 #include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/IntrinsicInst.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/IR/DiagnosticInfo.cpp b/lib/IR/DiagnosticInfo.cpp
index e73f53f3202d..5129d6b9b008 100644
--- a/lib/IR/DiagnosticInfo.cpp
+++ b/lib/IR/DiagnosticInfo.cpp
@@ -12,14 +12,14 @@
 // Diagnostics reporting is still done as part of the LLVMContext.
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/iterator_range.h"
+#include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/DiagnosticPrinter.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalValue.h"
@@ -32,8 +32,8 @@
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Regex.h"
+#include "llvm/Support/raw_ostream.h"
 #include <atomic>
 #include <cassert>
 #include <memory>
diff --git a/lib/IR/DiagnosticPrinter.cpp b/lib/IR/DiagnosticPrinter.cpp
index 659ff49d623f..ee2df9e24f93 100644
--- a/lib/IR/DiagnosticPrinter.cpp
+++ b/lib/IR/DiagnosticPrinter.cpp
@@ -11,12 +11,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/Twine.h"
 #include "llvm/IR/DiagnosticPrinter.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Value.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
diff --git a/lib/IR/Dominators.cpp b/lib/IR/Dominators.cpp
index 44948cc5831d..37e735251fdf 100644
--- a/lib/IR/Dominators.cpp
+++ b/lib/IR/Dominators.cpp
@@ -150,12 +150,6 @@ bool DominatorTree::dominates(const Instruction *Def,
 
 bool DominatorTree::dominates(const BasicBlockEdge &BBE,
                               const BasicBlock *UseBB) const {
-  // Assert that we have a single edge. We could handle them by simply
-  // returning false, but since isSingleEdge is linear on the number of
-  // edges, the callers can normally handle them more efficiently.
-  assert(BBE.isSingleEdge() &&
-         "This function is not efficient in handling multiple edges");
-
   // If the BB the edge ends in doesn't dominate the use BB, then the
   // edge also doesn't.
   const BasicBlock *Start = BBE.getStart();
@@ -188,11 +182,17 @@ bool DominatorTree::dominates(const BasicBlockEdge &BBE,
   // trivially dominates itself, so we only have to find if it dominates the
   // other predecessors. Since the only way out of X is via NormalDest, X can
   // only properly dominate a node if NormalDest dominates that node too.
+  int IsDuplicateEdge = 0;
   for (const_pred_iterator PI = pred_begin(End), E = pred_end(End);
        PI != E; ++PI) {
     const BasicBlock *BB = *PI;
-    if (BB == Start)
+    if (BB == Start) {
+      // If there are multiple edges between Start and End, by definition they
+      // can't dominate anything.
+      if (IsDuplicateEdge++)
+        return false;
       continue;
+    }
 
     if (!dominates(End, BB))
       return false;
@@ -201,12 +201,6 @@ bool DominatorTree::dominates(const BasicBlockEdge &BBE,
 }
 
 bool DominatorTree::dominates(const BasicBlockEdge &BBE, const Use &U) const {
-  // Assert that we have a single edge. We could handle them by simply
-  // returning false, but since isSingleEdge is linear on the number of
-  // edges, the callers can normally handle them more efficiently.
-  assert(BBE.isSingleEdge() &&
-         "This function is not efficient in handling multiple edges");
-
   Instruction *UserInst = cast<Instruction>(U.getUser());
   // A PHI in the end of the edge is dominated by it.
   PHINode *PN = dyn_cast<PHINode>(UserInst);
diff --git a/lib/IR/Function.cpp b/lib/IR/Function.cpp
index fc68c0e3cad9..85a019856c01 100644
--- a/lib/IR/Function.cpp
+++ b/lib/IR/Function.cpp
@@ -11,14 +11,15 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/IR/Function.h"
 #include "LLVMContextImpl.h"
 #include "SymbolTableListTraitsImpl.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/None.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/CodeGen/ValueTypes.h"
@@ -29,7 +30,6 @@
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Instruction.h"
diff --git a/lib/IR/Globals.cpp b/lib/IR/Globals.cpp
index 17d27b016cf2..afd4a36270a8 100644
--- a/lib/IR/Globals.cpp
+++ b/lib/IR/Globals.cpp
@@ -12,10 +12,11 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "LLVMContextImpl.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Triple.h"
-#include "llvm/IR/Constants.h"
 #include "llvm/IR/ConstantRange.h"
+#include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/GlobalAlias.h"
 #include "llvm/IR/GlobalValue.h"
@@ -24,7 +25,6 @@
 #include "llvm/IR/Operator.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "LLVMContextImpl.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -293,6 +293,8 @@ GlobalVariable::GlobalVariable(Type *Ty, bool constant, LinkageTypes Link,
                    InitVal != nullptr, Link, Name, AddressSpace),
       isConstantGlobal(constant),
       isExternallyInitializedConstant(isExternallyInitialized) {
+  assert(!Ty->isFunctionTy() && PointerType::isValidElementType(Ty) &&
+         "invalid type for global variable");
   setThreadLocalMode(TLMode);
   if (InitVal) {
     assert(InitVal->getType() == Ty &&
@@ -311,6 +313,8 @@ GlobalVariable::GlobalVariable(Module &M, Type *Ty, bool constant,
                    InitVal != nullptr, Link, Name, AddressSpace),
       isConstantGlobal(constant),
       isExternallyInitializedConstant(isExternallyInitialized) {
+  assert(!Ty->isFunctionTy() && PointerType::isValidElementType(Ty) &&
+         "invalid type for global variable");
   setThreadLocalMode(TLMode);
   if (InitVal) {
     assert(InitVal->getType() == Ty &&
diff --git a/lib/IR/IRBuilder.cpp b/lib/IR/IRBuilder.cpp
index 7572d0c6b3bc..81b02946e1d5 100644
--- a/lib/IR/IRBuilder.cpp
+++ b/lib/IR/IRBuilder.cpp
@@ -12,9 +12,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Statepoint.h"
@@ -134,6 +134,38 @@ CreateMemCpy(Value *Dst, Value *Src, Value *Size, unsigned Align,
   return CI;  
 }
 
+CallInst *IRBuilderBase::CreateElementAtomicMemCpy(
+    Value *Dst, Value *Src, Value *NumElements, uint32_t ElementSize,
+    MDNode *TBAATag, MDNode *TBAAStructTag, MDNode *ScopeTag,
+    MDNode *NoAliasTag) {
+  Dst = getCastedInt8PtrValue(Dst);
+  Src = getCastedInt8PtrValue(Src);
+
+  Value *Ops[] = {Dst, Src, NumElements, getInt32(ElementSize)};
+  Type *Tys[] = {Dst->getType(), Src->getType()};
+  Module *M = BB->getParent()->getParent();
+  Value *TheFn =
+      Intrinsic::getDeclaration(M, Intrinsic::memcpy_element_atomic, Tys);
+
+  CallInst *CI = createCallHelper(TheFn, Ops, this);
+
+  // Set the TBAA info if present.
+  if (TBAATag)
+    CI->setMetadata(LLVMContext::MD_tbaa, TBAATag);
+
+  // Set the TBAA Struct info if present.
+  if (TBAAStructTag)
+    CI->setMetadata(LLVMContext::MD_tbaa_struct, TBAAStructTag);
+
+  if (ScopeTag)
+    CI->setMetadata(LLVMContext::MD_alias_scope, ScopeTag);
+
+  if (NoAliasTag)
+    CI->setMetadata(LLVMContext::MD_noalias, NoAliasTag);
+
+  return CI;
+}
+
 CallInst *IRBuilderBase::
 CreateMemMove(Value *Dst, Value *Src, Value *Size, unsigned Align,
               bool isVolatile, MDNode *TBAATag, MDNode *ScopeTag,
diff --git a/lib/IR/InlineAsm.cpp b/lib/IR/InlineAsm.cpp
index 6c0c5a267f81..ad22efdf0eff 100644
--- a/lib/IR/InlineAsm.cpp
+++ b/lib/IR/InlineAsm.cpp
@@ -11,11 +11,11 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/IR/InlineAsm.h"
 #include "ConstantsContext.h"
 #include "LLVMContextImpl.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Value.h"
 #include "llvm/Support/Casting.h"
diff --git a/lib/IR/Instruction.cpp b/lib/IR/Instruction.cpp
index 828e78b13005..3dd653d2d047 100644
--- a/lib/IR/Instruction.cpp
+++ b/lib/IR/Instruction.cpp
@@ -11,13 +11,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/DenseSet.h"
 #include "llvm/IR/Instruction.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Instructions.h"
-#include "llvm/IR/Module.h"
 #include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Module.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/Type.h"
 using namespace llvm;
@@ -216,10 +216,10 @@ void Instruction::copyFastMathFlags(const Instruction *I) {
   copyFastMathFlags(I->getFastMathFlags());
 }
 
-void Instruction::copyIRFlags(const Value *V) {
+void Instruction::copyIRFlags(const Value *V, bool IncludeWrapFlags) {
   // Copy the wrapping flags.
-  if (auto *OB = dyn_cast<OverflowingBinaryOperator>(V)) {
-    if (isa<OverflowingBinaryOperator>(this)) {
+  if (IncludeWrapFlags && isa<OverflowingBinaryOperator>(this)) {
+    if (auto *OB = dyn_cast<OverflowingBinaryOperator>(V)) {
       setHasNoSignedWrap(OB->hasNoSignedWrap());
       setHasNoUnsignedWrap(OB->hasNoUnsignedWrap());
     }
diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp
index 46c27331ff95..023a0b178a14 100644
--- a/lib/IR/Instructions.cpp
+++ b/lib/IR/Instructions.cpp
@@ -12,6 +12,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/IR/Instructions.h"
 #include "LLVMContextImpl.h"
 #include "llvm/ADT/None.h"
 #include "llvm/ADT/SmallVector.h"
@@ -26,7 +27,6 @@
 #include "llvm/IR/Function.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instruction.h"
-#include "llvm/IR/Instructions.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
@@ -63,7 +63,7 @@ unsigned TerminatorInst::getNumSuccessors() const {
   switch (getOpcode()) {
 #define HANDLE_TERM_INST(N, OPC, CLASS)                                        \
   case Instruction::OPC:                                                       \
-    return static_cast<const CLASS *>(this)->getNumSuccessorsV();
+    return static_cast<const CLASS *>(this)->getNumSuccessors();
 #include "llvm/IR/Instruction.def"
   default:
     break;
@@ -75,7 +75,7 @@ BasicBlock *TerminatorInst::getSuccessor(unsigned idx) const {
   switch (getOpcode()) {
 #define HANDLE_TERM_INST(N, OPC, CLASS)                                        \
   case Instruction::OPC:                                                       \
-    return static_cast<const CLASS *>(this)->getSuccessorV(idx);
+    return static_cast<const CLASS *>(this)->getSuccessor(idx);
 #include "llvm/IR/Instruction.def"
   default:
     break;
@@ -87,7 +87,7 @@ void TerminatorInst::setSuccessor(unsigned idx, BasicBlock *B) {
   switch (getOpcode()) {
 #define HANDLE_TERM_INST(N, OPC, CLASS)                                        \
   case Instruction::OPC:                                                       \
-    return static_cast<CLASS *>(this)->setSuccessorV(idx, B);
+    return static_cast<CLASS *>(this)->setSuccessor(idx, B);
 #include "llvm/IR/Instruction.def"
   default:
     break;
@@ -747,18 +747,6 @@ InvokeInst *InvokeInst::Create(InvokeInst *II, ArrayRef<OperandBundleDef> OpB,
   return NewII;
 }
 
-BasicBlock *InvokeInst::getSuccessorV(unsigned idx) const {
-  return getSuccessor(idx);
-}
-
-unsigned InvokeInst::getNumSuccessorsV() const {
-  return getNumSuccessors();
-}
-
-void InvokeInst::setSuccessorV(unsigned idx, BasicBlock *B) {
-  return setSuccessor(idx, B);
-}
-
 Value *InvokeInst::getReturnedArgOperand() const {
   unsigned Index;
 
@@ -902,20 +890,6 @@ ReturnInst::ReturnInst(LLVMContext &Context, BasicBlock *InsertAtEnd)
                    OperandTraits<ReturnInst>::op_end(this), 0, InsertAtEnd) {
 }
 
-unsigned ReturnInst::getNumSuccessorsV() const {
-  return getNumSuccessors();
-}
-
-/// Out-of-line ReturnInst method, put here so the C++ compiler can choose to
-/// emit the vtable for the class in this translation unit.
-void ReturnInst::setSuccessorV(unsigned idx, BasicBlock *NewSucc) {
-  llvm_unreachable("ReturnInst has no successors!");
-}
-
-BasicBlock *ReturnInst::getSuccessorV(unsigned idx) const {
-  llvm_unreachable("ReturnInst has no successors!");
-}
-
 //===----------------------------------------------------------------------===//
 //                        ResumeInst Implementation
 //===----------------------------------------------------------------------===//
@@ -938,18 +912,6 @@ ResumeInst::ResumeInst(Value *Exn, BasicBlock *InsertAtEnd)
   Op<0>() = Exn;
 }
 
-unsigned ResumeInst::getNumSuccessorsV() const {
-  return getNumSuccessors();
-}
-
-void ResumeInst::setSuccessorV(unsigned idx, BasicBlock *NewSucc) {
-  llvm_unreachable("ResumeInst has no successors!");
-}
-
-BasicBlock *ResumeInst::getSuccessorV(unsigned idx) const {
-  llvm_unreachable("ResumeInst has no successors!");
-}
-
 //===----------------------------------------------------------------------===//
 //                        CleanupReturnInst Implementation
 //===----------------------------------------------------------------------===//
@@ -992,20 +954,6 @@ CleanupReturnInst::CleanupReturnInst(Value *CleanupPad, BasicBlock *UnwindBB,
   init(CleanupPad, UnwindBB);
 }
 
-BasicBlock *CleanupReturnInst::getSuccessorV(unsigned Idx) const {
-  assert(Idx == 0);
-  return getUnwindDest();
-}
-
-unsigned CleanupReturnInst::getNumSuccessorsV() const {
-  return getNumSuccessors();
-}
-
-void CleanupReturnInst::setSuccessorV(unsigned Idx, BasicBlock *B) {
-  assert(Idx == 0);
-  setUnwindDest(B);
-}
-
 //===----------------------------------------------------------------------===//
 //                        CatchReturnInst Implementation
 //===----------------------------------------------------------------------===//
@@ -1037,20 +985,6 @@ CatchReturnInst::CatchReturnInst(Value *CatchPad, BasicBlock *BB,
   init(CatchPad, BB);
 }
 
-BasicBlock *CatchReturnInst::getSuccessorV(unsigned Idx) const {
-  assert(Idx < getNumSuccessors() && "Successor # out of range for catchret!");
-  return getSuccessor();
-}
-
-unsigned CatchReturnInst::getNumSuccessorsV() const {
-  return getNumSuccessors();
-}
-
-void CatchReturnInst::setSuccessorV(unsigned Idx, BasicBlock *B) {
-  assert(Idx < getNumSuccessors() && "Successor # out of range for catchret!");
-  setSuccessor(B);
-}
-
 //===----------------------------------------------------------------------===//
 //                       CatchSwitchInst Implementation
 //===----------------------------------------------------------------------===//
@@ -1134,18 +1068,6 @@ void CatchSwitchInst::removeHandler(handler_iterator HI) {
   setNumHungOffUseOperands(getNumOperands() - 1);
 }
 
-BasicBlock *CatchSwitchInst::getSuccessorV(unsigned idx) const {
-  return getSuccessor(idx);
-}
-
-unsigned CatchSwitchInst::getNumSuccessorsV() const {
-  return getNumSuccessors();
-}
-
-void CatchSwitchInst::setSuccessorV(unsigned idx, BasicBlock *B) {
-  setSuccessor(idx, B);
-}
-
 //===----------------------------------------------------------------------===//
 //                        FuncletPadInst Implementation
 //===----------------------------------------------------------------------===//
@@ -1198,18 +1120,6 @@ UnreachableInst::UnreachableInst(LLVMContext &Context, BasicBlock *InsertAtEnd)
                    nullptr, 0, InsertAtEnd) {
 }
 
-unsigned UnreachableInst::getNumSuccessorsV() const {
-  return getNumSuccessors();
-}
-
-void UnreachableInst::setSuccessorV(unsigned idx, BasicBlock *NewSucc) {
-  llvm_unreachable("UnreachableInst has no successors!");
-}
-
-BasicBlock *UnreachableInst::getSuccessorV(unsigned idx) const {
-  llvm_unreachable("UnreachableInst has no successors!");
-}
-
 //===----------------------------------------------------------------------===//
 //                        BranchInst Implementation
 //===----------------------------------------------------------------------===//
@@ -1285,18 +1195,6 @@ void BranchInst::swapSuccessors() {
   swapProfMetadata();
 }
 
-BasicBlock *BranchInst::getSuccessorV(unsigned idx) const {
-  return getSuccessor(idx);
-}
-
-unsigned BranchInst::getNumSuccessorsV() const {
-  return getNumSuccessors();
-}
-
-void BranchInst::setSuccessorV(unsigned idx, BasicBlock *B) {
-  setSuccessor(idx, B);
-}
-
 //===----------------------------------------------------------------------===//
 //                        AllocaInst Implementation
 //===----------------------------------------------------------------------===//
@@ -3785,19 +3683,6 @@ void SwitchInst::growOperands() {
   growHungoffUses(ReservedSpace);
 }
 
-
-BasicBlock *SwitchInst::getSuccessorV(unsigned idx) const {
-  return getSuccessor(idx);
-}
-
-unsigned SwitchInst::getNumSuccessorsV() const {
-  return getNumSuccessors();
-}
-
-void SwitchInst::setSuccessorV(unsigned idx, BasicBlock *B) {
-  setSuccessor(idx, B);
-}
-
 //===----------------------------------------------------------------------===//
 //                        IndirectBrInst Implementation
 //===----------------------------------------------------------------------===//
@@ -3877,18 +3762,6 @@ void IndirectBrInst::removeDestination(unsigned idx) {
   setNumHungOffUseOperands(NumOps-1);
 }
 
-BasicBlock *IndirectBrInst::getSuccessorV(unsigned idx) const {
-  return getSuccessor(idx);
-}
-
-unsigned IndirectBrInst::getNumSuccessorsV() const {
-  return getNumSuccessors();
-}
-
-void IndirectBrInst::setSuccessorV(unsigned idx, BasicBlock *B) {
-  setSuccessor(idx, B);
-}
-
 //===----------------------------------------------------------------------===//
 //                           cloneImpl() implementations
 //===----------------------------------------------------------------------===//
diff --git a/lib/IR/IntrinsicInst.cpp b/lib/IR/IntrinsicInst.cpp
index 94e115a6a78d..8b12c55937f5 100644
--- a/lib/IR/IntrinsicInst.cpp
+++ b/lib/IR/IntrinsicInst.cpp
@@ -21,8 +21,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/StringSwitch.h"
 #include "llvm/IR/IntrinsicInst.h"
+#include "llvm/ADT/StringSwitch.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/Metadata.h"
diff --git a/lib/IR/LLVMBuild.txt b/lib/IR/LLVMBuild.txt
index cd90ef5b16b6..71368abfd874 100644
--- a/lib/IR/LLVMBuild.txt
+++ b/lib/IR/LLVMBuild.txt
@@ -19,4 +19,4 @@
 type = Library
 name = Core
 parent = Libraries
-required_libraries = Support
+required_libraries = BinaryFormat Support
diff --git a/lib/IR/LLVMContext.cpp b/lib/IR/LLVMContext.cpp
index 6c6383c22255..ad0d4470c111 100644
--- a/lib/IR/LLVMContext.cpp
+++ b/lib/IR/LLVMContext.cpp
@@ -13,11 +13,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/IR/LLVMContext.h"
+#include "LLVMContextImpl.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
-#include "LLVMContextImpl.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/DiagnosticPrinter.h"
 #include "llvm/IR/Metadata.h"
diff --git a/lib/IR/LLVMContextImpl.h b/lib/IR/LLVMContextImpl.h
index 9db30da89ed0..4ba974409a4f 100644
--- a/lib/IR/LLVMContextImpl.h
+++ b/lib/IR/LLVMContextImpl.h
@@ -27,13 +27,13 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringSet.h"
+#include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/ValueHandle.h"
-#include "llvm/Support/Dwarf.h"
 #include "llvm/Support/YAMLTraits.h"
 #include <vector>
 
diff --git a/lib/IR/LegacyPassManager.cpp b/lib/IR/LegacyPassManager.cpp
index b2b12289f871..29e2f42d3e05 100644
--- a/lib/IR/LegacyPassManager.cpp
+++ b/lib/IR/LegacyPassManager.cpp
@@ -593,7 +593,7 @@ AnalysisUsage *PMTopLevelManager::findAnalysisUsage(Pass *P) {
     assert(Node && "cached analysis usage must be non null");
 
     AnUsageMap[P] = &Node->AU;
-    AnUsage = &Node->AU;;
+    AnUsage = &Node->AU;
   }
   return AnUsage;
 }
diff --git a/lib/IR/Metadata.cpp b/lib/IR/Metadata.cpp
index 2411dc5ce7dc..0b1bc9a8c270 100644
--- a/lib/IR/Metadata.cpp
+++ b/lib/IR/Metadata.cpp
@@ -11,6 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/IR/Metadata.h"
 #include "LLVMContextImpl.h"
 #include "MetadataImpl.h"
 #include "SymbolTableListTraitsImpl.h"
@@ -19,11 +20,11 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/None.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/IR/Argument.h"
@@ -38,7 +39,6 @@
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/TrackingMDRef.h"
 #include "llvm/IR/Type.h"
diff --git a/lib/IR/Module.cpp b/lib/IR/Module.cpp
index 95673e515a55..f8853ed169c5 100644
--- a/lib/IR/Module.cpp
+++ b/lib/IR/Module.cpp
@@ -11,6 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/IR/Module.h"
 #include "SymbolTableListTraitsImpl.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallString.h"
@@ -22,17 +23,16 @@
 #include "llvm/IR/Comdat.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/GVMaterializer.h"
 #include "llvm/IR/GlobalAlias.h"
 #include "llvm/IR/GlobalIFunc.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/GVMaterializer.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Metadata.h"
-#include "llvm/IR/Module.h"
 #include "llvm/IR/SymbolTableListTraits.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/TypeFinder.h"
diff --git a/lib/IR/OptBisect.cpp b/lib/IR/OptBisect.cpp
index a03a6fb62237..f1c70058fac2 100644
--- a/lib/IR/OptBisect.cpp
+++ b/lib/IR/OptBisect.cpp
@@ -13,12 +13,12 @@
 ///
 //===----------------------------------------------------------------------===//
 
+#include "llvm/IR/OptBisect.h"
 #include "llvm/Analysis/CallGraphSCCPass.h"
 #include "llvm/Analysis/LazyCallGraph.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/RegionInfo.h"
 #include "llvm/IR/Module.h"
-#include "llvm/IR/OptBisect.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/lib/IR/Type.cpp b/lib/IR/Type.cpp
index c9f957c244f8..44fe5e48c720 100644
--- a/lib/IR/Type.cpp
+++ b/lib/IR/Type.cpp
@@ -11,6 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/IR/Type.h"
 #include "LLVMContextImpl.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/None.h"
@@ -22,7 +23,6 @@
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
-#include "llvm/IR/Type.h"
 #include "llvm/IR/Value.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/MathExtras.h"
diff --git a/lib/IR/TypeFinder.cpp b/lib/IR/TypeFinder.cpp
index a178b9ec0f09..b39678a013fb 100644
--- a/lib/IR/TypeFinder.cpp
+++ b/lib/IR/TypeFinder.cpp
@@ -11,6 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/IR/TypeFinder.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constant.h"
@@ -20,7 +21,6 @@
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
-#include "llvm/IR/TypeFinder.h"
 #include "llvm/IR/Use.h"
 #include "llvm/IR/User.h"
 #include "llvm/IR/Value.h"
diff --git a/lib/IR/ValueSymbolTable.cpp b/lib/IR/ValueSymbolTable.cpp
index 0c3946c8661e..ccdabe0817b4 100644
--- a/lib/IR/ValueSymbolTable.cpp
+++ b/lib/IR/ValueSymbolTable.cpp
@@ -11,11 +11,11 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/IR/ValueSymbolTable.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Value.h"
-#include "llvm/IR/ValueSymbolTable.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp
index a8523236ac9f..5c1b3412840d 100644
--- a/lib/IR/Verifier.cpp
+++ b/lib/IR/Verifier.cpp
@@ -49,7 +49,6 @@
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/ilist.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/STLExtras.h"
@@ -59,6 +58,8 @@
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/ADT/ilist.h"
+#include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/IR/Argument.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/BasicBlock.h"
@@ -81,10 +82,10 @@
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstVisitor.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
-#include "llvm/IR/InstVisitor.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/LLVMContext.h"
@@ -102,7 +103,6 @@
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/lib/LLVMBuild.txt b/lib/LLVMBuild.txt
index 89ddd0fc1af3..9e586465025e 100644
--- a/lib/LLVMBuild.txt
+++ b/lib/LLVMBuild.txt
@@ -31,6 +31,7 @@ subdirectories =
  LTO
  MC
  Object
+ BinaryFormat
  ObjectYAML
  Option
  Passes
diff --git a/lib/LTO/LTO.cpp b/lib/LTO/LTO.cpp
index 92145aaf667a..9d2a44045d6a 100644
--- a/lib/LTO/LTO.cpp
+++ b/lib/LTO/LTO.cpp
@@ -315,54 +315,19 @@ InputFile::~InputFile() = default;
 Expected<std::unique_ptr<InputFile>> InputFile::create(MemoryBufferRef Object) {
   std::unique_ptr<InputFile> File(new InputFile);
 
-  ErrorOr<MemoryBufferRef> BCOrErr =
-      IRObjectFile::findBitcodeInMemBuffer(Object);
-  if (!BCOrErr)
-    return errorCodeToError(BCOrErr.getError());
+  Expected<IRSymtabFile> FOrErr = readIRSymtab(Object);
+  if (!FOrErr)
+    return FOrErr.takeError();
 
-  Expected<std::vector<BitcodeModule>> BMsOrErr =
-      getBitcodeModuleList(*BCOrErr);
-  if (!BMsOrErr)
-    return BMsOrErr.takeError();
+  File->TargetTriple = FOrErr->TheReader.getTargetTriple();
+  File->SourceFileName = FOrErr->TheReader.getSourceFileName();
+  File->COFFLinkerOpts = FOrErr->TheReader.getCOFFLinkerOpts();
+  File->ComdatTable = FOrErr->TheReader.getComdatTable();
 
-  if (BMsOrErr->empty())
-    return make_error<StringError>("Bitcode file does not contain any modules",
-                                   inconvertibleErrorCode());
-
-  File->Mods = *BMsOrErr;
-
-  LLVMContext Ctx;
-  std::vector<Module *> Mods;
-  std::vector<std::unique_ptr<Module>> OwnedMods;
-  for (auto BM : *BMsOrErr) {
-    Expected<std::unique_ptr<Module>> MOrErr =
-        BM.getLazyModule(Ctx, /*ShouldLazyLoadMetadata*/ true,
-                         /*IsImporting*/ false);
-    if (!MOrErr)
-      return MOrErr.takeError();
-
-    if ((*MOrErr)->getDataLayoutStr().empty())
-      return make_error<StringError>("input module has no datalayout",
-                                     inconvertibleErrorCode());
-
-    Mods.push_back(MOrErr->get());
-    OwnedMods.push_back(std::move(*MOrErr));
-  }
-
-  SmallVector<char, 0> Symtab;
-  if (Error E = irsymtab::build(Mods, Symtab, File->Strtab))
-    return std::move(E);
-
-  irsymtab::Reader R({Symtab.data(), Symtab.size()},
-                     {File->Strtab.data(), File->Strtab.size()});
-  File->TargetTriple = R.getTargetTriple();
-  File->SourceFileName = R.getSourceFileName();
-  File->COFFLinkerOpts = R.getCOFFLinkerOpts();
-  File->ComdatTable = R.getComdatTable();
-
-  for (unsigned I = 0; I != Mods.size(); ++I) {
+  for (unsigned I = 0; I != FOrErr->Mods.size(); ++I) {
     size_t Begin = File->Symbols.size();
-    for (const irsymtab::Reader::SymbolRef &Sym : R.module_symbols(I))
+    for (const irsymtab::Reader::SymbolRef &Sym :
+         FOrErr->TheReader.module_symbols(I))
       // Skip symbols that are irrelevant to LTO. Note that this condition needs
       // to match the one in Skip() in LTO::addRegularLTO().
       if (Sym.isGlobal() && !Sym.isFormatSpecific())
@@ -370,6 +335,8 @@ Expected<std::unique_ptr<InputFile>> InputFile::create(MemoryBufferRef Object) {
     File->ModuleSymIndices.push_back({Begin, File->Symbols.size()});
   }
 
+  File->Mods = FOrErr->Mods;
+  File->Strtab = std::move(FOrErr->Strtab);
   return std::move(File);
 }
 
@@ -405,10 +372,11 @@ void LTO::addSymbolToGlobalRes(const InputFile::Symbol &Sym,
   if (Res.Prevailing)
     GlobalRes.IRName = Sym.getIRName();
 
-  // Set the partition to external if we know it is used elsewhere, e.g.
-  // it is visible to a regular object, is referenced from llvm.compiler_used,
-  // or was already recorded as being referenced from a different partition.
-  if (Res.VisibleToRegularObj || Sym.isUsed() ||
+  // Set the partition to external if we know it is re-defined by the linker
+  // with -defsym or -wrap options, used elsewhere, e.g. it is visible to a
+  // regular object, is referenced from llvm.compiler_used, or was already
+  // recorded as being referenced from a different partition.
+  if (Res.LinkerRedefined || Res.VisibleToRegularObj || Sym.isUsed() ||
       (GlobalRes.Partition != GlobalResolution::Unknown &&
        GlobalRes.Partition != Partition)) {
     GlobalRes.Partition = GlobalResolution::External;
@@ -439,6 +407,8 @@ static void writeToResolutionFile(raw_ostream &OS, InputFile *Input,
       OS << 'l';
     if (Res.VisibleToRegularObj)
       OS << 'x';
+    if (Res.LinkerRedefined)
+      OS << 'r';
     OS << '\n';
   }
   OS.flush();
@@ -543,6 +513,12 @@ Error LTO::addRegularLTO(BitcodeModule BM,
         if (Sym.isUndefined())
           continue;
         Keep.push_back(GV);
+        // For symbols re-defined with linker -wrap and -defsym options,
+        // set the linkage to weak to inhibit IPO. The linkage will be
+        // restored by the linker.
+        if (Res.LinkerRedefined)
+          GV->setLinkage(GlobalValue::WeakAnyLinkage);
+
         GlobalValue::LinkageTypes OriginalLinkage = GV->getLinkage();
         if (GlobalValue::isLinkOnceLinkage(OriginalLinkage))
           GV->setLinkage(GlobalValue::getWeakLinkage(
diff --git a/lib/LTO/ThinLTOCodeGenerator.cpp b/lib/LTO/ThinLTOCodeGenerator.cpp
index 6b221a347c17..e4094d44867b 100644
--- a/lib/LTO/ThinLTOCodeGenerator.cpp
+++ b/lib/LTO/ThinLTOCodeGenerator.cpp
@@ -24,8 +24,8 @@
 #include "llvm/Bitcode/BitcodeWriter.h"
 #include "llvm/Bitcode/BitcodeWriterPass.h"
 #include "llvm/ExecutionEngine/ObjectMemoryBuffer.h"
-#include "llvm/IR/DiagnosticPrinter.h"
 #include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DiagnosticPrinter.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Mangler.h"
diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp
index e86db933af3c..4d139132df46 100644
--- a/lib/MC/ELFObjectWriter.cpp
+++ b/lib/MC/ELFObjectWriter.cpp
@@ -13,11 +13,12 @@
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCAsmLayout.h"
 #include "llvm/MC/MCAssembler.h"
@@ -36,7 +37,6 @@
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Compression.h"
-#include "llvm/Support/ELF.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -1020,18 +1020,24 @@ void ELFObjectWriter::writeSectionData(const MCAssembler &Asm, MCSection &Sec,
   MCSectionELF &Section = static_cast<MCSectionELF &>(Sec);
   StringRef SectionName = Section.getSectionName();
 
+  auto &MC = Asm.getContext();
+  const auto &MAI = MC.getAsmInfo();
+
   // Compressing debug_frame requires handling alignment fragments which is
   // more work (possibly generalizing MCAssembler.cpp:writeFragment to allow
   // for writing to arbitrary buffers) for little benefit.
   bool CompressionEnabled =
-      Asm.getContext().getAsmInfo()->compressDebugSections() !=
-      DebugCompressionType::DCT_None;
+      MAI->compressDebugSections() != DebugCompressionType::None;
   if (!CompressionEnabled || !SectionName.startswith(".debug_") ||
       SectionName == ".debug_frame") {
     Asm.writeSectionData(&Section, Layout);
     return;
   }
 
+  assert((MAI->compressDebugSections() == DebugCompressionType::Z ||
+          MAI->compressDebugSections() == DebugCompressionType::GNU) &&
+         "expected zlib or zlib-gnu style compression");
+
   SmallVector<char, 128> UncompressedData;
   raw_svector_ostream VecOS(UncompressedData);
   raw_pwrite_stream &OldStream = getStream();
@@ -1048,8 +1054,7 @@ void ELFObjectWriter::writeSectionData(const MCAssembler &Asm, MCSection &Sec,
     return;
   }
 
-  bool ZlibStyle = Asm.getContext().getAsmInfo()->compressDebugSections() ==
-                   DebugCompressionType::DCT_Zlib;
+  bool ZlibStyle = MAI->compressDebugSections() == DebugCompressionType::Z;
   if (!maybeWriteCompression(UncompressedData.size(), CompressedContents,
                              ZlibStyle, Sec.getAlignment())) {
     getStream() << UncompressedData;
@@ -1061,8 +1066,7 @@ void ELFObjectWriter::writeSectionData(const MCAssembler &Asm, MCSection &Sec,
     Section.setFlags(Section.getFlags() | ELF::SHF_COMPRESSED);
   else
     // Add "z" prefix to section name. This is zlib-gnu style.
-    Asm.getContext().renameELFSection(&Section,
-                                      (".z" + SectionName.drop_front(1)).str());
+    MC.renameELFSection(&Section, (".z" + SectionName.drop_front(1)).str());
   getStream() << CompressedContents;
 }
 
diff --git a/lib/MC/MCAsmBackend.cpp b/lib/MC/MCAsmBackend.cpp
index fc0aa788f6d3..3642f37aa855 100644
--- a/lib/MC/MCAsmBackend.cpp
+++ b/lib/MC/MCAsmBackend.cpp
@@ -7,9 +7,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/MC/MCAsmBackend.h"
 #include "llvm/ADT/None.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCFixupKindInfo.h"
 #include <cassert>
 #include <cstddef>
diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp
index b9be685cedc4..f05904048e0b 100644
--- a/lib/MC/MCAsmInfo.cpp
+++ b/lib/MC/MCAsmInfo.cpp
@@ -13,10 +13,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCStreamer.h"
-#include "llvm/Support/Dwarf.h"
 
 using namespace llvm;
 
diff --git a/lib/MC/MCAsmInfoDarwin.cpp b/lib/MC/MCAsmInfoDarwin.cpp
index 4b2001764e97..c74840982fb7 100644
--- a/lib/MC/MCAsmInfoDarwin.cpp
+++ b/lib/MC/MCAsmInfoDarwin.cpp
@@ -13,9 +13,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCAsmInfoDarwin.h"
+#include "llvm/BinaryFormat/MachO.h"
 #include "llvm/MC/MCDirectives.h"
 #include "llvm/MC/MCSectionMachO.h"
-#include "llvm/Support/MachO.h"
 
 using namespace llvm;
 
diff --git a/lib/MC/MCAsmInfoELF.cpp b/lib/MC/MCAsmInfoELF.cpp
index e44c08b50d76..b0dc43c6c868 100644
--- a/lib/MC/MCAsmInfoELF.cpp
+++ b/lib/MC/MCAsmInfoELF.cpp
@@ -13,9 +13,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCAsmInfoELF.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCSectionELF.h"
-#include "llvm/Support/ELF.h"
 
 using namespace llvm;
 
diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp
index c2bb7b277181..53cdaac3aa54 100644
--- a/lib/MC/MCAssembler.cpp
+++ b/lib/MC/MCAssembler.cpp
@@ -7,6 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/MC/MCAssembler.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
@@ -16,7 +17,6 @@
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCAsmLayout.h"
-#include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCCodeView.h"
 #include "llvm/MC/MCContext.h"
@@ -37,9 +37,9 @@
 #include "llvm/Support/LEB128.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include <cstring>
 #include <cassert>
 #include <cstdint>
+#include <cstring>
 #include <tuple>
 #include <utility>
 
diff --git a/lib/MC/MCCodeView.cpp b/lib/MC/MCCodeView.cpp
index 6c9a4f9f982d..92b1e12da552 100644
--- a/lib/MC/MCCodeView.cpp
+++ b/lib/MC/MCCodeView.cpp
@@ -13,6 +13,7 @@
 
 #include "llvm/MC/MCCodeView.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/BinaryFormat/COFF.h"
 #include "llvm/DebugInfo/CodeView/CodeView.h"
 #include "llvm/DebugInfo/CodeView/Line.h"
 #include "llvm/DebugInfo/CodeView/SymbolRecord.h"
@@ -20,7 +21,6 @@
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCObjectStreamer.h"
 #include "llvm/MC/MCValue.h"
-#include "llvm/Support/COFF.h"
 #include "llvm/Support/EndianStream.h"
 
 using namespace llvm;
diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp
index 4628d0ab88f3..48ee84edb096 100644
--- a/lib/MC/MCContext.cpp
+++ b/lib/MC/MCContext.cpp
@@ -7,14 +7,16 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/MC/MCContext.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCCodeView.h"
-#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDwarf.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCFragment.h"
@@ -32,14 +34,12 @@
 #include "llvm/MC/MCSymbolWasm.h"
 #include "llvm/MC/SectionKind.h"
 #include "llvm/Support/Casting.h"
-#include "llvm/Support/COFF.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Signals.h"
 #include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
 #include <cassert>
 #include <cstdlib>
 #include <tuple>
diff --git a/lib/MC/MCDisassembler/Disassembler.cpp b/lib/MC/MCDisassembler/Disassembler.cpp
index aa5072743bdf..ef1d8335e1bd 100644
--- a/lib/MC/MCDisassembler/Disassembler.cpp
+++ b/lib/MC/MCDisassembler/Disassembler.cpp
@@ -27,8 +27,8 @@
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
 #include <cassert>
 #include <cstddef>
 #include <cstring>
diff --git a/lib/MC/MCDisassembler/MCRelocationInfo.cpp b/lib/MC/MCDisassembler/MCRelocationInfo.cpp
index 5805fd7007d2..8f932a3f0d48 100644
--- a/lib/MC/MCDisassembler/MCRelocationInfo.cpp
+++ b/lib/MC/MCDisassembler/MCRelocationInfo.cpp
@@ -8,8 +8,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCDisassembler/MCRelocationInfo.h"
-#include "llvm/Support/TargetRegistry.h"
 #include "llvm-c/Disassembler.h"
+#include "llvm/Support/TargetRegistry.h"
 
 using namespace llvm;
 
diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp
index 1a320b0165fa..a2beee32f2cb 100644
--- a/lib/MC/MCDwarf.cpp
+++ b/lib/MC/MCDwarf.cpp
@@ -7,19 +7,20 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/MC/MCDwarf.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/None.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/Config/config.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCDwarf.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCObjectFileInfo.h"
 #include "llvm/MC/MCObjectStreamer.h"
@@ -28,7 +29,6 @@
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/Casting.h"
-#include "llvm/Support/Dwarf.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/EndianStream.h"
 #include "llvm/Support/ErrorHandling.h"
diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp
index c8e0223c0573..50c1f6e79f8a 100644
--- a/lib/MC/MCELFStreamer.cpp
+++ b/lib/MC/MCELFStreamer.cpp
@@ -11,14 +11,15 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/MC/MCELFStreamer.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCELFStreamer.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCFixup.h"
 #include "llvm/MC/MCFragment.h"
@@ -27,10 +28,9 @@
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSymbolELF.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCSymbolELF.h"
 #include "llvm/Support/Casting.h"
-#include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp
index 8149aa27327c..38a8af49c194 100644
--- a/lib/MC/MCExpr.cpp
+++ b/lib/MC/MCExpr.cpp
@@ -7,13 +7,13 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/MC/MCExpr.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCAsmLayout.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCObjectWriter.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCValue.h"
@@ -655,8 +655,12 @@ bool MCExpr::evaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm,
         // the OS X assembler will completely drop the 4. We should probably
         // include it in the relocation or produce an error if that is not
         // possible.
+        // Allow constant expressions.
         if (!A && !B)
           return true;
+        // Allows aliases with zero offset.
+        if (Res.getConstant() == 0 && (!A || !B))
+          return true;
       }
     }
 
diff --git a/lib/MC/MCFragment.cpp b/lib/MC/MCFragment.cpp
index 90b44177cf5e..f3d0eb55eecd 100644
--- a/lib/MC/MCFragment.cpp
+++ b/lib/MC/MCFragment.cpp
@@ -7,15 +7,15 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/MC/MCFragment.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
-#include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCFixup.h"
-#include "llvm/MC/MCFragment.h"
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCValue.h"
diff --git a/lib/MC/MCInstPrinter.cpp b/lib/MC/MCInstPrinter.cpp
index 912179095974..9296fcedb72b 100644
--- a/lib/MC/MCInstPrinter.cpp
+++ b/lib/MC/MCInstPrinter.cpp
@@ -7,10 +7,10 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/MC/MCInstPrinter.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCInstPrinter.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
diff --git a/lib/MC/MCInstrAnalysis.cpp b/lib/MC/MCInstrAnalysis.cpp
index 566944c53548..280b5cf68c98 100644
--- a/lib/MC/MCInstrAnalysis.cpp
+++ b/lib/MC/MCInstrAnalysis.cpp
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstrAnalysis.h"
+#include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstrDesc.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include <cstdint>
diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp
index 1e9ef4163256..674c7b9bf619 100644
--- a/lib/MC/MCMachOStreamer.cpp
+++ b/lib/MC/MCMachOStreamer.cpp
@@ -32,8 +32,8 @@
 #include "llvm/MC/MCValue.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
 #include <cassert>
 #include <vector>
 
diff --git a/lib/MC/MCNullStreamer.cpp b/lib/MC/MCNullStreamer.cpp
index d156f5d05a31..4db9a2c8d8de 100644
--- a/lib/MC/MCNullStreamer.cpp
+++ b/lib/MC/MCNullStreamer.cpp
@@ -7,10 +7,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 
 using namespace llvm;
diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp
index b685790910d0..21c5516785ef 100644
--- a/lib/MC/MCObjectFileInfo.cpp
+++ b/lib/MC/MCObjectFileInfo.cpp
@@ -10,6 +10,8 @@
 #include "llvm/MC/MCObjectFileInfo.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCSection.h"
@@ -17,8 +19,6 @@
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCSectionWasm.h"
-#include "llvm/Support/COFF.h"
-#include "llvm/Support/ELF.h"
 
 using namespace llvm;
 
@@ -241,6 +241,9 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(const Triple &T) {
   DwarfStrSection =
       Ctx->getMachOSection("__DWARF", "__debug_str", MachO::S_ATTR_DEBUG,
                            SectionKind::getMetadata(), "info_string");
+  DwarfStrOffSection =
+      Ctx->getMachOSection("__DWARF", "__debug_str_offs", MachO::S_ATTR_DEBUG,
+                           SectionKind::getMetadata(), "section_str_off");
   DwarfLocSection =
       Ctx->getMachOSection("__DWARF", "__debug_loc", MachO::S_ATTR_DEBUG,
                            SectionKind::getMetadata(), "section_debug_loc");
@@ -557,6 +560,11 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(const Triple &T) {
   DwarfAccelTypesSection =
       Ctx->getELFSection(".apple_types", ELF::SHT_PROGBITS, 0);
 
+  // String Offset and Address Sections
+  DwarfStrOffSection =
+      Ctx->getELFSection(".debug_str_offsets", DebugSecType, 0);
+  DwarfAddrSection = Ctx->getELFSection(".debug_addr", DebugSecType, 0);
+
   // Fission Sections
   DwarfInfoDWOSection =
       Ctx->getELFSection(".debug_info.dwo", DebugSecType, 0);
@@ -573,7 +581,6 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(const Triple &T) {
       Ctx->getELFSection(".debug_loc.dwo", DebugSecType, 0);
   DwarfStrOffDWOSection =
       Ctx->getELFSection(".debug_str_offsets.dwo", DebugSecType, 0);
-  DwarfAddrSection = Ctx->getELFSection(".debug_addr", DebugSecType, 0);
 
   // DWP Sections
   DwarfCUIndexSection =
@@ -695,6 +702,11 @@ void MCObjectFileInfo::initCOFFMCObjectFileInfo(const Triple &T) {
       COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
           COFF::IMAGE_SCN_MEM_READ,
       SectionKind::getMetadata(), "info_string");
+  DwarfStrOffSection = Ctx->getCOFFSection(
+      ".debug_str_offsets",
+      COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+          COFF::IMAGE_SCN_MEM_READ,
+      SectionKind::getMetadata(), "section_str_off");
   DwarfLocSection = Ctx->getCOFFSection(
       ".debug_loc",
       COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
@@ -749,7 +761,7 @@ void MCObjectFileInfo::initCOFFMCObjectFileInfo(const Triple &T) {
       ".debug_str_offsets.dwo",
       COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
           COFF::IMAGE_SCN_MEM_READ,
-      SectionKind::getMetadata());
+      SectionKind::getMetadata(), "section_str_off_dwo");
   DwarfAddrSection = Ctx->getCOFFSection(
       ".debug_addr",
       COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
diff --git a/lib/MC/MCObjectWriter.cpp b/lib/MC/MCObjectWriter.cpp
index 478b4e84e74a..98ac48a23f91 100644
--- a/lib/MC/MCObjectWriter.cpp
+++ b/lib/MC/MCObjectWriter.cpp
@@ -7,10 +7,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/MC/MCAssembler.h"
-#include "llvm/MC/MCFragment.h"
-#include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFragment.h"
 #include "llvm/MC/MCSymbol.h"
 
 using namespace llvm;
diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp
index 38dadfe62135..2b963607b837 100644
--- a/lib/MC/MCParser/AsmLexer.cpp
+++ b/lib/MC/MCParser/AsmLexer.cpp
@@ -11,12 +11,12 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/MC/MCParser/AsmLexer.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCParser/AsmLexer.h"
 #include "llvm/MC/MCParser/MCAsmLexer.h"
 #include "llvm/Support/SMLoc.h"
 #include "llvm/Support/SaveAndRestore.h"
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index 3b213ef4ce09..dad47e49e2c2 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -15,12 +15,13 @@
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/None.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCCodeView.h"
 #include "llvm/MC/MCContext.h"
@@ -47,7 +48,6 @@
 #include "llvm/MC/MCValue.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -703,7 +703,7 @@ const AsmToken &AsmParser::Lex() {
   // if it's a end of statement with a comment in it
   if (getTok().is(AsmToken::EndOfStatement)) {
     // if this is a line comment output it.
-    if (getTok().getString().front() != '\n' &&
+    if (!getTok().getString().empty() && getTok().getString().front() != '\n' &&
         getTok().getString().front() != '\r' && MAI.preserveAsmComments())
       Out.addExplicitComment(Twine(getTok().getString()));
   }
@@ -1523,7 +1523,7 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
     Lex();
   if (Lexer.is(AsmToken::EndOfStatement)) {
     // if this is a line comment we can drop it safely
-    if (getTok().getString().front() == '\r' ||
+    if (getTok().getString().empty() || getTok().getString().front() == '\r' ||
         getTok().getString().front() == '\n')
       Out.AddBlankLine();
     Lex();
diff --git a/lib/MC/MCParser/COFFAsmParser.cpp b/lib/MC/MCParser/COFFAsmParser.cpp
index bec62ccb2f7f..b83d68d4fe20 100644
--- a/lib/MC/MCParser/COFFAsmParser.cpp
+++ b/lib/MC/MCParser/COFFAsmParser.cpp
@@ -7,10 +7,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/COFF.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDirectives.h"
 #include "llvm/MC/MCObjectFileInfo.h"
@@ -21,7 +22,6 @@
 #include "llvm/MC/MCSectionCOFF.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/SectionKind.h"
-#include "llvm/Support/COFF.h"
 #include "llvm/Support/SMLoc.h"
 #include <cassert>
 #include <cstdint>
diff --git a/lib/MC/MCParser/DarwinAsmParser.cpp b/lib/MC/MCParser/DarwinAsmParser.cpp
index 73a7ad0500c3..f4152a9067a0 100644
--- a/lib/MC/MCParser/DarwinAsmParser.cpp
+++ b/lib/MC/MCParser/DarwinAsmParser.cpp
@@ -7,12 +7,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/MachO.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDirectives.h"
 #include "llvm/MC/MCObjectFileInfo.h"
@@ -25,10 +26,9 @@
 #include "llvm/MC/SectionKind.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/MachO.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/SMLoc.h"
 #include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cstddef>
 #include <cstdint>
diff --git a/lib/MC/MCParser/ELFAsmParser.cpp b/lib/MC/MCParser/ELFAsmParser.cpp
index 401011a027f4..f1dfb91aafbb 100644
--- a/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/lib/MC/MCParser/ELFAsmParser.cpp
@@ -7,8 +7,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDirectives.h"
@@ -23,7 +24,6 @@
 #include "llvm/MC/MCSymbolELF.h"
 #include "llvm/MC/SectionKind.h"
 #include "llvm/Support/Casting.h"
-#include "llvm/Support/ELF.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/SMLoc.h"
 #include <cassert>
diff --git a/lib/MC/MCParser/MCAsmLexer.cpp b/lib/MC/MCParser/MCAsmLexer.cpp
index 1d12ab858284..8f845ee1d76f 100644
--- a/lib/MC/MCParser/MCAsmLexer.cpp
+++ b/lib/MC/MCParser/MCAsmLexer.cpp
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/SMLoc.h"
 
 using namespace llvm;
diff --git a/lib/MC/MCParser/MCAsmParser.cpp b/lib/MC/MCParser/MCAsmParser.cpp
index 27b37f3e2dfb..ea36b3b9b3b2 100644
--- a/lib/MC/MCParser/MCAsmParser.cpp
+++ b/lib/MC/MCParser/MCAsmParser.cpp
@@ -7,10 +7,10 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/MC/MCParser/MCAsmParser.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/MC/MCParser/MCAsmLexer.h"
-#include "llvm/MC/MCParser/MCAsmParser.h"
 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
 #include "llvm/Support/Debug.h"
diff --git a/lib/MC/MCParser/MCTargetAsmParser.cpp b/lib/MC/MCParser/MCTargetAsmParser.cpp
index 5f821443bb96..64ac82a6c66f 100644
--- a/lib/MC/MCParser/MCTargetAsmParser.cpp
+++ b/lib/MC/MCParser/MCTargetAsmParser.cpp
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
+#include "llvm/MC/MCContext.h"
 
 using namespace llvm;
 
diff --git a/lib/MC/MCRegisterInfo.cpp b/lib/MC/MCRegisterInfo.cpp
index a75100a4876b..0f76c1838b51 100644
--- a/lib/MC/MCRegisterInfo.cpp
+++ b/lib/MC/MCRegisterInfo.cpp
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/DenseMap.h"
 #include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <algorithm>
 #include <cassert>
diff --git a/lib/MC/MCSection.cpp b/lib/MC/MCSection.cpp
index 7986c0122043..b961cb3968e8 100644
--- a/lib/MC/MCSection.cpp
+++ b/lib/MC/MCSection.cpp
@@ -7,10 +7,10 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/MC/MCSection.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCFragment.h"
-#include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
diff --git a/lib/MC/MCSectionCOFF.cpp b/lib/MC/MCSectionCOFF.cpp
index f0709cbc2515..72a7fc36a460 100644
--- a/lib/MC/MCSectionCOFF.cpp
+++ b/lib/MC/MCSectionCOFF.cpp
@@ -8,8 +8,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/BinaryFormat/COFF.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/COFF.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cassert>
 
diff --git a/lib/MC/MCSectionELF.cpp b/lib/MC/MCSectionELF.cpp
index 78fe01cca24a..a75068ebf05a 100644
--- a/lib/MC/MCSectionELF.cpp
+++ b/lib/MC/MCSectionELF.cpp
@@ -7,11 +7,11 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/MC/MCSectionELF.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCSectionELF.h"
-#include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cassert>
diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp
index c9a6f12b6a58..2bfb9a63eedb 100644
--- a/lib/MC/MCStreamer.cpp
+++ b/lib/MC/MCStreamer.cpp
@@ -7,9 +7,11 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/MC/MCStreamer.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/COFF.h"
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCCodeView.h"
@@ -21,19 +23,17 @@
 #include "llvm/MC/MCObjectFileInfo.h"
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCSectionCOFF.h"
-#include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCWin64EH.h"
 #include "llvm/MC/MCWinEH.h"
 #include "llvm/Support/Casting.h"
-#include "llvm/Support/COFF.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/LEB128.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include <cstdlib>
 #include <cassert>
 #include <cstdint>
+#include <cstdlib>
 #include <utility>
 
 using namespace llvm;
diff --git a/lib/MC/MCSubtargetInfo.cpp b/lib/MC/MCSubtargetInfo.cpp
index 777b4e3d6b67..385cdcc62320 100644
--- a/lib/MC/MCSubtargetInfo.cpp
+++ b/lib/MC/MCSubtargetInfo.cpp
@@ -7,11 +7,11 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/MC/MCSchedule.h"
-#include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/SubtargetFeature.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
diff --git a/lib/MC/MCSymbol.cpp b/lib/MC/MCSymbol.cpp
index cb262542b89f..9abaaef2fe84 100644
--- a/lib/MC/MCSymbol.cpp
+++ b/lib/MC/MCSymbol.cpp
@@ -7,12 +7,12 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/MC/MCSymbol.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCFragment.h"
-#include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
diff --git a/lib/MC/MCSymbolELF.cpp b/lib/MC/MCSymbolELF.cpp
index ffa8260d4342..67449eb6dcf9 100644
--- a/lib/MC/MCSymbolELF.cpp
+++ b/lib/MC/MCSymbolELF.cpp
@@ -7,10 +7,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCSymbolELF.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCFixupKindInfo.h"
-#include "llvm/Support/ELF.h"
 
 namespace llvm {
 
diff --git a/lib/MC/MCTargetOptions.cpp b/lib/MC/MCTargetOptions.cpp
index 5d666b67fddb..b85e53db5d61 100644
--- a/lib/MC/MCTargetOptions.cpp
+++ b/lib/MC/MCTargetOptions.cpp
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCTargetOptions.h"
+#include "llvm/ADT/StringRef.h"
 
 using namespace llvm;
 
diff --git a/lib/MC/MCWasmObjectTargetWriter.cpp b/lib/MC/MCWasmObjectTargetWriter.cpp
index a09a17d7a124..301f30d4f6ec 100644
--- a/lib/MC/MCWasmObjectTargetWriter.cpp
+++ b/lib/MC/MCWasmObjectTargetWriter.cpp
@@ -17,11 +17,5 @@ using namespace llvm;
 MCWasmObjectTargetWriter::MCWasmObjectTargetWriter(bool Is64Bit_)
     : Is64Bit(Is64Bit_) {}
 
-bool MCWasmObjectTargetWriter::needsRelocateWithSymbol(const MCSymbol &Sym,
-                                                       unsigned Type) const {
-  return false;
-}
-
-void MCWasmObjectTargetWriter::sortRelocs(
-    const MCAssembler &Asm, std::vector<WasmRelocationEntry> &Relocs) {
-}
+// Pin the vtable to this object file
+MCWasmObjectTargetWriter::~MCWasmObjectTargetWriter() = default;
diff --git a/lib/MC/MCWinEH.cpp b/lib/MC/MCWinEH.cpp
index 21a913999f64..a5d0f5a2cb75 100644
--- a/lib/MC/MCWinEH.cpp
+++ b/lib/MC/MCWinEH.cpp
@@ -7,14 +7,14 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/MC/MCWinEH.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/COFF.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCObjectFileInfo.h"
 #include "llvm/MC/MCSectionCOFF.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCWinEH.h"
-#include "llvm/Support/COFF.h"
 
 namespace llvm {
 namespace WinEH {
diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp
index d9ccf0dd661f..c4e7cdbe095e 100644
--- a/lib/MC/MachObjectWriter.cpp
+++ b/lib/MC/MachObjectWriter.cpp
@@ -8,8 +8,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/iterator_range.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/BinaryFormat/MachO.h"
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCAsmLayout.h"
 #include "llvm/MC/MCAssembler.h"
@@ -27,7 +28,6 @@
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachO.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
diff --git a/lib/MC/StringTableBuilder.cpp b/lib/MC/StringTableBuilder.cpp
index a0fb33846fcf..6025a20a9c19 100644
--- a/lib/MC/StringTableBuilder.cpp
+++ b/lib/MC/StringTableBuilder.cpp
@@ -7,11 +7,11 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/MC/StringTableBuilder.h"
 #include "llvm/ADT/CachedHashString.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/MC/StringTableBuilder.h"
-#include "llvm/Support/COFF.h"
+#include "llvm/BinaryFormat/COFF.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/lib/MC/SubtargetFeature.cpp b/lib/MC/SubtargetFeature.cpp
index 51aaa4b0aa25..b68e88ca5725 100644
--- a/lib/MC/SubtargetFeature.cpp
+++ b/lib/MC/SubtargetFeature.cpp
@@ -11,12 +11,12 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/MC/SubtargetFeature.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
-#include "llvm/MC/SubtargetFeature.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Format.h"
diff --git a/lib/MC/WasmObjectWriter.cpp b/lib/MC/WasmObjectWriter.cpp
index 9b2031f05043..4b3dc6e0c211 100644
--- a/lib/MC/WasmObjectWriter.cpp
+++ b/lib/MC/WasmObjectWriter.cpp
@@ -13,6 +13,7 @@
 
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/BinaryFormat/Wasm.h"
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCAsmLayout.h"
@@ -31,7 +32,6 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/LEB128.h"
 #include "llvm/Support/StringSaver.h"
-#include "llvm/Support/Wasm.h"
 #include <vector>
 
 using namespace llvm;
@@ -127,6 +127,38 @@ struct WasmGlobal {
   uint32_t ImportIndex;
 };
 
+// Information about a single relocation.
+struct WasmRelocationEntry {
+  uint64_t Offset;            // Where is the relocation.
+  const MCSymbolWasm *Symbol; // The symbol to relocate with.
+  int64_t Addend;             // A value to add to the symbol.
+  unsigned Type;              // The type of the relocation.
+  MCSectionWasm *FixupSection;// The section the relocation is targeting.
+
+  WasmRelocationEntry(uint64_t Offset, const MCSymbolWasm *Symbol,
+                      int64_t Addend, unsigned Type,
+                      MCSectionWasm *FixupSection)
+      : Offset(Offset), Symbol(Symbol), Addend(Addend), Type(Type),
+        FixupSection(FixupSection) {}
+
+  bool hasAddend() const {
+    switch (Type) {
+    case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_LEB:
+    case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_SLEB:
+    case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_I32:
+      return true;
+    default:
+      return false;
+    }
+  }
+
+  void print(raw_ostream &Out) const {
+    Out << "Off=" << Offset << ", Sym=" << Symbol << ", Addend=" << Addend
+        << ", Type=" << Type << ", FixupSection=" << FixupSection;
+  }
+  void dump() const { print(errs()); }
+};
+
 class WasmObjectWriter : public MCObjectWriter {
   /// Helper struct for containing some precomputed information on symbols.
   struct WasmSymbolData {
@@ -146,11 +178,14 @@ class WasmObjectWriter : public MCObjectWriter {
   // Relocations for fixing up references in the data section.
   std::vector<WasmRelocationEntry> DataRelocations;
 
-  // Fixups for call_indirect type indices.
-  std::vector<WasmRelocationEntry> TypeIndexFixups;
-
   // Index values to use for fixing up call_indirect type indices.
-  std::vector<uint32_t> TypeIndexFixupTypes;
+  // Maps function symbols to the index of the type of the function
+  DenseMap<const MCSymbolWasm *, uint32_t> TypeIndices;
+
+  DenseMap<const MCSymbolWasm *, uint32_t> SymbolIndices;
+
+  DenseMap<WasmFunctionType, int32_t, WasmFunctionTypeDenseMapInfo>
+      FunctionTypeIndices;
 
   // TargetObjectWriter wrappers.
   bool is64Bit() const { return TargetObjectWriter->is64Bit(); }
@@ -170,6 +205,15 @@ public:
 private:
   ~WasmObjectWriter() override;
 
+  void reset() override {
+    CodeRelocations.clear();
+    DataRelocations.clear();
+    TypeIndices.clear();
+    SymbolIndices.clear();
+    FunctionTypeIndices.clear();
+    MCObjectWriter::reset();
+  }
+
   void writeHeader(const MCAssembler &Asm);
 
   void recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout,
@@ -195,21 +239,23 @@ private:
   void writeExportSection(const SmallVector<WasmExport, 4> &Exports);
   void writeElemSection(const SmallVector<uint32_t, 4> &TableElems);
   void writeCodeSection(const MCAssembler &Asm, const MCAsmLayout &Layout,
-                        DenseMap<const MCSymbolWasm *, uint32_t> &SymbolIndices,
                         const SmallVector<WasmFunction, 4> &Functions);
   uint64_t
-  writeDataSection(const SmallVector<char, 0> &DataBytes,
-                   DenseMap<const MCSymbolWasm *, uint32_t> &SymbolIndices);
+  writeDataSection(const SmallVector<char, 0> &DataBytes);
   void writeNameSection(const SmallVector<WasmFunction, 4> &Functions,
                         const SmallVector<WasmImport, 4> &Imports,
                         uint32_t NumFuncImports);
-  void writeCodeRelocSection(
-      DenseMap<const MCSymbolWasm *, uint32_t> &SymbolIndices);
-  void writeDataRelocSection(
-      DenseMap<const MCSymbolWasm *, uint32_t> &SymbolIndices,
-      uint64_t DataSectionHeaderSize);
+  void writeCodeRelocSection();
+  void writeDataRelocSection(uint64_t DataSectionHeaderSize);
   void writeLinkingMetaDataSection(bool HasStackPointer,
                                    uint32_t StackPointerGlobal);
+
+  void applyRelocations(ArrayRef<WasmRelocationEntry> Relocations,
+                        uint64_t ContentsOffset);
+
+  void writeRelocations(ArrayRef<WasmRelocationEntry> Relocations,
+                        uint64_t HeaderSize);
+  uint32_t getRelocationIndexValue(const WasmRelocationEntry &RelEntry);
 };
 
 } // end anonymous namespace
@@ -356,19 +402,7 @@ void WasmObjectWriter::recordRelocation(MCAssembler &Asm,
       SymA->setUsedInReloc();
   }
 
-  if (RefA) {
-    if (RefA->getKind() == MCSymbolRefExpr::VK_WebAssembly_TYPEINDEX) {
-      assert(C == 0);
-      WasmRelocationEntry Rec(FixupOffset, SymA, C,
-                              wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB,
-                              &FixupSection);
-      TypeIndexFixups.push_back(Rec);
-      return;
-    }
-  }
-
   unsigned Type = getRelocType(Ctx, Target, Fixup, IsPCRel);
-
   WasmRelocationEntry Rec(FixupOffset, SymA, C, Type, &FixupSection);
 
   if (FixupSection.hasInstructions())
@@ -427,124 +461,85 @@ static uint32_t ProvisionalValue(const WasmRelocationEntry &RelEntry) {
   return Value;
 }
 
+uint32_t WasmObjectWriter::getRelocationIndexValue(
+    const WasmRelocationEntry &RelEntry) {
+  switch (RelEntry.Type) {
+  case wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB:
+  case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB:
+  case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32:
+  case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_LEB:
+  case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_SLEB:
+  case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_I32:
+    assert(SymbolIndices.count(RelEntry.Symbol));
+    return SymbolIndices[RelEntry.Symbol];
+  case wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB:
+    assert(TypeIndices.count(RelEntry.Symbol));
+    return TypeIndices[RelEntry.Symbol];
+  default:
+    llvm_unreachable("invalid relocation type");
+  }
+}
+
 // Apply the portions of the relocation records that we can handle ourselves
 // directly.
-static void ApplyRelocations(
-    ArrayRef<WasmRelocationEntry> Relocations,
-    raw_pwrite_stream &Stream,
-    DenseMap<const MCSymbolWasm *, uint32_t> &SymbolIndices,
-    uint64_t ContentsOffset)
-{
+void WasmObjectWriter::applyRelocations(
+    ArrayRef<WasmRelocationEntry> Relocations, uint64_t ContentsOffset) {
+  raw_pwrite_stream &Stream = getStream();
   for (const WasmRelocationEntry &RelEntry : Relocations) {
     uint64_t Offset = ContentsOffset +
                       RelEntry.FixupSection->getSectionOffset() +
                       RelEntry.Offset;
-    switch (RelEntry.Type) {
-    case wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB: {
-      assert(SymbolIndices.count(RelEntry.Symbol));
-      uint32_t Index = SymbolIndices[RelEntry.Symbol];
-      assert(RelEntry.Addend == 0);
 
-      WritePatchableLEB(Stream, Index, Offset);
+    switch (RelEntry.Type) {
+    case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB:
+    case wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB:
+    case wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB: {
+      uint32_t Index = getRelocationIndexValue(RelEntry);
+      WritePatchableSLEB(Stream, Index, Offset);
       break;
     }
-    case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB: {
-      assert(SymbolIndices.count(RelEntry.Symbol));
-      uint32_t Index = SymbolIndices[RelEntry.Symbol];
-      assert(RelEntry.Addend == 0);
-
-      WritePatchableSLEB(Stream, Index, Offset);
+    case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32: {
+      uint32_t Index = getRelocationIndexValue(RelEntry);
+      WriteI32(Stream, Index, Offset);
       break;
     }
     case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_SLEB: {
       uint32_t Value = ProvisionalValue(RelEntry);
-
       WritePatchableSLEB(Stream, Value, Offset);
       break;
     }
     case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_LEB: {
       uint32_t Value = ProvisionalValue(RelEntry);
-
       WritePatchableLEB(Stream, Value, Offset);
       break;
     }
-    case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32: {
-      assert(SymbolIndices.count(RelEntry.Symbol));
-      uint32_t Index = SymbolIndices[RelEntry.Symbol];
-      assert(RelEntry.Addend == 0);
-
-      WriteI32(Stream, Index, Offset);
-      break;
-    }
     case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_I32: {
       uint32_t Value = ProvisionalValue(RelEntry);
-
       WriteI32(Stream, Value, Offset);
       break;
     }
-    default:
-      break;
-    }
-  }
-}
-
-// Write out the portions of the relocation records that the linker will
-// need to handle.
-static void
-WriteRelocations(ArrayRef<WasmRelocationEntry> Relocations,
-                 raw_pwrite_stream &Stream,
-                 DenseMap<const MCSymbolWasm *, uint32_t> &SymbolIndices,
-                 uint64_t HeaderSize) {
-  for (const WasmRelocationEntry RelEntry : Relocations) {
-    encodeULEB128(RelEntry.Type, Stream);
-
-    uint64_t Offset = RelEntry.Offset +
-                      RelEntry.FixupSection->getSectionOffset() + HeaderSize;
-    assert(SymbolIndices.count(RelEntry.Symbol));
-    uint32_t Index = SymbolIndices[RelEntry.Symbol];
-    int64_t Addend = RelEntry.Addend;
-
-    switch (RelEntry.Type) {
-    case wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB:
-    case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB:
-    case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32:
-      encodeULEB128(Offset, Stream);
-      encodeULEB128(Index, Stream);
-      assert(Addend == 0 && "addends not supported for functions");
-      break;
-    case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_LEB:
-    case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_SLEB:
-    case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_I32:
-      encodeULEB128(Offset, Stream);
-      encodeULEB128(Index, Stream);
-      encodeSLEB128(Addend, Stream);
-      break;
     default:
       llvm_unreachable("unsupported relocation type");
     }
   }
 }
 
-// Write out the the type relocation records that the linker will
+// Write out the portions of the relocation records that the linker will
 // need to handle.
-static void WriteTypeRelocations(
-    ArrayRef<WasmRelocationEntry> TypeIndexFixups,
-    ArrayRef<uint32_t> TypeIndexFixupTypes,
-    raw_pwrite_stream &Stream)
-{
-  for (size_t i = 0, e = TypeIndexFixups.size(); i < e; ++i) {
-    const WasmRelocationEntry &Fixup = TypeIndexFixups[i];
-    uint32_t Type = TypeIndexFixupTypes[i];
+void WasmObjectWriter::writeRelocations(
+    ArrayRef<WasmRelocationEntry> Relocations, uint64_t HeaderSize) {
+  raw_pwrite_stream &Stream = getStream();
+  for (const WasmRelocationEntry& RelEntry : Relocations) {
 
-    assert(Fixup.Type == wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB);
-    assert(Fixup.Addend == 0);
+    uint64_t Offset = RelEntry.Offset +
+                      RelEntry.FixupSection->getSectionOffset() + HeaderSize;
+    uint32_t Index = getRelocationIndexValue(RelEntry);
 
-    uint64_t Offset = Fixup.Offset +
-                      Fixup.FixupSection->getSectionOffset();
-
-    encodeULEB128(Fixup.Type, Stream);
+    encodeULEB128(RelEntry.Type, Stream);
     encodeULEB128(Offset, Stream);
-    encodeULEB128(Type, Stream);
+    encodeULEB128(Index, Stream);
+    if (RelEntry.hasAddend())
+      encodeSLEB128(RelEntry.Addend, Stream);
   }
 }
 
@@ -733,7 +728,6 @@ void WasmObjectWriter::writeElemSection(
 
 void WasmObjectWriter::writeCodeSection(
     const MCAssembler &Asm, const MCAsmLayout &Layout,
-    DenseMap<const MCSymbolWasm *, uint32_t> &SymbolIndices,
     const SmallVector<WasmFunction, 4> &Functions) {
   if (Functions.empty())
     return;
@@ -768,34 +762,14 @@ void WasmObjectWriter::writeCodeSection(
     Asm.writeSectionData(&FuncSection, Layout);
   }
 
-  // Apply the type index fixups for call_indirect etc. instructions.
-  for (size_t i = 0, e = TypeIndexFixups.size(); i < e; ++i) {
-    uint32_t Type = TypeIndexFixupTypes[i];
-    unsigned Padding = PaddingFor5ByteULEB128(Type);
-
-    const WasmRelocationEntry &Fixup = TypeIndexFixups[i];
-    assert(Fixup.Addend == 0);
-    assert(Fixup.Type == wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB);
-    uint64_t Offset = Fixup.Offset +
-                      Fixup.FixupSection->getSectionOffset();
-
-    uint8_t Buffer[16];
-    unsigned SizeLen = encodeULEB128(Type, Buffer, Padding);
-    assert(SizeLen == 5);
-    getStream().pwrite((char *)Buffer, SizeLen,
-                       Section.ContentsOffset + Offset);
-  }
-
   // Apply fixups.
-  ApplyRelocations(CodeRelocations, getStream(), SymbolIndices,
-                   Section.ContentsOffset);
+  applyRelocations(CodeRelocations, Section.ContentsOffset);
 
   endSection(Section);
 }
 
 uint64_t WasmObjectWriter::writeDataSection(
-    const SmallVector<char, 0> &DataBytes,
-    DenseMap<const MCSymbolWasm *, uint32_t> &SymbolIndices) {
+    const SmallVector<char, 0> &DataBytes) {
   if (DataBytes.empty())
     return 0;
 
@@ -812,8 +786,7 @@ uint64_t WasmObjectWriter::writeDataSection(
   writeBytes(DataBytes); // data
 
   // Apply fixups.
-  ApplyRelocations(DataRelocations, getStream(), SymbolIndices,
-                   Section.ContentsOffset + HeaderSize);
+  applyRelocations(DataRelocations, Section.ContentsOffset + HeaderSize);
 
   endSection(Section);
   return HeaderSize;
@@ -853,8 +826,7 @@ void WasmObjectWriter::writeNameSection(
   endSection(Section);
 }
 
-void WasmObjectWriter::writeCodeRelocSection(
-    DenseMap<const MCSymbolWasm *, uint32_t> &SymbolIndices) {
+void WasmObjectWriter::writeCodeRelocSection() {
   // See: https://github.com/WebAssembly/tool-conventions/blob/master/Linking.md
   // for descriptions of the reloc sections.
 
@@ -865,17 +837,14 @@ void WasmObjectWriter::writeCodeRelocSection(
   startSection(Section, wasm::WASM_SEC_CUSTOM, "reloc.CODE");
 
   encodeULEB128(wasm::WASM_SEC_CODE, getStream());
-  encodeULEB128(CodeRelocations.size() + TypeIndexFixups.size(), getStream());
+  encodeULEB128(CodeRelocations.size(), getStream());
 
-  WriteRelocations(CodeRelocations, getStream(), SymbolIndices, 0);
-  WriteTypeRelocations(TypeIndexFixups, TypeIndexFixupTypes, getStream());
+  writeRelocations(CodeRelocations, 0);
 
   endSection(Section);
 }
 
-void WasmObjectWriter::writeDataRelocSection(
-    DenseMap<const MCSymbolWasm *, uint32_t> &SymbolIndices,
-    uint64_t DataSectionHeaderSize) {
+void WasmObjectWriter::writeDataRelocSection(uint64_t DataSectionHeaderSize) {
   // See: https://github.com/WebAssembly/tool-conventions/blob/master/Linking.md
   // for descriptions of the reloc sections.
 
@@ -888,8 +857,7 @@ void WasmObjectWriter::writeDataRelocSection(
   encodeULEB128(wasm::WASM_SEC_DATA, getStream());
   encodeULEB128(DataRelocations.size(), getStream());
 
-  WriteRelocations(DataRelocations, getStream(), SymbolIndices,
-                   DataSectionHeaderSize);
+  writeRelocations(DataRelocations, DataSectionHeaderSize);
 
   endSection(Section);
 }
@@ -915,15 +883,12 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm,
   wasm::ValType PtrType = is64Bit() ? wasm::ValType::I64 : wasm::ValType::I32;
 
   // Collect information from the available symbols.
-  DenseMap<WasmFunctionType, int32_t, WasmFunctionTypeDenseMapInfo>
-      FunctionTypeIndices;
   SmallVector<WasmFunctionType, 4> FunctionTypes;
   SmallVector<WasmFunction, 4> Functions;
   SmallVector<uint32_t, 4> TableElems;
   SmallVector<WasmGlobal, 4> Globals;
   SmallVector<WasmImport, 4> Imports;
   SmallVector<WasmExport, 4> Exports;
-  DenseMap<const MCSymbolWasm *, uint32_t> SymbolIndices;
   SmallPtrSet<const MCSymbolWasm *, 4> IsAddressTaken;
   unsigned NumFuncImports = 0;
   unsigned NumGlobalImports = 0;
@@ -1194,9 +1159,9 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm,
   }
 
   // Add types for indirect function calls.
-  for (const WasmRelocationEntry &Fixup : TypeIndexFixups) {
-    assert(Fixup.Addend == 0);
-    assert(Fixup.Type == wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB);
+  for (const WasmRelocationEntry &Fixup : CodeRelocations) {
+    if (Fixup.Type != wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB)
+      continue;
 
     WasmFunctionType F;
     F.Returns = Fixup.Symbol->getReturns();
@@ -1206,7 +1171,7 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm,
     if (Pair.second)
       FunctionTypes.push_back(F);
 
-    TypeIndexFixupTypes.push_back(Pair.first->second);
+    TypeIndices[Fixup.Symbol] = Pair.first->second;
   }
 
   // Write out the Wasm header.
@@ -1221,11 +1186,11 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm,
   writeExportSection(Exports);
   // TODO: Start Section
   writeElemSection(TableElems);
-  writeCodeSection(Asm, Layout, SymbolIndices, Functions);
-  uint64_t DataSectionHeaderSize = writeDataSection(DataBytes, SymbolIndices);
+  writeCodeSection(Asm, Layout, Functions);
+  uint64_t DataSectionHeaderSize = writeDataSection(DataBytes);
   writeNameSection(Functions, Imports, NumFuncImports);
-  writeCodeRelocSection(SymbolIndices);
-  writeDataRelocSection(SymbolIndices, DataSectionHeaderSize);
+  writeCodeRelocSection();
+  writeDataRelocSection(DataSectionHeaderSize);
   writeLinkingMetaDataSection(HasStackPointer, StackPointerGlobal);
 
   // TODO: Translate the .comment section to the output.
diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp
index e99a548ac001..53dee3e8b9f3 100644
--- a/lib/MC/WinCOFFObjectWriter.cpp
+++ b/lib/MC/WinCOFFObjectWriter.cpp
@@ -12,11 +12,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/COFF.h"
 #include "llvm/MC/MCAsmLayout.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCContext.h"
@@ -32,13 +33,12 @@
 #include "llvm/MC/MCWinCOFFObjectWriter.h"
 #include "llvm/MC/StringTableBuilder.h"
 #include "llvm/Support/Casting.h"
-#include "llvm/Support/COFF.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/JamCRC.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include <algorithm> 
+#include <algorithm>
 #include <cassert>
 #include <cstddef>
 #include <cstdint>
diff --git a/lib/MC/WinCOFFStreamer.cpp b/lib/MC/WinCOFFStreamer.cpp
index c26d87f36f83..b4d0d7a87f1d 100644
--- a/lib/MC/WinCOFFStreamer.cpp
+++ b/lib/MC/WinCOFFStreamer.cpp
@@ -15,6 +15,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/COFF.h"
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCCodeEmitter.h"
@@ -28,11 +29,10 @@
 #include "llvm/MC/MCSymbolCOFF.h"
 #include "llvm/MC/MCWinCOFFStreamer.h"
 #include "llvm/Support/Casting.h"
-#include "llvm/Support/COFF.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/SMLoc.h"
+#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
diff --git a/lib/Object/Archive.cpp b/lib/Object/Archive.cpp
index c4924f85a907..977cccc11dcd 100644
--- a/lib/Object/Archive.cpp
+++ b/lib/Object/Archive.cpp
@@ -11,11 +11,11 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Object/Archive.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
-#include "llvm/Object/Archive.h"
 #include "llvm/Object/Binary.h"
 #include "llvm/Object/Error.h"
 #include "llvm/Support/Chrono.h"
diff --git a/lib/Object/ArchiveWriter.cpp b/lib/Object/ArchiveWriter.cpp
index 5b233aab2018..e1c35ed6a6a0 100644
--- a/lib/Object/ArchiveWriter.cpp
+++ b/lib/Object/ArchiveWriter.cpp
@@ -14,6 +14,7 @@
 #include "llvm/Object/ArchiveWriter.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/Magic.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/Object/Archive.h"
 #include "llvm/Object/ObjectFile.h"
@@ -290,7 +291,7 @@ writeSymbolTable(raw_fd_ostream &Out, object::Archive::Kind Kind,
     MemoryBufferRef MemberBuffer = Members[MemberNum].Buf->getMemBufferRef();
     Expected<std::unique_ptr<object::SymbolicFile>> ObjOrErr =
         object::SymbolicFile::createSymbolicFile(
-            MemberBuffer, sys::fs::file_magic::unknown, &Context);
+            MemberBuffer, llvm::file_magic::unknown, &Context);
     if (!ObjOrErr) {
       // FIXME: check only for "not an object file" errors.
       consumeError(ObjOrErr.takeError());
diff --git a/lib/Object/Binary.cpp b/lib/Object/Binary.cpp
index 116af3c917be..c4565db459e6 100644
--- a/lib/Object/Binary.cpp
+++ b/lib/Object/Binary.cpp
@@ -11,9 +11,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Object/Archive.h"
 #include "llvm/Object/Binary.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/Magic.h"
+#include "llvm/Object/Archive.h"
 #include "llvm/Object/Error.h"
 #include "llvm/Object/MachOUniversal.h"
 #include "llvm/Object/ObjectFile.h"
@@ -43,41 +44,41 @@ MemoryBufferRef Binary::getMemoryBufferRef() const { return Data; }
 
 Expected<std::unique_ptr<Binary>> object::createBinary(MemoryBufferRef Buffer,
                                                       LLVMContext *Context) {
-  sys::fs::file_magic Type = sys::fs::identify_magic(Buffer.getBuffer());
+  file_magic Type = identify_magic(Buffer.getBuffer());
 
   switch (Type) {
-    case sys::fs::file_magic::archive:
-      return Archive::create(Buffer);
-    case sys::fs::file_magic::elf:
-    case sys::fs::file_magic::elf_relocatable:
-    case sys::fs::file_magic::elf_executable:
-    case sys::fs::file_magic::elf_shared_object:
-    case sys::fs::file_magic::elf_core:
-    case sys::fs::file_magic::macho_object:
-    case sys::fs::file_magic::macho_executable:
-    case sys::fs::file_magic::macho_fixed_virtual_memory_shared_lib:
-    case sys::fs::file_magic::macho_core:
-    case sys::fs::file_magic::macho_preload_executable:
-    case sys::fs::file_magic::macho_dynamically_linked_shared_lib:
-    case sys::fs::file_magic::macho_dynamic_linker:
-    case sys::fs::file_magic::macho_bundle:
-    case sys::fs::file_magic::macho_dynamically_linked_shared_lib_stub:
-    case sys::fs::file_magic::macho_dsym_companion:
-    case sys::fs::file_magic::macho_kext_bundle:
-    case sys::fs::file_magic::coff_object:
-    case sys::fs::file_magic::coff_import_library:
-    case sys::fs::file_magic::pecoff_executable:
-    case sys::fs::file_magic::bitcode:
-    case sys::fs::file_magic::wasm_object:
-      return ObjectFile::createSymbolicFile(Buffer, Type, Context);
-    case sys::fs::file_magic::macho_universal_binary:
-      return MachOUniversalBinary::create(Buffer);
-    case sys::fs::file_magic::windows_resource:
-      return WindowsResource::createWindowsResource(Buffer);
-    case sys::fs::file_magic::unknown:
-    case sys::fs::file_magic::coff_cl_gl_object:
-      // Unrecognized object file format.
-      return errorCodeToError(object_error::invalid_file_type);
+  case file_magic::archive:
+    return Archive::create(Buffer);
+  case file_magic::elf:
+  case file_magic::elf_relocatable:
+  case file_magic::elf_executable:
+  case file_magic::elf_shared_object:
+  case file_magic::elf_core:
+  case file_magic::macho_object:
+  case file_magic::macho_executable:
+  case file_magic::macho_fixed_virtual_memory_shared_lib:
+  case file_magic::macho_core:
+  case file_magic::macho_preload_executable:
+  case file_magic::macho_dynamically_linked_shared_lib:
+  case file_magic::macho_dynamic_linker:
+  case file_magic::macho_bundle:
+  case file_magic::macho_dynamically_linked_shared_lib_stub:
+  case file_magic::macho_dsym_companion:
+  case file_magic::macho_kext_bundle:
+  case file_magic::coff_object:
+  case file_magic::coff_import_library:
+  case file_magic::pecoff_executable:
+  case file_magic::bitcode:
+  case file_magic::wasm_object:
+    return ObjectFile::createSymbolicFile(Buffer, Type, Context);
+  case file_magic::macho_universal_binary:
+    return MachOUniversalBinary::create(Buffer);
+  case file_magic::windows_resource:
+    return WindowsResource::createWindowsResource(Buffer);
+  case file_magic::unknown:
+  case file_magic::coff_cl_gl_object:
+    // Unrecognized object file format.
+    return errorCodeToError(object_error::invalid_file_type);
   }
   llvm_unreachable("Unexpected Binary File Type");
 }
diff --git a/lib/Object/COFFImportFile.cpp b/lib/Object/COFFImportFile.cpp
index 37962d84d855..740bf94d40e0 100644
--- a/lib/Object/COFFImportFile.cpp
+++ b/lib/Object/COFFImportFile.cpp
@@ -285,11 +285,13 @@ ObjectFactory::createImportDescriptor(std::vector<uint8_t> &Buffer) {
        IMAGE_SYM_CLASS_EXTERNAL,
        0},
   };
-  reinterpret_cast<StringTableOffset &>(SymbolTable[0].Name).Offset =
+  // TODO: Name.Offset.Offset here and in the all similar places below
+  // suggests a names refactoring. Maybe StringTableOffset.Value?
+  SymbolTable[0].Name.Offset.Offset =
       sizeof(uint32_t);
-  reinterpret_cast<StringTableOffset &>(SymbolTable[5].Name).Offset =
+  SymbolTable[5].Name.Offset.Offset =
       sizeof(uint32_t) + ImportDescriptorSymbolName.length() + 1;
-  reinterpret_cast<StringTableOffset &>(SymbolTable[6].Name).Offset =
+  SymbolTable[6].Name.Offset.Offset =
       sizeof(uint32_t) + ImportDescriptorSymbolName.length() + 1 +
       NullImportDescriptorSymbolName.length() + 1;
   append(Buffer, SymbolTable);
@@ -354,8 +356,7 @@ ObjectFactory::createNullImportDescriptor(std::vector<uint8_t> &Buffer) {
        IMAGE_SYM_CLASS_EXTERNAL,
        0},
   };
-  reinterpret_cast<StringTableOffset &>(SymbolTable[0].Name).Offset =
-      sizeof(uint32_t);
+  SymbolTable[0].Name.Offset.Offset = sizeof(uint32_t);
   append(Buffer, SymbolTable);
 
   // String Table
@@ -437,8 +438,7 @@ NewArchiveMember ObjectFactory::createNullThunk(std::vector<uint8_t> &Buffer) {
        IMAGE_SYM_CLASS_EXTERNAL,
        0},
   };
-  reinterpret_cast<StringTableOffset &>(SymbolTable[0].Name).Offset =
-      sizeof(uint32_t);
+  SymbolTable[0].Name.Offset.Offset = sizeof(uint32_t);
   append(Buffer, SymbolTable);
 
   // String Table
diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp
index 7372f24cb9a8..579c8dde366a 100644
--- a/lib/Object/COFFObjectFile.cpp
+++ b/lib/Object/COFFObjectFile.cpp
@@ -15,12 +15,12 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/ADT/iterator_range.h"
+#include "llvm/BinaryFormat/COFF.h"
 #include "llvm/Object/Binary.h"
 #include "llvm/Object/COFF.h"
 #include "llvm/Object/Error.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/BinaryStreamReader.h"
-#include "llvm/Support/COFF.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/ErrorHandling.h"
diff --git a/lib/Object/Decompressor.cpp b/lib/Object/Decompressor.cpp
index 89d199a3f3f6..53f084d7620e 100644
--- a/lib/Object/Decompressor.cpp
+++ b/lib/Object/Decompressor.cpp
@@ -8,11 +8,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Object/Decompressor.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/Object/ELFObjectFile.h"
 #include "llvm/Support/Compression.h"
 #include "llvm/Support/DataExtractor.h"
 #include "llvm/Support/Endian.h"
-#include "llvm/Support/ELF.h"
 
 using namespace llvm;
 using namespace llvm::support::endian;
diff --git a/lib/Object/ELF.cpp b/lib/Object/ELF.cpp
index 5798a3540f53..9bc28dc14a29 100644
--- a/lib/Object/ELF.cpp
+++ b/lib/Object/ELF.cpp
@@ -8,7 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Object/ELF.h"
-#include "llvm/Support/ELF.h"
+#include "llvm/BinaryFormat/ELF.h"
 
 using namespace llvm;
 using namespace object;
@@ -24,7 +24,7 @@ StringRef llvm::object::getELFRelocationTypeName(uint32_t Machine,
   switch (Machine) {
   case ELF::EM_X86_64:
     switch (Type) {
-#include "llvm/Support/ELFRelocs/x86_64.def"
+#include "llvm/BinaryFormat/ELFRelocs/x86_64.def"
     default:
       break;
     }
@@ -32,77 +32,77 @@ StringRef llvm::object::getELFRelocationTypeName(uint32_t Machine,
   case ELF::EM_386:
   case ELF::EM_IAMCU:
     switch (Type) {
-#include "llvm/Support/ELFRelocs/i386.def"
+#include "llvm/BinaryFormat/ELFRelocs/i386.def"
     default:
       break;
     }
     break;
   case ELF::EM_MIPS:
     switch (Type) {
-#include "llvm/Support/ELFRelocs/Mips.def"
+#include "llvm/BinaryFormat/ELFRelocs/Mips.def"
     default:
       break;
     }
     break;
   case ELF::EM_AARCH64:
     switch (Type) {
-#include "llvm/Support/ELFRelocs/AArch64.def"
+#include "llvm/BinaryFormat/ELFRelocs/AArch64.def"
     default:
       break;
     }
     break;
   case ELF::EM_ARM:
     switch (Type) {
-#include "llvm/Support/ELFRelocs/ARM.def"
+#include "llvm/BinaryFormat/ELFRelocs/ARM.def"
     default:
       break;
     }
     break;
   case ELF::EM_AVR:
     switch (Type) {
-#include "llvm/Support/ELFRelocs/AVR.def"
+#include "llvm/BinaryFormat/ELFRelocs/AVR.def"
     default:
       break;
     }
     break;
   case ELF::EM_HEXAGON:
     switch (Type) {
-#include "llvm/Support/ELFRelocs/Hexagon.def"
+#include "llvm/BinaryFormat/ELFRelocs/Hexagon.def"
     default:
       break;
     }
     break;
   case ELF::EM_LANAI:
     switch (Type) {
-#include "llvm/Support/ELFRelocs/Lanai.def"
+#include "llvm/BinaryFormat/ELFRelocs/Lanai.def"
     default:
       break;
     }
     break;
   case ELF::EM_PPC:
     switch (Type) {
-#include "llvm/Support/ELFRelocs/PowerPC.def"
+#include "llvm/BinaryFormat/ELFRelocs/PowerPC.def"
     default:
       break;
     }
     break;
   case ELF::EM_PPC64:
     switch (Type) {
-#include "llvm/Support/ELFRelocs/PowerPC64.def"
+#include "llvm/BinaryFormat/ELFRelocs/PowerPC64.def"
     default:
       break;
     }
     break;
   case ELF::EM_RISCV:
     switch (Type) {
-#include "llvm/Support/ELFRelocs/RISCV.def"
+#include "llvm/BinaryFormat/ELFRelocs/RISCV.def"
     default:
       break;
     }
     break;
   case ELF::EM_S390:
     switch (Type) {
-#include "llvm/Support/ELFRelocs/SystemZ.def"
+#include "llvm/BinaryFormat/ELFRelocs/SystemZ.def"
     default:
       break;
     }
@@ -111,27 +111,27 @@ StringRef llvm::object::getELFRelocationTypeName(uint32_t Machine,
   case ELF::EM_SPARC32PLUS:
   case ELF::EM_SPARCV9:
     switch (Type) {
-#include "llvm/Support/ELFRelocs/Sparc.def"
+#include "llvm/BinaryFormat/ELFRelocs/Sparc.def"
     default:
       break;
     }
     break;
   case ELF::EM_WEBASSEMBLY:
     switch (Type) {
-#include "llvm/Support/ELFRelocs/WebAssembly.def"
+#include "llvm/BinaryFormat/ELFRelocs/WebAssembly.def"
     default:
       break;
     }
     break;
   case ELF::EM_AMDGPU:
     switch (Type) {
-#include "llvm/Support/ELFRelocs/AMDGPU.def"
+#include "llvm/BinaryFormat/ELFRelocs/AMDGPU.def"
     default:
       break;
     }
   case ELF::EM_BPF:
     switch (Type) {
-#include "llvm/Support/ELFRelocs/BPF.def"
+#include "llvm/BinaryFormat/ELFRelocs/BPF.def"
     default:
       break;
     }
diff --git a/lib/Object/ELFObjectFile.cpp b/lib/Object/ELFObjectFile.cpp
index 86f033bb6cbf..fa136d782b5a 100644
--- a/lib/Object/ELFObjectFile.cpp
+++ b/lib/Object/ELFObjectFile.cpp
@@ -11,15 +11,15 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Object/ELFObjectFile.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/MC/SubtargetFeature.h"
 #include "llvm/Object/ELF.h"
-#include "llvm/Object/ELFObjectFile.h"
 #include "llvm/Object/ELFTypes.h"
 #include "llvm/Object/Error.h"
-#include "llvm/Support/ARMBuildAttributes.h"
 #include "llvm/Support/ARMAttributeParser.h"
-#include "llvm/Support/ELF.h"
+#include "llvm/Support/ARMBuildAttributes.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
diff --git a/lib/Object/IRObjectFile.cpp b/lib/Object/IRObjectFile.cpp
index adbf0de6d1bc..e7807b038335 100644
--- a/lib/Object/IRObjectFile.cpp
+++ b/lib/Object/IRObjectFile.cpp
@@ -14,6 +14,7 @@
 #include "llvm/Object/IRObjectFile.h"
 #include "RecordStreamer.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/BinaryFormat/Magic.h"
 #include "llvm/Bitcode/BitcodeReader.h"
 #include "llvm/IR/GVMaterializer.h"
 #include "llvm/IR/LLVMContext.h"
@@ -95,13 +96,13 @@ ErrorOr<MemoryBufferRef> IRObjectFile::findBitcodeInObject(const ObjectFile &Obj
 }
 
 ErrorOr<MemoryBufferRef> IRObjectFile::findBitcodeInMemBuffer(MemoryBufferRef Object) {
-  sys::fs::file_magic Type = sys::fs::identify_magic(Object.getBuffer());
+  file_magic Type = identify_magic(Object.getBuffer());
   switch (Type) {
-  case sys::fs::file_magic::bitcode:
+  case file_magic::bitcode:
     return Object;
-  case sys::fs::file_magic::elf_relocatable:
-  case sys::fs::file_magic::macho_object:
-  case sys::fs::file_magic::coff_object: {
+  case file_magic::elf_relocatable:
+  case file_magic::macho_object:
+  case file_magic::coff_object: {
     Expected<std::unique_ptr<ObjectFile>> ObjFile =
         ObjectFile::createObjectFile(Object, Type);
     if (!ObjFile)
@@ -138,3 +139,25 @@ IRObjectFile::create(MemoryBufferRef Object, LLVMContext &Context) {
   return std::unique_ptr<IRObjectFile>(
       new IRObjectFile(*BCOrErr, std::move(Mods)));
 }
+
+Expected<IRSymtabFile> object::readIRSymtab(MemoryBufferRef MBRef) {
+  IRSymtabFile F;
+  ErrorOr<MemoryBufferRef> BCOrErr =
+      IRObjectFile::findBitcodeInMemBuffer(MBRef);
+  if (!BCOrErr)
+    return errorCodeToError(BCOrErr.getError());
+
+  Expected<BitcodeFileContents> BFCOrErr = getBitcodeFileContents(*BCOrErr);
+  if (!BFCOrErr)
+    return BFCOrErr.takeError();
+
+  Expected<irsymtab::FileContents> FCOrErr = irsymtab::readBitcode(*BFCOrErr);
+  if (!FCOrErr)
+    return FCOrErr.takeError();
+
+  F.Mods = std::move(BFCOrErr->Mods);
+  F.Symtab = std::move(FCOrErr->Symtab);
+  F.Strtab = std::move(FCOrErr->Strtab);
+  F.TheReader = std::move(FCOrErr->TheReader);
+  return std::move(F);
+}
diff --git a/lib/Object/IRSymtab.cpp b/lib/Object/IRSymtab.cpp
index 5f0837882d60..d21acdb1d556 100644
--- a/lib/Object/IRSymtab.cpp
+++ b/lib/Object/IRSymtab.cpp
@@ -7,6 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Object/IRSymtab.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -22,15 +23,16 @@
 #include "llvm/IR/Mangler.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
+#include "llvm/Bitcode/BitcodeReader.h"
 #include "llvm/MC/StringTableBuilder.h"
-#include "llvm/Object/IRSymtab.h"
+#include "llvm/Object/IRObjectFile.h"
 #include "llvm/Object/ModuleSymbolTable.h"
 #include "llvm/Object/SymbolicFile.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Error.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/StringSaver.h"
+#include "llvm/Support/raw_ostream.h"
 #include <cassert>
 #include <string>
 #include <utility>
@@ -88,6 +90,10 @@ struct Builder {
 };
 
 Error Builder::addModule(Module *M) {
+  if (M->getDataLayoutStr().empty())
+    return make_error<StringError>("input module has no datalayout",
+                                   inconvertibleErrorCode());
+
   SmallPtrSet<GlobalValue *, 8> Used;
   collectUsedGlobalVariables(*M, Used, /*CompilerUsed*/ false);
 
@@ -259,3 +265,40 @@ Error irsymtab::build(ArrayRef<Module *> Mods, SmallVector<char, 0> &Symtab,
                       SmallVector<char, 0> &Strtab) {
   return Builder(Symtab, Strtab).build(Mods);
 }
+
+// Upgrade a vector of bitcode modules created by an old version of LLVM by
+// creating an irsymtab for them in the current format.
+static Expected<FileContents> upgrade(ArrayRef<BitcodeModule> BMs) {
+  FileContents FC;
+
+  LLVMContext Ctx;
+  std::vector<Module *> Mods;
+  std::vector<std::unique_ptr<Module>> OwnedMods;
+  for (auto BM : BMs) {
+    Expected<std::unique_ptr<Module>> MOrErr =
+        BM.getLazyModule(Ctx, /*ShouldLazyLoadMetadata*/ true,
+                         /*IsImporting*/ false);
+    if (!MOrErr)
+      return MOrErr.takeError();
+
+    Mods.push_back(MOrErr->get());
+    OwnedMods.push_back(std::move(*MOrErr));
+  }
+
+  if (Error E = build(Mods, FC.Symtab, FC.Strtab))
+    return std::move(E);
+
+  FC.TheReader = {{FC.Symtab.data(), FC.Symtab.size()},
+                  {FC.Strtab.data(), FC.Strtab.size()}};
+  return std::move(FC);
+}
+
+Expected<FileContents> irsymtab::readBitcode(const BitcodeFileContents &BFC) {
+  if (BFC.Mods.empty())
+    return make_error<StringError>("Bitcode file does not contain any modules",
+                                   inconvertibleErrorCode());
+
+  // Right now we have no on-disk representation of symbol tables, so we always
+  // upgrade.
+  return upgrade(BFC.Mods);
+}
diff --git a/lib/Object/LLVMBuild.txt b/lib/Object/LLVMBuild.txt
index bae578c76f7e..687713bab6a2 100644
--- a/lib/Object/LLVMBuild.txt
+++ b/lib/Object/LLVMBuild.txt
@@ -19,4 +19,4 @@
 type = Library
 name = Object
 parent = Libraries
-required_libraries = BitReader Core MC MCParser Support
+required_libraries = BitReader Core MC BinaryFormat MCParser Support
diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp
index 084159a61f55..7804bbe06f83 100644
--- a/lib/Object/MachOObjectFile.cpp
+++ b/lib/Object/MachOObjectFile.cpp
@@ -14,13 +14,14 @@
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/None.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/MachO.h"
 #include "llvm/Object/Error.h"
 #include "llvm/Object/MachO.h"
 #include "llvm/Object/ObjectFile.h"
@@ -32,10 +33,9 @@
 #include "llvm/Support/Format.h"
 #include "llvm/Support/Host.h"
 #include "llvm/Support/LEB128.h"
-#include "llvm/Support/MachO.h"
 #include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/SwapByteOrder.h"
+#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cassert>
 #include <cstddef>
@@ -4314,3 +4314,9 @@ ObjectFile::createMachOObjectFile(MemoryBufferRef Buffer,
   return make_error<GenericBinaryError>("Unrecognized MachO magic number",
                                         object_error::invalid_file_type);
 }
+
+StringRef MachOObjectFile::mapDebugSectionName(StringRef Name) const {
+  return StringSwitch<StringRef>(Name)
+      .Case("debug_str_offs", "debug_str_offsets")
+      .Default(Name);
+}
diff --git a/lib/Object/ModuleSymbolTable.cpp b/lib/Object/ModuleSymbolTable.cpp
index a5b42725d817..f2e7a218c13a 100644
--- a/lib/Object/ModuleSymbolTable.cpp
+++ b/lib/Object/ModuleSymbolTable.cpp
@@ -13,9 +13,10 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Object/ModuleSymbolTable.h"
 #include "RecordStreamer.h"
-#include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
@@ -36,16 +37,15 @@
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCTargetOptions.h"
-#include "llvm/Object/ModuleSymbolTable.h"
 #include "llvm/Object/SymbolicFile.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CodeGen.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/SMLoc.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
diff --git a/lib/Object/Object.cpp b/lib/Object/Object.cpp
index 6df481b060e1..1d2859cfbe9d 100644
--- a/lib/Object/Object.cpp
+++ b/lib/Object/Object.cpp
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/SmallVector.h"
 #include "llvm-c/Object.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Object/ObjectFile.h"
 
 using namespace llvm;
diff --git a/lib/Object/ObjectFile.cpp b/lib/Object/ObjectFile.cpp
index 1f60e7157bd9..8377dd0d73fa 100644
--- a/lib/Object/ObjectFile.cpp
+++ b/lib/Object/ObjectFile.cpp
@@ -11,12 +11,13 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Object/ObjectFile.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/Magic.h"
 #include "llvm/Object/Binary.h"
 #include "llvm/Object/COFF.h"
 #include "llvm/Object/Error.h"
 #include "llvm/Object/MachO.h"
-#include "llvm/Object/ObjectFile.h"
 #include "llvm/Object/Wasm.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -79,42 +80,42 @@ section_iterator ObjectFile::getRelocatedSection(DataRefImpl Sec) const {
 }
 
 Expected<std::unique_ptr<ObjectFile>>
-ObjectFile::createObjectFile(MemoryBufferRef Object, sys::fs::file_magic Type) {
+ObjectFile::createObjectFile(MemoryBufferRef Object, file_magic Type) {
   StringRef Data = Object.getBuffer();
-  if (Type == sys::fs::file_magic::unknown)
-    Type = sys::fs::identify_magic(Data);
+  if (Type == file_magic::unknown)
+    Type = identify_magic(Data);
 
   switch (Type) {
-  case sys::fs::file_magic::unknown:
-  case sys::fs::file_magic::bitcode:
-  case sys::fs::file_magic::coff_cl_gl_object:
-  case sys::fs::file_magic::archive:
-  case sys::fs::file_magic::macho_universal_binary:
-  case sys::fs::file_magic::windows_resource:
+  case file_magic::unknown:
+  case file_magic::bitcode:
+  case file_magic::coff_cl_gl_object:
+  case file_magic::archive:
+  case file_magic::macho_universal_binary:
+  case file_magic::windows_resource:
     return errorCodeToError(object_error::invalid_file_type);
-  case sys::fs::file_magic::elf:
-  case sys::fs::file_magic::elf_relocatable:
-  case sys::fs::file_magic::elf_executable:
-  case sys::fs::file_magic::elf_shared_object:
-  case sys::fs::file_magic::elf_core:
+  case file_magic::elf:
+  case file_magic::elf_relocatable:
+  case file_magic::elf_executable:
+  case file_magic::elf_shared_object:
+  case file_magic::elf_core:
     return errorOrToExpected(createELFObjectFile(Object));
-  case sys::fs::file_magic::macho_object:
-  case sys::fs::file_magic::macho_executable:
-  case sys::fs::file_magic::macho_fixed_virtual_memory_shared_lib:
-  case sys::fs::file_magic::macho_core:
-  case sys::fs::file_magic::macho_preload_executable:
-  case sys::fs::file_magic::macho_dynamically_linked_shared_lib:
-  case sys::fs::file_magic::macho_dynamic_linker:
-  case sys::fs::file_magic::macho_bundle:
-  case sys::fs::file_magic::macho_dynamically_linked_shared_lib_stub:
-  case sys::fs::file_magic::macho_dsym_companion:
-  case sys::fs::file_magic::macho_kext_bundle:
+  case file_magic::macho_object:
+  case file_magic::macho_executable:
+  case file_magic::macho_fixed_virtual_memory_shared_lib:
+  case file_magic::macho_core:
+  case file_magic::macho_preload_executable:
+  case file_magic::macho_dynamically_linked_shared_lib:
+  case file_magic::macho_dynamic_linker:
+  case file_magic::macho_bundle:
+  case file_magic::macho_dynamically_linked_shared_lib_stub:
+  case file_magic::macho_dsym_companion:
+  case file_magic::macho_kext_bundle:
     return createMachOObjectFile(Object);
-  case sys::fs::file_magic::coff_object:
-  case sys::fs::file_magic::coff_import_library:
-  case sys::fs::file_magic::pecoff_executable:
+  case file_magic::coff_object:
+  case file_magic::coff_import_library:
+  case file_magic::pecoff_executable:
     return errorOrToExpected(createCOFFObjectFile(Object));
-  case sys::fs::file_magic::wasm_object:
+  case file_magic::wasm_object:
     return createWasmObjectFile(Object);
   }
   llvm_unreachable("Unexpected Object File Type");
diff --git a/lib/Object/SymbolicFile.cpp b/lib/Object/SymbolicFile.cpp
index 16cff5c228bd..1042d29d2350 100644
--- a/lib/Object/SymbolicFile.cpp
+++ b/lib/Object/SymbolicFile.cpp
@@ -11,12 +11,13 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Object/SymbolicFile.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/Magic.h"
 #include "llvm/Object/COFFImportFile.h"
 #include "llvm/Object/Error.h"
 #include "llvm/Object/IRObjectFile.h"
 #include "llvm/Object/ObjectFile.h"
-#include "llvm/Object/SymbolicFile.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -34,45 +35,46 @@ SymbolicFile::SymbolicFile(unsigned int Type, MemoryBufferRef Source)
 
 SymbolicFile::~SymbolicFile() = default;
 
-Expected<std::unique_ptr<SymbolicFile>> SymbolicFile::createSymbolicFile(
-    MemoryBufferRef Object, sys::fs::file_magic Type, LLVMContext *Context) {
+Expected<std::unique_ptr<SymbolicFile>>
+SymbolicFile::createSymbolicFile(MemoryBufferRef Object, file_magic Type,
+                                 LLVMContext *Context) {
   StringRef Data = Object.getBuffer();
-  if (Type == sys::fs::file_magic::unknown)
-    Type = sys::fs::identify_magic(Data);
+  if (Type == file_magic::unknown)
+    Type = identify_magic(Data);
 
   switch (Type) {
-  case sys::fs::file_magic::bitcode:
+  case file_magic::bitcode:
     if (Context)
       return IRObjectFile::create(Object, *Context);
     LLVM_FALLTHROUGH;
-  case sys::fs::file_magic::unknown:
-  case sys::fs::file_magic::archive:
-  case sys::fs::file_magic::coff_cl_gl_object:
-  case sys::fs::file_magic::macho_universal_binary:
-  case sys::fs::file_magic::windows_resource:
+  case file_magic::unknown:
+  case file_magic::archive:
+  case file_magic::coff_cl_gl_object:
+  case file_magic::macho_universal_binary:
+  case file_magic::windows_resource:
     return errorCodeToError(object_error::invalid_file_type);
-  case sys::fs::file_magic::elf:
-  case sys::fs::file_magic::elf_executable:
-  case sys::fs::file_magic::elf_shared_object:
-  case sys::fs::file_magic::elf_core:
-  case sys::fs::file_magic::macho_executable:
-  case sys::fs::file_magic::macho_fixed_virtual_memory_shared_lib:
-  case sys::fs::file_magic::macho_core:
-  case sys::fs::file_magic::macho_preload_executable:
-  case sys::fs::file_magic::macho_dynamically_linked_shared_lib:
-  case sys::fs::file_magic::macho_dynamic_linker:
-  case sys::fs::file_magic::macho_bundle:
-  case sys::fs::file_magic::macho_dynamically_linked_shared_lib_stub:
-  case sys::fs::file_magic::macho_dsym_companion:
-  case sys::fs::file_magic::macho_kext_bundle:
-  case sys::fs::file_magic::pecoff_executable:
-  case sys::fs::file_magic::wasm_object:
+  case file_magic::elf:
+  case file_magic::elf_executable:
+  case file_magic::elf_shared_object:
+  case file_magic::elf_core:
+  case file_magic::macho_executable:
+  case file_magic::macho_fixed_virtual_memory_shared_lib:
+  case file_magic::macho_core:
+  case file_magic::macho_preload_executable:
+  case file_magic::macho_dynamically_linked_shared_lib:
+  case file_magic::macho_dynamic_linker:
+  case file_magic::macho_bundle:
+  case file_magic::macho_dynamically_linked_shared_lib_stub:
+  case file_magic::macho_dsym_companion:
+  case file_magic::macho_kext_bundle:
+  case file_magic::pecoff_executable:
+  case file_magic::wasm_object:
     return ObjectFile::createObjectFile(Object, Type);
-  case sys::fs::file_magic::coff_import_library:
+  case file_magic::coff_import_library:
     return std::unique_ptr<SymbolicFile>(new COFFImportFile(Object));
-  case sys::fs::file_magic::elf_relocatable:
-  case sys::fs::file_magic::macho_object:
-  case sys::fs::file_magic::coff_object: {
+  case file_magic::elf_relocatable:
+  case file_magic::macho_object:
+  case file_magic::coff_object: {
     Expected<std::unique_ptr<ObjectFile>> Obj =
         ObjectFile::createObjectFile(Object, Type);
     if (!Obj || !Context)
diff --git a/lib/Object/WasmObjectFile.cpp b/lib/Object/WasmObjectFile.cpp
index f565d7a33e55..2304098c1dc9 100644
--- a/lib/Object/WasmObjectFile.cpp
+++ b/lib/Object/WasmObjectFile.cpp
@@ -11,6 +11,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/BinaryFormat/Wasm.h"
 #include "llvm/MC/SubtargetFeature.h"
 #include "llvm/Object/Binary.h"
 #include "llvm/Object/Error.h"
@@ -21,7 +22,6 @@
 #include "llvm/Support/Error.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/LEB128.h"
-#include "llvm/Support/Wasm.h"
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
@@ -830,7 +830,7 @@ void WasmObjectFile::getRelocationTypeName(
     break;
 
   switch (Rel.Type) {
-#include "llvm/Support/WasmRelocs/WebAssembly.def"
+#include "llvm/BinaryFormat/WasmRelocs/WebAssembly.def"
   }
 
 #undef WASM_RELOC
diff --git a/lib/Object/WindowsResource.cpp b/lib/Object/WindowsResource.cpp
index e46d38e466a0..041659e7aa23 100644
--- a/lib/Object/WindowsResource.cpp
+++ b/lib/Object/WindowsResource.cpp
@@ -12,7 +12,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Object/WindowsResource.h"
-#include "llvm/Support/COFF.h"
+#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/Object/COFF.h"
+#include "llvm/Support/FileOutputBuffer.h"
+#include "llvm/Support/MathExtras.h"
+#include <ctime>
+#include <queue>
 #include <sstream>
 #include <system_error>
 
@@ -29,6 +34,9 @@ static const size_t ResourceMagicSize = 16;
 
 static const size_t NullEntrySize = 16;
 
+uint32_t WindowsResourceParser::TreeNode::StringCount = 0;
+uint32_t WindowsResourceParser::TreeNode::DataCount = 0;
+
 WindowsResource::WindowsResource(MemoryBufferRef Source)
     : Binary(Binary::ID_WinRes, Source) {
   size_t LeadingSize = ResourceMagicSize + NullEntrySize;
@@ -115,7 +123,7 @@ Error ResourceEntryRef::loadNext() {
   return Error::success();
 }
 
-WindowsResourceParser::WindowsResourceParser() {}
+WindowsResourceParser::WindowsResourceParser() : Root(false) {}
 
 Error WindowsResourceParser::parse(WindowsResource *WR) {
   auto EntryOrErr = WR->getHeadEntry();
@@ -124,9 +132,16 @@ Error WindowsResourceParser::parse(WindowsResource *WR) {
 
   ResourceEntryRef Entry = EntryOrErr.get();
   bool End = false;
-
   while (!End) {
 
+    Data.push_back(Entry.getData());
+
+    if (Entry.checkTypeString())
+      StringTable.push_back(Entry.getTypeString());
+
+    if (Entry.checkNameString())
+      StringTable.push_back(Entry.getNameString());
+
     Root.addEntry(Entry);
 
     RETURN_IF_ERROR(Entry.moveNext(End));
@@ -146,8 +161,37 @@ void WindowsResourceParser::TreeNode::addEntry(const ResourceEntryRef &Entry) {
   NameNode.addLanguageNode(Entry);
 }
 
-WindowsResourceParser::TreeNode::TreeNode(ArrayRef<UTF16> NameRef)
-    : Name(NameRef) {}
+WindowsResourceParser::TreeNode::TreeNode(bool IsStringNode) {
+  if (IsStringNode)
+    StringIndex = StringCount++;
+}
+
+WindowsResourceParser::TreeNode::TreeNode(uint16_t MajorVersion,
+                                          uint16_t MinorVersion,
+                                          uint32_t Characteristics)
+    : IsDataNode(true), MajorVersion(MajorVersion), MinorVersion(MinorVersion),
+      Characteristics(Characteristics) {
+  if (IsDataNode)
+    DataIndex = DataCount++;
+}
+
+std::unique_ptr<WindowsResourceParser::TreeNode>
+WindowsResourceParser::TreeNode::createStringNode() {
+  return std::unique_ptr<TreeNode>(new TreeNode(true));
+}
+
+std::unique_ptr<WindowsResourceParser::TreeNode>
+WindowsResourceParser::TreeNode::createIDNode() {
+  return std::unique_ptr<TreeNode>(new TreeNode(false));
+}
+
+std::unique_ptr<WindowsResourceParser::TreeNode>
+WindowsResourceParser::TreeNode::createDataNode(uint16_t MajorVersion,
+                                                uint16_t MinorVersion,
+                                                uint32_t Characteristics) {
+  return std::unique_ptr<TreeNode>(
+      new TreeNode(MajorVersion, MinorVersion, Characteristics));
+}
 
 WindowsResourceParser::TreeNode &
 WindowsResourceParser::TreeNode::addTypeNode(const ResourceEntryRef &Entry) {
@@ -168,14 +212,18 @@ WindowsResourceParser::TreeNode::addNameNode(const ResourceEntryRef &Entry) {
 WindowsResourceParser::TreeNode &
 WindowsResourceParser::TreeNode::addLanguageNode(
     const ResourceEntryRef &Entry) {
-  return addChild(Entry.getLanguage());
+  return addChild(Entry.getLanguage(), true, Entry.getMajorVersion(),
+                  Entry.getMinorVersion(), Entry.getCharacteristics());
 }
 
-WindowsResourceParser::TreeNode &
-WindowsResourceParser::TreeNode::addChild(uint32_t ID) {
+WindowsResourceParser::TreeNode &WindowsResourceParser::TreeNode::addChild(
+    uint32_t ID, bool IsDataNode, uint16_t MajorVersion, uint16_t MinorVersion,
+    uint32_t Characteristics) {
   auto Child = IDChildren.find(ID);
   if (Child == IDChildren.end()) {
-    auto NewChild = llvm::make_unique<WindowsResourceParser::TreeNode>(ID);
+    auto NewChild =
+        IsDataNode ? createDataNode(MajorVersion, MinorVersion, Characteristics)
+                   : createIDNode();
     WindowsResourceParser::TreeNode &Node = *NewChild;
     IDChildren.emplace(ID, std::move(NewChild));
     return Node;
@@ -199,7 +247,7 @@ WindowsResourceParser::TreeNode::addChild(ArrayRef<UTF16> NameRef) {
 
   auto Child = StringChildren.find(NameString);
   if (Child == StringChildren.end()) {
-    auto NewChild = llvm::make_unique<WindowsResourceParser::TreeNode>(NameRef);
+    auto NewChild = createStringNode();
     WindowsResourceParser::TreeNode &Node = *NewChild;
     StringChildren.emplace(NameString, std::move(NewChild));
     return Node;
@@ -218,5 +266,455 @@ void WindowsResourceParser::TreeNode::print(ScopedPrinter &Writer,
   }
 }
 
+// This function returns the size of the entire resource tree, including
+// directory tables, directory entries, and data entries.  It does not include
+// the directory strings or the relocations of the .rsrc section.
+uint32_t WindowsResourceParser::TreeNode::getTreeSize() const {
+  uint32_t Size = (IDChildren.size() + StringChildren.size()) *
+                  sizeof(llvm::object::coff_resource_dir_entry);
+
+  // Reached a node pointing to a data entry.
+  if (IsDataNode) {
+    Size += sizeof(llvm::object::coff_resource_data_entry);
+    return Size;
+  }
+
+  // If the node does not point to data, it must have a directory table pointing
+  // to other nodes.
+  Size += sizeof(llvm::object::coff_resource_dir_table);
+
+  for (auto const &Child : StringChildren) {
+    Size += Child.second->getTreeSize();
+  }
+  for (auto const &Child : IDChildren) {
+    Size += Child.second->getTreeSize();
+  }
+  return Size;
+}
+
+class WindowsResourceCOFFWriter {
+public:
+  WindowsResourceCOFFWriter(StringRef OutputFile, Machine MachineType,
+                            const WindowsResourceParser &Parser, Error &E);
+
+  Error write();
+
+private:
+  void performFileLayout();
+  void performSectionOneLayout();
+  void performSectionTwoLayout();
+  void writeCOFFHeader();
+  void writeFirstSectionHeader();
+  void writeSecondSectionHeader();
+  void writeFirstSection();
+  void writeSecondSection();
+  void writeSymbolTable();
+  void writeStringTable();
+  void writeDirectoryTree();
+  void writeDirectoryStringTable();
+  void writeFirstSectionRelocations();
+  std::unique_ptr<FileOutputBuffer> Buffer;
+  uint8_t *Current;
+  Machine MachineType;
+  const WindowsResourceParser::TreeNode &Resources;
+  const ArrayRef<std::vector<uint8_t>> Data;
+  uint64_t FileSize;
+  uint32_t SymbolTableOffset;
+  uint32_t SectionOneSize;
+  uint32_t SectionOneOffset;
+  uint32_t SectionOneRelocations;
+  uint32_t SectionTwoSize;
+  uint32_t SectionTwoOffset;
+  const ArrayRef<std::vector<UTF16>> StringTable;
+  std::vector<uint32_t> StringTableOffsets;
+  std::vector<uint32_t> DataOffsets;
+  std::vector<uint32_t> RelocationAddresses;
+};
+
+WindowsResourceCOFFWriter::WindowsResourceCOFFWriter(
+    StringRef OutputFile, Machine MachineType,
+    const WindowsResourceParser &Parser, Error &E)
+    : MachineType(MachineType), Resources(Parser.getTree()),
+      Data(Parser.getData()), StringTable(Parser.getStringTable()) {
+  performFileLayout();
+
+  ErrorOr<std::unique_ptr<FileOutputBuffer>> BufferOrErr =
+      FileOutputBuffer::create(OutputFile, FileSize);
+  if (!BufferOrErr) {
+    E = errorCodeToError(BufferOrErr.getError());
+    return;
+  }
+
+  Buffer = std::move(*BufferOrErr);
+}
+
+void WindowsResourceCOFFWriter::performFileLayout() {
+  // Add size of COFF header.
+  FileSize = llvm::COFF::Header16Size;
+
+  // one .rsrc section header for directory tree, another for resource data.
+  FileSize += 2 * llvm::COFF::SectionSize;
+
+  performSectionOneLayout();
+  performSectionTwoLayout();
+
+  // We have reached the address of the symbol table.
+  SymbolTableOffset = FileSize;
+
+  FileSize += llvm::COFF::Symbol16Size;     // size of the @feat.00 symbol.
+  FileSize += 4 * llvm::COFF::Symbol16Size; // symbol + aux for each section.
+  FileSize += Data.size() * llvm::COFF::Symbol16Size; // 1 symbol per resource.
+  FileSize += 4; // four null bytes for the string table.
+}
+
+void WindowsResourceCOFFWriter::performSectionOneLayout() {
+  SectionOneOffset = FileSize;
+
+  SectionOneSize = Resources.getTreeSize();
+  uint32_t CurrentStringOffset = SectionOneSize;
+  uint32_t TotalStringTableSize = 0;
+  for (auto const &String : StringTable) {
+    StringTableOffsets.push_back(CurrentStringOffset);
+    uint32_t StringSize = String.size() * sizeof(UTF16) + sizeof(uint16_t);
+    CurrentStringOffset += StringSize;
+    TotalStringTableSize += StringSize;
+  }
+  SectionOneSize += alignTo(TotalStringTableSize, sizeof(uint32_t));
+
+  // account for the relocations of section one.
+  SectionOneRelocations = FileSize + SectionOneSize;
+  FileSize += SectionOneSize;
+  FileSize += Data.size() *
+              llvm::COFF::RelocationSize; // one relocation for each resource.
+}
+
+void WindowsResourceCOFFWriter::performSectionTwoLayout() {
+  // add size of .rsrc$2 section, which contains all resource data on 8-byte
+  // alignment.
+  SectionTwoOffset = FileSize;
+  SectionTwoSize = 0;
+  for (auto const &Entry : Data) {
+    DataOffsets.push_back(SectionTwoSize);
+    SectionTwoSize += llvm::alignTo(Entry.size(), sizeof(uint64_t));
+  }
+  FileSize += SectionTwoSize;
+}
+
+static std::time_t getTime() {
+  std::time_t Now = time(nullptr);
+  if (Now < 0 || !isUInt<32>(Now))
+    return UINT32_MAX;
+  return Now;
+}
+
+Error WindowsResourceCOFFWriter::write() {
+  Current = Buffer->getBufferStart();
+
+  writeCOFFHeader();
+  writeFirstSectionHeader();
+  writeSecondSectionHeader();
+  writeFirstSection();
+  writeSecondSection();
+  writeSymbolTable();
+  writeStringTable();
+
+  if (auto EC = Buffer->commit()) {
+    return errorCodeToError(EC);
+  }
+
+  return Error::success();
+}
+
+void WindowsResourceCOFFWriter::writeCOFFHeader() {
+  // Write the COFF header.
+  auto *Header = reinterpret_cast<llvm::object::coff_file_header *>(Current);
+  switch (MachineType) {
+  case Machine::ARM:
+    Header->Machine = llvm::COFF::IMAGE_FILE_MACHINE_ARMNT;
+    break;
+  case Machine::X64:
+    Header->Machine = llvm::COFF::IMAGE_FILE_MACHINE_AMD64;
+    break;
+  case Machine::X86:
+    Header->Machine = llvm::COFF::IMAGE_FILE_MACHINE_I386;
+    break;
+  default:
+    Header->Machine = llvm::COFF::IMAGE_FILE_MACHINE_UNKNOWN;
+  }
+  Header->NumberOfSections = 2;
+  Header->TimeDateStamp = getTime();
+  Header->PointerToSymbolTable = SymbolTableOffset;
+  // One symbol for every resource plus 2 for each section and @feat.00
+  Header->NumberOfSymbols = Data.size() + 5;
+  Header->SizeOfOptionalHeader = 0;
+  Header->Characteristics = llvm::COFF::IMAGE_FILE_32BIT_MACHINE;
+}
+
+void WindowsResourceCOFFWriter::writeFirstSectionHeader() {
+  // Write the first section header.
+  Current += sizeof(llvm::object::coff_file_header);
+  auto *SectionOneHeader =
+      reinterpret_cast<llvm::object::coff_section *>(Current);
+  strncpy(SectionOneHeader->Name, ".rsrc$01", (size_t)llvm::COFF::NameSize);
+  SectionOneHeader->VirtualSize = 0;
+  SectionOneHeader->VirtualAddress = 0;
+  SectionOneHeader->SizeOfRawData = SectionOneSize;
+  SectionOneHeader->PointerToRawData = SectionOneOffset;
+  SectionOneHeader->PointerToRelocations = SectionOneRelocations;
+  SectionOneHeader->PointerToLinenumbers = 0;
+  SectionOneHeader->NumberOfRelocations = Data.size();
+  SectionOneHeader->NumberOfLinenumbers = 0;
+  SectionOneHeader->Characteristics = llvm::COFF::IMAGE_SCN_ALIGN_1BYTES;
+  SectionOneHeader->Characteristics +=
+      llvm::COFF::IMAGE_SCN_CNT_INITIALIZED_DATA;
+  SectionOneHeader->Characteristics += llvm::COFF::IMAGE_SCN_MEM_DISCARDABLE;
+  SectionOneHeader->Characteristics += llvm::COFF::IMAGE_SCN_MEM_READ;
+}
+
+void WindowsResourceCOFFWriter::writeSecondSectionHeader() {
+  // Write the second section header.
+  Current += sizeof(llvm::object::coff_section);
+  auto *SectionTwoHeader =
+      reinterpret_cast<llvm::object::coff_section *>(Current);
+  strncpy(SectionTwoHeader->Name, ".rsrc$02", (size_t)llvm::COFF::NameSize);
+  SectionTwoHeader->VirtualSize = 0;
+  SectionTwoHeader->VirtualAddress = 0;
+  SectionTwoHeader->SizeOfRawData = SectionTwoSize;
+  SectionTwoHeader->PointerToRawData = SectionTwoOffset;
+  SectionTwoHeader->PointerToRelocations = 0;
+  SectionTwoHeader->PointerToLinenumbers = 0;
+  SectionTwoHeader->NumberOfRelocations = 0;
+  SectionTwoHeader->NumberOfLinenumbers = 0;
+  SectionTwoHeader->Characteristics =
+      llvm::COFF::IMAGE_SCN_CNT_INITIALIZED_DATA;
+  SectionTwoHeader->Characteristics += llvm::COFF::IMAGE_SCN_MEM_READ;
+}
+
+void WindowsResourceCOFFWriter::writeFirstSection() {
+  // Write section one.
+  Current += sizeof(llvm::object::coff_section);
+
+  writeDirectoryTree();
+  writeDirectoryStringTable();
+  writeFirstSectionRelocations();
+}
+
+void WindowsResourceCOFFWriter::writeSecondSection() {
+  // Now write the .rsrc$02 section.
+  for (auto const &RawDataEntry : Data) {
+    std::copy(RawDataEntry.begin(), RawDataEntry.end(), Current);
+    Current += alignTo(RawDataEntry.size(), sizeof(uint64_t));
+  }
+}
+
+void WindowsResourceCOFFWriter::writeSymbolTable() {
+  // Now write the symbol table.
+  // First, the feat symbol.
+  auto *Symbol = reinterpret_cast<llvm::object::coff_symbol16 *>(Current);
+  strncpy(Symbol->Name.ShortName, "@feat.00", (size_t)llvm::COFF::NameSize);
+  Symbol->Value = 0x11;
+  Symbol->SectionNumber = 0xffff;
+  Symbol->Type = llvm::COFF::IMAGE_SYM_DTYPE_NULL;
+  Symbol->StorageClass = llvm::COFF::IMAGE_SYM_CLASS_STATIC;
+  Symbol->NumberOfAuxSymbols = 0;
+  Current += sizeof(llvm::object::coff_symbol16);
+
+  // Now write the .rsrc1 symbol + aux.
+  Symbol = reinterpret_cast<llvm::object::coff_symbol16 *>(Current);
+  strncpy(Symbol->Name.ShortName, ".rsrc$01", (size_t)llvm::COFF::NameSize);
+  Symbol->Value = 0;
+  Symbol->SectionNumber = 1;
+  Symbol->Type = llvm::COFF::IMAGE_SYM_DTYPE_NULL;
+  Symbol->StorageClass = llvm::COFF::IMAGE_SYM_CLASS_STATIC;
+  Symbol->NumberOfAuxSymbols = 1;
+  Current += sizeof(llvm::object::coff_symbol16);
+  auto *Aux =
+      reinterpret_cast<llvm::object::coff_aux_section_definition *>(Current);
+  Aux->Length = SectionOneSize;
+  Aux->NumberOfRelocations = Data.size();
+  Aux->NumberOfLinenumbers = 0;
+  Aux->CheckSum = 0;
+  Aux->NumberLowPart = 0;
+  Aux->Selection = 0;
+  Current += sizeof(llvm::object::coff_aux_section_definition);
+
+  // Now write the .rsrc2 symbol + aux.
+  Symbol = reinterpret_cast<llvm::object::coff_symbol16 *>(Current);
+  strncpy(Symbol->Name.ShortName, ".rsrc$02", (size_t)llvm::COFF::NameSize);
+  Symbol->Value = 0;
+  Symbol->SectionNumber = 2;
+  Symbol->Type = llvm::COFF::IMAGE_SYM_DTYPE_NULL;
+  Symbol->StorageClass = llvm::COFF::IMAGE_SYM_CLASS_STATIC;
+  Symbol->NumberOfAuxSymbols = 1;
+  Current += sizeof(llvm::object::coff_symbol16);
+  Aux = reinterpret_cast<llvm::object::coff_aux_section_definition *>(Current);
+  Aux->Length = SectionTwoSize;
+  Aux->NumberOfRelocations = 0;
+  Aux->NumberOfLinenumbers = 0;
+  Aux->CheckSum = 0;
+  Aux->NumberLowPart = 0;
+  Aux->Selection = 0;
+  Current += sizeof(llvm::object::coff_aux_section_definition);
+
+  // Now write a symbol for each relocation.
+  for (unsigned i = 0; i < Data.size(); i++) {
+    char RelocationName[9];
+    sprintf(RelocationName, "$R%06X", DataOffsets[i]);
+    Symbol = reinterpret_cast<llvm::object::coff_symbol16 *>(Current);
+    strncpy(Symbol->Name.ShortName, RelocationName,
+            (size_t)llvm::COFF::NameSize);
+    Symbol->Value = DataOffsets[i];
+    Symbol->SectionNumber = 1;
+    Symbol->Type = llvm::COFF::IMAGE_SYM_DTYPE_NULL;
+    Symbol->StorageClass = llvm::COFF::IMAGE_SYM_CLASS_STATIC;
+    Symbol->NumberOfAuxSymbols = 0;
+    Current += sizeof(llvm::object::coff_symbol16);
+  }
+}
+
+void WindowsResourceCOFFWriter::writeStringTable() {
+  // Just 4 null bytes for the string table.
+  auto COFFStringTable = reinterpret_cast<uint32_t *>(Current);
+  *COFFStringTable = 0;
+}
+
+void WindowsResourceCOFFWriter::writeDirectoryTree() {
+  // Traverse parsed resource tree breadth-first and write the corresponding
+  // COFF objects.
+  std::queue<const WindowsResourceParser::TreeNode *> Queue;
+  Queue.push(&Resources);
+  uint32_t NextLevelOffset = sizeof(llvm::object::coff_resource_dir_table) +
+                             (Resources.getStringChildren().size() +
+                              Resources.getIDChildren().size()) *
+                                 sizeof(llvm::object::coff_resource_dir_entry);
+  std::vector<const WindowsResourceParser::TreeNode *> DataEntriesTreeOrder;
+  uint32_t CurrentRelativeOffset = 0;
+
+  while (!Queue.empty()) {
+    auto CurrentNode = Queue.front();
+    Queue.pop();
+    auto *Table =
+        reinterpret_cast<llvm::object::coff_resource_dir_table *>(Current);
+    Table->Characteristics = CurrentNode->getCharacteristics();
+    Table->TimeDateStamp = 0;
+    Table->MajorVersion = CurrentNode->getMajorVersion();
+    Table->MinorVersion = CurrentNode->getMinorVersion();
+    auto &IDChildren = CurrentNode->getIDChildren();
+    auto &StringChildren = CurrentNode->getStringChildren();
+    Table->NumberOfNameEntries = StringChildren.size();
+    Table->NumberOfIDEntries = IDChildren.size();
+    Current += sizeof(llvm::object::coff_resource_dir_table);
+    CurrentRelativeOffset += sizeof(llvm::object::coff_resource_dir_table);
+
+    // Write the directory entries immediately following each directory table.
+    for (auto const &Child : StringChildren) {
+      auto *Entry =
+          reinterpret_cast<llvm::object::coff_resource_dir_entry *>(Current);
+      Entry->Identifier.NameOffset =
+          StringTableOffsets[Child.second->getStringIndex()];
+      if (Child.second->checkIsDataNode()) {
+        Entry->Offset.DataEntryOffset = NextLevelOffset;
+        NextLevelOffset += sizeof(llvm::object::coff_resource_data_entry);
+        DataEntriesTreeOrder.push_back(Child.second.get());
+      } else {
+        Entry->Offset.SubdirOffset = NextLevelOffset + (1 << 31);
+        NextLevelOffset += sizeof(llvm::object::coff_resource_dir_table) +
+                           (Child.second->getStringChildren().size() +
+                            Child.second->getIDChildren().size()) *
+                               sizeof(llvm::object::coff_resource_dir_entry);
+        Queue.push(Child.second.get());
+      }
+      Current += sizeof(llvm::object::coff_resource_dir_entry);
+      CurrentRelativeOffset += sizeof(llvm::object::coff_resource_dir_entry);
+    }
+    for (auto const &Child : IDChildren) {
+      auto *Entry =
+          reinterpret_cast<llvm::object::coff_resource_dir_entry *>(Current);
+      Entry->Identifier.ID = Child.first;
+      if (Child.second->checkIsDataNode()) {
+        Entry->Offset.DataEntryOffset = NextLevelOffset;
+        NextLevelOffset += sizeof(llvm::object::coff_resource_data_entry);
+        DataEntriesTreeOrder.push_back(Child.second.get());
+      } else {
+        Entry->Offset.SubdirOffset = NextLevelOffset + (1 << 31);
+        NextLevelOffset += sizeof(llvm::object::coff_resource_dir_table) +
+                           (Child.second->getStringChildren().size() +
+                            Child.second->getIDChildren().size()) *
+                               sizeof(llvm::object::coff_resource_dir_entry);
+        Queue.push(Child.second.get());
+      }
+      Current += sizeof(llvm::object::coff_resource_dir_entry);
+      CurrentRelativeOffset += sizeof(llvm::object::coff_resource_dir_entry);
+    }
+  }
+
+  RelocationAddresses.resize(Data.size());
+  // Now write all the resource data entries.
+  for (auto DataNodes : DataEntriesTreeOrder) {
+    auto *Entry =
+        reinterpret_cast<llvm::object::coff_resource_data_entry *>(Current);
+    RelocationAddresses[DataNodes->getDataIndex()] = CurrentRelativeOffset;
+    Entry->DataRVA = 0; // Set to zero because it is a relocation.
+    Entry->DataSize = Data[DataNodes->getDataIndex()].size();
+    Entry->Codepage = 0;
+    Entry->Reserved = 0;
+    Current += sizeof(llvm::object::coff_resource_data_entry);
+    CurrentRelativeOffset += sizeof(llvm::object::coff_resource_data_entry);
+  }
+}
+
+void WindowsResourceCOFFWriter::writeDirectoryStringTable() {
+  // Now write the directory string table for .rsrc$01
+  uint32_t TotalStringTableSize = 0;
+  for (auto String : StringTable) {
+    auto *LengthField = reinterpret_cast<uint16_t *>(Current);
+    uint16_t Length = String.size();
+    *LengthField = Length;
+    Current += sizeof(uint16_t);
+    auto *Start = reinterpret_cast<UTF16 *>(Current);
+    std::copy(String.begin(), String.end(), Start);
+    Current += Length * sizeof(UTF16);
+    TotalStringTableSize += Length * sizeof(UTF16) + sizeof(uint16_t);
+  }
+  Current +=
+      alignTo(TotalStringTableSize, sizeof(uint32_t)) - TotalStringTableSize;
+}
+
+void WindowsResourceCOFFWriter::writeFirstSectionRelocations() {
+
+  // Now write the relocations for .rsrc$01
+  // Five symbols already in table before we start, @feat.00 and 2 for each
+  // .rsrc section.
+  uint32_t NextSymbolIndex = 5;
+  for (unsigned i = 0; i < Data.size(); i++) {
+    auto *Reloc = reinterpret_cast<llvm::object::coff_relocation *>(Current);
+    Reloc->VirtualAddress = RelocationAddresses[i];
+    Reloc->SymbolTableIndex = NextSymbolIndex++;
+    switch (MachineType) {
+    case Machine::ARM:
+      Reloc->Type = llvm::COFF::IMAGE_REL_ARM_ADDR32NB;
+      break;
+    case Machine::X64:
+      Reloc->Type = llvm::COFF::IMAGE_REL_AMD64_ADDR32NB;
+      break;
+    case Machine::X86:
+      Reloc->Type = llvm::COFF::IMAGE_REL_I386_DIR32NB;
+      break;
+    default:
+      Reloc->Type = 0;
+    }
+    Current += sizeof(llvm::object::coff_relocation);
+  }
+}
+
+Error writeWindowsResourceCOFF(StringRef OutputFile, Machine MachineType,
+                               const WindowsResourceParser &Parser) {
+  Error E = Error::success();
+  WindowsResourceCOFFWriter Writer(OutputFile, MachineType, Parser, E);
+  if (E)
+    return E;
+  return Writer.write();
+}
+
 } // namespace object
 } // namespace llvm
diff --git a/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp b/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp
index 21d29835624e..08a4bb715fac 100644
--- a/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp
+++ b/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp
@@ -18,13 +18,20 @@
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/DebugInfo/CodeView/CodeViewError.h"
 #include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h"
+#include "llvm/DebugInfo/CodeView/DebugCrossExSubsection.h"
+#include "llvm/DebugInfo/CodeView/DebugCrossImpSubsection.h"
+#include "llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h"
 #include "llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h"
 #include "llvm/DebugInfo/CodeView/DebugLinesSubsection.h"
 #include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h"
 #include "llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h"
+#include "llvm/DebugInfo/CodeView/DebugSymbolRVASubsection.h"
+#include "llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h"
 #include "llvm/DebugInfo/CodeView/EnumTables.h"
 #include "llvm/DebugInfo/CodeView/SymbolRecord.h"
-
+#include "llvm/DebugInfo/CodeView/SymbolSerializer.h"
+#include "llvm/ObjectYAML/CodeViewYAMLSymbols.h"
+#include "llvm/Support/BinaryStreamWriter.h"
 using namespace llvm;
 using namespace llvm::codeview;
 using namespace llvm::CodeViewYAML;
@@ -38,13 +45,21 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(SourceLineBlock)
 LLVM_YAML_IS_SEQUENCE_VECTOR(SourceLineInfo)
 LLVM_YAML_IS_SEQUENCE_VECTOR(InlineeSite)
 LLVM_YAML_IS_SEQUENCE_VECTOR(InlineeInfo)
+LLVM_YAML_IS_SEQUENCE_VECTOR(CrossModuleExport)
+LLVM_YAML_IS_SEQUENCE_VECTOR(YAMLCrossModuleImport)
 LLVM_YAML_IS_SEQUENCE_VECTOR(StringRef)
+LLVM_YAML_IS_SEQUENCE_VECTOR(YAMLFrameData)
+LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint32_t)
 
 LLVM_YAML_DECLARE_SCALAR_TRAITS(HexFormattedString, false)
 LLVM_YAML_DECLARE_ENUM_TRAITS(DebugSubsectionKind)
 LLVM_YAML_DECLARE_ENUM_TRAITS(FileChecksumKind)
 LLVM_YAML_DECLARE_BITSET_TRAITS(LineFlags)
 
+LLVM_YAML_DECLARE_MAPPING_TRAITS(CrossModuleExport)
+LLVM_YAML_DECLARE_MAPPING_TRAITS(YAMLFrameData)
+LLVM_YAML_DECLARE_MAPPING_TRAITS(YAMLCrossModuleImport)
+LLVM_YAML_DECLARE_MAPPING_TRAITS(CrossModuleImportItem)
 LLVM_YAML_DECLARE_MAPPING_TRAITS(SourceLineEntry)
 LLVM_YAML_DECLARE_MAPPING_TRAITS(SourceColumnEntry)
 LLVM_YAML_DECLARE_MAPPING_TRAITS(SourceFileChecksumEntry)
@@ -61,7 +76,8 @@ struct YAMLSubsectionBase {
 
   virtual void map(IO &IO) = 0;
   virtual std::unique_ptr<DebugSubsection>
-  toCodeViewSubsection(DebugStringTableSubsection *UseStrings,
+  toCodeViewSubsection(BumpPtrAllocator &Allocator,
+                       DebugStringTableSubsection *UseStrings,
                        DebugChecksumsSubsection *UseChecksums) const = 0;
 };
 }
@@ -75,7 +91,8 @@ struct YAMLChecksumsSubsection : public YAMLSubsectionBase {
 
   void map(IO &IO) override;
   std::unique_ptr<DebugSubsection>
-  toCodeViewSubsection(DebugStringTableSubsection *Strings,
+  toCodeViewSubsection(BumpPtrAllocator &Allocator,
+                       DebugStringTableSubsection *Strings,
                        DebugChecksumsSubsection *Checksums) const override;
   static Expected<std::shared_ptr<YAMLChecksumsSubsection>>
   fromCodeViewSubsection(const DebugStringTableSubsectionRef &Strings,
@@ -89,7 +106,8 @@ struct YAMLLinesSubsection : public YAMLSubsectionBase {
 
   void map(IO &IO) override;
   std::unique_ptr<DebugSubsection>
-  toCodeViewSubsection(DebugStringTableSubsection *Strings,
+  toCodeViewSubsection(BumpPtrAllocator &Allocator,
+                       DebugStringTableSubsection *Strings,
                        DebugChecksumsSubsection *Checksums) const override;
   static Expected<std::shared_ptr<YAMLLinesSubsection>>
   fromCodeViewSubsection(const DebugStringTableSubsectionRef &Strings,
@@ -105,7 +123,8 @@ struct YAMLInlineeLinesSubsection : public YAMLSubsectionBase {
 
   void map(IO &IO) override;
   std::unique_ptr<DebugSubsection>
-  toCodeViewSubsection(DebugStringTableSubsection *Strings,
+  toCodeViewSubsection(BumpPtrAllocator &Allocator,
+                       DebugStringTableSubsection *Strings,
                        DebugChecksumsSubsection *Checksums) const override;
   static Expected<std::shared_ptr<YAMLInlineeLinesSubsection>>
   fromCodeViewSubsection(const DebugStringTableSubsectionRef &Strings,
@@ -114,6 +133,97 @@ struct YAMLInlineeLinesSubsection : public YAMLSubsectionBase {
 
   InlineeInfo InlineeLines;
 };
+
+struct YAMLCrossModuleExportsSubsection : public YAMLSubsectionBase {
+  YAMLCrossModuleExportsSubsection()
+      : YAMLSubsectionBase(DebugSubsectionKind::CrossScopeExports) {}
+
+  void map(IO &IO) override;
+  std::unique_ptr<DebugSubsection>
+  toCodeViewSubsection(BumpPtrAllocator &Allocator,
+                       DebugStringTableSubsection *Strings,
+                       DebugChecksumsSubsection *Checksums) const override;
+  static Expected<std::shared_ptr<YAMLCrossModuleExportsSubsection>>
+  fromCodeViewSubsection(const DebugCrossModuleExportsSubsectionRef &Exports);
+
+  std::vector<CrossModuleExport> Exports;
+};
+
+struct YAMLCrossModuleImportsSubsection : public YAMLSubsectionBase {
+  YAMLCrossModuleImportsSubsection()
+      : YAMLSubsectionBase(DebugSubsectionKind::CrossScopeImports) {}
+
+  void map(IO &IO) override;
+  std::unique_ptr<DebugSubsection>
+  toCodeViewSubsection(BumpPtrAllocator &Allocator,
+                       DebugStringTableSubsection *Strings,
+                       DebugChecksumsSubsection *Checksums) const override;
+  static Expected<std::shared_ptr<YAMLCrossModuleImportsSubsection>>
+  fromCodeViewSubsection(const DebugStringTableSubsectionRef &Strings,
+                         const DebugCrossModuleImportsSubsectionRef &Imports);
+
+  std::vector<YAMLCrossModuleImport> Imports;
+};
+
+struct YAMLSymbolsSubsection : public YAMLSubsectionBase {
+  YAMLSymbolsSubsection() : YAMLSubsectionBase(DebugSubsectionKind::Symbols) {}
+
+  void map(IO &IO) override;
+  std::unique_ptr<DebugSubsection>
+  toCodeViewSubsection(BumpPtrAllocator &Allocator,
+                       DebugStringTableSubsection *Strings,
+                       DebugChecksumsSubsection *Checksums) const override;
+  static Expected<std::shared_ptr<YAMLSymbolsSubsection>>
+  fromCodeViewSubsection(const DebugSymbolsSubsectionRef &Symbols);
+
+  std::vector<CodeViewYAML::SymbolRecord> Symbols;
+};
+
+struct YAMLStringTableSubsection : public YAMLSubsectionBase {
+  YAMLStringTableSubsection()
+      : YAMLSubsectionBase(DebugSubsectionKind::StringTable) {}
+
+  void map(IO &IO) override;
+  std::unique_ptr<DebugSubsection>
+  toCodeViewSubsection(BumpPtrAllocator &Allocator,
+                       DebugStringTableSubsection *Strings,
+                       DebugChecksumsSubsection *Checksums) const override;
+  static Expected<std::shared_ptr<YAMLStringTableSubsection>>
+  fromCodeViewSubsection(const DebugStringTableSubsectionRef &Strings);
+
+  std::vector<StringRef> Strings;
+};
+
+struct YAMLFrameDataSubsection : public YAMLSubsectionBase {
+  YAMLFrameDataSubsection()
+      : YAMLSubsectionBase(DebugSubsectionKind::FrameData) {}
+
+  void map(IO &IO) override;
+  std::unique_ptr<DebugSubsection>
+  toCodeViewSubsection(BumpPtrAllocator &Allocator,
+                       DebugStringTableSubsection *Strings,
+                       DebugChecksumsSubsection *Checksums) const override;
+  static Expected<std::shared_ptr<YAMLFrameDataSubsection>>
+  fromCodeViewSubsection(const DebugStringTableSubsectionRef &Strings,
+                         const DebugFrameDataSubsectionRef &Frames);
+
+  std::vector<YAMLFrameData> Frames;
+};
+
+struct YAMLCoffSymbolRVASubsection : public YAMLSubsectionBase {
+  YAMLCoffSymbolRVASubsection()
+      : YAMLSubsectionBase(DebugSubsectionKind::CoffSymbolRVA) {}
+
+  void map(IO &IO) override;
+  std::unique_ptr<DebugSubsection>
+  toCodeViewSubsection(BumpPtrAllocator &Allocator,
+                       DebugStringTableSubsection *Strings,
+                       DebugChecksumsSubsection *Checksums) const override;
+  static Expected<std::shared_ptr<YAMLCoffSymbolRVASubsection>>
+  fromCodeViewSubsection(const DebugSymbolRVASubsectionRef &RVAs);
+
+  std::vector<uint32_t> RVAs;
+};
 }
 
 void ScalarBitSetTraits<LineFlags>::bitset(IO &io, LineFlags &Flags) {
@@ -161,6 +271,17 @@ void MappingTraits<SourceLineBlock>::mapping(IO &IO, SourceLineBlock &Obj) {
   IO.mapRequired("Columns", Obj.Columns);
 }
 
+void MappingTraits<CrossModuleExport>::mapping(IO &IO, CrossModuleExport &Obj) {
+  IO.mapRequired("LocalId", Obj.Local);
+  IO.mapRequired("GlobalId", Obj.Global);
+}
+
+void MappingTraits<YAMLCrossModuleImport>::mapping(IO &IO,
+                                                   YAMLCrossModuleImport &Obj) {
+  IO.mapRequired("Module", Obj.ModuleName);
+  IO.mapRequired("Imports", Obj.ImportIds);
+}
+
 void MappingTraits<SourceFileChecksumEntry>::mapping(
     IO &IO, SourceFileChecksumEntry &Obj) {
   IO.mapRequired("FileName", Obj.FileName);
@@ -175,6 +296,17 @@ void MappingTraits<InlineeSite>::mapping(IO &IO, InlineeSite &Obj) {
   IO.mapOptional("ExtraFiles", Obj.ExtraFiles);
 }
 
+void MappingTraits<YAMLFrameData>::mapping(IO &IO, YAMLFrameData &Obj) {
+  IO.mapRequired("CodeSize", Obj.CodeSize);
+  IO.mapRequired("FrameFunc", Obj.FrameFunc);
+  IO.mapRequired("LocalSize", Obj.LocalSize);
+  IO.mapOptional("MaxStackSize", Obj.MaxStackSize);
+  IO.mapOptional("ParamsSize", Obj.ParamsSize);
+  IO.mapOptional("PrologSize", Obj.PrologSize);
+  IO.mapOptional("RvaStart", Obj.RvaStart);
+  IO.mapOptional("SavedRegsSize", Obj.SavedRegsSize);
+}
+
 void YAMLChecksumsSubsection::map(IO &IO) {
   IO.mapTag("!FileChecksums", true);
   IO.mapRequired("Checksums", Checksums);
@@ -196,6 +328,36 @@ void YAMLInlineeLinesSubsection::map(IO &IO) {
   IO.mapRequired("Sites", InlineeLines.Sites);
 }
 
+void YAMLCrossModuleExportsSubsection::map(IO &IO) {
+  IO.mapTag("!CrossModuleExports", true);
+  IO.mapOptional("Exports", Exports);
+}
+
+void YAMLCrossModuleImportsSubsection::map(IO &IO) {
+  IO.mapTag("!CrossModuleImports", true);
+  IO.mapOptional("Imports", Imports);
+}
+
+void YAMLSymbolsSubsection::map(IO &IO) {
+  IO.mapTag("!Symbols", true);
+  IO.mapRequired("Records", Symbols);
+}
+
+void YAMLStringTableSubsection::map(IO &IO) {
+  IO.mapTag("!StringTable", true);
+  IO.mapRequired("Strings", Strings);
+}
+
+void YAMLFrameDataSubsection::map(IO &IO) {
+  IO.mapTag("!FrameData", true);
+  IO.mapRequired("Frames", Frames);
+}
+
+void YAMLCoffSymbolRVASubsection::map(IO &IO) {
+  IO.mapTag("!COFFSymbolRVAs", true);
+  IO.mapRequired("RVAs", RVAs);
+}
+
 void MappingTraits<YAMLDebugSubsection>::mapping(
     IO &IO, YAMLDebugSubsection &Subsection) {
   if (!IO.outputting()) {
@@ -206,6 +368,20 @@ void MappingTraits<YAMLDebugSubsection>::mapping(
       Subsection.Subsection = std::make_shared<YAMLLinesSubsection>();
     } else if (IO.mapTag("!InlineeLines")) {
       Subsection.Subsection = std::make_shared<YAMLInlineeLinesSubsection>();
+    } else if (IO.mapTag("!CrossModuleExports")) {
+      Subsection.Subsection =
+          std::make_shared<YAMLCrossModuleExportsSubsection>();
+    } else if (IO.mapTag("!CrossModuleImports")) {
+      Subsection.Subsection =
+          std::make_shared<YAMLCrossModuleImportsSubsection>();
+    } else if (IO.mapTag("!Symbols")) {
+      Subsection.Subsection = std::make_shared<YAMLSymbolsSubsection>();
+    } else if (IO.mapTag("!StringTable")) {
+      Subsection.Subsection = std::make_shared<YAMLStringTableSubsection>();
+    } else if (IO.mapTag("!FrameData")) {
+      Subsection.Subsection = std::make_shared<YAMLFrameDataSubsection>();
+    } else if (IO.mapTag("!COFFSymbolRVAs")) {
+      Subsection.Subsection = std::make_shared<YAMLCoffSymbolRVASubsection>();
     } else {
       llvm_unreachable("Unexpected subsection tag!");
     }
@@ -213,18 +389,19 @@ void MappingTraits<YAMLDebugSubsection>::mapping(
   Subsection.Subsection->map(IO);
 }
 
-static Expected<const YAMLChecksumsSubsection &>
+static std::shared_ptr<YAMLChecksumsSubsection>
 findChecksums(ArrayRef<YAMLDebugSubsection> Subsections) {
   for (const auto &SS : Subsections) {
     if (SS.Subsection->Kind == DebugSubsectionKind::FileChecksums) {
-      return static_cast<const YAMLChecksumsSubsection &>(*SS.Subsection);
+      return std::static_pointer_cast<YAMLChecksumsSubsection>(SS.Subsection);
     }
   }
-  return make_error<CodeViewError>(cv_error_code::no_records);
+
+  return nullptr;
 }
 
 std::unique_ptr<DebugSubsection> YAMLChecksumsSubsection::toCodeViewSubsection(
-    DebugStringTableSubsection *UseStrings,
+    BumpPtrAllocator &Allocator, DebugStringTableSubsection *UseStrings,
     DebugChecksumsSubsection *UseChecksums) const {
   assert(UseStrings && !UseChecksums);
   auto Result = llvm::make_unique<DebugChecksumsSubsection>(*UseStrings);
@@ -235,7 +412,7 @@ std::unique_ptr<DebugSubsection> YAMLChecksumsSubsection::toCodeViewSubsection(
 }
 
 std::unique_ptr<DebugSubsection> YAMLLinesSubsection::toCodeViewSubsection(
-    DebugStringTableSubsection *UseStrings,
+    BumpPtrAllocator &Allocator, DebugStringTableSubsection *UseStrings,
     DebugChecksumsSubsection *UseChecksums) const {
   assert(UseStrings && UseChecksums);
   auto Result =
@@ -266,7 +443,7 @@ std::unique_ptr<DebugSubsection> YAMLLinesSubsection::toCodeViewSubsection(
 
 std::unique_ptr<DebugSubsection>
 YAMLInlineeLinesSubsection::toCodeViewSubsection(
-    DebugStringTableSubsection *UseStrings,
+    BumpPtrAllocator &Allocator, DebugStringTableSubsection *UseStrings,
     DebugChecksumsSubsection *UseChecksums) const {
   assert(UseChecksums);
   auto Result = llvm::make_unique<DebugInlineeLinesSubsection>(
@@ -285,6 +462,79 @@ YAMLInlineeLinesSubsection::toCodeViewSubsection(
   return llvm::cast<DebugSubsection>(std::move(Result));
 }
 
+std::unique_ptr<DebugSubsection>
+YAMLCrossModuleExportsSubsection::toCodeViewSubsection(
+    BumpPtrAllocator &Allocator, DebugStringTableSubsection *Strings,
+    DebugChecksumsSubsection *Checksums) const {
+  auto Result = llvm::make_unique<DebugCrossModuleExportsSubsection>();
+  for (const auto &M : Exports)
+    Result->addMapping(M.Local, M.Global);
+  return llvm::cast<DebugSubsection>(std::move(Result));
+}
+
+std::unique_ptr<DebugSubsection>
+YAMLCrossModuleImportsSubsection::toCodeViewSubsection(
+    BumpPtrAllocator &Allocator, DebugStringTableSubsection *Strings,
+    DebugChecksumsSubsection *Checksums) const {
+  auto Result = llvm::make_unique<DebugCrossModuleImportsSubsection>(*Strings);
+  for (const auto &M : Imports) {
+    for (const auto Id : M.ImportIds)
+      Result->addImport(M.ModuleName, Id);
+  }
+  return llvm::cast<DebugSubsection>(std::move(Result));
+}
+
+std::unique_ptr<DebugSubsection> YAMLSymbolsSubsection::toCodeViewSubsection(
+    BumpPtrAllocator &Allocator, DebugStringTableSubsection *Strings,
+    DebugChecksumsSubsection *Checksums) const {
+  auto Result = llvm::make_unique<DebugSymbolsSubsection>();
+  for (const auto &Sym : Symbols)
+    Result->addSymbol(
+        Sym.toCodeViewSymbol(Allocator, CodeViewContainer::ObjectFile));
+  return std::move(Result);
+}
+
+std::unique_ptr<DebugSubsection>
+YAMLStringTableSubsection::toCodeViewSubsection(
+    BumpPtrAllocator &Allocator, DebugStringTableSubsection *Strings,
+    DebugChecksumsSubsection *Checksums) const {
+  auto Result = llvm::make_unique<DebugStringTableSubsection>();
+  for (const auto &Str : this->Strings)
+    Result->insert(Str);
+  return std::move(Result);
+}
+
+std::unique_ptr<DebugSubsection> YAMLFrameDataSubsection::toCodeViewSubsection(
+    BumpPtrAllocator &Allocator, DebugStringTableSubsection *Strings,
+    DebugChecksumsSubsection *Checksums) const {
+  assert(Strings);
+  auto Result = llvm::make_unique<DebugFrameDataSubsection>();
+  for (const auto &YF : Frames) {
+    codeview::FrameData F;
+    F.CodeSize = YF.CodeSize;
+    F.Flags = YF.Flags;
+    F.LocalSize = YF.LocalSize;
+    F.MaxStackSize = YF.MaxStackSize;
+    F.ParamsSize = YF.ParamsSize;
+    F.PrologSize = YF.PrologSize;
+    F.RvaStart = YF.RvaStart;
+    F.SavedRegsSize = YF.SavedRegsSize;
+    F.FrameFunc = Strings->insert(YF.FrameFunc);
+    Result->addFrameData(F);
+  }
+  return std::move(Result);
+}
+
+std::unique_ptr<DebugSubsection>
+YAMLCoffSymbolRVASubsection::toCodeViewSubsection(
+    BumpPtrAllocator &Allocator, DebugStringTableSubsection *Strings,
+    DebugChecksumsSubsection *Checksums) const {
+  auto Result = llvm::make_unique<DebugSymbolRVASubsection>();
+  for (const auto &RVA : RVAs)
+    Result->addRVA(RVA);
+  return std::move(Result);
+}
+
 static Expected<SourceFileChecksumEntry>
 convertOneChecksum(const DebugStringTableSubsectionRef &Strings,
                    const FileChecksumEntry &CS) {
@@ -391,20 +641,121 @@ YAMLInlineeLinesSubsection::fromCodeViewSubsection(
   return Result;
 }
 
+Expected<std::shared_ptr<YAMLCrossModuleExportsSubsection>>
+YAMLCrossModuleExportsSubsection::fromCodeViewSubsection(
+    const DebugCrossModuleExportsSubsectionRef &Exports) {
+  auto Result = std::make_shared<YAMLCrossModuleExportsSubsection>();
+  Result->Exports.assign(Exports.begin(), Exports.end());
+  return Result;
+}
+
+Expected<std::shared_ptr<YAMLCrossModuleImportsSubsection>>
+YAMLCrossModuleImportsSubsection::fromCodeViewSubsection(
+    const DebugStringTableSubsectionRef &Strings,
+    const DebugCrossModuleImportsSubsectionRef &Imports) {
+  auto Result = std::make_shared<YAMLCrossModuleImportsSubsection>();
+  for (const auto &CMI : Imports) {
+    YAMLCrossModuleImport YCMI;
+    auto ExpectedStr = Strings.getString(CMI.Header->ModuleNameOffset);
+    if (!ExpectedStr)
+      return ExpectedStr.takeError();
+    YCMI.ModuleName = *ExpectedStr;
+    YCMI.ImportIds.assign(CMI.Imports.begin(), CMI.Imports.end());
+    Result->Imports.push_back(YCMI);
+  }
+  return Result;
+}
+
+Expected<std::shared_ptr<YAMLSymbolsSubsection>>
+YAMLSymbolsSubsection::fromCodeViewSubsection(
+    const DebugSymbolsSubsectionRef &Symbols) {
+  auto Result = std::make_shared<YAMLSymbolsSubsection>();
+  for (const auto &Sym : Symbols) {
+    auto S = CodeViewYAML::SymbolRecord::fromCodeViewSymbol(Sym);
+    if (!S)
+      return joinErrors(make_error<CodeViewError>(
+                            cv_error_code::corrupt_record,
+                            "Invalid CodeView Symbol Record in SymbolRecord "
+                            "subsection of .debug$S while converting to YAML!"),
+                        S.takeError());
+
+    Result->Symbols.push_back(*S);
+  }
+  return Result;
+}
+
+Expected<std::shared_ptr<YAMLStringTableSubsection>>
+YAMLStringTableSubsection::fromCodeViewSubsection(
+    const DebugStringTableSubsectionRef &Strings) {
+  auto Result = std::make_shared<YAMLStringTableSubsection>();
+  BinaryStreamReader Reader(Strings.getBuffer());
+  StringRef S;
+  // First item is a single null string, skip it.
+  if (auto EC = Reader.readCString(S))
+    return std::move(EC);
+  assert(S.empty());
+  while (Reader.bytesRemaining() > 0) {
+    if (auto EC = Reader.readCString(S))
+      return std::move(EC);
+    Result->Strings.push_back(S);
+  }
+  return Result;
+}
+
+Expected<std::shared_ptr<YAMLFrameDataSubsection>>
+YAMLFrameDataSubsection::fromCodeViewSubsection(
+    const DebugStringTableSubsectionRef &Strings,
+    const DebugFrameDataSubsectionRef &Frames) {
+  auto Result = std::make_shared<YAMLFrameDataSubsection>();
+  for (const auto &F : Frames) {
+    YAMLFrameData YF;
+    YF.CodeSize = F.CodeSize;
+    YF.Flags = F.Flags;
+    YF.LocalSize = F.LocalSize;
+    YF.MaxStackSize = F.MaxStackSize;
+    YF.ParamsSize = F.ParamsSize;
+    YF.PrologSize = F.PrologSize;
+    YF.RvaStart = F.RvaStart;
+    YF.SavedRegsSize = F.SavedRegsSize;
+
+    auto ES = Strings.getString(F.FrameFunc);
+    if (!ES)
+      return joinErrors(
+          make_error<CodeViewError>(
+              cv_error_code::no_records,
+              "Could not find string for string id while mapping FrameData!"),
+          ES.takeError());
+    YF.FrameFunc = *ES;
+    Result->Frames.push_back(YF);
+  }
+  return Result;
+}
+
+Expected<std::shared_ptr<YAMLCoffSymbolRVASubsection>>
+YAMLCoffSymbolRVASubsection::fromCodeViewSubsection(
+    const DebugSymbolRVASubsectionRef &Section) {
+  auto Result = std::make_shared<YAMLCoffSymbolRVASubsection>();
+  for (const auto &RVA : Section) {
+    Result->RVAs.push_back(RVA);
+  }
+  return Result;
+}
+
 Expected<std::vector<std::unique_ptr<DebugSubsection>>>
-llvm::CodeViewYAML::convertSubsectionList(
-    ArrayRef<YAMLDebugSubsection> Subsections,
+llvm::CodeViewYAML::toCodeViewSubsectionList(
+    BumpPtrAllocator &Allocator, ArrayRef<YAMLDebugSubsection> Subsections,
     DebugStringTableSubsection &Strings) {
   std::vector<std::unique_ptr<DebugSubsection>> Result;
   if (Subsections.empty())
     return std::move(Result);
 
   auto Checksums = findChecksums(Subsections);
-  if (!Checksums)
-    return Checksums.takeError();
-  auto ChecksumsBase = Checksums->toCodeViewSubsection(&Strings, nullptr);
-  DebugChecksumsSubsection &CS =
-      llvm::cast<DebugChecksumsSubsection>(*ChecksumsBase);
+  std::unique_ptr<DebugSubsection> ChecksumsBase;
+  if (Checksums)
+    ChecksumsBase =
+        Checksums->toCodeViewSubsection(Allocator, &Strings, nullptr);
+  DebugChecksumsSubsection *CS =
+      static_cast<DebugChecksumsSubsection *>(ChecksumsBase.get());
   for (const auto &SS : Subsections) {
     // We've already converted the checksums subsection, don't do it
     // twice.
@@ -412,7 +763,42 @@ llvm::CodeViewYAML::convertSubsectionList(
     if (SS.Subsection->Kind == DebugSubsectionKind::FileChecksums)
       CVS = std::move(ChecksumsBase);
     else
-      CVS = SS.Subsection->toCodeViewSubsection(&Strings, &CS);
+      CVS = SS.Subsection->toCodeViewSubsection(Allocator, &Strings, CS);
+    assert(CVS != nullptr);
+    Result.push_back(std::move(CVS));
+  }
+  return std::move(Result);
+}
+
+Expected<std::vector<std::unique_ptr<codeview::DebugSubsection>>>
+llvm::CodeViewYAML::toCodeViewSubsectionList(
+    BumpPtrAllocator &Allocator, ArrayRef<YAMLDebugSubsection> Subsections,
+    std::unique_ptr<DebugStringTableSubsection> &TakeStrings,
+    DebugStringTableSubsection *StringsRef) {
+  std::vector<std::unique_ptr<DebugSubsection>> Result;
+  if (Subsections.empty())
+    return std::move(Result);
+
+  auto Checksums = findChecksums(Subsections);
+
+  std::unique_ptr<DebugSubsection> ChecksumsBase;
+  if (Checksums)
+    ChecksumsBase =
+        Checksums->toCodeViewSubsection(Allocator, StringsRef, nullptr);
+  DebugChecksumsSubsection *CS =
+      static_cast<DebugChecksumsSubsection *>(ChecksumsBase.get());
+  for (const auto &SS : Subsections) {
+    // We've already converted the checksums and string table subsection, don't
+    // do it twice.
+    std::unique_ptr<DebugSubsection> CVS;
+    if (SS.Subsection->Kind == DebugSubsectionKind::FileChecksums)
+      CVS = std::move(ChecksumsBase);
+    else if (SS.Subsection->Kind == DebugSubsectionKind::StringTable) {
+      assert(TakeStrings && "No string table!");
+      CVS = std::move(TakeStrings);
+    } else
+      CVS = SS.Subsection->toCodeViewSubsection(Allocator, StringsRef, CS);
+    assert(CVS != nullptr);
     Result.push_back(std::move(CVS));
   }
   return std::move(Result);
@@ -420,21 +806,29 @@ llvm::CodeViewYAML::convertSubsectionList(
 
 namespace {
 struct SubsectionConversionVisitor : public DebugSubsectionVisitor {
-  explicit SubsectionConversionVisitor(
-      const DebugStringTableSubsectionRef &Strings,
-      const DebugChecksumsSubsectionRef &Checksums)
-      : Strings(Strings), Checksums(Checksums) {}
+  SubsectionConversionVisitor() {}
 
   Error visitUnknown(DebugUnknownSubsectionRef &Unknown) override;
-  Error visitLines(DebugLinesSubsectionRef &Lines) override;
-  Error visitFileChecksums(DebugChecksumsSubsectionRef &Checksums) override;
-  Error visitInlineeLines(DebugInlineeLinesSubsectionRef &Inlinees) override;
+  Error visitLines(DebugLinesSubsectionRef &Lines,
+                   const DebugSubsectionState &State) override;
+  Error visitFileChecksums(DebugChecksumsSubsectionRef &Checksums,
+                           const DebugSubsectionState &State) override;
+  Error visitInlineeLines(DebugInlineeLinesSubsectionRef &Inlinees,
+                          const DebugSubsectionState &State) override;
+  Error visitCrossModuleExports(DebugCrossModuleExportsSubsectionRef &Checksums,
+                                const DebugSubsectionState &State) override;
+  Error visitCrossModuleImports(DebugCrossModuleImportsSubsectionRef &Inlinees,
+                                const DebugSubsectionState &State) override;
+  Error visitStringTable(DebugStringTableSubsectionRef &ST,
+                         const DebugSubsectionState &State) override;
+  Error visitSymbols(DebugSymbolsSubsectionRef &Symbols,
+                     const DebugSubsectionState &State) override;
+  Error visitFrameData(DebugFrameDataSubsectionRef &Symbols,
+                       const DebugSubsectionState &State) override;
+  Error visitCOFFSymbolRVAs(DebugSymbolRVASubsectionRef &Symbols,
+                            const DebugSubsectionState &State) override;
 
   YAMLDebugSubsection Subsection;
-
-private:
-  const DebugStringTableSubsectionRef &Strings;
-  const DebugChecksumsSubsectionRef &Checksums;
 };
 
 Error SubsectionConversionVisitor::visitUnknown(
@@ -442,9 +836,10 @@ Error SubsectionConversionVisitor::visitUnknown(
   return make_error<CodeViewError>(cv_error_code::operation_unsupported);
 }
 
-Error SubsectionConversionVisitor::visitLines(DebugLinesSubsectionRef &Lines) {
-  auto Result =
-      YAMLLinesSubsection::fromCodeViewSubsection(Strings, Checksums, Lines);
+Error SubsectionConversionVisitor::visitLines(
+    DebugLinesSubsectionRef &Lines, const DebugSubsectionState &State) {
+  auto Result = YAMLLinesSubsection::fromCodeViewSubsection(
+      State.strings(), State.checksums(), Lines);
   if (!Result)
     return Result.takeError();
   Subsection.Subsection = *Result;
@@ -452,9 +847,9 @@ Error SubsectionConversionVisitor::visitLines(DebugLinesSubsectionRef &Lines) {
 }
 
 Error SubsectionConversionVisitor::visitFileChecksums(
-    DebugChecksumsSubsectionRef &Checksums) {
-  auto Result =
-      YAMLChecksumsSubsection::fromCodeViewSubsection(Strings, Checksums);
+    DebugChecksumsSubsectionRef &Checksums, const DebugSubsectionState &State) {
+  auto Result = YAMLChecksumsSubsection::fromCodeViewSubsection(State.strings(),
+                                                                Checksums);
   if (!Result)
     return Result.takeError();
   Subsection.Subsection = *Result;
@@ -462,9 +857,69 @@ Error SubsectionConversionVisitor::visitFileChecksums(
 }
 
 Error SubsectionConversionVisitor::visitInlineeLines(
-    DebugInlineeLinesSubsectionRef &Inlinees) {
+    DebugInlineeLinesSubsectionRef &Inlinees,
+    const DebugSubsectionState &State) {
   auto Result = YAMLInlineeLinesSubsection::fromCodeViewSubsection(
-      Strings, Checksums, Inlinees);
+      State.strings(), State.checksums(), Inlinees);
+  if (!Result)
+    return Result.takeError();
+  Subsection.Subsection = *Result;
+  return Error::success();
+}
+
+Error SubsectionConversionVisitor::visitCrossModuleExports(
+    DebugCrossModuleExportsSubsectionRef &Exports,
+    const DebugSubsectionState &State) {
+  auto Result =
+      YAMLCrossModuleExportsSubsection::fromCodeViewSubsection(Exports);
+  if (!Result)
+    return Result.takeError();
+  Subsection.Subsection = *Result;
+  return Error::success();
+}
+
+Error SubsectionConversionVisitor::visitCrossModuleImports(
+    DebugCrossModuleImportsSubsectionRef &Imports,
+    const DebugSubsectionState &State) {
+  auto Result = YAMLCrossModuleImportsSubsection::fromCodeViewSubsection(
+      State.strings(), Imports);
+  if (!Result)
+    return Result.takeError();
+  Subsection.Subsection = *Result;
+  return Error::success();
+}
+
+Error SubsectionConversionVisitor::visitStringTable(
+    DebugStringTableSubsectionRef &Strings, const DebugSubsectionState &State) {
+  auto Result = YAMLStringTableSubsection::fromCodeViewSubsection(Strings);
+  if (!Result)
+    return Result.takeError();
+  Subsection.Subsection = *Result;
+  return Error::success();
+}
+
+Error SubsectionConversionVisitor::visitSymbols(
+    DebugSymbolsSubsectionRef &Symbols, const DebugSubsectionState &State) {
+  auto Result = YAMLSymbolsSubsection::fromCodeViewSubsection(Symbols);
+  if (!Result)
+    return Result.takeError();
+  Subsection.Subsection = *Result;
+  return Error::success();
+}
+
+Error SubsectionConversionVisitor::visitFrameData(
+    DebugFrameDataSubsectionRef &Frames, const DebugSubsectionState &State) {
+  auto Result =
+      YAMLFrameDataSubsection::fromCodeViewSubsection(State.strings(), Frames);
+  if (!Result)
+    return Result.takeError();
+  Subsection.Subsection = *Result;
+  return Error::success();
+}
+
+Error SubsectionConversionVisitor::visitCOFFSymbolRVAs(
+    DebugSymbolRVASubsectionRef &RVAs, const DebugSubsectionState &State) {
+  auto Result = YAMLCoffSymbolRVASubsection::fromCodeViewSubsection(RVAs);
   if (!Result)
     return Result.takeError();
   Subsection.Subsection = *Result;
@@ -476,9 +931,25 @@ Expected<YAMLDebugSubsection> YAMLDebugSubsection::fromCodeViewSubection(
     const DebugStringTableSubsectionRef &Strings,
     const DebugChecksumsSubsectionRef &Checksums,
     const DebugSubsectionRecord &SS) {
-  SubsectionConversionVisitor V(Strings, Checksums);
-  if (auto EC = visitDebugSubsection(SS, V))
+  DebugSubsectionState State(Strings, Checksums);
+  SubsectionConversionVisitor V;
+  if (auto EC = visitDebugSubsection(SS, V, State))
     return std::move(EC);
 
   return V.Subsection;
 }
+
+std::unique_ptr<DebugStringTableSubsection>
+llvm::CodeViewYAML::findStringTable(ArrayRef<YAMLDebugSubsection> Sections) {
+  for (const auto &SS : Sections) {
+    if (SS.Subsection->Kind != DebugSubsectionKind::StringTable)
+      continue;
+
+    // String Table doesn't use the allocator.
+    BumpPtrAllocator Allocator;
+    auto Result =
+        SS.Subsection->toCodeViewSubsection(Allocator, nullptr, nullptr);
+    return llvm::cast<DebugStringTableSubsection>(std::move(Result));
+  }
+  return nullptr;
+}
diff --git a/lib/ObjectYAML/CodeViewYAMLSymbols.cpp b/lib/ObjectYAML/CodeViewYAMLSymbols.cpp
index bd97af3a9323..fa3f1e0b60aa 100644
--- a/lib/ObjectYAML/CodeViewYAMLSymbols.cpp
+++ b/lib/ObjectYAML/CodeViewYAMLSymbols.cpp
@@ -47,6 +47,18 @@ LLVM_YAML_DECLARE_ENUM_TRAITS(RegisterId)
 LLVM_YAML_DECLARE_ENUM_TRAITS(TrampolineType)
 LLVM_YAML_DECLARE_ENUM_TRAITS(ThunkOrdinal)
 
+LLVM_YAML_STRONG_TYPEDEF(llvm::StringRef, TypeName)
+
+LLVM_YAML_DECLARE_SCALAR_TRAITS(TypeName, true)
+
+StringRef ScalarTraits<TypeName>::input(StringRef S, void *V, TypeName &T) {
+  return ScalarTraits<StringRef>::input(S, V, T.value);
+}
+void ScalarTraits<TypeName>::output(const TypeName &T, void *V,
+                                    llvm::raw_ostream &R) {
+  ScalarTraits<StringRef>::output(T.value, V, R);
+}
+
 void ScalarEnumerationTraits<SymbolKind>::enumeration(IO &io,
                                                       SymbolKind &Value) {
   auto SymbolNames = getSymbolTypeNames();
@@ -264,6 +276,7 @@ template <> void SymbolRecordImpl<InlineSiteSym>::map(IO &IO) {
 template <> void SymbolRecordImpl<LocalSym>::map(IO &IO) {
   IO.mapRequired("Type", Symbol.Type);
   IO.mapRequired("Flags", Symbol.Flags);
+
   IO.mapRequired("VarName", Symbol.Name);
 }
 
diff --git a/lib/ObjectYAML/CodeViewYAMLTypes.cpp b/lib/ObjectYAML/CodeViewYAMLTypes.cpp
index 4e82a299a672..1302b0713d0e 100644
--- a/lib/ObjectYAML/CodeViewYAMLTypes.cpp
+++ b/lib/ObjectYAML/CodeViewYAMLTypes.cpp
@@ -20,6 +20,7 @@
 #include "llvm/DebugInfo/CodeView/EnumTables.h"
 #include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
 #include "llvm/DebugInfo/CodeView/TypeTableBuilder.h"
+#include "llvm/Support/BinaryStreamWriter.h"
 
 using namespace llvm;
 using namespace llvm::codeview;
@@ -66,7 +67,7 @@ struct LeafRecordBase {
 
   virtual ~LeafRecordBase() {}
   virtual void map(yaml::IO &io) = 0;
-  virtual CVType toCodeViewRecord(BumpPtrAllocator &Allocator) const = 0;
+  virtual CVType toCodeViewRecord(TypeTableBuilder &TTB) const = 0;
   virtual Error fromCodeViewRecord(CVType Type) = 0;
 };
 
@@ -80,10 +81,9 @@ template <typename T> struct LeafRecordImpl : public LeafRecordBase {
     return TypeDeserializer::deserializeAs<T>(Type, Record);
   }
 
-  CVType toCodeViewRecord(BumpPtrAllocator &Allocator) const override {
-    TypeTableBuilder Table(Allocator);
-    Table.writeKnownType(Record);
-    return CVType(Kind, Table.records().front());
+  CVType toCodeViewRecord(TypeTableBuilder &TTB) const override {
+    TTB.writeKnownType(Record);
+    return CVType(Kind, TTB.records().back());
   }
 
   mutable T Record;
@@ -93,7 +93,7 @@ template <> struct LeafRecordImpl<FieldListRecord> : public LeafRecordBase {
   explicit LeafRecordImpl(TypeLeafKind K) : LeafRecordBase(K) {}
 
   void map(yaml::IO &io) override;
-  CVType toCodeViewRecord(BumpPtrAllocator &Allocator) const override;
+  CVType toCodeViewRecord(TypeTableBuilder &TTB) const override;
   Error fromCodeViewRecord(CVType Type) override;
 
   std::vector<MemberRecord> Members;
@@ -440,16 +440,15 @@ Error LeafRecordImpl<FieldListRecord>::fromCodeViewRecord(CVType Type) {
   return visitMemberRecordStream(Type.content(), V);
 }
 
-CVType LeafRecordImpl<FieldListRecord>::toCodeViewRecord(
-    BumpPtrAllocator &Allocator) const {
-  TypeTableBuilder TTB(Allocator);
+CVType
+LeafRecordImpl<FieldListRecord>::toCodeViewRecord(TypeTableBuilder &TTB) const {
   FieldListRecordBuilder FLRB(TTB);
   FLRB.begin();
   for (const auto &Member : Members) {
     Member.Member->writeTo(FLRB);
   }
   FLRB.end(true);
-  return CVType(Kind, TTB.records().front());
+  return CVType(Kind, TTB.records().back());
 }
 
 void MappingTraits<OneMethodRecord>::mapping(IO &io, OneMethodRecord &Record) {
@@ -634,8 +633,13 @@ Expected<LeafRecord> LeafRecord::fromCodeViewRecord(CVType Type) {
   return make_error<CodeViewError>(cv_error_code::corrupt_record);
 }
 
-CVType LeafRecord::toCodeViewRecord(BumpPtrAllocator &Allocator) const {
-  return Leaf->toCodeViewRecord(Allocator);
+CVType LeafRecord::toCodeViewRecord(BumpPtrAllocator &Alloc) const {
+  TypeTableBuilder TTB(Alloc);
+  return Leaf->toCodeViewRecord(TTB);
+}
+
+CVType LeafRecord::toCodeViewRecord(TypeTableBuilder &TTB) const {
+  return Leaf->toCodeViewRecord(TTB);
 }
 
 namespace llvm {
diff --git a/lib/ObjectYAML/DWARFEmitter.cpp b/lib/ObjectYAML/DWARFEmitter.cpp
index 1aa1519b708b..91c928771a65 100644
--- a/lib/ObjectYAML/DWARFEmitter.cpp
+++ b/lib/ObjectYAML/DWARFEmitter.cpp
@@ -16,8 +16,8 @@
 #include "llvm/ObjectYAML/DWARFYAML.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/LEB128.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/SwapByteOrder.h"
+#include "llvm/Support/raw_ostream.h"
 
 #include "DWARFVisitor.h"
 
diff --git a/lib/ObjectYAML/DWARFVisitor.h b/lib/ObjectYAML/DWARFVisitor.h
index 263e36220a05..81ef412eb7e6 100644
--- a/lib/ObjectYAML/DWARFVisitor.h
+++ b/lib/ObjectYAML/DWARFVisitor.h
@@ -13,7 +13,7 @@
 #define LLVM_OBJECTYAML_DWARFVISITOR_H
 
 #include "llvm/ADT/StringRef.h"
-#include "llvm/Support/Dwarf.h"
+#include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/Support/MemoryBuffer.h"
 
 namespace llvm {
diff --git a/lib/ObjectYAML/ELFYAML.cpp b/lib/ObjectYAML/ELFYAML.cpp
index 3052901da45c..70e25ea504a0 100644
--- a/lib/ObjectYAML/ELFYAML.cpp
+++ b/lib/ObjectYAML/ELFYAML.cpp
@@ -424,12 +424,6 @@ void ScalarBitSetTraits<ELFYAML::ELF_SHF>::bitset(IO &IO,
   case ELF::EM_ARM:
     BCase(SHF_ARM_PURECODE);
     break;
-  case ELF::EM_AMDGPU:
-    BCase(SHF_AMDGPU_HSA_GLOBAL);
-    BCase(SHF_AMDGPU_HSA_READONLY);
-    BCase(SHF_AMDGPU_HSA_CODE);
-    BCase(SHF_AMDGPU_HSA_AGENT);
-    break;
   case ELF::EM_HEXAGON:
     BCase(SHF_HEX_GPREL);
     break;
@@ -513,35 +507,35 @@ void ScalarEnumerationTraits<ELFYAML::ELF_REL>::enumeration(
 #define ELF_RELOC(X, Y) IO.enumCase(Value, #X, ELF::X);
   switch (Object->Header.Machine) {
   case ELF::EM_X86_64:
-#include "llvm/Support/ELFRelocs/x86_64.def"
+#include "llvm/BinaryFormat/ELFRelocs/x86_64.def"
     break;
   case ELF::EM_MIPS:
-#include "llvm/Support/ELFRelocs/Mips.def"
+#include "llvm/BinaryFormat/ELFRelocs/Mips.def"
     break;
   case ELF::EM_HEXAGON:
-#include "llvm/Support/ELFRelocs/Hexagon.def"
+#include "llvm/BinaryFormat/ELFRelocs/Hexagon.def"
     break;
   case ELF::EM_386:
   case ELF::EM_IAMCU:
-#include "llvm/Support/ELFRelocs/i386.def"
+#include "llvm/BinaryFormat/ELFRelocs/i386.def"
     break;
   case ELF::EM_AARCH64:
-#include "llvm/Support/ELFRelocs/AArch64.def"
+#include "llvm/BinaryFormat/ELFRelocs/AArch64.def"
     break;
   case ELF::EM_ARM:
-#include "llvm/Support/ELFRelocs/ARM.def"
+#include "llvm/BinaryFormat/ELFRelocs/ARM.def"
     break;
   case ELF::EM_RISCV:
-#include "llvm/Support/ELFRelocs/RISCV.def"
+#include "llvm/BinaryFormat/ELFRelocs/RISCV.def"
     break;
   case ELF::EM_LANAI:
-#include "llvm/Support/ELFRelocs/Lanai.def"
+#include "llvm/BinaryFormat/ELFRelocs/Lanai.def"
     break;
   case ELF::EM_AMDGPU:
-#include "llvm/Support/ELFRelocs/AMDGPU.def"
+#include "llvm/BinaryFormat/ELFRelocs/AMDGPU.def"
     break;
   case ELF::EM_BPF:
-#include "llvm/Support/ELFRelocs/BPF.def"
+#include "llvm/BinaryFormat/ELFRelocs/BPF.def"
     break;
   default:
     llvm_unreachable("Unsupported architecture");
diff --git a/lib/ObjectYAML/MachOYAML.cpp b/lib/ObjectYAML/MachOYAML.cpp
index 6b0e4e3762d0..461684827872 100644
--- a/lib/ObjectYAML/MachOYAML.cpp
+++ b/lib/ObjectYAML/MachOYAML.cpp
@@ -12,10 +12,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ObjectYAML/MachOYAML.h"
+#include "llvm/BinaryFormat/MachO.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/Host.h"
-#include "llvm/Support/MachO.h"
 
 #include <string.h> // For memcpy, memset and strnlen.
 
@@ -252,7 +252,7 @@ void MappingTraits<MachOYAML::LoadCommand>::mapping(
     break;
 
   switch (LoadCommand.Data.load_command_data.cmd) {
-#include "llvm/Support/MachO.def"
+#include "llvm/BinaryFormat/MachO.def"
   }
   IO.mapOptional("PayloadBytes", LoadCommand.PayloadBytes);
   IO.mapOptional("ZeroPadBytes", LoadCommand.ZeroPadBytes, (uint64_t)0ull);
diff --git a/lib/ObjectYAML/ObjectYAML.cpp b/lib/ObjectYAML/ObjectYAML.cpp
index 74581c1ecaac..4b7154ebb7c1 100644
--- a/lib/ObjectYAML/ObjectYAML.cpp
+++ b/lib/ObjectYAML/ObjectYAML.cpp
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ObjectYAML/YAML.h"
 #include "llvm/ObjectYAML/ObjectYAML.h"
+#include "llvm/ObjectYAML/YAML.h"
 
 using namespace llvm;
 using namespace yaml;
diff --git a/lib/ObjectYAML/WasmYAML.cpp b/lib/ObjectYAML/WasmYAML.cpp
index 910d32f16af9..353d027f4e11 100644
--- a/lib/ObjectYAML/WasmYAML.cpp
+++ b/lib/ObjectYAML/WasmYAML.cpp
@@ -366,7 +366,7 @@ void ScalarEnumerationTraits<WasmYAML::TableType>::enumeration(
 void ScalarEnumerationTraits<WasmYAML::RelocType>::enumeration(
     IO &IO, WasmYAML::RelocType &Type) {
 #define WASM_RELOC(name, value) IO.enumCase(Type, #name, wasm::name);
-#include "llvm/Support/WasmRelocs/WebAssembly.def"
+#include "llvm/BinaryFormat/WasmRelocs/WebAssembly.def"
 #undef WASM_RELOC
 }
 
diff --git a/lib/Option/Arg.cpp b/lib/Option/Arg.cpp
index 3e8a1d802314..e416df6a38dc 100644
--- a/lib/Option/Arg.cpp
+++ b/lib/Option/Arg.cpp
@@ -12,8 +12,8 @@
 #include "llvm/ADT/Twine.h"
 #include "llvm/Option/ArgList.h"
 #include "llvm/Option/Option.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 using namespace llvm::opt;
diff --git a/lib/Passes/PassBuilder.cpp b/lib/Passes/PassBuilder.cpp
index 17c60348633c..1f638e768307 100644
--- a/lib/Passes/PassBuilder.cpp
+++ b/lib/Passes/PassBuilder.cpp
@@ -164,6 +164,10 @@ static cl::opt<bool> EnableGVNHoist(
     "enable-npm-gvn-hoist", cl::init(false), cl::Hidden,
     cl::desc("Enable the GVN hoisting pass for the new PM (default = off)"));
 
+static cl::opt<bool> EnableGVNSink(
+    "enable-npm-gvn-sink", cl::init(false), cl::Hidden,
+    cl::desc("Enable the GVN hoisting pass for the new PM (default = off)"));
+
 static Regex DefaultAliasRegex(
     "^(default|thinlto-pre-link|thinlto|lto-pre-link|lto)<(O[0123sz])>$");
 
@@ -314,6 +318,12 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
   if (EnableGVNHoist)
     FPM.addPass(GVNHoistPass());
 
+  // Global value numbering based sinking.
+  if (EnableGVNSink) {
+    FPM.addPass(GVNSinkPass());
+    FPM.addPass(SimplifyCFGPass());
+  }
+
   // Speculative execution if the target has divergent branches; otherwise nop.
   FPM.addPass(SpeculativeExecutionPass());
 
diff --git a/lib/ProfileData/Coverage/CoverageMapping.cpp b/lib/ProfileData/Coverage/CoverageMapping.cpp
index 23999a5312c7..015b3c6c2021 100644
--- a/lib/ProfileData/Coverage/CoverageMapping.cpp
+++ b/lib/ProfileData/Coverage/CoverageMapping.cpp
@@ -12,6 +12,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ProfileData/Coverage/CoverageMapping.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/None.h"
@@ -19,7 +20,6 @@
 #include "llvm/ADT/SmallBitVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/ProfileData/Coverage/CoverageMapping.h"
 #include "llvm/ProfileData/Coverage/CoverageMappingReader.h"
 #include "llvm/ProfileData/InstrProfReader.h"
 #include "llvm/Support/Debug.h"
diff --git a/lib/ProfileData/Coverage/CoverageMappingWriter.cpp b/lib/ProfileData/Coverage/CoverageMappingWriter.cpp
index f131be2cba49..6fe93530da21 100644
--- a/lib/ProfileData/Coverage/CoverageMappingWriter.cpp
+++ b/lib/ProfileData/Coverage/CoverageMappingWriter.cpp
@@ -12,9 +12,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ProfileData/Coverage/CoverageMappingWriter.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ProfileData/Coverage/CoverageMappingWriter.h"
 #include "llvm/Support/LEB128.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
diff --git a/lib/ProfileData/InstrProf.cpp b/lib/ProfileData/InstrProf.cpp
index a2b7c94f9dec..c9b82c303e33 100644
--- a/lib/ProfileData/InstrProf.cpp
+++ b/lib/ProfileData/InstrProf.cpp
@@ -12,6 +12,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ProfileData/InstrProf.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
@@ -29,7 +30,6 @@
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
-#include "llvm/ProfileData/InstrProf.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
@@ -45,8 +45,8 @@
 #include <algorithm>
 #include <cassert>
 #include <cstddef>
-#include <cstring>
 #include <cstdint>
+#include <cstring>
 #include <memory>
 #include <string>
 #include <system_error>
diff --git a/lib/ProfileData/InstrProfReader.cpp b/lib/ProfileData/InstrProfReader.cpp
index 856f793363f7..d9f599f400da 100644
--- a/lib/ProfileData/InstrProfReader.cpp
+++ b/lib/ProfileData/InstrProfReader.cpp
@@ -12,12 +12,12 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ProfileData/InstrProfReader.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/IR/ProfileSummary.h"
 #include "llvm/ProfileData/InstrProf.h"
-#include "llvm/ProfileData/InstrProfReader.h"
 #include "llvm/ProfileData/ProfileCommon.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/Error.h"
diff --git a/lib/ProfileData/InstrProfWriter.cpp b/lib/ProfileData/InstrProfWriter.cpp
index 6b7bd3b2fc0a..b3402a6ea956 100644
--- a/lib/ProfileData/InstrProfWriter.cpp
+++ b/lib/ProfileData/InstrProfWriter.cpp
@@ -12,11 +12,11 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ProfileData/InstrProfWriter.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/IR/ProfileSummary.h"
 #include "llvm/ProfileData/InstrProf.h"
-#include "llvm/ProfileData/InstrProfWriter.h"
 #include "llvm/ProfileData/ProfileCommon.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/EndianStream.h"
diff --git a/lib/ProfileData/SampleProfWriter.cpp b/lib/ProfileData/SampleProfWriter.cpp
index b05efa7417b9..b45026140c99 100644
--- a/lib/ProfileData/SampleProfWriter.cpp
+++ b/lib/ProfileData/SampleProfWriter.cpp
@@ -18,10 +18,10 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ProfileData/SampleProfWriter.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ProfileData/ProfileCommon.h"
 #include "llvm/ProfileData/SampleProf.h"
-#include "llvm/ProfileData/SampleProfWriter.h"
 #include "llvm/Support/ErrorOr.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/LEB128.h"
diff --git a/lib/Support/AMDGPUCodeObjectMetadata.cpp b/lib/Support/AMDGPUCodeObjectMetadata.cpp
new file mode 100644
index 000000000000..a00e371415a3
--- /dev/null
+++ b/lib/Support/AMDGPUCodeObjectMetadata.cpp
@@ -0,0 +1,218 @@
+//===--- AMDGPUCodeObjectMetadata.cpp ---------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief AMDGPU Code Object Metadata definitions and in-memory
+/// representations.
+///
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/AMDGPUCodeObjectMetadata.h"
+#include "llvm/Support/YAMLTraits.h"
+
+using namespace llvm::AMDGPU;
+using namespace llvm::AMDGPU::CodeObject;
+
+LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint32_t)
+LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string)
+LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Arg::Metadata)
+LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Metadata)
+
+namespace llvm {
+namespace yaml {
+
+template <>
+struct ScalarEnumerationTraits<AccessQualifier> {
+  static void enumeration(IO &YIO, AccessQualifier &EN) {
+    YIO.enumCase(EN, "Default", AccessQualifier::Default);
+    YIO.enumCase(EN, "ReadOnly", AccessQualifier::ReadOnly);
+    YIO.enumCase(EN, "WriteOnly", AccessQualifier::WriteOnly);
+    YIO.enumCase(EN, "ReadWrite", AccessQualifier::ReadWrite);
+  }
+};
+
+template <>
+struct ScalarEnumerationTraits<AddressSpaceQualifier> {
+  static void enumeration(IO &YIO, AddressSpaceQualifier &EN) {
+    YIO.enumCase(EN, "Private", AddressSpaceQualifier::Private);
+    YIO.enumCase(EN, "Global", AddressSpaceQualifier::Global);
+    YIO.enumCase(EN, "Constant", AddressSpaceQualifier::Constant);
+    YIO.enumCase(EN, "Local", AddressSpaceQualifier::Local);
+    YIO.enumCase(EN, "Generic", AddressSpaceQualifier::Generic);
+    YIO.enumCase(EN, "Region", AddressSpaceQualifier::Region);
+  }
+};
+
+template <>
+struct ScalarEnumerationTraits<ValueKind> {
+  static void enumeration(IO &YIO, ValueKind &EN) {
+    YIO.enumCase(EN, "ByValue", ValueKind::ByValue);
+    YIO.enumCase(EN, "GlobalBuffer", ValueKind::GlobalBuffer);
+    YIO.enumCase(EN, "DynamicSharedPointer", ValueKind::DynamicSharedPointer);
+    YIO.enumCase(EN, "Sampler", ValueKind::Sampler);
+    YIO.enumCase(EN, "Image", ValueKind::Image);
+    YIO.enumCase(EN, "Pipe", ValueKind::Pipe);
+    YIO.enumCase(EN, "Queue", ValueKind::Queue);
+    YIO.enumCase(EN, "HiddenGlobalOffsetX", ValueKind::HiddenGlobalOffsetX);
+    YIO.enumCase(EN, "HiddenGlobalOffsetY", ValueKind::HiddenGlobalOffsetY);
+    YIO.enumCase(EN, "HiddenGlobalOffsetZ", ValueKind::HiddenGlobalOffsetZ);
+    YIO.enumCase(EN, "HiddenNone", ValueKind::HiddenNone);
+    YIO.enumCase(EN, "HiddenPrintfBuffer", ValueKind::HiddenPrintfBuffer);
+    YIO.enumCase(EN, "HiddenDefaultQueue", ValueKind::HiddenDefaultQueue);
+    YIO.enumCase(EN, "HiddenCompletionAction",
+                 ValueKind::HiddenCompletionAction);
+  }
+};
+
+template <>
+struct ScalarEnumerationTraits<ValueType> {
+  static void enumeration(IO &YIO, ValueType &EN) {
+    YIO.enumCase(EN, "Struct", ValueType::Struct);
+    YIO.enumCase(EN, "I8", ValueType::I8);
+    YIO.enumCase(EN, "U8", ValueType::U8);
+    YIO.enumCase(EN, "I16", ValueType::I16);
+    YIO.enumCase(EN, "U16", ValueType::U16);
+    YIO.enumCase(EN, "F16", ValueType::F16);
+    YIO.enumCase(EN, "I32", ValueType::I32);
+    YIO.enumCase(EN, "U32", ValueType::U32);
+    YIO.enumCase(EN, "F32", ValueType::F32);
+    YIO.enumCase(EN, "I64", ValueType::I64);
+    YIO.enumCase(EN, "U64", ValueType::U64);
+    YIO.enumCase(EN, "F64", ValueType::F64);
+  }
+};
+
+template <>
+struct MappingTraits<Kernel::Attrs::Metadata> {
+  static void mapping(IO &YIO, Kernel::Attrs::Metadata &MD) {
+    YIO.mapOptional(Kernel::Attrs::Key::ReqdWorkGroupSize,
+                    MD.mReqdWorkGroupSize, std::vector<uint32_t>());
+    YIO.mapOptional(Kernel::Attrs::Key::WorkGroupSizeHint,
+                    MD.mWorkGroupSizeHint, std::vector<uint32_t>());
+    YIO.mapOptional(Kernel::Attrs::Key::VecTypeHint,
+                    MD.mVecTypeHint, std::string());
+  }
+};
+
+template <>
+struct MappingTraits<Kernel::Arg::Metadata> {
+  static void mapping(IO &YIO, Kernel::Arg::Metadata &MD) {
+    YIO.mapRequired(Kernel::Arg::Key::Size, MD.mSize);
+    YIO.mapRequired(Kernel::Arg::Key::Align, MD.mAlign);
+    YIO.mapRequired(Kernel::Arg::Key::ValueKind, MD.mValueKind);
+    YIO.mapRequired(Kernel::Arg::Key::ValueType, MD.mValueType);
+    YIO.mapOptional(Kernel::Arg::Key::PointeeAlign, MD.mPointeeAlign,
+                    uint32_t(0));
+    YIO.mapOptional(Kernel::Arg::Key::AccQual, MD.mAccQual,
+                    AccessQualifier::Unknown);
+    YIO.mapOptional(Kernel::Arg::Key::AddrSpaceQual, MD.mAddrSpaceQual,
+                    AddressSpaceQualifier::Unknown);
+    YIO.mapOptional(Kernel::Arg::Key::IsConst, MD.mIsConst, false);
+    YIO.mapOptional(Kernel::Arg::Key::IsPipe, MD.mIsPipe, false);
+    YIO.mapOptional(Kernel::Arg::Key::IsRestrict, MD.mIsRestrict, false);
+    YIO.mapOptional(Kernel::Arg::Key::IsVolatile, MD.mIsVolatile, false);
+    YIO.mapOptional(Kernel::Arg::Key::Name, MD.mName, std::string());
+    YIO.mapOptional(Kernel::Arg::Key::TypeName, MD.mTypeName, std::string());
+  }
+};
+
+template <>
+struct MappingTraits<Kernel::CodeProps::Metadata> {
+  static void mapping(IO &YIO, Kernel::CodeProps::Metadata &MD) {
+    YIO.mapOptional(Kernel::CodeProps::Key::KernargSegmentSize,
+                    MD.mKernargSegmentSize, uint64_t(0));
+    YIO.mapOptional(Kernel::CodeProps::Key::WorkgroupGroupSegmentSize,
+                    MD.mWorkgroupGroupSegmentSize, uint32_t(0));
+    YIO.mapOptional(Kernel::CodeProps::Key::WorkitemPrivateSegmentSize,
+                    MD.mWorkitemPrivateSegmentSize, uint32_t(0));
+    YIO.mapOptional(Kernel::CodeProps::Key::WavefrontNumSGPRs,
+                    MD.mWavefrontNumSGPRs, uint16_t(0));
+    YIO.mapOptional(Kernel::CodeProps::Key::WorkitemNumVGPRs,
+                    MD.mWorkitemNumVGPRs, uint16_t(0));
+    YIO.mapOptional(Kernel::CodeProps::Key::KernargSegmentAlign,
+                    MD.mKernargSegmentAlign, uint8_t(0));
+    YIO.mapOptional(Kernel::CodeProps::Key::GroupSegmentAlign,
+                    MD.mGroupSegmentAlign, uint8_t(0));
+    YIO.mapOptional(Kernel::CodeProps::Key::PrivateSegmentAlign,
+                    MD.mPrivateSegmentAlign, uint8_t(0));
+    YIO.mapOptional(Kernel::CodeProps::Key::WavefrontSize,
+                    MD.mWavefrontSize, uint8_t(0));
+  }
+};
+
+template <>
+struct MappingTraits<Kernel::DebugProps::Metadata> {
+  static void mapping(IO &YIO, Kernel::DebugProps::Metadata &MD) {
+    YIO.mapOptional(Kernel::DebugProps::Key::DebuggerABIVersion,
+                    MD.mDebuggerABIVersion, std::vector<uint32_t>());
+    YIO.mapOptional(Kernel::DebugProps::Key::ReservedNumVGPRs,
+                    MD.mReservedNumVGPRs, uint16_t(0));
+    YIO.mapOptional(Kernel::DebugProps::Key::ReservedFirstVGPR,
+                    MD.mReservedFirstVGPR, uint16_t(-1));
+    YIO.mapOptional(Kernel::DebugProps::Key::PrivateSegmentBufferSGPR,
+                    MD.mPrivateSegmentBufferSGPR, uint16_t(-1));
+    YIO.mapOptional(Kernel::DebugProps::Key::WavefrontPrivateSegmentOffsetSGPR,
+                    MD.mWavefrontPrivateSegmentOffsetSGPR, uint16_t(-1));
+  }
+};
+
+template <>
+struct MappingTraits<Kernel::Metadata> {
+  static void mapping(IO &YIO, Kernel::Metadata &MD) {
+    YIO.mapRequired(Kernel::Key::Name, MD.mName);
+    YIO.mapOptional(Kernel::Key::Language, MD.mLanguage, std::string());
+    YIO.mapOptional(Kernel::Key::LanguageVersion, MD.mLanguageVersion,
+                    std::vector<uint32_t>());
+    if (!MD.mAttrs.empty() || !YIO.outputting())
+      YIO.mapOptional(Kernel::Key::Attrs, MD.mAttrs);
+    if (!MD.mArgs.empty() || !YIO.outputting())
+      YIO.mapOptional(Kernel::Key::Args, MD.mArgs);
+    if (!MD.mCodeProps.empty() || !YIO.outputting())
+      YIO.mapOptional(Kernel::Key::CodeProps, MD.mCodeProps);
+    if (!MD.mDebugProps.empty() || !YIO.outputting())
+      YIO.mapOptional(Kernel::Key::DebugProps, MD.mDebugProps);
+  }
+};
+
+template <>
+struct MappingTraits<CodeObject::Metadata> {
+  static void mapping(IO &YIO, CodeObject::Metadata &MD) {
+    YIO.mapRequired(Key::Version, MD.mVersion);
+    YIO.mapOptional(Key::Printf, MD.mPrintf, std::vector<std::string>());
+    if (!MD.mKernels.empty() || !YIO.outputting())
+      YIO.mapOptional(Key::Kernels, MD.mKernels);
+  }
+};
+
+} // end namespace yaml
+
+namespace AMDGPU {
+namespace CodeObject {
+
+/* static */
+std::error_code Metadata::fromYamlString(
+    std::string YamlString, Metadata &CodeObjectMetadata) {
+  yaml::Input YamlInput(YamlString);
+  YamlInput >> CodeObjectMetadata;
+  return YamlInput.error();
+}
+
+/* static */
+std::error_code Metadata::toYamlString(
+    Metadata CodeObjectMetadata, std::string &YamlString) {
+  raw_string_ostream YamlStream(YamlString);
+  yaml::Output YamlOutput(YamlStream, nullptr, std::numeric_limits<int>::max());
+  YamlOutput << CodeObjectMetadata;
+  return std::error_code();
+}
+
+} // end namespace CodeObject
+} // end namespace AMDGPU
+} // end namespace llvm
diff --git a/lib/Support/ARMAttributeParser.cpp b/lib/Support/ARMAttributeParser.cpp
index 63e800a5b78b..a9a0c1d1a4d3 100644
--- a/lib/Support/ARMAttributeParser.cpp
+++ b/lib/Support/ARMAttributeParser.cpp
@@ -7,9 +7,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Support/ARMAttributeParser.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
-#include "llvm/Support/ARMAttributeParser.h"
 #include "llvm/Support/LEB128.h"
 #include "llvm/Support/ScopedPrinter.h"
 
diff --git a/lib/Support/ARMBuildAttrs.cpp b/lib/Support/ARMBuildAttrs.cpp
index 134ef8b587b7..8f18e9eb24ed 100644
--- a/lib/Support/ARMBuildAttrs.cpp
+++ b/lib/Support/ARMBuildAttrs.cpp
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Support/ARMBuildAttributes.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/ARMBuildAttributes.h"
 
 using namespace llvm;
 
diff --git a/lib/Support/Atomic.cpp b/lib/Support/Atomic.cpp
index 80550e2b46a7..55910c489faf 100644
--- a/lib/Support/Atomic.cpp
+++ b/lib/Support/Atomic.cpp
@@ -18,6 +18,8 @@ using namespace llvm;
 
 #if defined(_MSC_VER)
 #include <Intrin.h>
+
+// We must include windows.h after Intrin.h.
 #include <windows.h>
 #undef MemoryFence
 #endif
diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt
index a12ba4fbfda8..0a8e3897cce9 100644
--- a/lib/Support/CMakeLists.txt
+++ b/lib/Support/CMakeLists.txt
@@ -30,6 +30,7 @@ elseif( CMAKE_HOST_UNIX )
 endif( MSVC OR MINGW )
 
 add_llvm_library(LLVMSupport
+  AMDGPUCodeObjectMetadata.cpp
   APFloat.cpp
   APInt.cpp
   APSInt.cpp
@@ -57,7 +58,6 @@ add_llvm_library(LLVMSupport
   DebugCounter.cpp
   DeltaAlgorithm.cpp
   DAGDeltaAlgorithm.cpp
-  Dwarf.cpp
   Error.cpp
   ErrorHandling.cpp
   FileUtilities.cpp
diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp
index 34345901eab1..de0ca940b405 100644
--- a/lib/Support/CommandLine.cpp
+++ b/lib/Support/CommandLine.cpp
@@ -2042,9 +2042,9 @@ void CommandLineParser::printOptionValues() {
     Opts[i].second->printOptionValue(MaxArgLen, PrintAllOptions);
 }
 
-static void (*OverrideVersionPrinter)() = nullptr;
+static VersionPrinterTy OverrideVersionPrinter = nullptr;
 
-static std::vector<void (*)()> *ExtraVersionPrinters = nullptr;
+static std::vector<VersionPrinterTy> *ExtraVersionPrinters = nullptr;
 
 namespace {
 class VersionPrinter {
@@ -2084,7 +2084,7 @@ public:
       return;
 
     if (OverrideVersionPrinter != nullptr) {
-      (*OverrideVersionPrinter)();
+      OverrideVersionPrinter(outs());
       exit(0);
     }
     print();
@@ -2093,10 +2093,8 @@ public:
     // information.
     if (ExtraVersionPrinters != nullptr) {
       outs() << '\n';
-      for (std::vector<void (*)()>::iterator I = ExtraVersionPrinters->begin(),
-                                             E = ExtraVersionPrinters->end();
-           I != E; ++I)
-        (*I)();
+      for (auto I : *ExtraVersionPrinters)
+        I(outs());
     }
 
     exit(0);
@@ -2134,11 +2132,11 @@ void cl::PrintHelpMessage(bool Hidden, bool Categorized) {
 /// Utility function for printing version number.
 void cl::PrintVersionMessage() { VersionPrinterInstance.print(); }
 
-void cl::SetVersionPrinter(void (*func)()) { OverrideVersionPrinter = func; }
+void cl::SetVersionPrinter(VersionPrinterTy func) { OverrideVersionPrinter = func; }
 
-void cl::AddExtraVersionPrinter(void (*func)()) {
+void cl::AddExtraVersionPrinter(VersionPrinterTy func) {
   if (!ExtraVersionPrinters)
-    ExtraVersionPrinters = new std::vector<void (*)()>;
+    ExtraVersionPrinters = new std::vector<VersionPrinterTy>;
 
   ExtraVersionPrinters->push_back(func);
 }
diff --git a/lib/Support/ConvertUTF.cpp b/lib/Support/ConvertUTF.cpp
index aa9507c189ed..e56854a3ae42 100644
--- a/lib/Support/ConvertUTF.cpp
+++ b/lib/Support/ConvertUTF.cpp
@@ -46,14 +46,12 @@
 
 ------------------------------------------------------------------------ */
 
-
 #include "llvm/Support/ConvertUTF.h"
 #ifdef CVTUTF_DEBUG
 #include <stdio.h>
 #endif
 #include <assert.h>
 
-
 /*
  * This code extensively uses fall-through switches.
  * Keep the compiler from warning about that.
diff --git a/lib/Support/ConvertUTFWrapper.cpp b/lib/Support/ConvertUTFWrapper.cpp
index 217cedb24df6..6cb4f6376250 100644
--- a/lib/Support/ConvertUTFWrapper.cpp
+++ b/lib/Support/ConvertUTFWrapper.cpp
@@ -7,9 +7,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Support/ConvertUTF.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/ConvertUTF.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/SwapByteOrder.h"
 #include <string>
diff --git a/lib/Support/Errno.cpp b/lib/Support/Errno.cpp
index 3ba2a1277d05..10be9b391b49 100644
--- a/lib/Support/Errno.cpp
+++ b/lib/Support/Errno.cpp
@@ -12,7 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/Errno.h"
-#include "llvm/Config/config.h"     // Get autoconf configuration settings
+#include "llvm/Config/config.h" // Get autoconf configuration settings
 #include "llvm/Support/raw_ostream.h"
 #include <string.h>
 
diff --git a/lib/Support/Error.cpp b/lib/Support/Error.cpp
index 4730c0b26ba0..bb02c03ff2b6 100644
--- a/lib/Support/Error.cpp
+++ b/lib/Support/Error.cpp
@@ -13,7 +13,6 @@
 #include "llvm/Support/ManagedStatic.h"
 #include <system_error>
 
-
 using namespace llvm;
 
 namespace {
diff --git a/lib/Support/FormattedStream.cpp b/lib/Support/FormattedStream.cpp
index c01659604444..a9f4409f5dde 100644
--- a/lib/Support/FormattedStream.cpp
+++ b/lib/Support/FormattedStream.cpp
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Support/Debug.h"
 #include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 
diff --git a/lib/Support/LockFileManager.cpp b/lib/Support/LockFileManager.cpp
index 8be9879fbc24..3ee3af7731e6 100644
--- a/lib/Support/LockFileManager.cpp
+++ b/lib/Support/LockFileManager.cpp
@@ -15,15 +15,15 @@
 #include "llvm/Support/ErrorOr.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Signals.h"
+#include "llvm/Support/raw_ostream.h"
 #include <cerrno>
 #include <ctime>
 #include <memory>
-#include <tuple>
-#include <system_error>
 #include <sys/stat.h>
 #include <sys/types.h>
+#include <system_error>
+#include <tuple>
 #if LLVM_ON_WIN32
 #include <windows.h>
 #endif
diff --git a/lib/Support/MD5.cpp b/lib/Support/MD5.cpp
index bdbf1d677938..545a64cfc767 100644
--- a/lib/Support/MD5.cpp
+++ b/lib/Support/MD5.cpp
@@ -37,11 +37,11 @@
  * compile-time configuration.
  */
 
+#include "llvm/Support/MD5.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/Format.h"
-#include "llvm/Support/MD5.h"
 #include "llvm/Support/raw_ostream.h"
 #include <array>
 #include <cstdint>
diff --git a/lib/Support/Mutex.cpp b/lib/Support/Mutex.cpp
index c8d3844d0c96..bdd02105f6f0 100644
--- a/lib/Support/Mutex.cpp
+++ b/lib/Support/Mutex.cpp
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Config/config.h"
 #include "llvm/Support/Mutex.h"
+#include "llvm/Config/config.h"
 
 //===----------------------------------------------------------------------===//
 //=== WARNING: Implementation here must contain only TRULY operating system
diff --git a/lib/Support/Path.cpp b/lib/Support/Path.cpp
index 80bef558258d..e58f856ca244 100644
--- a/lib/Support/Path.cpp
+++ b/lib/Support/Path.cpp
@@ -13,12 +13,12 @@
 
 #include "llvm/Support/Path.h"
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/Support/COFF.h"
+#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/BinaryFormat/MachO.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/Errc.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FileSystem.h"
-#include "llvm/Support/MachO.h"
 #include "llvm/Support/Process.h"
 #include <cctype>
 #include <cstring>
@@ -1027,178 +1027,6 @@ void directory_entry::replace_filename(const Twine &filename, file_status st) {
   Status = st;
 }
 
-template <size_t N>
-static bool startswith(StringRef Magic, const char (&S)[N]) {
-  return Magic.startswith(StringRef(S, N - 1));
-}
-
-/// @brief Identify the magic in magic.
-file_magic identify_magic(StringRef Magic) {
-  if (Magic.size() < 4)
-    return file_magic::unknown;
-  switch ((unsigned char)Magic[0]) {
-    case 0x00: {
-      // COFF bigobj, CL.exe's LTO object file, or short import library file
-      if (startswith(Magic, "\0\0\xFF\xFF")) {
-        size_t MinSize = offsetof(COFF::BigObjHeader, UUID) + sizeof(COFF::BigObjMagic);
-        if (Magic.size() < MinSize)
-          return file_magic::coff_import_library;
-
-        const char *Start = Magic.data() + offsetof(COFF::BigObjHeader, UUID);
-        if (memcmp(Start, COFF::BigObjMagic, sizeof(COFF::BigObjMagic)) == 0)
-          return file_magic::coff_object;
-        if (memcmp(Start, COFF::ClGlObjMagic, sizeof(COFF::BigObjMagic)) == 0)
-          return file_magic::coff_cl_gl_object;
-        return file_magic::coff_import_library;
-      }
-      // Windows resource file
-      if (startswith(Magic, "\0\0\0\0\x20\0\0\0\xFF"))
-        return file_magic::windows_resource;
-      // 0x0000 = COFF unknown machine type
-      if (Magic[1] == 0)
-        return file_magic::coff_object;
-      if (startswith(Magic, "\0asm"))
-        return file_magic::wasm_object;
-      break;
-    }
-    case 0xDE:  // 0x0B17C0DE = BC wraper
-      if (startswith(Magic, "\xDE\xC0\x17\x0B"))
-        return file_magic::bitcode;
-      break;
-    case 'B':
-      if (startswith(Magic, "BC\xC0\xDE"))
-        return file_magic::bitcode;
-      break;
-    case '!':
-      if (startswith(Magic, "!<arch>\n") || startswith(Magic, "!<thin>\n"))
-        return file_magic::archive;
-      break;
-
-    case '\177':
-      if (startswith(Magic, "\177ELF") && Magic.size() >= 18) {
-        bool Data2MSB = Magic[5] == 2;
-        unsigned high = Data2MSB ? 16 : 17;
-        unsigned low  = Data2MSB ? 17 : 16;
-        if (Magic[high] == 0) {
-          switch (Magic[low]) {
-            default: return file_magic::elf;
-            case 1: return file_magic::elf_relocatable;
-            case 2: return file_magic::elf_executable;
-            case 3: return file_magic::elf_shared_object;
-            case 4: return file_magic::elf_core;
-          }
-        }
-        // It's still some type of ELF file.
-        return file_magic::elf;
-      }
-      break;
-
-    case 0xCA:
-      if (startswith(Magic, "\xCA\xFE\xBA\xBE") ||
-          startswith(Magic, "\xCA\xFE\xBA\xBF")) {
-        // This is complicated by an overlap with Java class files.
-        // See the Mach-O section in /usr/share/file/magic for details.
-        if (Magic.size() >= 8 && Magic[7] < 43)
-          return file_magic::macho_universal_binary;
-      }
-      break;
-
-      // The two magic numbers for mach-o are:
-      // 0xfeedface - 32-bit mach-o
-      // 0xfeedfacf - 64-bit mach-o
-    case 0xFE:
-    case 0xCE:
-    case 0xCF: {
-      uint16_t type = 0;
-      if (startswith(Magic, "\xFE\xED\xFA\xCE") ||
-          startswith(Magic, "\xFE\xED\xFA\xCF")) {
-        /* Native endian */
-        size_t MinSize;
-        if (Magic[3] == char(0xCE))
-          MinSize = sizeof(MachO::mach_header);
-        else
-          MinSize = sizeof(MachO::mach_header_64);
-        if (Magic.size() >= MinSize)
-          type = Magic[12] << 24 | Magic[13] << 12 | Magic[14] << 8 | Magic[15];
-      } else if (startswith(Magic, "\xCE\xFA\xED\xFE") ||
-                 startswith(Magic, "\xCF\xFA\xED\xFE")) {
-        /* Reverse endian */
-        size_t MinSize;
-        if (Magic[0] == char(0xCE))
-          MinSize = sizeof(MachO::mach_header);
-        else
-          MinSize = sizeof(MachO::mach_header_64);
-        if (Magic.size() >= MinSize)
-          type = Magic[15] << 24 | Magic[14] << 12 |Magic[13] << 8 | Magic[12];
-      }
-      switch (type) {
-        default: break;
-        case 1: return file_magic::macho_object;
-        case 2: return file_magic::macho_executable;
-        case 3: return file_magic::macho_fixed_virtual_memory_shared_lib;
-        case 4: return file_magic::macho_core;
-        case 5: return file_magic::macho_preload_executable;
-        case 6: return file_magic::macho_dynamically_linked_shared_lib;
-        case 7: return file_magic::macho_dynamic_linker;
-        case 8: return file_magic::macho_bundle;
-        case 9: return file_magic::macho_dynamically_linked_shared_lib_stub;
-        case 10: return file_magic::macho_dsym_companion;
-        case 11: return file_magic::macho_kext_bundle;
-      }
-      break;
-    }
-    case 0xF0: // PowerPC Windows
-    case 0x83: // Alpha 32-bit
-    case 0x84: // Alpha 64-bit
-    case 0x66: // MPS R4000 Windows
-    case 0x50: // mc68K
-    case 0x4c: // 80386 Windows
-    case 0xc4: // ARMNT Windows
-      if (Magic[1] == 0x01)
-        return file_magic::coff_object;
-      LLVM_FALLTHROUGH;
-
-    case 0x90: // PA-RISC Windows
-    case 0x68: // mc68K Windows
-      if (Magic[1] == 0x02)
-        return file_magic::coff_object;
-      break;
-
-    case 'M': // Possible MS-DOS stub on Windows PE file
-      if (startswith(Magic, "MZ")) {
-        uint32_t off = read32le(Magic.data() + 0x3c);
-        // PE/COFF file, either EXE or DLL.
-        if (off < Magic.size() &&
-            memcmp(Magic.data()+off, COFF::PEMagic, sizeof(COFF::PEMagic)) == 0)
-          return file_magic::pecoff_executable;
-      }
-      break;
-
-    case 0x64: // x86-64 Windows.
-      if (Magic[1] == char(0x86))
-        return file_magic::coff_object;
-      break;
-
-    default:
-      break;
-  }
-  return file_magic::unknown;
-}
-
-std::error_code identify_magic(const Twine &Path, file_magic &Result) {
-  int FD;
-  if (std::error_code EC = openFileForRead(Path, FD))
-    return EC;
-
-  char Buffer[32];
-  int Length = read(FD, Buffer, sizeof(Buffer));
-  if (close(FD) != 0 || Length < 0)
-    return std::error_code(errno, std::generic_category());
-
-  Result = identify_magic(StringRef(Buffer, Length));
-  return std::error_code();
-}
-
 std::error_code directory_entry::status(file_status &result) const {
   return fs::status(Path, result, FollowSymlinks);
 }
diff --git a/lib/Support/PrettyStackTrace.cpp b/lib/Support/PrettyStackTrace.cpp
index abf61b73a70d..a18e9cc50040 100644
--- a/lib/Support/PrettyStackTrace.cpp
+++ b/lib/Support/PrettyStackTrace.cpp
@@ -15,7 +15,7 @@
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm-c/ErrorHandling.h"
 #include "llvm/ADT/SmallString.h"
-#include "llvm/Config/config.h"     // Get autoconf configuration settings
+#include "llvm/Config/config.h" // Get autoconf configuration settings
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Signals.h"
 #include "llvm/Support/Watchdog.h"
diff --git a/lib/Support/Process.cpp b/lib/Support/Process.cpp
index 290c30f4968f..caec993ee165 100644
--- a/lib/Support/Process.cpp
+++ b/lib/Support/Process.cpp
@@ -11,11 +11,11 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Support/Process.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Config/config.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Path.h"
-#include "llvm/Support/Process.h"
 #include "llvm/Support/Program.h"
 
 using namespace llvm;
diff --git a/lib/Support/RWMutex.cpp b/lib/Support/RWMutex.cpp
index 6c9781c4e2d6..83c6d1d52b4c 100644
--- a/lib/Support/RWMutex.cpp
+++ b/lib/Support/RWMutex.cpp
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Config/config.h"
 #include "llvm/Support/RWMutex.h"
+#include "llvm/Config/config.h"
 
 //===----------------------------------------------------------------------===//
 //=== WARNING: Implementation here must contain only TRULY operating system
diff --git a/lib/Support/SHA1.cpp b/lib/Support/SHA1.cpp
index 0eefd998cd75..20f41c5ff447 100644
--- a/lib/Support/SHA1.cpp
+++ b/lib/Support/SHA1.cpp
@@ -15,9 +15,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Support/Host.h"
 #include "llvm/Support/SHA1.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/Support/Host.h"
 using namespace llvm;
 
 #include <stdint.h>
diff --git a/lib/Support/Signals.cpp b/lib/Support/Signals.cpp
index 57f36bf175b3..256a22dee87b 100644
--- a/lib/Support/Signals.cpp
+++ b/lib/Support/Signals.cpp
@@ -12,6 +12,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Support/Signals.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Config/config.h"
@@ -23,18 +24,23 @@
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Mutex.h"
 #include "llvm/Support/Program.h"
-#include "llvm/Support/Signals.h"
 #include "llvm/Support/StringSaver.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Options.h"
 #include <vector>
 
-namespace llvm {
-
 //===----------------------------------------------------------------------===//
 //=== WARNING: Implementation here must contain only TRULY operating system
 //===          independent code.
 //===----------------------------------------------------------------------===//
 
+using namespace llvm;
+
+static cl::opt<bool>
+    DisableSymbolication("disable-symbolication",
+                         cl::desc("Disable symbolizing crash backtraces."),
+                         cl::init(false), cl::Hidden);
+
 static ManagedStatic<std::vector<std::pair<void (*)(void *), void *>>>
     CallBacksToRun;
 void sys::RunSignalHandlers() {
@@ -44,9 +50,6 @@ void sys::RunSignalHandlers() {
     I.first(I.second);
   CallBacksToRun->clear();
 }
-}
-
-using namespace llvm;
 
 static bool findModulesAndOffsets(void **StackTrace, int Depth,
                                   const char **Modules, intptr_t *Offsets,
@@ -70,6 +73,9 @@ static bool printSymbolizedStackTrace(StringRef Argv0,
 static bool printSymbolizedStackTrace(StringRef Argv0,
                                       void **StackTrace, int Depth,
                                       llvm::raw_ostream &OS) {
+  if (DisableSymbolication)
+    return false;
+
   // Don't recursively invoke the llvm-symbolizer binary.
   if (Argv0.find("llvm-symbolizer") != std::string::npos)
     return false;
diff --git a/lib/Support/SourceMgr.cpp b/lib/Support/SourceMgr.cpp
index 5199fad7d9e9..b0609d4fe047 100644
--- a/lib/Support/SourceMgr.cpp
+++ b/lib/Support/SourceMgr.cpp
@@ -13,18 +13,18 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Support/SourceMgr.h"
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/ErrorOr.h"
 #include "llvm/Support/Locale.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/SMLoc.h"
-#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cassert>
 #include <cstddef>
diff --git a/lib/Support/SpecialCaseList.cpp b/lib/Support/SpecialCaseList.cpp
index df524b352351..05886eaa8aee 100644
--- a/lib/Support/SpecialCaseList.cpp
+++ b/lib/Support/SpecialCaseList.cpp
@@ -15,12 +15,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/SpecialCaseList.h"
-#include "llvm/Support/TrigramIndex.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSet.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Regex.h"
+#include "llvm/Support/TrigramIndex.h"
 #include <string>
 #include <system_error>
 #include <utility>
diff --git a/lib/Support/Statistic.cpp b/lib/Support/Statistic.cpp
index 0c50dfd27d61..72ca22806c43 100644
--- a/lib/Support/Statistic.cpp
+++ b/lib/Support/Statistic.cpp
@@ -30,8 +30,8 @@
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/Mutex.h"
 #include "llvm/Support/Timer.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/YAMLTraits.h"
+#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cstring>
 using namespace llvm;
diff --git a/lib/Support/StringExtras.cpp b/lib/Support/StringExtras.cpp
index 3e2420f67760..b2f42dfcc04d 100644
--- a/lib/Support/StringExtras.cpp
+++ b/lib/Support/StringExtras.cpp
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/SmallVector.h"
 using namespace llvm;
 
 /// StrInStrNoCase - Portable version of strcasestr.  Locates the first
diff --git a/lib/Support/TargetRegistry.cpp b/lib/Support/TargetRegistry.cpp
index bed9ed64f802..b5c283253117 100644
--- a/lib/Support/TargetRegistry.cpp
+++ b/lib/Support/TargetRegistry.cpp
@@ -114,7 +114,7 @@ static int TargetArraySortFn(const std::pair<StringRef, const Target *> *LHS,
   return LHS->first.compare(RHS->first);
 }
 
-void TargetRegistry::printRegisteredTargetsForVersion() {
+void TargetRegistry::printRegisteredTargetsForVersion(raw_ostream &OS) {
   std::vector<std::pair<StringRef, const Target*> > Targets;
   size_t Width = 0;
   for (const auto &T : TargetRegistry::targets()) {
@@ -123,7 +123,6 @@ void TargetRegistry::printRegisteredTargetsForVersion() {
   }
   array_pod_sort(Targets.begin(), Targets.end(), TargetArraySortFn);
 
-  raw_ostream &OS = outs();
   OS << "  Registered Targets:\n";
   for (unsigned i = 0, e = Targets.size(); i != e; ++i) {
     OS << "    " << Targets[i].first;
diff --git a/lib/Support/ThreadLocal.cpp b/lib/Support/ThreadLocal.cpp
index 9da1603080a2..9a75c02b351f 100644
--- a/lib/Support/ThreadLocal.cpp
+++ b/lib/Support/ThreadLocal.cpp
@@ -11,9 +11,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Support/ThreadLocal.h"
 #include "llvm/Config/config.h"
 #include "llvm/Support/Compiler.h"
-#include "llvm/Support/ThreadLocal.h"
 
 //===----------------------------------------------------------------------===//
 //=== WARNING: Implementation here must contain only TRULY operating system
diff --git a/lib/Support/Timer.cpp b/lib/Support/Timer.cpp
index dec6baf7bf47..3386f2660f31 100644
--- a/lib/Support/Timer.cpp
+++ b/lib/Support/Timer.cpp
@@ -20,8 +20,8 @@
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/Mutex.h"
 #include "llvm/Support/Process.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/YAMLTraits.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 // This ugly hack is brought to you courtesy of constructor/destructor ordering
diff --git a/lib/Support/TrigramIndex.cpp b/lib/Support/TrigramIndex.cpp
index 85ab5287566b..721763c88525 100644
--- a/lib/Support/TrigramIndex.cpp
+++ b/lib/Support/TrigramIndex.cpp
@@ -18,9 +18,9 @@
 #include "llvm/Support/TrigramIndex.h"
 #include "llvm/ADT/SmallVector.h"
 
-#include <unordered_map>
 #include <set>
 #include <string>
+#include <unordered_map>
 
 using namespace llvm;
 
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index f7b7ad89e959..320aede79fbb 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -12,8 +12,8 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetParser.h"
 #include "llvm/Support/Host.h"
+#include "llvm/Support/TargetParser.h"
 #include <cstring>
 using namespace llvm;
 
@@ -877,6 +877,10 @@ std::string Triple::normalize(StringRef Str) {
     }
   }
 
+  // SUSE uses "gnueabi" to mean "gnueabihf"
+  if (Vendor == Triple::SUSE && Environment == llvm::Triple::GNUEABI)
+    Components[3] = "gnueabihf";
+
   if (OS == Triple::Win32) {
     Components.resize(4);
     Components[2] = "windows";
@@ -1484,6 +1488,21 @@ bool Triple::isLittleEndian() const {
 }
 
 bool Triple::isCompatibleWith(const Triple &Other) const {
+  // ARM and Thumb triples are compatible, if subarch, vendor and OS match.
+  if ((getArch() == Triple::thumb && Other.getArch() == Triple::arm) ||
+      (getArch() == Triple::arm && Other.getArch() == Triple::thumb) ||
+      (getArch() == Triple::thumbeb && Other.getArch() == Triple::armeb) ||
+      (getArch() == Triple::armeb && Other.getArch() == Triple::thumbeb)) {
+    if (getVendor() == Triple::Apple)
+      return getSubArch() == Other.getSubArch() &&
+             getVendor() == Other.getVendor() && getOS() == Other.getOS();
+    else
+      return getSubArch() == Other.getSubArch() &&
+             getVendor() == Other.getVendor() && getOS() == Other.getOS() &&
+             getEnvironment() == Other.getEnvironment() &&
+             getObjectFormat() == Other.getObjectFormat();
+  }
+
   // If vendor is apple, ignore the version number.
   if (getVendor() == Triple::Apple)
     return getArch() == Other.getArch() && getSubArch() == Other.getSubArch() &&
diff --git a/lib/Support/Unix/DynamicLibrary.inc b/lib/Support/Unix/DynamicLibrary.inc
index a0526fa2c1b8..aad77f19c35a 100644
--- a/lib/Support/Unix/DynamicLibrary.inc
+++ b/lib/Support/Unix/DynamicLibrary.inc
@@ -15,7 +15,8 @@
 #include <dlfcn.h>
 
 DynamicLibrary::HandleSet::~HandleSet() {
-  for (void *Handle : Handles)
+  // Close the libraries in reverse order.
+  for (void *Handle : llvm::reverse(Handles))
     ::dlclose(Handle);
   if (Process)
     ::dlclose(Process);
@@ -101,10 +102,10 @@ static void *DoSearch(const char* SymbolName) {
 #define EXPLICIT_SYMBOL(SYM) \
    if (!strcmp(SymbolName, #SYM)) return &SYM
 
-// On linux we have a weird situation. The stderr/out/in symbols are both
+// Under glibc we have a weird situation. The stderr/out/in symbols are both
 // macros and global variables because of standards requirements. So, we
 // boldly use the EXPLICIT_SYMBOL macro without checking for a #define first.
-#if defined(__linux__) and !defined(__ANDROID__)
+#if defined(__GLIBC__)
   {
     EXPLICIT_SYMBOL(stderr);
     EXPLICIT_SYMBOL(stdout);
diff --git a/lib/Support/Unix/Path.inc b/lib/Support/Unix/Path.inc
index ce638d453c19..b6774692595b 100644
--- a/lib/Support/Unix/Path.inc
+++ b/lib/Support/Unix/Path.inc
@@ -75,8 +75,8 @@
 #define STATVFS_F_FRSIZE(vfs) vfs.f_frsize
 #else
 #if defined(__OpenBSD__) || defined(__FreeBSD__)
-#include <sys/param.h>
 #include <sys/mount.h>
+#include <sys/param.h>
 #elif defined(__linux__)
 #if defined(HAVE_LINUX_MAGIC_H)
 #include <linux/magic.h>
diff --git a/lib/Support/Unix/Signals.inc b/lib/Support/Unix/Signals.inc
index 88ad21e9806e..aaf760c5b616 100644
--- a/lib/Support/Unix/Signals.inc
+++ b/lib/Support/Unix/Signals.inc
@@ -15,9 +15,9 @@
 #include "Unix.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Demangle/Demangle.h"
-#include "llvm/Support/Format.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/FileUtilities.h"
+#include "llvm/Support/Format.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Mutex.h"
 #include "llvm/Support/Program.h"
diff --git a/lib/Support/Unix/Threading.inc b/lib/Support/Unix/Threading.inc
index 407b194e1b6a..267af388ecdb 100644
--- a/lib/Support/Unix/Threading.inc
+++ b/lib/Support/Unix/Threading.inc
@@ -26,19 +26,19 @@
 #endif
 
 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
+#include <errno.h>
 #include <sys/sysctl.h>
 #include <sys/user.h>
-#include <errno.h>
 #include <unistd.h>
 #endif
 
 #if defined(__NetBSD__)
-#include <lwp.h>  // For _lwp_self()
+#include <lwp.h> // For _lwp_self()
 #endif
 
 #if defined(__linux__)
-#include <unistd.h> // For syscall()
-#include <sys/syscall.h>  // For syscall codes
+#include <sys/syscall.h> // For syscall codes
+#include <unistd.h>      // For syscall()
 #endif
 
 namespace {
diff --git a/lib/Support/Windows/DynamicLibrary.inc b/lib/Support/Windows/DynamicLibrary.inc
index 0b54b5dfdbc5..caf1a0a658de 100644
--- a/lib/Support/Windows/DynamicLibrary.inc
+++ b/lib/Support/Windows/DynamicLibrary.inc
@@ -23,7 +23,7 @@
 
 
 DynamicLibrary::HandleSet::~HandleSet() {
-  for (void *Handle : Handles)
+  for (void *Handle : llvm::reverse(Handles))
     FreeLibrary(HMODULE(Handle));
 
   // 'Process' should not be released on Windows.
diff --git a/lib/Support/Windows/WindowsSupport.h b/lib/Support/Windows/WindowsSupport.h
index c358b99ab96a..d4599dca044e 100644
--- a/lib/Support/Windows/WindowsSupport.h
+++ b/lib/Support/Windows/WindowsSupport.h
@@ -45,7 +45,9 @@
 #include <string>
 #include <system_error>
 #include <windows.h>
-#include <wincrypt.h> // Must be included after windows.h
+
+// Must be included after windows.h
+#include <wincrypt.h>
 
 /// Determines if the program is running on Windows 8 or newer. This
 /// reimplements one of the helpers in the Windows 8.1 SDK, which are intended
diff --git a/lib/Support/YAMLParser.cpp b/lib/Support/YAMLParser.cpp
index f1496393e55e..01ae3214453d 100644
--- a/lib/Support/YAMLParser.cpp
+++ b/lib/Support/YAMLParser.cpp
@@ -12,12 +12,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/YAMLParser.h"
+#include "llvm/ADT/AllocatorList.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
-#include "llvm/ADT/AllocatorList.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/SourceMgr.h"
diff --git a/lib/TableGen/StringMatcher.cpp b/lib/TableGen/StringMatcher.cpp
index 0c83da65e19e..7e510f0c2fdc 100644
--- a/lib/TableGen/StringMatcher.cpp
+++ b/lib/TableGen/StringMatcher.cpp
@@ -11,9 +11,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/TableGen/StringMatcher.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/TableGen/StringMatcher.h"
 #include <cassert>
 #include <map>
 #include <string>
diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 981fd22c213c..5ce57926cc03 100644
--- a/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -12,13 +12,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "MCTargetDesc/AArch64AddressingModes.h"
 #include "AArch64.h"
 #include "AArch64MCInstLower.h"
 #include "AArch64MachineFunctionInfo.h"
 #include "AArch64RegisterInfo.h"
 #include "AArch64Subtarget.h"
 #include "InstPrinter/AArch64InstPrinter.h"
+#include "MCTargetDesc/AArch64AddressingModes.h"
 #include "MCTargetDesc/AArch64MCExpr.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringSwitch.h"
@@ -35,11 +35,11 @@
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstBuilder.h"
 #include "llvm/MC/MCLinkerOptimizationHint.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCSymbolELF.h"
-#include "llvm/MC/MCSectionELF.h"
-#include "llvm/MC/MCSectionMachO.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp b/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
index 30e2b2310456..544f67433fd5 100644
--- a/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
+++ b/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
@@ -20,8 +20,8 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "aarch64-dead-defs"
diff --git a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 33fec74998d6..160107cd7e2b 100644
--- a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -14,9 +14,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "MCTargetDesc/AArch64AddressingModes.h"
 #include "AArch64InstrInfo.h"
 #include "AArch64Subtarget.h"
+#include "MCTargetDesc/AArch64AddressingModes.h"
 #include "Utils/AArch64BaseInfo.h"
 #include "llvm/CodeGen/LivePhysRegs.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp
index 9ac7ecb9cdb4..e8fcf1a0e9b7 100644
--- a/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/lib/Target/AArch64/AArch64FastISel.cpp
@@ -2827,7 +2827,7 @@ bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
     return false;
 
   EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
-  if (SrcVT == MVT::f128)
+  if (SrcVT == MVT::f128 || SrcVT == MVT::f16)
     return false;
 
   unsigned Opc;
@@ -2854,6 +2854,10 @@ bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
   MVT DestVT;
   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
     return false;
+  // Let regular ISEL handle FP16
+  if (DestVT == MVT::f16)
+    return false;
+
   assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
          "Unexpected value type.");
 
diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index b18fb30eb2d4..8c2c0a564c30 100644
--- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -2566,7 +2566,7 @@ bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
   // pstatefield for the MSR (immediate) instruction, we also require that an
   // immediate value has been provided as an argument, we know that this is
   // the case as it has been ensured by semantic checking.
-  auto PMapper = AArch64PState::lookupPStateByName(RegString->getString());;
+  auto PMapper = AArch64PState::lookupPStateByName(RegString->getString());
   if (PMapper) {
     assert (isa<ConstantSDNode>(N->getOperand(2))
               && "Expected a constant integer expression.");
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index f798010906cc..059556a560c0 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -11,9 +11,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "AArch64ISelLowering.h"
 #include "AArch64CallingConvention.h"
 #include "AArch64MachineFunctionInfo.h"
-#include "AArch64ISelLowering.h"
 #include "AArch64PerfectShuffle.h"
 #include "AArch64RegisterInfo.h"
 #include "AArch64Subtarget.h"
@@ -22,9 +22,9 @@
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Triple.h"
@@ -51,10 +51,10 @@
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GetElementPtrTypeIterator.h"
 #include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/OperandTraits.h"
 #include "llvm/IR/Type.h"
diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp
index faf39be9b41e..eea012382150 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -17,8 +17,8 @@
 #include "MCTargetDesc/AArch64AddressingModes.h"
 #include "Utils/AArch64BaseInfo.h"
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
diff --git a/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/lib/Target/AArch64/AArch64LegalizerInfo.cpp
index 6e6daf812295..01196817f311 100644
--- a/lib/Target/AArch64/AArch64LegalizerInfo.cpp
+++ b/lib/Target/AArch64/AArch64LegalizerInfo.cpp
@@ -13,12 +13,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "AArch64LegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
-#include "llvm/IR/Type.h"
 #include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Type.h"
 #include "llvm/Target/TargetOpcodes.h"
 
 using namespace llvm;
diff --git a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index 976498aa70d6..9243eb91cc1a 100644
--- a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -16,10 +16,10 @@
 #include "AArch64Subtarget.h"
 #include "MCTargetDesc/AArch64AddressingModes.h"
 #include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/iterator_range.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
diff --git a/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp b/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp
index 038162c6f54a..fe4ef4b40ece 100644
--- a/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp
+++ b/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp
@@ -17,8 +17,8 @@
 
 #define DEBUG_TYPE "aarch64-pbqp"
 
-#include "AArch64.h"
 #include "AArch64PBQPRegAlloc.h"
+#include "AArch64.h"
 #include "AArch64RegisterInfo.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
diff --git a/lib/Target/AArch64/AArch64RegisterBankInfo.cpp b/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
index 789270c2a34b..9b3899e0681c 100644
--- a/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
+++ b/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
@@ -15,13 +15,13 @@
 #include "AArch64RegisterBankInfo.h"
 #include "AArch64InstrInfo.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/LowLevelType.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/GlobalISel/RegisterBank.h"
 #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/CodeGen/LowLevelType.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetOpcodes.h"
 #include "llvm/Target/TargetRegisterInfo.h"
diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.cpp b/lib/Target/AArch64/AArch64TargetObjectFile.cpp
index 12a2e9a867f0..4bc2c060a068 100644
--- a/lib/Target/AArch64/AArch64TargetObjectFile.cpp
+++ b/lib/Target/AArch64/AArch64TargetObjectFile.cpp
@@ -9,12 +9,12 @@
 
 #include "AArch64TargetObjectFile.h"
 #include "AArch64TargetMachine.h"
+#include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/IR/Mangler.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCValue.h"
-#include "llvm/Support/Dwarf.h"
 using namespace llvm;
 using namespace dwarf;
 
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 43569af04347..a4328682b93c 100644
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -9,8 +9,8 @@
 
 #include "AArch64TargetTransformInfo.h"
 #include "MCTargetDesc/AArch64AddressingModes.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/CodeGen/BasicTTIImpl.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Target/CostTable.h"
diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 449d732a8d44..e841fb894519 100644
--- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -15,8 +15,8 @@
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index ebf05ae303dd..43a6fa9ce089 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -11,8 +11,9 @@
 #include "AArch64RegisterInfo.h"
 #include "MCTargetDesc/AArch64FixupKinds.h"
 #include "llvm/ADT/Triple.h"
-#include "llvm/MC/MCAssembler.h"
+#include "llvm/BinaryFormat/MachO.h"
 #include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDirectives.h"
 #include "llvm/MC/MCELFObjectWriter.h"
@@ -22,7 +23,6 @@
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCValue.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachO.h"
 using namespace llvm;
 
 namespace {
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
index 10e7241da709..f7dda92fb551 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
@@ -15,11 +15,11 @@
 #include "MCTargetDesc/AArch64FixupKinds.h"
 #include "MCTargetDesc/AArch64MCExpr.h"
 #include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCELFObjectWriter.h"
 #include "llvm/MC/MCFixup.h"
 #include "llvm/MC/MCValue.h"
-#include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <cassert>
 #include <cstdint>
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index 271263507ae1..031aa8b81e35 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -18,6 +18,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCCodeEmitter.h"
@@ -30,7 +31,6 @@
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCSymbolELF.h"
 #include "llvm/Support/Casting.h"
-#include "llvm/Support/ELF.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/raw_ostream.h"
 
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
index 3d296ba4806b..19b2576f6895 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
@@ -10,6 +10,7 @@
 #include "MCTargetDesc/AArch64FixupKinds.h"
 #include "MCTargetDesc/AArch64MCTargetDesc.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/MachO.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCAsmLayout.h"
 #include "llvm/MC/MCAssembler.h"
@@ -23,7 +24,6 @@
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCValue.h"
 #include "llvm/Support/Casting.h"
-#include "llvm/Support/MachO.h"
 #include "llvm/Support/MathExtras.h"
 #include <cassert>
 #include <cstdint>
diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td
index b50e8d1d659e..6ab2b9ef0459 100644
--- a/lib/Target/AMDGPU/AMDGPU.td
+++ b/lib/Target/AMDGPU/AMDGPU.td
@@ -447,6 +447,16 @@ class SubtargetFeatureISAVersion <int Major, int Minor, int Stepping,
   Implies
 >;
 
+def FeatureISAVersion6_0_0 : SubtargetFeatureISAVersion <6,0,0,
+  [FeatureSouthernIslands,
+   FeatureFastFMAF32, 
+   HalfRate64Ops,
+   FeatureLDSBankCount32]>;
+
+def FeatureISAVersion6_0_1 : SubtargetFeatureISAVersion <6,0,1,
+  [FeatureSouthernIslands,
+   FeatureLDSBankCount32]>;
+   
 def FeatureISAVersion7_0_0 : SubtargetFeatureISAVersion <7,0,0,
   [FeatureSeaIslands,
    FeatureLDSBankCount32]>;
@@ -461,6 +471,10 @@ def FeatureISAVersion7_0_2 : SubtargetFeatureISAVersion <7,0,2,
   [FeatureSeaIslands,
    FeatureLDSBankCount16]>;
 
+def FeatureISAVersion7_0_3 : SubtargetFeatureISAVersion <7,0,3,
+  [FeatureSeaIslands,
+   FeatureLDSBankCount16]>;
+
 def FeatureISAVersion8_0_0 : SubtargetFeatureISAVersion <8,0,0,
   [FeatureVolcanicIslands,
    FeatureLDSBankCount32,
@@ -489,8 +503,23 @@ def FeatureISAVersion8_1_0 : SubtargetFeatureISAVersion <8,1,0,
    FeatureLDSBankCount16,
    FeatureXNACK]>;
 
-def FeatureISAVersion9_0_0 : SubtargetFeatureISAVersion <9,0,0,[]>;
-def FeatureISAVersion9_0_1 : SubtargetFeatureISAVersion <9,0,1,[]>;
+def FeatureISAVersion9_0_0 : SubtargetFeatureISAVersion <9,0,0,
+  [FeatureGFX9,
+   FeatureLDSBankCount32]>;
+
+def FeatureISAVersion9_0_1 : SubtargetFeatureISAVersion <9,0,1,
+  [FeatureGFX9,
+   FeatureLDSBankCount32,
+   FeatureXNACK]>;
+
+def FeatureISAVersion9_0_2 : SubtargetFeatureISAVersion <9,0,2,
+  [FeatureGFX9,
+   FeatureLDSBankCount32]>;
+
+def FeatureISAVersion9_0_3 : SubtargetFeatureISAVersion <9,0,3,
+  [FeatureGFX9,
+   FeatureLDSBankCount32,
+   FeatureXNACK]>;
 
 //===----------------------------------------------------------------------===//
 // Debugger related subtarget features.
diff --git a/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp b/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
index 3c99f48e818a..faa424eb0a64 100644
--- a/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
@@ -10,15 +10,15 @@
 /// This is the AMGPU address space based alias analysis pass.
 //===----------------------------------------------------------------------===//
 
-#include "AMDGPU.h"
 #include "AMDGPUAliasAnalysis.h"
+#include "AMDGPU.h"
 #include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Analysis/Passes.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Pass.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
diff --git a/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp b/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
index 91b3649f5c39..3c788fa1dcea 100644
--- a/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
@@ -19,8 +19,8 @@
 #include "llvm/Analysis/DivergenceAnalysis.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
-#include "llvm/IR/InstVisitor.h"
 #include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstVisitor.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 0959014812d8..83ad1a5c6ee3 100644
--- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -17,25 +17,25 @@
 //
 
 #include "AMDGPUAsmPrinter.h"
-#include "AMDGPUTargetMachine.h"
-#include "MCTargetDesc/AMDGPUTargetStreamer.h"
-#include "InstPrinter/AMDGPUInstPrinter.h"
-#include "Utils/AMDGPUBaseInfo.h"
 #include "AMDGPU.h"
 #include "AMDGPUSubtarget.h"
+#include "AMDGPUTargetMachine.h"
+#include "InstPrinter/AMDGPUInstPrinter.h"
+#include "MCTargetDesc/AMDGPUTargetStreamer.h"
 #include "R600Defines.h"
 #include "R600MachineFunctionInfo.h"
 #include "R600RegisterInfo.h"
 #include "SIDefines.h"
-#include "SIMachineFunctionInfo.h"
 #include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
 #include "SIRegisterInfo.h"
+#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCStreamer.h"
-#include "llvm/Support/ELF.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index e5adeeb465e1..0a58ce06704d 100644
--- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -15,8 +15,8 @@
 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUASMPRINTER_H
 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUASMPRINTER_H
 
-#include "AMDKernelCodeT.h"
 #include "AMDGPU.h"
+#include "AMDKernelCodeT.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include <cstddef>
diff --git a/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
index e67ae092fdda..515cc07dd449 100644
--- a/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -18,8 +18,8 @@
 #include "AMDGPUISelLowering.h"
 #include "AMDGPUSubtarget.h"
 #include "SIISelLowering.h"
-#include "SIRegisterInfo.h"
 #include "SIMachineFunctionInfo.h"
+#include "SIRegisterInfo.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
diff --git a/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index d923cb117c12..b312dbc8d14d 100644
--- a/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -25,13 +25,13 @@
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstVisitor.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
-#include "llvm/IR/InstVisitor.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/Type.h"
diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 19fce064783d..251c2f9bb25a 100644
--- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -13,15 +13,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "AMDGPU.h"
+#include "AMDGPUISelLowering.h" // For AMDGPUISD
 #include "AMDGPUInstrInfo.h"
 #include "AMDGPURegisterInfo.h"
-#include "AMDGPUISelLowering.h" // For AMDGPUISD
 #include "AMDGPUSubtarget.h"
 #include "SIDefines.h"
-#include "SIInstrInfo.h"
-#include "SIRegisterInfo.h"
 #include "SIISelLowering.h"
+#include "SIInstrInfo.h"
 #include "SIMachineFunctionInfo.h"
+#include "SIRegisterInfo.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 723e8a7b54e2..5586b513b5fc 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -21,6 +21,7 @@
 #include "AMDGPURegisterInfo.h"
 #include "AMDGPUSubtarget.h"
 #include "R600MachineFunctionInfo.h"
+#include "SIInstrInfo.h"
 #include "SIMachineFunctionInfo.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -30,7 +31,6 @@
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/Support/KnownBits.h"
-#include "SIInstrInfo.h"
 using namespace llvm;
 
 static bool allocateKernArg(unsigned ValNo, MVT ValVT, MVT LocVT,
diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/lib/Target/AMDGPU/AMDGPUInstrInfo.h
index 12caa5118342..41cc7d7093ec 100644
--- a/lib/Target/AMDGPU/AMDGPUInstrInfo.h
+++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.h
@@ -17,8 +17,8 @@
 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRINFO_H
 
 #include "AMDGPU.h"
-#include "llvm/Target/TargetInstrInfo.h"
 #include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
 
 #define GET_INSTRINFO_HEADER
 #include "AMDGPUGenInstrInfo.inc"
diff --git a/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index c87102e55dfb..ef845f44d365 100644
--- a/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -15,9 +15,9 @@
 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRUCTIONSELECTOR_H
 
 #include "AMDGPU.h"
-#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
 
 namespace llvm {
 
diff --git a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 267f4807a788..b889788c3426 100644
--- a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -14,10 +14,10 @@
 
 #include "AMDGPULegalizerInfo.h"
 #include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/IR/Type.h"
 #include "llvm/IR/DerivedTypes.h"
-#include "llvm/Target/TargetOpcodes.h"
+#include "llvm/IR/Type.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetOpcodes.h"
 
 using namespace llvm;
 
@@ -47,12 +47,18 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo() {
   setAction({G_GEP, P2}, Legal);
   setAction({G_GEP, 1, S64}, Legal);
 
+  setAction({G_ICMP, S1}, Legal);
+  setAction({G_ICMP, 1, S32}, Legal);
+
   setAction({G_LOAD, P1}, Legal);
   setAction({G_LOAD, P2}, Legal);
   setAction({G_LOAD, S32}, Legal);
   setAction({G_LOAD, 1, P1}, Legal);
   setAction({G_LOAD, 1, P2}, Legal);
 
+  setAction({G_SELECT, S32}, Legal);
+  setAction({G_SELECT, 1, S1}, Legal);
+
   setAction({G_STORE, S32}, Legal);
   setAction({G_STORE, 1, P1}, Legal);
 
diff --git a/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
index f1ef6281c90f..63dd0d726d91 100644
--- a/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
+++ b/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
@@ -38,7 +38,6 @@ using namespace llvm;
 
 #include "AMDGPUGenMCPseudoLowering.inc"
 
-
 AMDGPUMCInstLower::AMDGPUMCInstLower(MCContext &ctx, const AMDGPUSubtarget &st,
                                      const AsmPrinter &ap):
   Ctx(ctx), ST(st), AP(ap) { }
diff --git a/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp b/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
index 6d2785ba1c60..2071b6f157cd 100644
--- a/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
+++ b/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
@@ -12,8 +12,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "AMDGPU.h"
-#include "SIInstrInfo.h"
 #include "AMDGPUSubtarget.h"
+#include "SIInstrInfo.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/SetVector.h"
diff --git a/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/lib/Target/AMDGPU/AMDGPUMachineFunction.h
index 8bfeb67ad4ec..99bb61b21db0 100644
--- a/lib/Target/AMDGPU/AMDGPUMachineFunction.h
+++ b/lib/Target/AMDGPU/AMDGPUMachineFunction.h
@@ -10,8 +10,8 @@
 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINEFUNCTION_H
 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINEFUNCTION_H
 
-#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/MachineFunction.h"
 
 namespace llvm {
 
diff --git a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index 07f92918a43f..625c9b77e2de 100644
--- a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -33,11 +33,11 @@
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
@@ -319,15 +319,17 @@ static bool canVectorizeInst(Instruction *Inst, User *User) {
   switch (Inst->getOpcode()) {
   case Instruction::Load: {
     LoadInst *LI = cast<LoadInst>(Inst);
-    return !LI->isVolatile();
+    // Currently only handle the case where the Pointer Operand is a GEP so check for that case.
+    return isa<GetElementPtrInst>(LI->getPointerOperand()) && !LI->isVolatile();
   }
   case Instruction::BitCast:
   case Instruction::AddrSpaceCast:
     return true;
   case Instruction::Store: {
-    // Must be the stored pointer operand, not a stored value.
+    // Must be the stored pointer operand, not a stored value, plus
+    // since it should be canonical form, the User should be a GEP.
     StoreInst *SI = cast<StoreInst>(Inst);
-    return (SI->getPointerOperand() == User) && !SI->isVolatile();
+    return (SI->getPointerOperand() == User) && isa<GetElementPtrInst>(User) && !SI->isVolatile();
   }
   default:
     return false;
@@ -341,8 +343,11 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) {
 
   // FIXME: There is no reason why we can't support larger arrays, we
   // are just being conservative for now.
+  // FIXME: We also reject alloca's of the form [ 2 x [ 2 x i32 ]] or equivalent. Potentially these
+  // could also be promoted but we don't currently handle this case
   if (!AllocaTy ||
       AllocaTy->getElementType()->isVectorTy() ||
+      AllocaTy->getElementType()->isArrayTy() ||
       AllocaTy->getNumElements() > 4 ||
       AllocaTy->getNumElements() < 2) {
     DEBUG(dbgs() << "  Cannot convert type to vector\n");
@@ -390,7 +395,7 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) {
     switch (Inst->getOpcode()) {
     case Instruction::Load: {
       Type *VecPtrTy = VectorTy->getPointerTo(AS.PRIVATE_ADDRESS);
-      Value *Ptr = Inst->getOperand(0);
+      Value *Ptr = cast<LoadInst>(Inst)->getPointerOperand();
       Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
 
       Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy);
@@ -403,12 +408,13 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) {
     case Instruction::Store: {
       Type *VecPtrTy = VectorTy->getPointerTo(AS.PRIVATE_ADDRESS);
 
-      Value *Ptr = Inst->getOperand(1);
+      StoreInst *SI = cast<StoreInst>(Inst);
+      Value *Ptr = SI->getPointerOperand();
       Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
       Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy);
       Value *VecValue = Builder.CreateLoad(BitCast);
       Value *NewVecValue = Builder.CreateInsertElement(VecValue,
-                                                       Inst->getOperand(0),
+                                                       SI->getValueOperand(),
                                                        Index);
       Builder.CreateStore(NewVecValue, BitCast);
       Inst->eraseFromParent();
diff --git a/lib/Target/AMDGPU/AMDGPURegAsmNames.inc.cpp b/lib/Target/AMDGPU/AMDGPURegAsmNames.inc.cpp
new file mode 100644
index 000000000000..36d88f52910d
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPURegAsmNames.inc.cpp
@@ -0,0 +1,353 @@
+//===-- AMDGPURegAsmNames.inc - Register asm names ----------*- C++ -*-----===//
+
+#ifdef AMDGPU_REG_ASM_NAMES
+
+static const char *const VGPR32RegNames[] = {
+    "v0",   "v1",   "v2",   "v3",   "v4",   "v5",   "v6",   "v7",   "v8",
+    "v9",   "v10",  "v11",  "v12",  "v13",  "v14",  "v15",  "v16",  "v17",
+    "v18",  "v19",  "v20",  "v21",  "v22",  "v23",  "v24",  "v25",  "v26",
+    "v27",  "v28",  "v29",  "v30",  "v31",  "v32",  "v33",  "v34",  "v35",
+    "v36",  "v37",  "v38",  "v39",  "v40",  "v41",  "v42",  "v43",  "v44",
+    "v45",  "v46",  "v47",  "v48",  "v49",  "v50",  "v51",  "v52",  "v53",
+    "v54",  "v55",  "v56",  "v57",  "v58",  "v59",  "v60",  "v61",  "v62",
+    "v63",  "v64",  "v65",  "v66",  "v67",  "v68",  "v69",  "v70",  "v71",
+    "v72",  "v73",  "v74",  "v75",  "v76",  "v77",  "v78",  "v79",  "v80",
+    "v81",  "v82",  "v83",  "v84",  "v85",  "v86",  "v87",  "v88",  "v89",
+    "v90",  "v91",  "v92",  "v93",  "v94",  "v95",  "v96",  "v97",  "v98",
+    "v99",  "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
+    "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
+    "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
+    "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
+    "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
+    "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
+    "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
+    "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
+    "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
+    "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
+    "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
+    "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
+    "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
+    "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
+    "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
+    "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
+    "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
+    "v252", "v253", "v254", "v255"
+};
+
+static const char *const SGPR32RegNames[] = {
+    "s0",   "s1",   "s2",   "s3",   "s4",  "s5",  "s6",  "s7",  "s8",  "s9",
+    "s10",  "s11",  "s12",  "s13",  "s14", "s15", "s16", "s17", "s18", "s19",
+    "s20",  "s21",  "s22",  "s23",  "s24", "s25", "s26", "s27", "s28", "s29",
+    "s30",  "s31",  "s32",  "s33",  "s34", "s35", "s36", "s37", "s38", "s39",
+    "s40",  "s41",  "s42",  "s43",  "s44", "s45", "s46", "s47", "s48", "s49",
+    "s50",  "s51",  "s52",  "s53",  "s54", "s55", "s56", "s57", "s58", "s59",
+    "s60",  "s61",  "s62",  "s63",  "s64", "s65", "s66", "s67", "s68", "s69",
+    "s70",  "s71",  "s72",  "s73",  "s74", "s75", "s76", "s77", "s78", "s79",
+    "s80",  "s81",  "s82",  "s83",  "s84", "s85", "s86", "s87", "s88", "s89",
+    "s90",  "s91",  "s92",  "s93",  "s94", "s95", "s96", "s97", "s98", "s99",
+    "s100", "s101", "s102", "s103"
+};
+
+static const char *const VGPR64RegNames[] = {
+    "v[0:1]",     "v[1:2]",     "v[2:3]",     "v[3:4]",     "v[4:5]",
+    "v[5:6]",     "v[6:7]",     "v[7:8]",     "v[8:9]",     "v[9:10]",
+    "v[10:11]",   "v[11:12]",   "v[12:13]",   "v[13:14]",   "v[14:15]",
+    "v[15:16]",   "v[16:17]",   "v[17:18]",   "v[18:19]",   "v[19:20]",
+    "v[20:21]",   "v[21:22]",   "v[22:23]",   "v[23:24]",   "v[24:25]",
+    "v[25:26]",   "v[26:27]",   "v[27:28]",   "v[28:29]",   "v[29:30]",
+    "v[30:31]",   "v[31:32]",   "v[32:33]",   "v[33:34]",   "v[34:35]",
+    "v[35:36]",   "v[36:37]",   "v[37:38]",   "v[38:39]",   "v[39:40]",
+    "v[40:41]",   "v[41:42]",   "v[42:43]",   "v[43:44]",   "v[44:45]",
+    "v[45:46]",   "v[46:47]",   "v[47:48]",   "v[48:49]",   "v[49:50]",
+    "v[50:51]",   "v[51:52]",   "v[52:53]",   "v[53:54]",   "v[54:55]",
+    "v[55:56]",   "v[56:57]",   "v[57:58]",   "v[58:59]",   "v[59:60]",
+    "v[60:61]",   "v[61:62]",   "v[62:63]",   "v[63:64]",   "v[64:65]",
+    "v[65:66]",   "v[66:67]",   "v[67:68]",   "v[68:69]",   "v[69:70]",
+    "v[70:71]",   "v[71:72]",   "v[72:73]",   "v[73:74]",   "v[74:75]",
+    "v[75:76]",   "v[76:77]",   "v[77:78]",   "v[78:79]",   "v[79:80]",
+    "v[80:81]",   "v[81:82]",   "v[82:83]",   "v[83:84]",   "v[84:85]",
+    "v[85:86]",   "v[86:87]",   "v[87:88]",   "v[88:89]",   "v[89:90]",
+    "v[90:91]",   "v[91:92]",   "v[92:93]",   "v[93:94]",   "v[94:95]",
+    "v[95:96]",   "v[96:97]",   "v[97:98]",   "v[98:99]",   "v[99:100]",
+    "v[100:101]", "v[101:102]", "v[102:103]", "v[103:104]", "v[104:105]",
+    "v[105:106]", "v[106:107]", "v[107:108]", "v[108:109]", "v[109:110]",
+    "v[110:111]", "v[111:112]", "v[112:113]", "v[113:114]", "v[114:115]",
+    "v[115:116]", "v[116:117]", "v[117:118]", "v[118:119]", "v[119:120]",
+    "v[120:121]", "v[121:122]", "v[122:123]", "v[123:124]", "v[124:125]",
+    "v[125:126]", "v[126:127]", "v[127:128]", "v[128:129]", "v[129:130]",
+    "v[130:131]", "v[131:132]", "v[132:133]", "v[133:134]", "v[134:135]",
+    "v[135:136]", "v[136:137]", "v[137:138]", "v[138:139]", "v[139:140]",
+    "v[140:141]", "v[141:142]", "v[142:143]", "v[143:144]", "v[144:145]",
+    "v[145:146]", "v[146:147]", "v[147:148]", "v[148:149]", "v[149:150]",
+    "v[150:151]", "v[151:152]", "v[152:153]", "v[153:154]", "v[154:155]",
+    "v[155:156]", "v[156:157]", "v[157:158]", "v[158:159]", "v[159:160]",
+    "v[160:161]", "v[161:162]", "v[162:163]", "v[163:164]", "v[164:165]",
+    "v[165:166]", "v[166:167]", "v[167:168]", "v[168:169]", "v[169:170]",
+    "v[170:171]", "v[171:172]", "v[172:173]", "v[173:174]", "v[174:175]",
+    "v[175:176]", "v[176:177]", "v[177:178]", "v[178:179]", "v[179:180]",
+    "v[180:181]", "v[181:182]", "v[182:183]", "v[183:184]", "v[184:185]",
+    "v[185:186]", "v[186:187]", "v[187:188]", "v[188:189]", "v[189:190]",
+    "v[190:191]", "v[191:192]", "v[192:193]", "v[193:194]", "v[194:195]",
+    "v[195:196]", "v[196:197]", "v[197:198]", "v[198:199]", "v[199:200]",
+    "v[200:201]", "v[201:202]", "v[202:203]", "v[203:204]", "v[204:205]",
+    "v[205:206]", "v[206:207]", "v[207:208]", "v[208:209]", "v[209:210]",
+    "v[210:211]", "v[211:212]", "v[212:213]", "v[213:214]", "v[214:215]",
+    "v[215:216]", "v[216:217]", "v[217:218]", "v[218:219]", "v[219:220]",
+    "v[220:221]", "v[221:222]", "v[222:223]", "v[223:224]", "v[224:225]",
+    "v[225:226]", "v[226:227]", "v[227:228]", "v[228:229]", "v[229:230]",
+    "v[230:231]", "v[231:232]", "v[232:233]", "v[233:234]", "v[234:235]",
+    "v[235:236]", "v[236:237]", "v[237:238]", "v[238:239]", "v[239:240]",
+    "v[240:241]", "v[241:242]", "v[242:243]", "v[243:244]", "v[244:245]",
+    "v[245:246]", "v[246:247]", "v[247:248]", "v[248:249]", "v[249:250]",
+    "v[250:251]", "v[251:252]", "v[252:253]", "v[253:254]", "v[254:255]"
+};
+
+static const char *const VGPR96RegNames[] = {
+    "v[0:2]",     "v[1:3]",     "v[2:4]",     "v[3:5]",     "v[4:6]",
+    "v[5:7]",     "v[6:8]",     "v[7:9]",     "v[8:10]",    "v[9:11]",
+    "v[10:12]",   "v[11:13]",   "v[12:14]",   "v[13:15]",   "v[14:16]",
+    "v[15:17]",   "v[16:18]",   "v[17:19]",   "v[18:20]",   "v[19:21]",
+    "v[20:22]",   "v[21:23]",   "v[22:24]",   "v[23:25]",   "v[24:26]",
+    "v[25:27]",   "v[26:28]",   "v[27:29]",   "v[28:30]",   "v[29:31]",
+    "v[30:32]",   "v[31:33]",   "v[32:34]",   "v[33:35]",   "v[34:36]",
+    "v[35:37]",   "v[36:38]",   "v[37:39]",   "v[38:40]",   "v[39:41]",
+    "v[40:42]",   "v[41:43]",   "v[42:44]",   "v[43:45]",   "v[44:46]",
+    "v[45:47]",   "v[46:48]",   "v[47:49]",   "v[48:50]",   "v[49:51]",
+    "v[50:52]",   "v[51:53]",   "v[52:54]",   "v[53:55]",   "v[54:56]",
+    "v[55:57]",   "v[56:58]",   "v[57:59]",   "v[58:60]",   "v[59:61]",
+    "v[60:62]",   "v[61:63]",   "v[62:64]",   "v[63:65]",   "v[64:66]",
+    "v[65:67]",   "v[66:68]",   "v[67:69]",   "v[68:70]",   "v[69:71]",
+    "v[70:72]",   "v[71:73]",   "v[72:74]",   "v[73:75]",   "v[74:76]",
+    "v[75:77]",   "v[76:78]",   "v[77:79]",   "v[78:80]",   "v[79:81]",
+    "v[80:82]",   "v[81:83]",   "v[82:84]",   "v[83:85]",   "v[84:86]",
+    "v[85:87]",   "v[86:88]",   "v[87:89]",   "v[88:90]",   "v[89:91]",
+    "v[90:92]",   "v[91:93]",   "v[92:94]",   "v[93:95]",   "v[94:96]",
+    "v[95:97]",   "v[96:98]",   "v[97:99]",   "v[98:100]",  "v[99:101]",
+    "v[100:102]", "v[101:103]", "v[102:104]", "v[103:105]", "v[104:106]",
+    "v[105:107]", "v[106:108]", "v[107:109]", "v[108:110]", "v[109:111]",
+    "v[110:112]", "v[111:113]", "v[112:114]", "v[113:115]", "v[114:116]",
+    "v[115:117]", "v[116:118]", "v[117:119]", "v[118:120]", "v[119:121]",
+    "v[120:122]", "v[121:123]", "v[122:124]", "v[123:125]", "v[124:126]",
+    "v[125:127]", "v[126:128]", "v[127:129]", "v[128:130]", "v[129:131]",
+    "v[130:132]", "v[131:133]", "v[132:134]", "v[133:135]", "v[134:136]",
+    "v[135:137]", "v[136:138]", "v[137:139]", "v[138:140]", "v[139:141]",
+    "v[140:142]", "v[141:143]", "v[142:144]", "v[143:145]", "v[144:146]",
+    "v[145:147]", "v[146:148]", "v[147:149]", "v[148:150]", "v[149:151]",
+    "v[150:152]", "v[151:153]", "v[152:154]", "v[153:155]", "v[154:156]",
+    "v[155:157]", "v[156:158]", "v[157:159]", "v[158:160]", "v[159:161]",
+    "v[160:162]", "v[161:163]", "v[162:164]", "v[163:165]", "v[164:166]",
+    "v[165:167]", "v[166:168]", "v[167:169]", "v[168:170]", "v[169:171]",
+    "v[170:172]", "v[171:173]", "v[172:174]", "v[173:175]", "v[174:176]",
+    "v[175:177]", "v[176:178]", "v[177:179]", "v[178:180]", "v[179:181]",
+    "v[180:182]", "v[181:183]", "v[182:184]", "v[183:185]", "v[184:186]",
+    "v[185:187]", "v[186:188]", "v[187:189]", "v[188:190]", "v[189:191]",
+    "v[190:192]", "v[191:193]", "v[192:194]", "v[193:195]", "v[194:196]",
+    "v[195:197]", "v[196:198]", "v[197:199]", "v[198:200]", "v[199:201]",
+    "v[200:202]", "v[201:203]", "v[202:204]", "v[203:205]", "v[204:206]",
+    "v[205:207]", "v[206:208]", "v[207:209]", "v[208:210]", "v[209:211]",
+    "v[210:212]", "v[211:213]", "v[212:214]", "v[213:215]", "v[214:216]",
+    "v[215:217]", "v[216:218]", "v[217:219]", "v[218:220]", "v[219:221]",
+    "v[220:222]", "v[221:223]", "v[222:224]", "v[223:225]", "v[224:226]",
+    "v[225:227]", "v[226:228]", "v[227:229]", "v[228:230]", "v[229:231]",
+    "v[230:232]", "v[231:233]", "v[232:234]", "v[233:235]", "v[234:236]",
+    "v[235:237]", "v[236:238]", "v[237:239]", "v[238:240]", "v[239:241]",
+    "v[240:242]", "v[241:243]", "v[242:244]", "v[243:245]", "v[244:246]",
+    "v[245:247]", "v[246:248]", "v[247:249]", "v[248:250]", "v[249:251]",
+    "v[250:252]", "v[251:253]", "v[252:254]", "v[253:255]"
+};
+
+static const char *const VGPR128RegNames[] = {
+    "v[0:3]",     "v[1:4]",     "v[2:5]",     "v[3:6]",     "v[4:7]",
+    "v[5:8]",     "v[6:9]",     "v[7:10]",    "v[8:11]",    "v[9:12]",
+    "v[10:13]",   "v[11:14]",   "v[12:15]",   "v[13:16]",   "v[14:17]",
+    "v[15:18]",   "v[16:19]",   "v[17:20]",   "v[18:21]",   "v[19:22]",
+    "v[20:23]",   "v[21:24]",   "v[22:25]",   "v[23:26]",   "v[24:27]",
+    "v[25:28]",   "v[26:29]",   "v[27:30]",   "v[28:31]",   "v[29:32]",
+    "v[30:33]",   "v[31:34]",   "v[32:35]",   "v[33:36]",   "v[34:37]",
+    "v[35:38]",   "v[36:39]",   "v[37:40]",   "v[38:41]",   "v[39:42]",
+    "v[40:43]",   "v[41:44]",   "v[42:45]",   "v[43:46]",   "v[44:47]",
+    "v[45:48]",   "v[46:49]",   "v[47:50]",   "v[48:51]",   "v[49:52]",
+    "v[50:53]",   "v[51:54]",   "v[52:55]",   "v[53:56]",   "v[54:57]",
+    "v[55:58]",   "v[56:59]",   "v[57:60]",   "v[58:61]",   "v[59:62]",
+    "v[60:63]",   "v[61:64]",   "v[62:65]",   "v[63:66]",   "v[64:67]",
+    "v[65:68]",   "v[66:69]",   "v[67:70]",   "v[68:71]",   "v[69:72]",
+    "v[70:73]",   "v[71:74]",   "v[72:75]",   "v[73:76]",   "v[74:77]",
+    "v[75:78]",   "v[76:79]",   "v[77:80]",   "v[78:81]",   "v[79:82]",
+    "v[80:83]",   "v[81:84]",   "v[82:85]",   "v[83:86]",   "v[84:87]",
+    "v[85:88]",   "v[86:89]",   "v[87:90]",   "v[88:91]",   "v[89:92]",
+    "v[90:93]",   "v[91:94]",   "v[92:95]",   "v[93:96]",   "v[94:97]",
+    "v[95:98]",   "v[96:99]",   "v[97:100]",  "v[98:101]",  "v[99:102]",
+    "v[100:103]", "v[101:104]", "v[102:105]", "v[103:106]", "v[104:107]",
+    "v[105:108]", "v[106:109]", "v[107:110]", "v[108:111]", "v[109:112]",
+    "v[110:113]", "v[111:114]", "v[112:115]", "v[113:116]", "v[114:117]",
+    "v[115:118]", "v[116:119]", "v[117:120]", "v[118:121]", "v[119:122]",
+    "v[120:123]", "v[121:124]", "v[122:125]", "v[123:126]", "v[124:127]",
+    "v[125:128]", "v[126:129]", "v[127:130]", "v[128:131]", "v[129:132]",
+    "v[130:133]", "v[131:134]", "v[132:135]", "v[133:136]", "v[134:137]",
+    "v[135:138]", "v[136:139]", "v[137:140]", "v[138:141]", "v[139:142]",
+    "v[140:143]", "v[141:144]", "v[142:145]", "v[143:146]", "v[144:147]",
+    "v[145:148]", "v[146:149]", "v[147:150]", "v[148:151]", "v[149:152]",
+    "v[150:153]", "v[151:154]", "v[152:155]", "v[153:156]", "v[154:157]",
+    "v[155:158]", "v[156:159]", "v[157:160]", "v[158:161]", "v[159:162]",
+    "v[160:163]", "v[161:164]", "v[162:165]", "v[163:166]", "v[164:167]",
+    "v[165:168]", "v[166:169]", "v[167:170]", "v[168:171]", "v[169:172]",
+    "v[170:173]", "v[171:174]", "v[172:175]", "v[173:176]", "v[174:177]",
+    "v[175:178]", "v[176:179]", "v[177:180]", "v[178:181]", "v[179:182]",
+    "v[180:183]", "v[181:184]", "v[182:185]", "v[183:186]", "v[184:187]",
+    "v[185:188]", "v[186:189]", "v[187:190]", "v[188:191]", "v[189:192]",
+    "v[190:193]", "v[191:194]", "v[192:195]", "v[193:196]", "v[194:197]",
+    "v[195:198]", "v[196:199]", "v[197:200]", "v[198:201]", "v[199:202]",
+    "v[200:203]", "v[201:204]", "v[202:205]", "v[203:206]", "v[204:207]",
+    "v[205:208]", "v[206:209]", "v[207:210]", "v[208:211]", "v[209:212]",
+    "v[210:213]", "v[211:214]", "v[212:215]", "v[213:216]", "v[214:217]",
+    "v[215:218]", "v[216:219]", "v[217:220]", "v[218:221]", "v[219:222]",
+    "v[220:223]", "v[221:224]", "v[222:225]", "v[223:226]", "v[224:227]",
+    "v[225:228]", "v[226:229]", "v[227:230]", "v[228:231]", "v[229:232]",
+    "v[230:233]", "v[231:234]", "v[232:235]", "v[233:236]", "v[234:237]",
+    "v[235:238]", "v[236:239]", "v[237:240]", "v[238:241]", "v[239:242]",
+    "v[240:243]", "v[241:244]", "v[242:245]", "v[243:246]", "v[244:247]",
+    "v[245:248]", "v[246:249]", "v[247:250]", "v[248:251]", "v[249:252]",
+    "v[250:253]", "v[251:254]", "v[252:255]"
+};
+
+static const char *const VGPR256RegNames[] = {
+    "v[0:7]",     "v[1:8]",     "v[2:9]",     "v[3:10]",    "v[4:11]",
+    "v[5:12]",    "v[6:13]",    "v[7:14]",    "v[8:15]",    "v[9:16]",
+    "v[10:17]",   "v[11:18]",   "v[12:19]",   "v[13:20]",   "v[14:21]",
+    "v[15:22]",   "v[16:23]",   "v[17:24]",   "v[18:25]",   "v[19:26]",
+    "v[20:27]",   "v[21:28]",   "v[22:29]",   "v[23:30]",   "v[24:31]",
+    "v[25:32]",   "v[26:33]",   "v[27:34]",   "v[28:35]",   "v[29:36]",
+    "v[30:37]",   "v[31:38]",   "v[32:39]",   "v[33:40]",   "v[34:41]",
+    "v[35:42]",   "v[36:43]",   "v[37:44]",   "v[38:45]",   "v[39:46]",
+    "v[40:47]",   "v[41:48]",   "v[42:49]",   "v[43:50]",   "v[44:51]",
+    "v[45:52]",   "v[46:53]",   "v[47:54]",   "v[48:55]",   "v[49:56]",
+    "v[50:57]",   "v[51:58]",   "v[52:59]",   "v[53:60]",   "v[54:61]",
+    "v[55:62]",   "v[56:63]",   "v[57:64]",   "v[58:65]",   "v[59:66]",
+    "v[60:67]",   "v[61:68]",   "v[62:69]",   "v[63:70]",   "v[64:71]",
+    "v[65:72]",   "v[66:73]",   "v[67:74]",   "v[68:75]",   "v[69:76]",
+    "v[70:77]",   "v[71:78]",   "v[72:79]",   "v[73:80]",   "v[74:81]",
+    "v[75:82]",   "v[76:83]",   "v[77:84]",   "v[78:85]",   "v[79:86]",
+    "v[80:87]",   "v[81:88]",   "v[82:89]",   "v[83:90]",   "v[84:91]",
+    "v[85:92]",   "v[86:93]",   "v[87:94]",   "v[88:95]",   "v[89:96]",
+    "v[90:97]",   "v[91:98]",   "v[92:99]",   "v[93:100]",  "v[94:101]",
+    "v[95:102]",  "v[96:103]",  "v[97:104]",  "v[98:105]",  "v[99:106]",
+    "v[100:107]", "v[101:108]", "v[102:109]", "v[103:110]", "v[104:111]",
+    "v[105:112]", "v[106:113]", "v[107:114]", "v[108:115]", "v[109:116]",
+    "v[110:117]", "v[111:118]", "v[112:119]", "v[113:120]", "v[114:121]",
+    "v[115:122]", "v[116:123]", "v[117:124]", "v[118:125]", "v[119:126]",
+    "v[120:127]", "v[121:128]", "v[122:129]", "v[123:130]", "v[124:131]",
+    "v[125:132]", "v[126:133]", "v[127:134]", "v[128:135]", "v[129:136]",
+    "v[130:137]", "v[131:138]", "v[132:139]", "v[133:140]", "v[134:141]",
+    "v[135:142]", "v[136:143]", "v[137:144]", "v[138:145]", "v[139:146]",
+    "v[140:147]", "v[141:148]", "v[142:149]", "v[143:150]", "v[144:151]",
+    "v[145:152]", "v[146:153]", "v[147:154]", "v[148:155]", "v[149:156]",
+    "v[150:157]", "v[151:158]", "v[152:159]", "v[153:160]", "v[154:161]",
+    "v[155:162]", "v[156:163]", "v[157:164]", "v[158:165]", "v[159:166]",
+    "v[160:167]", "v[161:168]", "v[162:169]", "v[163:170]", "v[164:171]",
+    "v[165:172]", "v[166:173]", "v[167:174]", "v[168:175]", "v[169:176]",
+    "v[170:177]", "v[171:178]", "v[172:179]", "v[173:180]", "v[174:181]",
+    "v[175:182]", "v[176:183]", "v[177:184]", "v[178:185]", "v[179:186]",
+    "v[180:187]", "v[181:188]", "v[182:189]", "v[183:190]", "v[184:191]",
+    "v[185:192]", "v[186:193]", "v[187:194]", "v[188:195]", "v[189:196]",
+    "v[190:197]", "v[191:198]", "v[192:199]", "v[193:200]", "v[194:201]",
+    "v[195:202]", "v[196:203]", "v[197:204]", "v[198:205]", "v[199:206]",
+    "v[200:207]", "v[201:208]", "v[202:209]", "v[203:210]", "v[204:211]",
+    "v[205:212]", "v[206:213]", "v[207:214]", "v[208:215]", "v[209:216]",
+    "v[210:217]", "v[211:218]", "v[212:219]", "v[213:220]", "v[214:221]",
+    "v[215:222]", "v[216:223]", "v[217:224]", "v[218:225]", "v[219:226]",
+    "v[220:227]", "v[221:228]", "v[222:229]", "v[223:230]", "v[224:231]",
+    "v[225:232]", "v[226:233]", "v[227:234]", "v[228:235]", "v[229:236]",
+    "v[230:237]", "v[231:238]", "v[232:239]", "v[233:240]", "v[234:241]",
+    "v[235:242]", "v[236:243]", "v[237:244]", "v[238:245]", "v[239:246]",
+    "v[240:247]", "v[241:248]", "v[242:249]", "v[243:250]", "v[244:251]",
+    "v[245:252]", "v[246:253]", "v[247:254]", "v[248:255]"
+};
+
+static const char *const VGPR512RegNames[] = {
+    "v[0:15]",    "v[1:16]",    "v[2:17]",    "v[3:18]",    "v[4:19]",
+    "v[5:20]",    "v[6:21]",    "v[7:22]",    "v[8:23]",    "v[9:24]",
+    "v[10:25]",   "v[11:26]",   "v[12:27]",   "v[13:28]",   "v[14:29]",
+    "v[15:30]",   "v[16:31]",   "v[17:32]",   "v[18:33]",   "v[19:34]",
+    "v[20:35]",   "v[21:36]",   "v[22:37]",   "v[23:38]",   "v[24:39]",
+    "v[25:40]",   "v[26:41]",   "v[27:42]",   "v[28:43]",   "v[29:44]",
+    "v[30:45]",   "v[31:46]",   "v[32:47]",   "v[33:48]",   "v[34:49]",
+    "v[35:50]",   "v[36:51]",   "v[37:52]",   "v[38:53]",   "v[39:54]",
+    "v[40:55]",   "v[41:56]",   "v[42:57]",   "v[43:58]",   "v[44:59]",
+    "v[45:60]",   "v[46:61]",   "v[47:62]",   "v[48:63]",   "v[49:64]",
+    "v[50:65]",   "v[51:66]",   "v[52:67]",   "v[53:68]",   "v[54:69]",
+    "v[55:70]",   "v[56:71]",   "v[57:72]",   "v[58:73]",   "v[59:74]",
+    "v[60:75]",   "v[61:76]",   "v[62:77]",   "v[63:78]",   "v[64:79]",
+    "v[65:80]",   "v[66:81]",   "v[67:82]",   "v[68:83]",   "v[69:84]",
+    "v[70:85]",   "v[71:86]",   "v[72:87]",   "v[73:88]",   "v[74:89]",
+    "v[75:90]",   "v[76:91]",   "v[77:92]",   "v[78:93]",   "v[79:94]",
+    "v[80:95]",   "v[81:96]",   "v[82:97]",   "v[83:98]",   "v[84:99]",
+    "v[85:100]",  "v[86:101]",  "v[87:102]",  "v[88:103]",  "v[89:104]",
+    "v[90:105]",  "v[91:106]",  "v[92:107]",  "v[93:108]",  "v[94:109]",
+    "v[95:110]",  "v[96:111]",  "v[97:112]",  "v[98:113]",  "v[99:114]",
+    "v[100:115]", "v[101:116]", "v[102:117]", "v[103:118]", "v[104:119]",
+    "v[105:120]", "v[106:121]", "v[107:122]", "v[108:123]", "v[109:124]",
+    "v[110:125]", "v[111:126]", "v[112:127]", "v[113:128]", "v[114:129]",
+    "v[115:130]", "v[116:131]", "v[117:132]", "v[118:133]", "v[119:134]",
+    "v[120:135]", "v[121:136]", "v[122:137]", "v[123:138]", "v[124:139]",
+    "v[125:140]", "v[126:141]", "v[127:142]", "v[128:143]", "v[129:144]",
+    "v[130:145]", "v[131:146]", "v[132:147]", "v[133:148]", "v[134:149]",
+    "v[135:150]", "v[136:151]", "v[137:152]", "v[138:153]", "v[139:154]",
+    "v[140:155]", "v[141:156]", "v[142:157]", "v[143:158]", "v[144:159]",
+    "v[145:160]", "v[146:161]", "v[147:162]", "v[148:163]", "v[149:164]",
+    "v[150:165]", "v[151:166]", "v[152:167]", "v[153:168]", "v[154:169]",
+    "v[155:170]", "v[156:171]", "v[157:172]", "v[158:173]", "v[159:174]",
+    "v[160:175]", "v[161:176]", "v[162:177]", "v[163:178]", "v[164:179]",
+    "v[165:180]", "v[166:181]", "v[167:182]", "v[168:183]", "v[169:184]",
+    "v[170:185]", "v[171:186]", "v[172:187]", "v[173:188]", "v[174:189]",
+    "v[175:190]", "v[176:191]", "v[177:192]", "v[178:193]", "v[179:194]",
+    "v[180:195]", "v[181:196]", "v[182:197]", "v[183:198]", "v[184:199]",
+    "v[185:200]", "v[186:201]", "v[187:202]", "v[188:203]", "v[189:204]",
+    "v[190:205]", "v[191:206]", "v[192:207]", "v[193:208]", "v[194:209]",
+    "v[195:210]", "v[196:211]", "v[197:212]", "v[198:213]", "v[199:214]",
+    "v[200:215]", "v[201:216]", "v[202:217]", "v[203:218]", "v[204:219]",
+    "v[205:220]", "v[206:221]", "v[207:222]", "v[208:223]", "v[209:224]",
+    "v[210:225]", "v[211:226]", "v[212:227]", "v[213:228]", "v[214:229]",
+    "v[215:230]", "v[216:231]", "v[217:232]", "v[218:233]", "v[219:234]",
+    "v[220:235]", "v[221:236]", "v[222:237]", "v[223:238]", "v[224:239]",
+    "v[225:240]", "v[226:241]", "v[227:242]", "v[228:243]", "v[229:244]",
+    "v[230:245]", "v[231:246]", "v[232:247]", "v[233:248]", "v[234:249]",
+    "v[235:250]", "v[236:251]", "v[237:252]", "v[238:253]", "v[239:254]",
+    "v[240:255]"
+};
+
+static const char *const SGPR64RegNames[] = {
+    "s[0:1]",   "s[2:3]",   "s[4:5]",     "s[6:7]",     "s[8:9]",   "s[10:11]",
+    "s[12:13]", "s[14:15]", "s[16:17]",   "s[18:19]",   "s[20:21]", "s[22:23]",
+    "s[24:25]", "s[26:27]", "s[28:29]",   "s[30:31]",   "s[32:33]", "s[34:35]",
+    "s[36:37]", "s[38:39]", "s[40:41]",   "s[42:43]",   "s[44:45]", "s[46:47]",
+    "s[48:49]", "s[50:51]", "s[52:53]",   "s[54:55]",   "s[56:57]", "s[58:59]",
+    "s[60:61]", "s[62:63]", "s[64:65]",   "s[66:67]",   "s[68:69]", "s[70:71]",
+    "s[72:73]", "s[74:75]", "s[76:77]",   "s[78:79]",   "s[80:81]", "s[82:83]",
+    "s[84:85]", "s[86:87]", "s[88:89]",   "s[90:91]",   "s[92:93]", "s[94:95]",
+    "s[96:97]", "s[98:99]", "s[100:101]", "s[102:103]"
+};
+
+static const char *const SGPR128RegNames[] = {
+    "s[0:3]",   "s[4:7]",     "s[8:11]",  "s[12:15]", "s[16:19]", "s[20:23]",
+    "s[24:27]", "s[28:31]",   "s[32:35]", "s[36:39]", "s[40:43]", "s[44:47]",
+    "s[48:51]", "s[52:55]",   "s[56:59]", "s[60:63]", "s[64:67]", "s[68:71]",
+    "s[72:75]", "s[76:79]",   "s[80:83]", "s[84:87]", "s[88:91]", "s[92:95]",
+    "s[96:99]", "s[100:103]"
+};
+
+static const char *const SGPR256RegNames[] = {
+    "s[0:7]",   "s[4:11]",  "s[8:15]",  "s[12:19]", "s[16:23]",
+    "s[20:27]", "s[24:31]", "s[28:35]", "s[32:39]", "s[36:43]",
+    "s[40:47]", "s[44:51]", "s[48:55]", "s[52:59]", "s[56:63]",
+    "s[60:67]", "s[64:71]", "s[68:75]", "s[72:79]", "s[76:83]",
+    "s[80:87]", "s[84:91]", "s[88:95]", "s[92:99]", "s[96:103]"
+};
+
+static const char *const SGPR512RegNames[] = {
+    "s[0:15]",  "s[4:19]",  "s[8:23]",  "s[12:27]", "s[16:31]",  "s[20:35]",
+    "s[24:39]", "s[28:43]", "s[32:47]", "s[36:51]", "s[40:55]",  "s[44:59]",
+    "s[48:63]", "s[52:67]", "s[56:71]", "s[60:75]", "s[64:79]",  "s[68:83]",
+    "s[72:87]", "s[76:91]", "s[80:95]", "s[84:99]", "s[88:103]"
+};
+
+#endif
diff --git a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
index 7c198a1b8a3f..201fdc1974c6 100644
--- a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
+++ b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
@@ -36,7 +36,6 @@ protected:
 
 #define GET_TARGET_REGBANK_CLASS
 #include "AMDGPUGenRegisterBank.inc"
-
 };
 class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo {
   const SIRegisterInfo *TRI;
diff --git a/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp b/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp
index b2867fcc49f9..ff58aa5741a1 100644
--- a/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp
@@ -40,7 +40,6 @@ unsigned AMDGPURegisterInfo::getSubRegFromChannel(unsigned Channel) const {
 #define GET_REGINFO_TARGET_DESC
 #include "AMDGPUGenRegisterInfo.inc"
 
-
 // Forced to be here by one .inc
 const MCPhysReg *SIRegisterInfo::getCalleeSavedRegs(
   const MachineFunction *MF) const {
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h
index ed9cbb994fad..5f4f20316a6b 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -16,12 +16,12 @@
 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
 
 #include "AMDGPU.h"
-#include "R600InstrInfo.h"
-#include "R600ISelLowering.h"
 #include "R600FrameLowering.h"
-#include "SIInstrInfo.h"
-#include "SIISelLowering.h"
+#include "R600ISelLowering.h"
+#include "R600InstrInfo.h"
 #include "SIFrameLowering.h"
+#include "SIISelLowering.h"
+#include "SIInstrInfo.h"
 #include "SIMachineFunctionInfo.h"
 #include "Utils/AMDGPUBaseInfo.h"
 #include "llvm/ADT/Triple.h"
@@ -57,9 +57,12 @@ public:
 
   enum {
     ISAVersion0_0_0,
+    ISAVersion6_0_0,
+    ISAVersion6_0_1,
     ISAVersion7_0_0,
     ISAVersion7_0_1,
     ISAVersion7_0_2,
+    ISAVersion7_0_3,
     ISAVersion8_0_0,
     ISAVersion8_0_1,
     ISAVersion8_0_2,
@@ -67,7 +70,9 @@ public:
     ISAVersion8_0_4,
     ISAVersion8_1_0,
     ISAVersion9_0_0,
-    ISAVersion9_0_1
+    ISAVersion9_0_1,
+    ISAVersion9_0_2,
+    ISAVersion9_0_3
   };
 
   enum TrapHandlerAbi {
@@ -787,7 +792,7 @@ public:
 
   /// \returns VGPR allocation granularity supported by the subtarget.
   unsigned getVGPRAllocGranule() const {
-    return AMDGPU::IsaInfo::getVGPRAllocGranule(getFeatureBits());;
+    return AMDGPU::IsaInfo::getVGPRAllocGranule(getFeatureBits());
   }
 
   /// \returns VGPR encoding granularity supported by the subtarget.
diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 404598ff4738..b644eba536fa 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -28,26 +28,26 @@
 #include "GCNSchedStrategy.h"
 #include "R600MachineScheduler.h"
 #include "SIMachineScheduler.h"
-#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
 #include "llvm/CodeGen/GlobalISel/IRTranslator.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
 #include "llvm/CodeGen/GlobalISel/Legalizer.h"
 #include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Transforms/IPO.h"
-#include "llvm/Transforms/IPO/AlwaysInliner.h"
-#include "llvm/Transforms/IPO/PassManagerBuilder.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Scalar/GVN.h"
-#include "llvm/Transforms/Vectorize.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/LegacyPassManager.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/TargetRegistry.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/AlwaysInliner.h"
+#include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/GVN.h"
+#include "llvm/Transforms/Vectorize.h"
 #include <memory>
 
 using namespace llvm;
@@ -734,7 +734,6 @@ void GCNPassConfig::addMachineSSAOptimization() {
   addPass(&SIFoldOperandsID);
   addPass(&DeadMachineInstructionElimID);
   addPass(&SILoadStoreOptimizerID);
-  addPass(createSIShrinkInstructionsPass());
   if (EnableSDWAPeephole) {
     addPass(&SIPeepholeSDWAID);
     addPass(&MachineLICMID);
@@ -742,6 +741,7 @@ void GCNPassConfig::addMachineSSAOptimization() {
     addPass(&SIFoldOperandsID);
     addPass(&DeadMachineInstructionElimID);
   }
+  addPass(createSIShrinkInstructionsPass());
 }
 
 bool GCNPassConfig::addILPOpts() {
diff --git a/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp b/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp
index c96761c0b04e..6c1885e67fcb 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp
@@ -7,13 +7,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "AMDGPUTargetMachine.h"
 #include "AMDGPUTargetObjectFile.h"
 #include "AMDGPU.h"
+#include "AMDGPUTargetMachine.h"
+#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCSectionELF.h"
-#include "llvm/Support/ELF.h"
-#include "Utils/AMDGPUBaseInfo.h"
 
 using namespace llvm;
 
diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index beafebc1284a..dee3d2856701 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -20,8 +20,8 @@
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/BasicTTIImpl.h"
-#include "llvm/IR/Module.h"
 #include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Target/CostTable.h"
 #include "llvm/Target/TargetLowering.h"
diff --git a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index cc68c971b249..16e3b7b4ebee 100644
--- a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -11,18 +11,19 @@
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
 #include "SIDefines.h"
+#include "Utils/AMDGPUAsmUtils.h"
 #include "Utils/AMDGPUBaseInfo.h"
 #include "Utils/AMDKernelCodeTUtils.h"
-#include "Utils/AMDGPUAsmUtils.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallBitVector.h"
 #include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/CodeGen/MachineValueType.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
@@ -40,12 +41,11 @@
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/Casting.h"
-#include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/SMLoc.h"
 #include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
@@ -814,14 +814,8 @@ private:
   bool ParseDirectiveCodeObjectMetadata();
   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
   bool ParseDirectiveAMDKernelCodeT();
-  bool ParseSectionDirectiveHSAText();
   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
   bool ParseDirectiveAMDGPUHsaKernel();
-  bool ParseDirectiveAMDGPUHsaModuleGlobal();
-  bool ParseDirectiveAMDGPUHsaProgramGlobal();
-  bool ParseSectionDirectiveHSADataGlobalAgent();
-  bool ParseSectionDirectiveHSADataGlobalProgram();
-  bool ParseSectionDirectiveHSARodataReadonlyAgent();
   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
                              RegisterKind RegKind, unsigned Reg1,
                              unsigned RegNum);
@@ -2365,12 +2359,6 @@ bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
   return false;
 }
 
-bool AMDGPUAsmParser::ParseSectionDirectiveHSAText() {
-  getParser().getStreamer().SwitchSection(
-      AMDGPU::getHSATextSection(getContext()));
-  return false;
-}
-
 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
   if (getLexer().isNot(AsmToken::Identifier))
     return TokError("expected symbol name");
@@ -2384,46 +2372,6 @@ bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
   return false;
 }
 
-bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaModuleGlobal() {
-  if (getLexer().isNot(AsmToken::Identifier))
-    return TokError("expected symbol name");
-
-  StringRef GlobalName = Parser.getTok().getIdentifier();
-
-  getTargetStreamer().EmitAMDGPUHsaModuleScopeGlobal(GlobalName);
-  Lex();
-  return false;
-}
-
-bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaProgramGlobal() {
-  if (getLexer().isNot(AsmToken::Identifier))
-    return TokError("expected symbol name");
-
-  StringRef GlobalName = Parser.getTok().getIdentifier();
-
-  getTargetStreamer().EmitAMDGPUHsaProgramScopeGlobal(GlobalName);
-  Lex();
-  return false;
-}
-
-bool AMDGPUAsmParser::ParseSectionDirectiveHSADataGlobalAgent() {
-  getParser().getStreamer().SwitchSection(
-      AMDGPU::getHSADataGlobalAgentSection(getContext()));
-  return false;
-}
-
-bool AMDGPUAsmParser::ParseSectionDirectiveHSADataGlobalProgram() {
-  getParser().getStreamer().SwitchSection(
-      AMDGPU::getHSADataGlobalProgramSection(getContext()));
-  return false;
-}
-
-bool AMDGPUAsmParser::ParseSectionDirectiveHSARodataReadonlyAgent() {
-  getParser().getStreamer().SwitchSection(
-      AMDGPU::getHSARodataReadonlyAgentSection(getContext()));
-  return false;
-}
-
 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
   StringRef IDVal = DirectiveID.getString();
 
@@ -2439,27 +2387,9 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
   if (IDVal == ".amd_kernel_code_t")
     return ParseDirectiveAMDKernelCodeT();
 
-  if (IDVal == ".hsatext")
-    return ParseSectionDirectiveHSAText();
-
   if (IDVal == ".amdgpu_hsa_kernel")
     return ParseDirectiveAMDGPUHsaKernel();
 
-  if (IDVal == ".amdgpu_hsa_module_global")
-    return ParseDirectiveAMDGPUHsaModuleGlobal();
-
-  if (IDVal == ".amdgpu_hsa_program_global")
-    return ParseDirectiveAMDGPUHsaProgramGlobal();
-
-  if (IDVal == ".hsadata_global_agent")
-    return ParseSectionDirectiveHSADataGlobalAgent();
-
-  if (IDVal == ".hsadata_global_program")
-    return ParseSectionDirectiveHSADataGlobalProgram();
-
-  if (IDVal == ".hsarodata_readonly_agent")
-    return ParseSectionDirectiveHSARodataReadonlyAgent();
-
   return true;
 }
 
@@ -2919,6 +2849,7 @@ bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
   if (getLexer().isNot(AsmToken::Integer))
     return true;
 
+  SMLoc ValLoc = Parser.getTok().getLoc();
   if (getParser().parseAbsoluteExpression(CntVal))
     return true;
 
@@ -2936,21 +2867,24 @@ bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
   }
 
-  // To improve diagnostics, do not skip delimiters on errors
-  if (!Failed) {
-    if (getLexer().isNot(AsmToken::RParen)) {
-      return true;
-    }
-    Parser.Lex();
-    if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) {
-      const AsmToken NextToken = getLexer().peekTok();
-      if (NextToken.is(AsmToken::Identifier)) {
-        Parser.Lex();
-      }
+  if (Failed) {
+    Error(ValLoc, "too large value for " + CntName);
+    return true;
+  }
+
+  if (getLexer().isNot(AsmToken::RParen)) {
+    return true;
+  }
+
+  Parser.Lex();
+  if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) {
+    const AsmToken NextToken = getLexer().peekTok();
+    if (NextToken.is(AsmToken::Identifier)) {
+      Parser.Lex();
     }
   }
 
-  return Failed;
+  return false;
 }
 
 OperandMatchResultTy
diff --git a/lib/Target/AMDGPU/CMakeLists.txt b/lib/Target/AMDGPU/CMakeLists.txt
index cafce0164fa9..e30844f082cd 100644
--- a/lib/Target/AMDGPU/CMakeLists.txt
+++ b/lib/Target/AMDGPU/CMakeLists.txt
@@ -58,6 +58,7 @@ add_llvm_target(AMDGPUCodeGen
   AMDGPUISelLowering.cpp
   AMDGPUInstrInfo.cpp
   AMDGPUPromoteAlloca.cpp
+  AMDGPURegAsmNames.inc.cpp
   AMDGPURegisterInfo.cpp
   AMDGPUUnifyDivergentExitNodes.cpp
   GCNHazardRecognizer.cpp
diff --git a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 9b3cde7c4df6..88c92b9582fd 100644
--- a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -20,21 +20,20 @@
 #include "AMDGPUDisassembler.h"
 #include "AMDGPU.h"
 #include "AMDGPURegisterInfo.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "SIDefines.h"
 #include "Utils/AMDGPUBaseInfo.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCFixedLenDisassembler.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstrDesc.h"
 #include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/ELF.h"
-#include "llvm/Support/Endian.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/Endian.h"
 #include "llvm/Support/TargetRegistry.h"
 
-
 using namespace llvm;
 
 #define DEBUG_TYPE "amdgpu-disassembler"
diff --git a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index 0ff405a71e9b..5fa3cf1a223f 100644
--- a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -20,8 +20,8 @@
 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
 #include "llvm/MC/MCDisassembler/MCRelocationInfo.h"
 #include "llvm/MC/MCDisassembler/MCSymbolizer.h"
-#include <cstdint>
 #include <algorithm>
+#include <cstdint>
 #include <memory>
 
 namespace llvm {
diff --git a/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 80fc4ac9d2a3..cd9e7fb04f16 100644
--- a/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "AMDGPUSubtarget.h"
 #include "GCNHazardRecognizer.h"
+#include "AMDGPUSubtarget.h"
 #include "SIDefines.h"
 #include "SIInstrInfo.h"
 #include "SIRegisterInfo.h"
diff --git a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
index 523eea41897e..b84640230eee 100644
--- a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
@@ -9,8 +9,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "AMDGPUInstPrinter.h"
-#include "SIDefines.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIDefines.h"
 #include "Utils/AMDGPUAsmUtils.h"
 #include "Utils/AMDGPUBaseInfo.h"
 #include "llvm/MC/MCExpr.h"
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
index f3266fe82955..0a9c2b94c1ee 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
@@ -8,8 +8,8 @@
 /// \file
 //===----------------------------------------------------------------------===//
 
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "MCTargetDesc/AMDGPUFixupKinds.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCAssembler.h"
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp
index 647017d5061d..4e828a791e09 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp
@@ -13,20 +13,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "AMDGPU.h"
 #include "AMDGPUCodeObjectMetadataStreamer.h"
+#include "AMDGPU.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Module.h"
-#include "llvm/Support/YAMLTraits.h"
-
-using namespace llvm::AMDGPU;
-using namespace llvm::AMDGPU::CodeObject;
-
-LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint32_t)
-LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string)
-LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Arg::Metadata)
-LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Metadata)
+#include "llvm/Support/raw_ostream.h"
 
 namespace llvm {
 
@@ -37,192 +29,7 @@ static cl::opt<bool> VerifyCodeObjectMetadata(
     "amdgpu-verify-comd",
     cl::desc("Verify AMDGPU Code Object Metadata"));
 
-namespace yaml {
-
-template <>
-struct ScalarEnumerationTraits<AccessQualifier> {
-  static void enumeration(IO &YIO, AccessQualifier &EN) {
-    YIO.enumCase(EN, "Default", AccessQualifier::Default);
-    YIO.enumCase(EN, "ReadOnly", AccessQualifier::ReadOnly);
-    YIO.enumCase(EN, "WriteOnly", AccessQualifier::WriteOnly);
-    YIO.enumCase(EN, "ReadWrite", AccessQualifier::ReadWrite);
-  }
-};
-
-template <>
-struct ScalarEnumerationTraits<AddressSpaceQualifier> {
-  static void enumeration(IO &YIO, AddressSpaceQualifier &EN) {
-    YIO.enumCase(EN, "Private", AddressSpaceQualifier::Private);
-    YIO.enumCase(EN, "Global", AddressSpaceQualifier::Global);
-    YIO.enumCase(EN, "Constant", AddressSpaceQualifier::Constant);
-    YIO.enumCase(EN, "Local", AddressSpaceQualifier::Local);
-    YIO.enumCase(EN, "Generic", AddressSpaceQualifier::Generic);
-    YIO.enumCase(EN, "Region", AddressSpaceQualifier::Region);
-  }
-};
-
-template <>
-struct ScalarEnumerationTraits<ValueKind> {
-  static void enumeration(IO &YIO, ValueKind &EN) {
-    YIO.enumCase(EN, "ByValue", ValueKind::ByValue);
-    YIO.enumCase(EN, "GlobalBuffer", ValueKind::GlobalBuffer);
-    YIO.enumCase(EN, "DynamicSharedPointer", ValueKind::DynamicSharedPointer);
-    YIO.enumCase(EN, "Sampler", ValueKind::Sampler);
-    YIO.enumCase(EN, "Image", ValueKind::Image);
-    YIO.enumCase(EN, "Pipe", ValueKind::Pipe);
-    YIO.enumCase(EN, "Queue", ValueKind::Queue);
-    YIO.enumCase(EN, "HiddenGlobalOffsetX", ValueKind::HiddenGlobalOffsetX);
-    YIO.enumCase(EN, "HiddenGlobalOffsetY", ValueKind::HiddenGlobalOffsetY);
-    YIO.enumCase(EN, "HiddenGlobalOffsetZ", ValueKind::HiddenGlobalOffsetZ);
-    YIO.enumCase(EN, "HiddenNone", ValueKind::HiddenNone);
-    YIO.enumCase(EN, "HiddenPrintfBuffer", ValueKind::HiddenPrintfBuffer);
-    YIO.enumCase(EN, "HiddenDefaultQueue", ValueKind::HiddenDefaultQueue);
-    YIO.enumCase(EN, "HiddenCompletionAction",
-                 ValueKind::HiddenCompletionAction);
-  }
-};
-
-template <>
-struct ScalarEnumerationTraits<ValueType> {
-  static void enumeration(IO &YIO, ValueType &EN) {
-    YIO.enumCase(EN, "Struct", ValueType::Struct);
-    YIO.enumCase(EN, "I8", ValueType::I8);
-    YIO.enumCase(EN, "U8", ValueType::U8);
-    YIO.enumCase(EN, "I16", ValueType::I16);
-    YIO.enumCase(EN, "U16", ValueType::U16);
-    YIO.enumCase(EN, "F16", ValueType::F16);
-    YIO.enumCase(EN, "I32", ValueType::I32);
-    YIO.enumCase(EN, "U32", ValueType::U32);
-    YIO.enumCase(EN, "F32", ValueType::F32);
-    YIO.enumCase(EN, "I64", ValueType::I64);
-    YIO.enumCase(EN, "U64", ValueType::U64);
-    YIO.enumCase(EN, "F64", ValueType::F64);
-  }
-};
-
-template <>
-struct MappingTraits<Kernel::Attrs::Metadata> {
-  static void mapping(IO &YIO, Kernel::Attrs::Metadata &MD) {
-    YIO.mapOptional(Kernel::Attrs::Key::ReqdWorkGroupSize,
-                    MD.mReqdWorkGroupSize, std::vector<uint32_t>());
-    YIO.mapOptional(Kernel::Attrs::Key::WorkGroupSizeHint,
-                    MD.mWorkGroupSizeHint, std::vector<uint32_t>());
-    YIO.mapOptional(Kernel::Attrs::Key::VecTypeHint,
-                    MD.mVecTypeHint, std::string());
-  }
-};
-
-template <>
-struct MappingTraits<Kernel::Arg::Metadata> {
-  static void mapping(IO &YIO, Kernel::Arg::Metadata &MD) {
-    YIO.mapRequired(Kernel::Arg::Key::Size, MD.mSize);
-    YIO.mapRequired(Kernel::Arg::Key::Align, MD.mAlign);
-    YIO.mapRequired(Kernel::Arg::Key::ValueKind, MD.mValueKind);
-    YIO.mapRequired(Kernel::Arg::Key::ValueType, MD.mValueType);
-    YIO.mapOptional(Kernel::Arg::Key::PointeeAlign, MD.mPointeeAlign,
-                    uint32_t(0));
-    YIO.mapOptional(Kernel::Arg::Key::AccQual, MD.mAccQual,
-                    AccessQualifier::Unknown);
-    YIO.mapOptional(Kernel::Arg::Key::AddrSpaceQual, MD.mAddrSpaceQual,
-                    AddressSpaceQualifier::Unknown);
-    YIO.mapOptional(Kernel::Arg::Key::IsConst, MD.mIsConst, false);
-    YIO.mapOptional(Kernel::Arg::Key::IsPipe, MD.mIsPipe, false);
-    YIO.mapOptional(Kernel::Arg::Key::IsRestrict, MD.mIsRestrict, false);
-    YIO.mapOptional(Kernel::Arg::Key::IsVolatile, MD.mIsVolatile, false);
-    YIO.mapOptional(Kernel::Arg::Key::Name, MD.mName, std::string());
-    YIO.mapOptional(Kernel::Arg::Key::TypeName, MD.mTypeName, std::string());
-  }
-};
-
-template <>
-struct MappingTraits<Kernel::CodeProps::Metadata> {
-  static void mapping(IO &YIO, Kernel::CodeProps::Metadata &MD) {
-    YIO.mapOptional(Kernel::CodeProps::Key::KernargSegmentSize,
-                    MD.mKernargSegmentSize, uint64_t(0));
-    YIO.mapOptional(Kernel::CodeProps::Key::WorkgroupGroupSegmentSize,
-                    MD.mWorkgroupGroupSegmentSize, uint32_t(0));
-    YIO.mapOptional(Kernel::CodeProps::Key::WorkitemPrivateSegmentSize,
-                    MD.mWorkitemPrivateSegmentSize, uint32_t(0));
-    YIO.mapOptional(Kernel::CodeProps::Key::WavefrontNumSGPRs,
-                    MD.mWavefrontNumSGPRs, uint16_t(0));
-    YIO.mapOptional(Kernel::CodeProps::Key::WorkitemNumVGPRs,
-                    MD.mWorkitemNumVGPRs, uint16_t(0));
-    YIO.mapOptional(Kernel::CodeProps::Key::KernargSegmentAlign,
-                    MD.mKernargSegmentAlign, uint8_t(0));
-    YIO.mapOptional(Kernel::CodeProps::Key::GroupSegmentAlign,
-                    MD.mGroupSegmentAlign, uint8_t(0));
-    YIO.mapOptional(Kernel::CodeProps::Key::PrivateSegmentAlign,
-                    MD.mPrivateSegmentAlign, uint8_t(0));
-    YIO.mapOptional(Kernel::CodeProps::Key::WavefrontSize,
-                    MD.mWavefrontSize, uint8_t(0));
-  }
-};
-
-template <>
-struct MappingTraits<Kernel::DebugProps::Metadata> {
-  static void mapping(IO &YIO, Kernel::DebugProps::Metadata &MD) {
-    YIO.mapOptional(Kernel::DebugProps::Key::DebuggerABIVersion,
-                    MD.mDebuggerABIVersion, std::vector<uint32_t>());
-    YIO.mapOptional(Kernel::DebugProps::Key::ReservedNumVGPRs,
-                    MD.mReservedNumVGPRs, uint16_t(0));
-    YIO.mapOptional(Kernel::DebugProps::Key::ReservedFirstVGPR,
-                    MD.mReservedFirstVGPR, uint16_t(-1));
-    YIO.mapOptional(Kernel::DebugProps::Key::PrivateSegmentBufferSGPR,
-                    MD.mPrivateSegmentBufferSGPR, uint16_t(-1));
-    YIO.mapOptional(Kernel::DebugProps::Key::WavefrontPrivateSegmentOffsetSGPR,
-                    MD.mWavefrontPrivateSegmentOffsetSGPR, uint16_t(-1));
-  }
-};
-
-template <>
-struct MappingTraits<Kernel::Metadata> {
-  static void mapping(IO &YIO, Kernel::Metadata &MD) {
-    YIO.mapRequired(Kernel::Key::Name, MD.mName);
-    YIO.mapOptional(Kernel::Key::Language, MD.mLanguage, std::string());
-    YIO.mapOptional(Kernel::Key::LanguageVersion, MD.mLanguageVersion,
-                    std::vector<uint32_t>());
-    if (!MD.mAttrs.empty() || !YIO.outputting())
-      YIO.mapOptional(Kernel::Key::Attrs, MD.mAttrs);
-    if (!MD.mArgs.empty() || !YIO.outputting())
-      YIO.mapOptional(Kernel::Key::Args, MD.mArgs);
-    if (!MD.mCodeProps.empty() || !YIO.outputting())
-      YIO.mapOptional(Kernel::Key::CodeProps, MD.mCodeProps);
-    if (!MD.mDebugProps.empty() || !YIO.outputting())
-      YIO.mapOptional(Kernel::Key::DebugProps, MD.mDebugProps);
-  }
-};
-
-template <>
-struct MappingTraits<CodeObject::Metadata> {
-  static void mapping(IO &YIO, CodeObject::Metadata &MD) {
-    YIO.mapRequired(Key::Version, MD.mVersion);
-    YIO.mapOptional(Key::Printf, MD.mPrintf, std::vector<std::string>());
-    if (!MD.mKernels.empty() || !YIO.outputting())
-      YIO.mapOptional(Key::Kernels, MD.mKernels);
-  }
-};
-
-} // end namespace yaml
-
 namespace AMDGPU {
-
-/* static */
-std::error_code CodeObject::Metadata::fromYamlString(
-    std::string YamlString, CodeObject::Metadata &CodeObjectMetadata) {
-  yaml::Input YamlInput(YamlString);
-  YamlInput >> CodeObjectMetadata;
-  return YamlInput.error();
-}
-
-/* static */
-std::error_code CodeObject::Metadata::toYamlString(
-    CodeObject::Metadata CodeObjectMetadata, std::string &YamlString) {
-  raw_string_ostream YamlStream(YamlString);
-  yaml::Output YamlOutput(YamlStream, nullptr, std::numeric_limits<int>::max());
-  YamlOutput << CodeObjectMetadata;
-  return std::error_code();
-}
-
 namespace CodeObject {
 
 void MetadataStreamer::dump(StringRef YamlString) const {
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h
index 8d4c51763f63..c6681431d74d 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h
@@ -17,9 +17,9 @@
 #define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATASTREAMER_H
 
 #include "AMDGPU.h"
-#include "AMDGPUCodeObjectMetadata.h"
 #include "AMDKernelCodeT.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/AMDGPUCodeObjectMetadata.h"
 #include "llvm/Support/ErrorOr.h"
 
 namespace llvm {
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
index 073d19422e86..6abe7f3d37d5 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
@@ -8,12 +8,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "AMDGPUMCTargetDesc.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/MC/MCELFObjectWriter.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCFixup.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCValue.h"
-#include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
 
 using namespace llvm;
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 8dc863f723e2..2a0032fc9adc 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -11,12 +11,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "AMDGPU.h"
 #include "AMDGPUTargetStreamer.h"
+#include "AMDGPU.h"
 #include "SIDefines.h"
 #include "Utils/AMDGPUBaseInfo.h"
 #include "Utils/AMDKernelCodeTUtils.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Metadata.h"
@@ -25,7 +26,6 @@
 #include "llvm/MC/MCELFStreamer.h"
 #include "llvm/MC/MCObjectFileInfo.h"
 #include "llvm/MC/MCSectionELF.h"
-#include "llvm/Support/ELF.h"
 #include "llvm/Support/FormattedStream.h"
 
 namespace llvm {
@@ -100,16 +100,6 @@ void AMDGPUTargetAsmStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
   }
 }
 
-void AMDGPUTargetAsmStreamer::EmitAMDGPUHsaModuleScopeGlobal(
-    StringRef GlobalName) {
-  OS << "\t.amdgpu_hsa_module_global " << GlobalName << '\n';
-}
-
-void AMDGPUTargetAsmStreamer::EmitAMDGPUHsaProgramScopeGlobal(
-    StringRef GlobalName) {
-  OS << "\t.amdgpu_hsa_program_global " << GlobalName << '\n';
-}
-
 bool AMDGPUTargetAsmStreamer::EmitCodeObjectMetadata(StringRef YamlString) {
   auto VerifiedYamlString = CodeObjectMetadataStreamer.toYamlString(YamlString);
   if (!VerifiedYamlString)
@@ -214,24 +204,6 @@ void AMDGPUTargetELFStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
   Symbol->setType(ELF::STT_AMDGPU_HSA_KERNEL);
 }
 
-void AMDGPUTargetELFStreamer::EmitAMDGPUHsaModuleScopeGlobal(
-    StringRef GlobalName) {
-
-  MCSymbolELF *Symbol = cast<MCSymbolELF>(
-      getStreamer().getContext().getOrCreateSymbol(GlobalName));
-  Symbol->setType(ELF::STT_OBJECT);
-  Symbol->setBinding(ELF::STB_LOCAL);
-}
-
-void AMDGPUTargetELFStreamer::EmitAMDGPUHsaProgramScopeGlobal(
-    StringRef GlobalName) {
-
-  MCSymbolELF *Symbol = cast<MCSymbolELF>(
-      getStreamer().getContext().getOrCreateSymbol(GlobalName));
-  Symbol->setType(ELF::STT_OBJECT);
-  Symbol->setBinding(ELF::STB_GLOBAL);
-}
-
 bool AMDGPUTargetELFStreamer::EmitCodeObjectMetadata(StringRef YamlString) {
   auto VerifiedYamlString = CodeObjectMetadataStreamer.toYamlString(YamlString);
   if (!VerifiedYamlString)
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
index 5c588bbded9c..968128e94d0b 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
@@ -44,10 +44,6 @@ public:
 
   virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) = 0;
 
-  virtual void EmitAMDGPUHsaModuleScopeGlobal(StringRef GlobalName) = 0;
-
-  virtual void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) = 0;
-
   virtual void EmitStartOfCodeObjectMetadata(const Module &Mod);
 
   virtual void EmitKernelCodeObjectMetadata(
@@ -74,10 +70,6 @@ public:
 
   void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override;
 
-  void EmitAMDGPUHsaModuleScopeGlobal(StringRef GlobalName) override;
-
-  void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override;
-
   /// \returns True on success, false on failure.
   bool EmitCodeObjectMetadata(StringRef YamlString) override;
 };
@@ -105,10 +97,6 @@ public:
 
   void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override;
 
-  void EmitAMDGPUHsaModuleScopeGlobal(StringRef GlobalName) override;
-
-  void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override;
-
   /// \returns True on success, false on failure.
   bool EmitCodeObjectMetadata(StringRef YamlString) override;
 };
diff --git a/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
index 6015ec190fd4..eab90e1d344c 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -14,10 +14,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "R600Defines.h"
 #include "MCTargetDesc/AMDGPUFixupKinds.h"
 #include "MCTargetDesc/AMDGPUMCCodeEmitter.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "R600Defines.h"
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCFixup.h"
diff --git a/lib/Target/AMDGPU/Processors.td b/lib/Target/AMDGPU/Processors.td
index 0e4eda982139..f6f2582aa11b 100644
--- a/lib/Target/AMDGPU/Processors.td
+++ b/lib/Target/AMDGPU/Processors.td
@@ -80,57 +80,71 @@ def : Proc<"cayman",     R600_VLIW4_Itin,
 // Southern Islands
 //===----------------------------------------------------------------------===//
 
-def : ProcessorModel<"SI", SIFullSpeedModel,
-  [FeatureSouthernIslands, FeatureFastFMAF32, HalfRate64Ops]
+def : ProcessorModel<"gfx600",     SIFullSpeedModel, 
+  [FeatureISAVersion6_0_0]>;
+
+def : ProcessorModel<"SI",         SIFullSpeedModel,
+  [FeatureISAVersion6_0_0]
 >;
 
-def : ProcessorModel<"tahiti", SIFullSpeedModel,
-  [FeatureSouthernIslands, FeatureFastFMAF32, HalfRate64Ops]
+def : ProcessorModel<"tahiti",     SIFullSpeedModel,
+  [FeatureISAVersion6_0_0]
 >;
 
-def : ProcessorModel<"pitcairn", SIQuarterSpeedModel, [FeatureSouthernIslands]>;
+def : ProcessorModel<"gfx601",     SIQuarterSpeedModel,
+  [FeatureISAVersion6_0_1]
+>;
 
-def : ProcessorModel<"verde",    SIQuarterSpeedModel, [FeatureSouthernIslands]>;
+def : ProcessorModel<"pitcairn",   SIQuarterSpeedModel, 
+  [FeatureISAVersion6_0_1]>;
 
-def : ProcessorModel<"oland",    SIQuarterSpeedModel, [FeatureSouthernIslands]>;
+def : ProcessorModel<"verde",      SIQuarterSpeedModel,
+  [FeatureISAVersion6_0_1]>;
 
-def : ProcessorModel<"hainan",   SIQuarterSpeedModel, [FeatureSouthernIslands]>;
+def : ProcessorModel<"oland",      SIQuarterSpeedModel,
+  [FeatureISAVersion6_0_1]>;
+
+def : ProcessorModel<"hainan",     SIQuarterSpeedModel, [FeatureISAVersion6_0_1]>;
 
 //===----------------------------------------------------------------------===//
 // Sea Islands
 //===----------------------------------------------------------------------===//
 
+def : ProcessorModel<"gfx700",     SIQuarterSpeedModel,
+  [FeatureISAVersion7_0_0]
+>;
+
 def : ProcessorModel<"bonaire",    SIQuarterSpeedModel,
   [FeatureISAVersion7_0_0]
 >;
 
-def : ProcessorModel<"kabini",     SIQuarterSpeedModel,
-  [FeatureISAVersion7_0_2]
->;
-
 def : ProcessorModel<"kaveri",     SIQuarterSpeedModel,
   [FeatureISAVersion7_0_0]
 >;
 
-def : ProcessorModel<"hawaii",     SIFullSpeedModel,
-  [FeatureISAVersion7_0_1]
->;
-
-def : ProcessorModel<"mullins",    SIQuarterSpeedModel,
-  [FeatureISAVersion7_0_2]>;
-
-def : ProcessorModel<"gfx700",     SIQuarterSpeedModel,
-  [FeatureISAVersion7_0_0]
->;
-
 def : ProcessorModel<"gfx701",     SIFullSpeedModel,
   [FeatureISAVersion7_0_1]
 >;
 
+def : ProcessorModel<"hawaii",     SIFullSpeedModel,
+  [FeatureISAVersion7_0_1]
+>;
+
 def : ProcessorModel<"gfx702",     SIQuarterSpeedModel,
   [FeatureISAVersion7_0_2]
 >;
 
+def : ProcessorModel<"gfx703",     SIQuarterSpeedModel,
+  [FeatureISAVersion7_0_3]
+>;
+
+def : ProcessorModel<"kabini",     SIQuarterSpeedModel,
+  [FeatureISAVersion7_0_3]
+>;
+
+def : ProcessorModel<"mullins",    SIQuarterSpeedModel,
+  [FeatureISAVersion7_0_3]>;
+
 //===----------------------------------------------------------------------===//
 // Volcanic Islands
 //===----------------------------------------------------------------------===//
@@ -187,10 +201,23 @@ def : ProcessorModel<"gfx810", SIQuarterSpeedModel,
   [FeatureISAVersion8_1_0]
 >;
 
-def : ProcessorModel<"gfx900",   SIQuarterSpeedModel,
-  [FeatureGFX9, FeatureISAVersion9_0_0, FeatureLDSBankCount32]
+//===----------------------------------------------------------------------===//
+// GFX9
+//===----------------------------------------------------------------------===//
+
+def : ProcessorModel<"gfx900", SIQuarterSpeedModel,
+  [FeatureISAVersion9_0_0]
 >;
 
-def : ProcessorModel<"gfx901",   SIQuarterSpeedModel,
-  [FeatureGFX9, FeatureXNACK, FeatureISAVersion9_0_1, FeatureLDSBankCount32]
+def : ProcessorModel<"gfx901", SIQuarterSpeedModel,
+  [FeatureISAVersion9_0_1]
 >;
+
+def : ProcessorModel<"gfx902", SIQuarterSpeedModel,
+  [FeatureISAVersion9_0_2]
+>;
+
+def : ProcessorModel<"gfx903", SIQuarterSpeedModel,
+  [FeatureISAVersion9_0_3]
+>;
+
diff --git a/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp b/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
index 09b328765604..6993e8a62a9c 100644
--- a/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
+++ b/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
@@ -12,15 +12,14 @@
 /// computing their address on the fly ; it also sets STACK_SIZE info.
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Support/Debug.h"
 #include "AMDGPU.h"
 #include "AMDGPUSubtarget.h"
 #include "R600Defines.h"
 #include "R600InstrInfo.h"
 #include "R600MachineFunctionInfo.h"
 #include "R600RegisterInfo.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -30,6 +29,7 @@
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/DebugLoc.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
diff --git a/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp b/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
index 03fc1aff5ec1..0d8ccd088ec4 100644
--- a/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
+++ b/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
@@ -15,10 +15,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
 #include "R600Defines.h"
 #include "R600InstrInfo.h"
 #include "R600RegisterInfo.h"
-#include "AMDGPUSubtarget.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
diff --git a/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp b/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
index 5c30a0734f0d..66def2d29caf 100644
--- a/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
+++ b/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
@@ -15,11 +15,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
 #include "R600Defines.h"
 #include "R600InstrInfo.h"
 #include "R600MachineFunctionInfo.h"
 #include "R600RegisterInfo.h"
-#include "AMDGPUSubtarget.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
diff --git a/lib/Target/AMDGPU/R600FrameLowering.cpp b/lib/Target/AMDGPU/R600FrameLowering.cpp
index 1f01ad732e00..37787b3c5f72 100644
--- a/lib/Target/AMDGPU/R600FrameLowering.cpp
+++ b/lib/Target/AMDGPU/R600FrameLowering.cpp
@@ -10,8 +10,8 @@
 #include "R600FrameLowering.h"
 #include "AMDGPUSubtarget.h"
 #include "R600RegisterInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/Support/MathExtras.h"
 
 using namespace llvm;
diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp
index 60b913cfd39a..c55878f8bff0 100644
--- a/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -1120,7 +1120,7 @@ SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
     Mask = DAG.getConstant(0xff, DL, MVT::i32);
   } else if (Store->getMemoryVT() == MVT::i16) {
     assert(Store->getAlignment() >= 2);
-    Mask = DAG.getConstant(0xffff, DL, MVT::i32);;
+    Mask = DAG.getConstant(0xffff, DL, MVT::i32);
   } else {
     llvm_unreachable("Unsupported private trunc store");
   }
diff --git a/lib/Target/AMDGPU/R600InstrInfo.cpp b/lib/Target/AMDGPU/R600InstrInfo.cpp
index 2422d57269eb..c5da5e404200 100644
--- a/lib/Target/AMDGPU/R600InstrInfo.cpp
+++ b/lib/Target/AMDGPU/R600InstrInfo.cpp
@@ -12,12 +12,12 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "R600InstrInfo.h"
 #include "AMDGPU.h"
 #include "AMDGPUInstrInfo.h"
 #include "AMDGPUSubtarget.h"
 #include "R600Defines.h"
 #include "R600FrameLowering.h"
-#include "R600InstrInfo.h"
 #include "R600RegisterInfo.h"
 #include "Utils/AMDGPUBaseInfo.h"
 #include "llvm/ADT/BitVector.h"
@@ -35,8 +35,8 @@
 #include "llvm/Target/TargetSubtargetInfo.h"
 #include <algorithm>
 #include <cassert>
-#include <cstring>
 #include <cstdint>
+#include <cstring>
 #include <iterator>
 #include <utility>
 #include <vector>
diff --git a/lib/Target/AMDGPU/R600MachineScheduler.cpp b/lib/Target/AMDGPU/R600MachineScheduler.cpp
index db18e5bd1afa..47fda1c8fa82 100644
--- a/lib/Target/AMDGPU/R600MachineScheduler.cpp
+++ b/lib/Target/AMDGPU/R600MachineScheduler.cpp
@@ -13,11 +13,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "R600MachineScheduler.h"
-#include "R600InstrInfo.h"
 #include "AMDGPUSubtarget.h"
+#include "R600InstrInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Pass.h"
 #include "llvm/IR/LegacyPassManager.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
diff --git a/lib/Target/AMDGPU/R600Packetizer.cpp b/lib/Target/AMDGPU/R600Packetizer.cpp
index 3e957126b497..1cb40938cee7 100644
--- a/lib/Target/AMDGPU/R600Packetizer.cpp
+++ b/lib/Target/AMDGPU/R600Packetizer.cpp
@@ -14,7 +14,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Support/Debug.h"
 #include "AMDGPU.h"
 #include "AMDGPUSubtarget.h"
 #include "R600InstrInfo.h"
@@ -24,6 +23,7 @@
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
diff --git a/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp b/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp
index 62ebef8e91af..b5c439b21b89 100644
--- a/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp
+++ b/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp
@@ -19,8 +19,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "SIInstrInfo.h"
 #include "AMDGPUSubtarget.h"
+#include "SIInstrInfo.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
diff --git a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index 3cca815d8773..5f5f25103c02 100644
--- a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -65,10 +65,10 @@
 /// ultimately led to the creation of an illegal COPY.
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/DenseSet.h"
 #include "AMDGPU.h"
 #include "AMDGPUSubtarget.h"
 #include "SIInstrInfo.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
diff --git a/lib/Target/AMDGPU/SIFoldOperands.cpp b/lib/Target/AMDGPU/SIFoldOperands.cpp
index dfac068d1f69..e10f1ed3762e 100644
--- a/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -730,7 +730,8 @@ const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const {
     // Make sure sources are identical.
     const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
     const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
-    if (!Src0->isReg() || Src0->getSubReg() != Src1->getSubReg() ||
+    if (!Src0->isReg() || !Src1->isReg() ||
+        Src0->getSubReg() != Src1->getSubReg() ||
         Src0->getSubReg() != AMDGPU::NoSubRegister)
       return nullptr;
 
diff --git a/lib/Target/AMDGPU/SIFrameLowering.cpp b/lib/Target/AMDGPU/SIFrameLowering.cpp
index 97bb0f0c0656..b1bd14e421f0 100644
--- a/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -8,10 +8,10 @@
 //==-----------------------------------------------------------------------===//
 
 #include "SIFrameLowering.h"
+#include "AMDGPUSubtarget.h"
 #include "SIInstrInfo.h"
 #include "SIMachineFunctionInfo.h"
 #include "SIRegisterInfo.h"
-#include "AMDGPUSubtarget.h"
 
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index b48b23911105..599ee942d738 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -17,12 +17,12 @@
 #define _USE_MATH_DEFINES
 #endif
 
+#include "SIISelLowering.h"
 #include "AMDGPU.h"
 #include "AMDGPUIntrinsicInfo.h"
-#include "AMDGPUTargetMachine.h"
 #include "AMDGPUSubtarget.h"
+#include "AMDGPUTargetMachine.h"
 #include "SIDefines.h"
-#include "SIISelLowering.h"
 #include "SIInstrInfo.h"
 #include "SIMachineFunctionInfo.h"
 #include "SIRegisterInfo.h"
@@ -2604,7 +2604,7 @@ SDValue SITargetLowering::lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
 
   SDValue FpToFp16 = DAG.getNode(ISD::FP_TO_FP16, DL, MVT::i32, Src);
   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FpToFp16);
-  return DAG.getNode(ISD::BITCAST, DL, MVT::f16, Trunc);;
+  return DAG.getNode(ISD::BITCAST, DL, MVT::f16, Trunc);
 }
 
 SDValue SITargetLowering::lowerTRAP(SDValue Op, SelectionDAG &DAG) const {
diff --git a/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index c10badba88f3..0f009a48754a 100644
--- a/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -229,7 +229,7 @@ public:
                      MachineInstr &MI);
 
   BlockWaitcntBrackets()
-      : WaitAtBeginning(false), ValidLoop(false), MixedExpTypes(false),
+      : WaitAtBeginning(false), RevisitLoop(false), ValidLoop(false), MixedExpTypes(false),
         LoopRegion(NULL), PostOrder(0), Waitcnt(NULL), VgprUB(0), SgprUB(0) {
     for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
          T = (enum InstCounterType)(T + 1)) {
diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp
index 36d29b8ecf06..58c05cf16f15 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -20,10 +20,10 @@
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/Function.h"
-#include "llvm/CodeGen/RegisterScavenging.h"
 #include "llvm/MC/MCInstrDesc.h"
 #include "llvm/Support/Debug.h"
 
diff --git a/lib/Target/AMDGPU/SILowerControlFlow.cpp b/lib/Target/AMDGPU/SILowerControlFlow.cpp
index 35d3a93d8710..5f1c7f1fc42f 100644
--- a/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -60,8 +60,8 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/SlotIndexes.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Pass.h"
diff --git a/lib/Target/AMDGPU/SILowerI1Copies.cpp b/lib/Target/AMDGPU/SILowerI1Copies.cpp
index 3680e02da576..ba616ada0c9c 100644
--- a/lib/Target/AMDGPU/SILowerI1Copies.cpp
+++ b/lib/Target/AMDGPU/SILowerI1Copies.cpp
@@ -21,8 +21,8 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Target/TargetMachine.h"
 
diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 348bb4fa0260..9fdb8caac6f2 100644
--- a/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -15,8 +15,8 @@
 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
 
 #include "AMDGPUMachineFunction.h"
-#include "SIRegisterInfo.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Support/ErrorHandling.h"
diff --git a/lib/Target/AMDGPU/SIMachineScheduler.cpp b/lib/Target/AMDGPU/SIMachineScheduler.cpp
index 9d4e677400e6..bb17dbbdfbd6 100644
--- a/lib/Target/AMDGPU/SIMachineScheduler.cpp
+++ b/lib/Target/AMDGPU/SIMachineScheduler.cpp
@@ -12,9 +12,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "SIMachineScheduler.h"
 #include "AMDGPU.h"
 #include "SIInstrInfo.h"
-#include "SIMachineScheduler.h"
 #include "SIRegisterInfo.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
diff --git a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
index fae249b04492..f4ddf1891683 100644
--- a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
+++ b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
@@ -20,13 +20,12 @@
 ///
 //===----------------------------------------------------------------------===//
 
-
 #include "AMDGPU.h"
 #include "AMDGPUSubtarget.h"
 #include "SIDefines.h"
 #include "SIInstrInfo.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include <unordered_map>
@@ -129,7 +128,8 @@ public:
   bool getNeg() const { return Neg; }
   bool getSext() const { return Sext; }
 
-  uint64_t getSrcMods() const;
+  uint64_t getSrcMods(const SIInstrInfo *TII,
+                      const MachineOperand *SrcOp) const;
 };
 
 class SDWADstOperand : public SDWAOperand {
@@ -240,13 +240,24 @@ static bool isSubregOf(const MachineOperand &SubReg,
   return SuperMask.all();
 }
 
-uint64_t SDWASrcOperand::getSrcMods() const {
+uint64_t SDWASrcOperand::getSrcMods(const SIInstrInfo *TII,
+                                    const MachineOperand *SrcOp) const {
   uint64_t Mods = 0;
+  const auto *MI = SrcOp->getParent();
+  if (TII->getNamedOperand(*MI, AMDGPU::OpName::src0) == SrcOp) {
+    if (auto *Mod = TII->getNamedOperand(*MI, AMDGPU::OpName::src0_modifiers)) {
+      Mods = Mod->getImm();
+    }
+  } else if (TII->getNamedOperand(*MI, AMDGPU::OpName::src1) == SrcOp) {
+    if (auto *Mod = TII->getNamedOperand(*MI, AMDGPU::OpName::src1_modifiers)) {
+      Mods = Mod->getImm();
+    }
+  }
   if (Abs || Neg) {
     assert(!Sext &&
            "Float and integer src modifiers can't be set simulteniously");
     Mods |= Abs ? SISrcMods::ABS : 0;
-    Mods |= Neg ? SISrcMods::NEG : 0;
+    Mods ^= Neg ? SISrcMods::NEG : 0;
   } else if (Sext) {
     Mods |= SISrcMods::SEXT;
   }
@@ -312,7 +323,7 @@ bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
   }
   copyRegOperand(*Src, *getTargetOperand());
   SrcSel->setImm(getSrcSel());
-  SrcMods->setImm(getSrcMods());
+  SrcMods->setImm(getSrcMods(TII, Src));
   getTargetOperand()->setIsKill(false);
   return true;
 }
@@ -409,7 +420,10 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
       switch (Opcode) {
       case AMDGPU::V_LSHRREV_B32_e32:
       case AMDGPU::V_ASHRREV_I32_e32:
-      case AMDGPU::V_LSHLREV_B32_e32: {
+      case AMDGPU::V_LSHLREV_B32_e32:
+      case AMDGPU::V_LSHRREV_B32_e64:
+      case AMDGPU::V_ASHRREV_I32_e64:
+      case AMDGPU::V_LSHLREV_B32_e64: {
         // from: v_lshrrev_b32_e32 v1, 16/24, v0
         // to SDWA src:v0 src_sel:WORD_1/BYTE_3
 
@@ -432,7 +446,8 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
             TRI->isPhysicalRegister(Dst->getReg()))
           break;
 
-        if (Opcode == AMDGPU::V_LSHLREV_B32_e32) {
+        if (Opcode == AMDGPU::V_LSHLREV_B32_e32 ||
+            Opcode == AMDGPU::V_LSHLREV_B32_e64) {
           auto SDWADst = make_unique<SDWADstOperand>(
               Dst, Src1, *Imm == 16 ? WORD_1 : BYTE_3, UNUSED_PAD);
           DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWADst << '\n');
@@ -441,7 +456,8 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
         } else {
           auto SDWASrc = make_unique<SDWASrcOperand>(
               Src1, Dst, *Imm == 16 ? WORD_1 : BYTE_3, false, false,
-              Opcode == AMDGPU::V_LSHRREV_B32_e32 ? false : true);
+              Opcode != AMDGPU::V_LSHRREV_B32_e32 &&
+              Opcode != AMDGPU::V_LSHRREV_B32_e64);
           DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n');
           SDWAOperands[&MI] = std::move(SDWASrc);
           ++NumSDWAPatternsFound;
@@ -451,7 +467,10 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
 
       case AMDGPU::V_LSHRREV_B16_e32:
       case AMDGPU::V_ASHRREV_I16_e32:
-      case AMDGPU::V_LSHLREV_B16_e32: {
+      case AMDGPU::V_LSHLREV_B16_e32:
+      case AMDGPU::V_LSHRREV_B16_e64:
+      case AMDGPU::V_ASHRREV_I16_e64:
+      case AMDGPU::V_LSHLREV_B16_e64: {
         // from: v_lshrrev_b16_e32 v1, 8, v0
         // to SDWA src:v0 src_sel:BYTE_1
 
@@ -472,7 +491,8 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
             TRI->isPhysicalRegister(Dst->getReg()))
           break;
 
-        if (Opcode == AMDGPU::V_LSHLREV_B16_e32) {
+        if (Opcode == AMDGPU::V_LSHLREV_B16_e32 ||
+            Opcode == AMDGPU::V_LSHLREV_B16_e64) {
           auto SDWADst =
             make_unique<SDWADstOperand>(Dst, Src1, BYTE_1, UNUSED_PAD);
           DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWADst << '\n');
@@ -481,7 +501,8 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
         } else {
           auto SDWASrc = make_unique<SDWASrcOperand>(
               Src1, Dst, BYTE_1, false, false,
-              Opcode == AMDGPU::V_LSHRREV_B16_e32 ? false : true);
+              Opcode != AMDGPU::V_LSHRREV_B16_e32 &&
+              Opcode != AMDGPU::V_LSHRREV_B16_e64);
           DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n');
           SDWAOperands[&MI] = std::move(SDWASrc);
           ++NumSDWAPatternsFound;
@@ -549,20 +570,25 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
         ++NumSDWAPatternsFound;
         break;
       }
-      case AMDGPU::V_AND_B32_e32: {
+      case AMDGPU::V_AND_B32_e32:
+      case AMDGPU::V_AND_B32_e64: {
         // e.g.:
         // from: v_and_b32_e32 v1, 0x0000ffff/0x000000ff, v0
         // to SDWA src:v0 src_sel:WORD_0/BYTE_0
 
         MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
-        auto Imm = foldToImm(*Src0);
-        if (!Imm)
-          break;
-
-        if (*Imm != 0x0000ffff && *Imm != 0x000000ff)
-          break;
-
         MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
+        auto ValSrc = Src1;
+        auto Imm = foldToImm(*Src0);
+
+        if (!Imm) {
+          Imm = foldToImm(*Src1);
+          ValSrc = Src0;
+        }
+
+        if (!Imm || (*Imm != 0x0000ffff && *Imm != 0x000000ff))
+          break;
+
         MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
       
         if (TRI->isPhysicalRegister(Src1->getReg()) ||
@@ -570,7 +596,7 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
           break;
 
         auto SDWASrc = make_unique<SDWASrcOperand>(
-            Src1, Dst, *Imm == 0x0000ffff ? WORD_0 : BYTE_0);
+            ValSrc, Dst, *Imm == 0x0000ffff ? WORD_0 : BYTE_0);
         DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n');
         SDWAOperands[&MI] = std::move(SDWASrc);
         ++NumSDWAPatternsFound;
@@ -583,28 +609,38 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
 
 bool SIPeepholeSDWA::isConvertibleToSDWA(const MachineInstr &MI) const {
   // Check if this instruction has opcode that supports SDWA
-  return AMDGPU::getSDWAOp(MI.getOpcode()) != -1;
+  unsigned Opc = MI.getOpcode();
+  if (AMDGPU::getSDWAOp(Opc) != -1)
+    return true;
+  int Opc32 = AMDGPU::getVOPe32(Opc);
+  if (Opc32 != -1 && AMDGPU::getSDWAOp(Opc32) != -1)
+    return !TII->hasModifiersSet(MI, AMDGPU::OpName::omod) &&
+           !TII->getNamedOperand(MI, AMDGPU::OpName::sdst);
+  return false;
 }
 
 bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
                                    const SDWAOperandsVector &SDWAOperands) {
   // Convert to sdwa
   int SDWAOpcode = AMDGPU::getSDWAOp(MI.getOpcode());
+  if (SDWAOpcode == -1)
+    SDWAOpcode = AMDGPU::getSDWAOp(AMDGPU::getVOPe32(MI.getOpcode()));
   assert(SDWAOpcode != -1);
 
+  // Copy dst, if it is present in original then should also be present in SDWA
+  MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
+  if (!Dst && !TII->isVOPC(MI))
+    return false;
+
   const MCInstrDesc &SDWADesc = TII->get(SDWAOpcode);
 
   // Create SDWA version of instruction MI and initialize its operands
   MachineInstrBuilder SDWAInst =
     BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), SDWADesc);
 
-  // Copy dst, if it is present in original then should also be present in SDWA
-  MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
   if (Dst) {
     assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::vdst) != -1);
     SDWAInst.add(*Dst);
-  } else {
-    assert(TII->isVOPC(MI));
   }
 
   // Copy src0, initialize src0_modifiers. All sdwa instructions has src0 and
@@ -614,7 +650,10 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
     Src0 &&
     AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0) != -1 &&
     AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0_modifiers) != -1);
-  SDWAInst.addImm(0);
+  if (auto *Mod = TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers))
+    SDWAInst.addImm(Mod->getImm());
+  else
+    SDWAInst.addImm(0);
   SDWAInst.add(*Src0);
 
   // Copy src1 if present, initialize src1_modifiers.
@@ -623,10 +662,11 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
     assert(
       AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1) != -1 &&
       AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1_modifiers) != -1);
-    SDWAInst.addImm(0);
+    if (auto *Mod = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers))
+      SDWAInst.addImm(Mod->getImm());
+    else
+      SDWAInst.addImm(0);
     SDWAInst.add(*Src1);
-  } else {
-    assert(TII->isVOP1(MI));
   }
 
   if (SDWAOpcode == AMDGPU::V_MAC_F16_sdwa ||
@@ -746,8 +786,9 @@ bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) {
   PotentialMatches.clear();
   SDWAOperands.clear();
 
+  bool Ret = !ConvertedInstructions.empty();
   while (!ConvertedInstructions.empty())
     legalizeScalarOperands(*ConvertedInstructions.pop_back_val());
 
-  return false;
+  return Ret;
 }
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 6fb01a09fe13..b611f28fcabd 100644
--- a/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -13,9 +13,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "SIRegisterInfo.h"
+#include "AMDGPUSubtarget.h"
 #include "SIInstrInfo.h"
 #include "SIMachineFunctionInfo.h"
-#include "AMDGPUSubtarget.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
@@ -1104,6 +1104,66 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
   }
 }
 
+StringRef SIRegisterInfo::getRegAsmName(unsigned Reg) const {
+  #define AMDGPU_REG_ASM_NAMES
+  #include "AMDGPURegAsmNames.inc.cpp"
+
+  #define REG_RANGE(BeginReg, EndReg, RegTable)            \
+    if (Reg >= BeginReg && Reg <= EndReg) {                \
+      unsigned Index = Reg - BeginReg;                     \
+      assert(Index < array_lengthof(RegTable));            \
+      return RegTable[Index];                              \
+    }
+
+  REG_RANGE(AMDGPU::VGPR0, AMDGPU::VGPR255, VGPR32RegNames);
+  REG_RANGE(AMDGPU::SGPR0, AMDGPU::SGPR103, SGPR32RegNames);
+  REG_RANGE(AMDGPU::VGPR0_VGPR1, AMDGPU::VGPR254_VGPR255, VGPR64RegNames);
+  REG_RANGE(AMDGPU::SGPR0_SGPR1, AMDGPU::SGPR102_SGPR103, SGPR64RegNames);
+  REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2, AMDGPU::VGPR253_VGPR254_VGPR255,
+            VGPR96RegNames);
+
+  REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3,
+            AMDGPU::VGPR252_VGPR253_VGPR254_VGPR255,
+            VGPR128RegNames);
+  REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3,
+            AMDGPU::SGPR100_SGPR101_SGPR102_SGPR103,
+            SGPR128RegNames);
+
+  REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7,
+            AMDGPU::VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255,
+            VGPR256RegNames);
+
+  REG_RANGE(
+    AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7_VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15,
+    AMDGPU::VGPR240_VGPR241_VGPR242_VGPR243_VGPR244_VGPR245_VGPR246_VGPR247_VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255,
+    VGPR512RegNames);
+
+  REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7,
+            AMDGPU::SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103,
+            SGPR256RegNames);
+
+  REG_RANGE(
+    AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7_SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15,
+    AMDGPU::SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95_SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103,
+    SGPR512RegNames
+  );
+
+#undef REG_RANGE
+
+  // FIXME: Rename flat_scr so we don't need to special case this.
+  switch (Reg) {
+  case AMDGPU::FLAT_SCR:
+    return "flat_scratch";
+  case AMDGPU::FLAT_SCR_LO:
+    return "flat_scratch_lo";
+  case AMDGPU::FLAT_SCR_HI:
+    return "flat_scratch_hi";
+  default:
+    // For the special named registers the default is fine.
+    return TargetRegisterInfo::getRegAsmName(Reg);
+  }
+}
+
 // FIXME: This is very slow. It might be worth creating a map from physreg to
 // register class.
 const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.h b/lib/Target/AMDGPU/SIRegisterInfo.h
index a648c178101a..8fed6d5f9710 100644
--- a/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -16,8 +16,8 @@
 #define LLVM_LIB_TARGET_AMDGPU_SIREGISTERINFO_H
 
 #include "AMDGPURegisterInfo.h"
-#include "SIDefines.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIDefines.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 
 namespace llvm {
@@ -118,6 +118,8 @@ public:
   bool eliminateSGPRToVGPRSpillFrameIndex(MachineBasicBlock::iterator MI,
                                           int FI, RegScavenger *RS) const;
 
+  StringRef getRegAsmName(unsigned Reg) const override;
+
   unsigned getHWRegIndex(unsigned Reg) const {
     return getEncodingValue(Reg) & 0xff;
   }
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 630f469eabf0..f581e69980c7 100644
--- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -7,11 +7,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "AMDGPU.h"
 #include "AMDGPUBaseInfo.h"
+#include "AMDGPU.h"
 #include "SIDefines.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/Constants.h"
@@ -27,7 +28,6 @@
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/SubtargetFeature.h"
 #include "llvm/Support/Casting.h"
-#include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include <algorithm>
@@ -38,7 +38,6 @@
 
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 
-
 #define GET_INSTRINFO_NAMED_OPS
 #include "AMDGPUGenInstrInfo.inc"
 #undef GET_INSTRINFO_NAMED_OPS
@@ -104,6 +103,11 @@ namespace AMDGPU {
 namespace IsaInfo {
 
 IsaVersion getIsaVersion(const FeatureBitset &Features) {
+  // SI.
+  if (Features.test(FeatureISAVersion6_0_0))
+    return {6, 0, 0};
+  if (Features.test(FeatureISAVersion6_0_1))
+    return {6, 0, 1};
   // CI.
   if (Features.test(FeatureISAVersion7_0_0))
     return {7, 0, 0};
@@ -111,6 +115,8 @@ IsaVersion getIsaVersion(const FeatureBitset &Features) {
     return {7, 0, 1};
   if (Features.test(FeatureISAVersion7_0_2))
     return {7, 0, 2};
+  if (Features.test(FeatureISAVersion7_0_3))
+    return {7, 0, 3};
 
   // VI.
   if (Features.test(FeatureISAVersion8_0_0))
@@ -131,6 +137,10 @@ IsaVersion getIsaVersion(const FeatureBitset &Features) {
     return {9, 0, 0};
   if (Features.test(FeatureISAVersion9_0_1))
     return {9, 0, 1};
+  if (Features.test(FeatureISAVersion9_0_2))
+    return {9, 0, 2};
+  if (Features.test(FeatureISAVersion9_0_3))
+    return {9, 0, 3};
 
   if (!Features.test(FeatureGCN) || Features.test(FeatureSouthernIslands))
     return {0, 0, 0};
@@ -327,33 +337,6 @@ void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
   Header.private_segment_alignment = 4;
 }
 
-MCSection *getHSATextSection(MCContext &Ctx) {
-  return Ctx.getELFSection(".hsatext", ELF::SHT_PROGBITS,
-                           ELF::SHF_ALLOC | ELF::SHF_WRITE |
-                           ELF::SHF_EXECINSTR |
-                           ELF::SHF_AMDGPU_HSA_AGENT |
-                           ELF::SHF_AMDGPU_HSA_CODE);
-}
-
-MCSection *getHSADataGlobalAgentSection(MCContext &Ctx) {
-  return Ctx.getELFSection(".hsadata_global_agent", ELF::SHT_PROGBITS,
-                           ELF::SHF_ALLOC | ELF::SHF_WRITE |
-                           ELF::SHF_AMDGPU_HSA_GLOBAL |
-                           ELF::SHF_AMDGPU_HSA_AGENT);
-}
-
-MCSection *getHSADataGlobalProgramSection(MCContext &Ctx) {
-  return  Ctx.getELFSection(".hsadata_global_program", ELF::SHT_PROGBITS,
-                            ELF::SHF_ALLOC | ELF::SHF_WRITE |
-                            ELF::SHF_AMDGPU_HSA_GLOBAL);
-}
-
-MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx) {
-  return Ctx.getELFSection(".hsarodata_readonly_agent", ELF::SHT_PROGBITS,
-                           ELF::SHF_ALLOC | ELF::SHF_AMDGPU_HSA_READONLY |
-                           ELF::SHF_AMDGPU_HSA_AGENT);
-}
-
 bool isGroupSegment(const GlobalValue *GV, AMDGPUAS AS) {
   return GV->getType()->getAddressSpace() == AS.LOCAL_ADDRESS;
 }
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 19888ad7556a..eff0230d21f5 100644
--- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -149,13 +149,6 @@ int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
 
 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
                                const FeatureBitset &Features);
-MCSection *getHSATextSection(MCContext &Ctx);
-
-MCSection *getHSADataGlobalAgentSection(MCContext &Ctx);
-
-MCSection *getHSADataGlobalProgramSection(MCContext &Ctx);
-
-MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx);
 
 bool isGroupSegment(const GlobalValue *GV, AMDGPUAS AS);
 bool isGlobalSegment(const GlobalValue *GV, AMDGPUAS AS);
diff --git a/lib/Target/AMDGPU/VOP3Instructions.td b/lib/Target/AMDGPU/VOP3Instructions.td
index 77fc9551cff9..a8ca593f14ed 100644
--- a/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/lib/Target/AMDGPU/VOP3Instructions.td
@@ -172,8 +172,8 @@ def V_CUBEMA_F32 : VOP3Inst <"v_cubema_f32", VOP3_Profile<VOP_F32_F32_F32_F32>,
 def V_BFE_U32 : VOP3Inst <"v_bfe_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfe_u32>;
 def V_BFE_I32 : VOP3Inst <"v_bfe_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfe_i32>;
 def V_BFI_B32 : VOP3Inst <"v_bfi_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfi>;
-def V_ALIGNBIT_B32 : VOP3Inst <"v_alignbit_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
-def V_ALIGNBYTE_B32 : VOP3Inst <"v_alignbyte_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
+def V_ALIGNBIT_B32 : VOP3Inst <"v_alignbit_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_alignbit>;
+def V_ALIGNBYTE_B32 : VOP3Inst <"v_alignbyte_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_alignbyte>;
 def V_MIN3_F32 : VOP3Inst <"v_min3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmin3>;
 def V_MIN3_I32 : VOP3Inst <"v_min3_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUsmin3>;
 def V_MIN3_U32 : VOP3Inst <"v_min3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUumin3>;
@@ -209,7 +209,10 @@ def V_DIV_SCALE_F64 : VOP3_Pseudo <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64,
 }
 
 def V_MSAD_U8 : VOP3Inst <"v_msad_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_msad_u8>;
+
+let Constraints = "@earlyclobber $vdst" in {
 def V_MQSAD_PK_U16_U8 : VOP3Inst <"v_mqsad_pk_u16_u8", VOP3_Profile<VOP_I64_I64_I32_I64>, int_amdgcn_mqsad_pk_u16_u8>;
+} // End Constraints = "@earlyclobber $vdst"
 
 def V_TRIG_PREOP_F64 : VOP3Inst <"v_trig_preop_f64", VOP3_Profile<VOP_F64_F64_I32>, AMDGPUtrig_preop> {
   let SchedRW = [WriteDouble];
@@ -232,8 +235,10 @@ def V_ASHRREV_I64 : VOP3Inst <"v_ashrrev_i64", VOP3_Profile<VOP_I64_I32_I64>>;
 
 let SubtargetPredicate = isCIVI in {
 
+let Constraints = "@earlyclobber $vdst" in {
 def V_QSAD_PK_U16_U8 : VOP3Inst <"v_qsad_pk_u16_u8", VOP3_Profile<VOP_I64_I64_I32_I64>, int_amdgcn_qsad_pk_u16_u8>;
 def V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOP3_Profile<VOP_V4I32_I64_I32_V4I32>, int_amdgcn_mqsad_u32_u8>;
+} // End Constraints = "@earlyclobber $vdst"
 
 let isCommutable = 1 in {
 def V_MAD_U64_U32 : VOP3Inst <"v_mad_u64_u32", VOP3b_I64_I1_I32_I32_I64>;
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 14e197f477f1..f9da036c7e46 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -23,6 +23,8 @@
 #include "MCTargetDesc/ARMMCExpr.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
@@ -43,9 +45,7 @@
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/ARMBuildAttributes.h"
-#include "llvm/Support/COFF.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetParser.h"
 #include "llvm/Support/TargetRegistry.h"
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index f8b65573f9cd..8715657ad5e2 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -21,9 +21,9 @@
 #include "MCTargetDesc/ARMAddressingModes.h"
 #include "MCTargetDesc/ARMBaseInfo.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/CodeGen/LiveVariables.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index b18ed509ed23..b4fb292c0116 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -11,17 +11,17 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "ARMBaseRegisterInfo.h"
 #include "ARM.h"
 #include "ARMBaseInstrInfo.h"
-#include "ARMBaseRegisterInfo.h"
 #include "ARMFrameLowering.h"
 #include "ARMMachineFunctionInfo.h"
 #include "ARMSubtarget.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
 #include "MCTargetDesc/ARMBaseInfo.h"
 #include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
diff --git a/lib/Target/ARM/ARMCallLowering.cpp b/lib/Target/ARM/ARMCallLowering.cpp
index a33d025d114e..a7ac9a1dca6e 100644
--- a/lib/Target/ARM/ARMCallLowering.cpp
+++ b/lib/Target/ARM/ARMCallLowering.cpp
@@ -21,6 +21,7 @@
 
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 
 using namespace llvm;
@@ -122,8 +123,7 @@ struct OutgoingValueHandler : public CallLowering::ValueHandler {
 
     unsigned NewRegs[] = {MRI.createGenericVirtualRegister(LLT::scalar(32)),
                           MRI.createGenericVirtualRegister(LLT::scalar(32))};
-    MIRBuilder.buildExtract(NewRegs[0], Arg.Reg, 0);
-    MIRBuilder.buildExtract(NewRegs[1], Arg.Reg, 32);
+    MIRBuilder.buildUnmerge(NewRegs, Arg.Reg);
 
     bool IsLittle = MIRBuilder.getMF().getSubtarget<ARMSubtarget>().isLittle();
     if (!IsLittle)
@@ -339,7 +339,7 @@ struct IncomingValueHandler : public CallLowering::ValueHandler {
     if (!IsLittle)
       std::swap(NewRegs[0], NewRegs[1]);
 
-    MIRBuilder.buildSequence(Arg.Reg, NewRegs, {0, 32});
+    MIRBuilder.buildMerge(Arg.Reg, NewRegs);
 
     return 1;
   }
@@ -461,7 +461,8 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
   MachineFunction &MF = MIRBuilder.getMF();
   const auto &TLI = *getTLI<ARMTargetLowering>();
   const auto &DL = MF.getDataLayout();
-  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+  const auto &STI = MF.getSubtarget();
+  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
   MachineRegisterInfo &MRI = MF.getRegInfo();
 
   if (MF.getSubtarget<ARMSubtarget>().genLongCalls())
@@ -473,6 +474,13 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
   // registers, but don't insert it yet.
   auto MIB = MIRBuilder.buildInstrNoInsert(ARM::BLX).add(Callee).addRegMask(
       TRI->getCallPreservedMask(MF, CallConv));
+  if (Callee.isReg()) {
+    auto CalleeReg = Callee.getReg();
+    if (CalleeReg && !TRI->isPhysicalRegister(CalleeReg))
+      MIB->getOperand(0).setReg(constrainOperandRegClass(
+          MF, *TRI, MRI, *STI.getInstrInfo(), *STI.getRegBankInfo(),
+          *MIB.getInstr(), MIB->getDesc(), CalleeReg, 0));
+  }
 
   SmallVector<ArgInfo, 8> ArgInfos;
   for (auto Arg : OrigArgs) {
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index 6434df317aa8..667337dc9267 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -21,10 +21,10 @@
 #include "MCTargetDesc/ARMBaseInfo.h"
 #include "Thumb2InstrInfo.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index 4f6a73b5980d..384f80356cc8 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -26,8 +26,8 @@
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/FastISel.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index c2b2502843c0..16b54e8848c2 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -20,9 +20,9 @@
 #include "MCTargetDesc/ARMAddressingModes.h"
 #include "MCTargetDesc/ARMBaseInfo.h"
 #include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 949d821e36b2..5b2d093e8f0d 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -12,11 +12,11 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "ARMISelLowering.h"
 #include "ARMBaseInstrInfo.h"
 #include "ARMBaseRegisterInfo.h"
 #include "ARMCallingConv.h"
 #include "ARMConstantPoolValue.h"
-#include "ARMISelLowering.h"
 #include "ARMMachineFunctionInfo.h"
 #include "ARMPerfectShuffle.h"
 #include "ARMRegisterInfo.h"
@@ -29,13 +29,13 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Analysis/VectorUtils.h"
@@ -61,7 +61,6 @@
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/Constants.h"
-#include "llvm/IR/Function.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/DerivedTypes.h"
@@ -103,8 +102,8 @@
 #include <cstdlib>
 #include <iterator>
 #include <limits>
-#include <tuple>
 #include <string>
+#include <tuple>
 #include <utility>
 #include <vector>
 
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index 0f225156d4ca..817b567db767 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -1958,7 +1958,8 @@ def VFMSD : ADbI<0b11101, 0b10, 1, 0,
                  [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
                                           (f64 DPR:$Ddin)))]>,
               RegConstraint<"$Ddin = $Dd">,
-              Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
+              Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>,
+              Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
 
 def VFMSS : ASbIn<0b11101, 0b10, 1, 0,
                   (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -1966,7 +1967,8 @@ def VFMSS : ASbIn<0b11101, 0b10, 1, 0,
                   [(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
                                            SPR:$Sdin))]>,
               RegConstraint<"$Sdin = $Sd">,
-              Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> {
+              Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>,
+              Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
   // Some single precision VFP instructions may be executed on both NEON and
   // VFP pipelines.
 }
@@ -1976,7 +1978,8 @@ def VFMSH : AHbI<0b11101, 0b10, 1, 0,
                   IIC_fpFMAC16, "vfms", ".f16\t$Sd, $Sn, $Sm",
                   []>,
               RegConstraint<"$Sdin = $Sd">,
-              Requires<[HasFullFP16,UseFusedMAC]>;
+              Requires<[HasFullFP16,UseFusedMAC]>,
+              Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
 
 def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
           (VFMSD DPR:$dstin, DPR:$a, DPR:$b)>,
diff --git a/lib/Target/ARM/ARMInstructionSelector.cpp b/lib/Target/ARM/ARMInstructionSelector.cpp
index b1f059835ff5..2ae3bad4076b 100644
--- a/lib/Target/ARM/ARMInstructionSelector.cpp
+++ b/lib/Target/ARM/ARMInstructionSelector.cpp
@@ -127,34 +127,30 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
   return true;
 }
 
-static bool selectSequence(MachineInstrBuilder &MIB,
-                           const ARMBaseInstrInfo &TII,
-                           MachineRegisterInfo &MRI,
-                           const TargetRegisterInfo &TRI,
-                           const RegisterBankInfo &RBI) {
-  assert(TII.getSubtarget().hasVFP2() && "Can't select sequence without VFP");
+static bool selectMergeValues(MachineInstrBuilder &MIB,
+                              const ARMBaseInstrInfo &TII,
+                              MachineRegisterInfo &MRI,
+                              const TargetRegisterInfo &TRI,
+                              const RegisterBankInfo &RBI) {
+  assert(TII.getSubtarget().hasVFP2() && "Can't select merge without VFP");
 
-  // We only support G_SEQUENCE as a way to stick together two scalar GPRs
+  // We only support G_MERGE_VALUES as a way to stick together two scalar GPRs
   // into one DPR.
   unsigned VReg0 = MIB->getOperand(0).getReg();
   (void)VReg0;
   assert(MRI.getType(VReg0).getSizeInBits() == 64 &&
          RBI.getRegBank(VReg0, MRI, TRI)->getID() == ARM::FPRRegBankID &&
-         "Unsupported operand for G_SEQUENCE");
+         "Unsupported operand for G_MERGE_VALUES");
   unsigned VReg1 = MIB->getOperand(1).getReg();
   (void)VReg1;
   assert(MRI.getType(VReg1).getSizeInBits() == 32 &&
          RBI.getRegBank(VReg1, MRI, TRI)->getID() == ARM::GPRRegBankID &&
-         "Unsupported operand for G_SEQUENCE");
-  unsigned VReg2 = MIB->getOperand(3).getReg();
+         "Unsupported operand for G_MERGE_VALUES");
+  unsigned VReg2 = MIB->getOperand(2).getReg();
   (void)VReg2;
   assert(MRI.getType(VReg2).getSizeInBits() == 32 &&
          RBI.getRegBank(VReg2, MRI, TRI)->getID() == ARM::GPRRegBankID &&
-         "Unsupported operand for G_SEQUENCE");
-
-  // Remove the operands corresponding to the offsets.
-  MIB->RemoveOperand(4);
-  MIB->RemoveOperand(2);
+         "Unsupported operand for G_MERGE_VALUES");
 
   MIB->setDesc(TII.get(ARM::VMOVDRR));
   MIB.add(predOps(ARMCC::AL));
@@ -162,30 +158,32 @@ static bool selectSequence(MachineInstrBuilder &MIB,
   return true;
 }
 
-static bool selectExtract(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII,
-                          MachineRegisterInfo &MRI,
-                          const TargetRegisterInfo &TRI,
-                          const RegisterBankInfo &RBI) {
-  assert(TII.getSubtarget().hasVFP2() && "Can't select extract without VFP");
+static bool selectUnmergeValues(MachineInstrBuilder &MIB,
+                                const ARMBaseInstrInfo &TII,
+                                MachineRegisterInfo &MRI,
+                                const TargetRegisterInfo &TRI,
+                                const RegisterBankInfo &RBI) {
+  assert(TII.getSubtarget().hasVFP2() && "Can't select unmerge without VFP");
 
-  // We only support G_EXTRACT as a way to break up one DPR into two GPRs.
+  // We only support G_UNMERGE_VALUES as a way to break up one DPR into two
+  // GPRs.
   unsigned VReg0 = MIB->getOperand(0).getReg();
   (void)VReg0;
   assert(MRI.getType(VReg0).getSizeInBits() == 32 &&
          RBI.getRegBank(VReg0, MRI, TRI)->getID() == ARM::GPRRegBankID &&
-         "Unsupported operand for G_EXTRACT");
+         "Unsupported operand for G_UNMERGE_VALUES");
   unsigned VReg1 = MIB->getOperand(1).getReg();
   (void)VReg1;
-  assert(MRI.getType(VReg1).getSizeInBits() == 64 &&
-         RBI.getRegBank(VReg1, MRI, TRI)->getID() == ARM::FPRRegBankID &&
-         "Unsupported operand for G_EXTRACT");
-  assert(MIB->getOperand(2).getImm() % 32 == 0 &&
-         "Unsupported operand for G_EXTRACT");
+  assert(MRI.getType(VReg1).getSizeInBits() == 32 &&
+         RBI.getRegBank(VReg1, MRI, TRI)->getID() == ARM::GPRRegBankID &&
+         "Unsupported operand for G_UNMERGE_VALUES");
+  unsigned VReg2 = MIB->getOperand(2).getReg();
+  (void)VReg2;
+  assert(MRI.getType(VReg2).getSizeInBits() == 64 &&
+         RBI.getRegBank(VReg2, MRI, TRI)->getID() == ARM::FPRRegBankID &&
+         "Unsupported operand for G_UNMERGE_VALUES");
 
-  // Remove the operands corresponding to the offsets.
-  MIB->getOperand(2).setImm(MIB->getOperand(2).getImm() / 32);
-
-  MIB->setDesc(TII.get(ARM::VGETLNi32));
+  MIB->setDesc(TII.get(ARM::VMOVRRD));
   MIB.add(predOps(ARMCC::AL));
 
   return true;
@@ -407,13 +405,13 @@ bool ARMInstructionSelector::select(MachineInstr &I) const {
     MIB.addImm(0).add(predOps(ARMCC::AL));
     break;
   }
-  case G_SEQUENCE: {
-    if (!selectSequence(MIB, TII, MRI, TRI, RBI))
+  case G_MERGE_VALUES: {
+    if (!selectMergeValues(MIB, TII, MRI, TRI, RBI))
       return false;
     break;
   }
-  case G_EXTRACT: {
-    if (!selectExtract(MIB, TII, MRI, TRI, RBI))
+  case G_UNMERGE_VALUES: {
+    if (!selectUnmergeValues(MIB, TII, MRI, TRI, RBI))
       return false;
     break;
   }
diff --git a/lib/Target/ARM/ARMLegalizerInfo.cpp b/lib/Target/ARM/ARMLegalizerInfo.cpp
index 5bf6c7aed6b8..2d490b7c303e 100644
--- a/lib/Target/ARM/ARMLegalizerInfo.cpp
+++ b/lib/Target/ARM/ARMLegalizerInfo.cpp
@@ -45,7 +45,7 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
     setAction({Op, 1, p0}, Legal);
   }
 
-  for (unsigned Op : {G_ADD, G_SUB, G_MUL}) {
+  for (unsigned Op : {G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR}) {
     for (auto Ty : {s1, s8, s16})
       setAction({Op, Ty}, WidenScalar);
     setAction({Op, s32}, Legal);
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 72fcf7cd6a4f..7a452d4a2095 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -26,6 +26,7 @@
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
@@ -33,7 +34,6 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterClassInfo.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
-#include "llvm/CodeGen/LivePhysRegs.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
diff --git a/lib/Target/ARM/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp
index 9e9c1ba6c114..13acea3c28a9 100644
--- a/lib/Target/ARM/ARMMCInstLower.cpp
+++ b/lib/Target/ARM/ARMMCInstLower.cpp
@@ -25,9 +25,9 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCInstBuilder.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/Support/ErrorHandling.h"
diff --git a/lib/Target/ARM/ARMRegisterBankInfo.cpp b/lib/Target/ARM/ARMRegisterBankInfo.cpp
index a20997c95cd9..f59b075e6dd9 100644
--- a/lib/Target/ARM/ARMRegisterBankInfo.cpp
+++ b/lib/Target/ARM/ARMRegisterBankInfo.cpp
@@ -221,6 +221,9 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
   case G_ADD:
   case G_SUB:
   case G_MUL:
+  case G_AND:
+  case G_OR:
+  case G_XOR:
   case G_SDIV:
   case G_UDIV:
   case G_SEXT:
@@ -252,30 +255,32 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
     OperandsMapping =
         getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr});
     break;
-  case G_SEQUENCE: {
-    // We only support G_SEQUENCE for creating a double precision floating point
-    // value out of two GPRs.
+  case G_MERGE_VALUES: {
+    // We only support G_MERGE_VALUES for creating a double precision floating
+    // point value out of two GPRs.
     LLT Ty1 = MRI.getType(MI.getOperand(1).getReg());
-    LLT Ty2 = MRI.getType(MI.getOperand(3).getReg());
+    LLT Ty2 = MRI.getType(MI.getOperand(2).getReg());
     if (Ty.getSizeInBits() != 64 || Ty1.getSizeInBits() != 32 ||
         Ty2.getSizeInBits() != 32)
       return getInvalidInstructionMapping();
     OperandsMapping =
         getOperandsMapping({&ARM::ValueMappings[ARM::DPR3OpsIdx],
-                            &ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr,
-                            &ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr});
+                            &ARM::ValueMappings[ARM::GPR3OpsIdx],
+                            &ARM::ValueMappings[ARM::GPR3OpsIdx]});
     break;
   }
-  case G_EXTRACT: {
-    // We only support G_EXTRACT for splitting a double precision floating point
-    // value into two GPRs.
+  case G_UNMERGE_VALUES: {
+    // We only support G_UNMERGE_VALUES for splitting a double precision
+    // floating point value into two GPRs.
     LLT Ty1 = MRI.getType(MI.getOperand(1).getReg());
-    if (Ty.getSizeInBits() != 32 || Ty1.getSizeInBits() != 64 ||
-        MI.getOperand(2).getImm() % 32 != 0)
+    LLT Ty2 = MRI.getType(MI.getOperand(2).getReg());
+    if (Ty.getSizeInBits() != 32 || Ty1.getSizeInBits() != 32 ||
+        Ty2.getSizeInBits() != 64)
       return getInvalidInstructionMapping();
-    OperandsMapping = getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx],
-                                          &ARM::ValueMappings[ARM::DPR3OpsIdx],
-                                          nullptr, nullptr});
+    OperandsMapping =
+        getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx],
+                            &ARM::ValueMappings[ARM::GPR3OpsIdx],
+                            &ARM::ValueMappings[ARM::DPR3OpsIdx]});
     break;
   }
   default:
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index b8a708a20a95..d9d0c27c6304 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -28,10 +28,10 @@
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCTargetOptions.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetOptions.h"
 #include "llvm/Support/CodeGen.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/TargetParser.h"
+#include "llvm/Target/TargetOptions.h"
 #include <cassert>
 #include <string>
 
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index f5e4043882ff..c0506cfda612 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -37,6 +37,7 @@
 #include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
 #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineScheduler.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/IR/Attributes.h"
@@ -389,6 +390,20 @@ public:
     return getTM<ARMBaseTargetMachine>();
   }
 
+  ScheduleDAGInstrs *
+  createMachineScheduler(MachineSchedContext *C) const override {
+    ScheduleDAGMILive *DAG = createGenericSchedLive(C);
+    // add DAG Mutations here.
+    return DAG;
+  }
+
+  ScheduleDAGInstrs *
+  createPostMachineScheduler(MachineSchedContext *C) const override {
+    ScheduleDAGMI *DAG = createGenericSchedPostRA(C);
+    // add DAG Mutations here.
+    return DAG;
+  }
+
   void addIRPasses() override;
   bool addPreISel() override;
   bool addInstSelector() override;
diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp
index edbf2b99126c..a5b27abeb27f 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.cpp
+++ b/lib/Target/ARM/ARMTargetObjectFile.cpp
@@ -7,17 +7,17 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "ARMTargetObjectFile.h"
 #include "ARMSubtarget.h"
 #include "ARMTargetMachine.h"
-#include "ARMTargetObjectFile.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCTargetOptions.h"
 #include "llvm/MC/SectionKind.h"
-#include "llvm/Support/Dwarf.h"
-#include "llvm/Support/ELF.h"
 #include "llvm/Target/TargetMachine.h"
 #include <cassert>
 
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index ada816c16389..19fba3033bb2 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -17,6 +17,8 @@
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCContext.h"
@@ -39,10 +41,8 @@
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/ARMBuildAttributes.h"
 #include "llvm/Support/ARMEHABI.h"
-#include "llvm/Support/COFF.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/ELF.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/TargetParser.h"
@@ -1026,6 +1026,15 @@ public:
             ARM_AM::getSOImmVal(-Value) != -1);
   }
   bool isT2SOImm() const {
+    // If we have an immediate that's not a constant, treat it as an expression
+    // needing a fixup.
+    if (isImm() && !isa<MCConstantExpr>(getImm())) {
+      // We want to avoid matching :upper16: and :lower16: as we want these
+      // expressions to match in isImm0_65535Expr()
+      const ARMMCExpr *ARM16Expr = dyn_cast<ARMMCExpr>(getImm());
+      return (!ARM16Expr || (ARM16Expr->getKind() != ARMMCExpr::VK_ARM_HI16 &&
+                             ARM16Expr->getKind() != ARMMCExpr::VK_ARM_LO16));
+    }
     if (!isImm()) return false;
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
     if (!CE) return false;
@@ -8404,7 +8413,8 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
     // wide encoding wasn't explicit.
     if (Inst.getOperand(0).getReg() != Inst.getOperand(1).getReg() ||
         !isARMLowRegister(Inst.getOperand(0).getReg()) ||
-        (unsigned)Inst.getOperand(2).getImm() > 255 ||
+        (Inst.getOperand(2).isImm() &&
+         (unsigned)Inst.getOperand(2).getImm() > 255) ||
         ((!inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR) ||
          (inITBlock() && Inst.getOperand(5).getReg() != 0)) ||
         (static_cast<ARMOperand &>(*Operands[3]).isToken() &&
@@ -8556,7 +8566,8 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
     // If we can use the 16-bit encoding and the user didn't explicitly
     // request the 32-bit variant, transform it here.
     if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
-        (unsigned)Inst.getOperand(1).getImm() <= 255 &&
+        (Inst.getOperand(1).isImm() &&
+         (unsigned)Inst.getOperand(1).getImm() <= 255) &&
         ((!inITBlock() && Inst.getOperand(2).getImm() == ARMCC::AL &&
           Inst.getOperand(4).getReg() == ARM::CPSR) ||
          (inITBlock() && Inst.getOperand(4).getReg() == 0)) &&
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index e812d32cc76f..585726208a8d 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -20,8 +20,8 @@
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index b0d1d3fb9ef0..716492ea2566 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -7,15 +7,17 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "MCTargetDesc/ARMMCTargetDesc.h"
-#include "MCTargetDesc/ARMAddressingModes.h"
 #include "MCTargetDesc/ARMAsmBackend.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
 #include "MCTargetDesc/ARMAsmBackendDarwin.h"
 #include "MCTargetDesc/ARMAsmBackendELF.h"
 #include "MCTargetDesc/ARMAsmBackendWinCOFF.h"
 #include "MCTargetDesc/ARMBaseInfo.h"
 #include "MCTargetDesc/ARMFixupKinds.h"
+#include "MCTargetDesc/ARMMCTargetDesc.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/BinaryFormat/MachO.h"
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCContext.h"
@@ -31,10 +33,8 @@
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCValue.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
-#include "llvm/Support/MachO.h"
 #include "llvm/Support/TargetParser.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
@@ -98,6 +98,7 @@ const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
       {"fixup_t2_movt_hi16", 0, 20, 0},
       {"fixup_t2_movw_lo16", 0, 20, 0},
       {"fixup_arm_mod_imm", 0, 12, 0},
+      {"fixup_t2_so_imm", 0, 26, 0},
   };
   const static MCFixupKindInfo InfosBE[ARM::NumTargetFixupKinds] = {
       // This table *must* be in the order that the fixup_* kinds are defined in
@@ -148,6 +149,7 @@ const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
       {"fixup_t2_movt_hi16", 12, 20, 0},
       {"fixup_t2_movw_lo16", 12, 20, 0},
       {"fixup_arm_mod_imm", 20, 12, 0},
+      {"fixup_t2_so_imm", 26, 6, 0},
   };
 
   if (Kind < FirstTargetFixupKind)
@@ -693,6 +695,23 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
       return 0;
     }
     return Value;
+  case ARM::fixup_t2_so_imm: {
+    Value = ARM_AM::getT2SOImmVal(Value);
+    if ((int64_t)Value < 0) {
+      Ctx.reportError(Fixup.getLoc(), "out of range immediate fixup value");
+      return 0;
+    }
+    // Value will contain a 12-bit value broken up into a 4-bit shift in bits
+    // 11:8 and the 8-bit immediate in 0:7. The instruction has the immediate
+    // in 0:7. The 4-bit shift is split up into i:imm3 where i is placed at bit
+    // 10 of the upper half-word and imm3 is placed at 14:12 of the lower
+    // half-word.
+    uint64_t EncValue = 0;
+    EncValue |= (Value & 0x800) << 15;
+    EncValue |= (Value & 0x700) << 4;
+    EncValue |= (Value & 0xff);
+    return swapHalfWords(EncValue, IsLittleEndian);
+  }
   }
 }
 
@@ -704,16 +723,17 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm,
                                       bool &IsResolved) {
   const MCSymbolRefExpr *A = Target.getSymA();
   const MCSymbol *Sym = A ? &A->getSymbol() : nullptr;
+  const unsigned FixupKind = Fixup.getKind() ;
   // MachO (the only user of "Value") tries to make .o files that look vaguely
   // pre-linked, so for MOVW/MOVT and .word relocations they put the Thumb bit
   // into the addend if possible. Other relocation types don't want this bit
   // though (branches couldn't encode it if it *was* present, and no other
   // relocations exist) and it can interfere with checking valid expressions.
-  if ((unsigned)Fixup.getKind() == FK_Data_4 ||
-      (unsigned)Fixup.getKind() == ARM::fixup_arm_movw_lo16 ||
-      (unsigned)Fixup.getKind() == ARM::fixup_arm_movt_hi16 ||
-      (unsigned)Fixup.getKind() == ARM::fixup_t2_movw_lo16 ||
-      (unsigned)Fixup.getKind() == ARM::fixup_t2_movt_hi16) {
+  if (FixupKind == FK_Data_4 ||
+      FixupKind == ARM::fixup_arm_movw_lo16 ||
+      FixupKind == ARM::fixup_arm_movt_hi16 ||
+      FixupKind == ARM::fixup_t2_movw_lo16 ||
+      FixupKind == ARM::fixup_t2_movt_hi16) {
     if (Sym) {
       if (Asm.isThumbFunc(Sym))
         Value |= 1;
@@ -729,23 +749,27 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm,
     // linker can handle it. GNU AS produces an error in this case.
     if (Sym->isExternal() || Value >= 0x400004)
       IsResolved = false;
-    // When an ARM function is called from a Thumb function, produce a
-    // relocation so the linker will use the correct branch instruction for ELF
-    // binaries.
-    if (Sym->isELF()) {
-      unsigned Type = dyn_cast<MCSymbolELF>(Sym)->getType();
-      if ((Type == ELF::STT_FUNC || Type == ELF::STT_GNU_IFUNC) &&
-          !Asm.isThumbFunc(Sym))
+  }
+  // Create relocations for unconditional branches to function symbols with
+  // different execution mode in ELF binaries.
+  if (Sym && Sym->isELF()) {
+    unsigned Type = dyn_cast<MCSymbolELF>(Sym)->getType();
+    if ((Type == ELF::STT_FUNC || Type == ELF::STT_GNU_IFUNC)) {
+      if (Asm.isThumbFunc(Sym) && (FixupKind == ARM::fixup_arm_uncondbranch))
+        IsResolved = false;
+      if (!Asm.isThumbFunc(Sym) && (FixupKind == ARM::fixup_arm_thumb_br ||
+                                    FixupKind == ARM::fixup_arm_thumb_bl ||
+                                    FixupKind == ARM::fixup_t2_uncondbranch))
         IsResolved = false;
     }
   }
   // We must always generate a relocation for BL/BLX instructions if we have
   // a symbol to reference, as the linker relies on knowing the destination
   // symbol's thumb-ness to get interworking right.
-  if (A && ((unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_blx ||
-            (unsigned)Fixup.getKind() == ARM::fixup_arm_blx ||
-            (unsigned)Fixup.getKind() == ARM::fixup_arm_uncondbl ||
-            (unsigned)Fixup.getKind() == ARM::fixup_arm_condbl))
+  if (A && (FixupKind == ARM::fixup_arm_thumb_blx ||
+            FixupKind == ARM::fixup_arm_blx ||
+            FixupKind == ARM::fixup_arm_uncondbl ||
+            FixupKind == ARM::fixup_arm_condbl))
     IsResolved = false;
 }
 
@@ -792,6 +816,7 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
   case ARM::fixup_arm_movw_lo16:
   case ARM::fixup_t2_movt_hi16:
   case ARM::fixup_t2_movw_lo16:
+  case ARM::fixup_t2_so_imm:
     return 4;
 
   case FK_SecRel_2:
@@ -844,6 +869,7 @@ static unsigned getFixupKindContainerSizeBytes(unsigned Kind) {
   case ARM::fixup_t2_movt_hi16:
   case ARM::fixup_t2_movw_lo16:
   case ARM::fixup_arm_mod_imm:
+  case ARM::fixup_t2_so_imm:
     // Instruction size is 4 bytes.
     return 4;
   }
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
index 09dc0173ade6..bd729fabedf5 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
@@ -11,7 +11,7 @@
 #define LLVM_LIB_TARGET_ARM_ARMASMBACKENDDARWIN_H
 
 #include "ARMAsmBackend.h"
-#include "llvm/Support/MachO.h"
+#include "llvm/BinaryFormat/MachO.h"
 
 namespace llvm {
 class ARMAsmBackendDarwin : public ARMAsmBackend {
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
index e1fa24571820..59f31be69d58 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -9,12 +9,12 @@
 
 #include "MCTargetDesc/ARMFixupKinds.h"
 #include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCELFObjectWriter.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCFixup.h"
 #include "llvm/MC/MCValue.h"
-#include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cstdint>
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 4d6c52f3cd49..93f4006cee87 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -21,6 +21,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCAssembler.h"
@@ -43,12 +44,11 @@
 #include "llvm/Support/ARMBuildAttributes.h"
 #include "llvm/Support/ARMEHABI.h"
 #include "llvm/Support/Casting.h"
-#include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/LEB128.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/TargetParser.h"
+#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cassert>
 #include <climits>
diff --git a/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h b/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
index 3fe2302bdd37..9f6c5d7bf920 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
@@ -110,6 +110,9 @@ enum Fixups {
   // fixup_arm_mod_imm - Fixup for mod_imm
   fixup_arm_mod_imm,
 
+  // fixup_t2_so_imm - Fixup for Thumb2 8-bit rotated operand
+  fixup_t2_so_imm,
+
   // Marker
   LastTargetFixupKind,
   NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index d9df2c6da7ec..f1f35f409900 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -339,7 +339,17 @@ public:
   unsigned getT2SOImmOpValue(const MCInst &MI, unsigned Op,
                            SmallVectorImpl<MCFixup> &Fixups,
                            const MCSubtargetInfo &STI) const {
-    unsigned SoImm = MI.getOperand(Op).getImm();
+    const MCOperand &MO = MI.getOperand(Op);
+
+    // Support for fixups (MCFixup)
+    if (MO.isExpr()) {
+      const MCExpr *Expr = MO.getExpr();
+      // Fixups resolve to plain values that need to be encoded.
+      MCFixupKind Kind = MCFixupKind(ARM::fixup_t2_so_imm);
+      Fixups.push_back(MCFixup::create(0, Expr, Kind, MI.getLoc()));
+      return 0;
+    }
+    unsigned SoImm = MO.getImm();
     unsigned Encoded =  ARM_AM::getT2SOImmVal(SoImm);
     assert(Encoded != ~0U && "Not a Thumb2 so_imm value?");
     return Encoded;
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 477755157040..b8a8b1f7619a 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -11,9 +11,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "ARMMCTargetDesc.h"
 #include "ARMBaseInfo.h"
 #include "ARMMCAsmInfo.h"
-#include "ARMMCTargetDesc.h"
 #include "InstPrinter/ARMInstPrinter.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/MC/MCELFStreamer.h"
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp
index 34c770440e1b..5516a1bdb03d 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp
@@ -9,10 +9,10 @@
 
 #include "ARMMCExpr.h"
 #include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "llvm-c/Disassembler.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDisassembler/MCRelocationInfo.h"
 #include "llvm/MC/MCExpr.h"
-#include "llvm-c/Disassembler.h"
 
 using namespace llvm;
 
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index b77181f29b2d..4a8139dea668 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -7,10 +7,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "MCTargetDesc/ARMMCTargetDesc.h"
 #include "MCTargetDesc/ARMBaseInfo.h"
 #include "MCTargetDesc/ARMFixupKinds.h"
+#include "MCTargetDesc/ARMMCTargetDesc.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/MachO.h"
 #include "llvm/MC/MCAsmLayout.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCContext.h"
@@ -21,7 +22,6 @@
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCValue.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachO.h"
 using namespace llvm;
 
 namespace {
diff --git a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
index 7ae2f864d79d..00505a103e00 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
@@ -9,13 +9,13 @@
 
 #include "MCTargetDesc/ARMFixupKinds.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/COFF.h"
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCFixup.h"
 #include "llvm/MC/MCFixupKindInfo.h"
 #include "llvm/MC/MCValue.h"
 #include "llvm/MC/MCWinCOFFObjectWriter.h"
-#include "llvm/Support/COFF.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cassert>
diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp
index f10427e2ed57..0b6574c37de1 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -11,26 +11,26 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "Thumb1FrameLowering.h"
 #include "ARMBaseInstrInfo.h"
 #include "ARMBaseRegisterInfo.h"
 #include "ARMMachineFunctionInfo.h"
 #include "ARMSubtarget.h"
 #include "MCTargetDesc/ARMBaseInfo.h"
-#include "Thumb1FrameLowering.h"
 #include "Thumb1InstrInfo.h"
 #include "ThumbRegisterInfo.h"
 #include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/LivePhysRegs.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/MC/MCDwarf.h"
diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp
index 0ebf55924647..3a3920a2db32 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "ARMSubtarget.h"
 #include "Thumb1InstrInfo.h"
+#include "ARMSubtarget.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index 2e2dfe035e26..9125be96a07b 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -1,4 +1,4 @@
-//===-- Thumb2InstrInfo.cpp - Thumb-2 Instruction Information -------------===//
+//===- Thumb2InstrInfo.cpp - Thumb-2 Instruction Information --------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,16 +11,26 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "Thumb2InstrInfo.h"
-#include "ARMConstantPoolValue.h"
 #include "ARMMachineFunctionInfo.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
+#include "Thumb2InstrInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/DebugLoc.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <cassert>
 
 using namespace llvm;
 
@@ -30,7 +40,7 @@ OldT2IfCvt("old-thumb2-ifcvt", cl::Hidden,
            cl::init(false));
 
 Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI)
-    : ARMBaseInstrInfo(STI), RI() {}
+    : ARMBaseInstrInfo(STI) {}
 
 /// Return the noop instruction to use for a noop.
 void Thumb2InstrInfo::getNoop(MCInst &NopInst) const {
@@ -539,9 +549,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
     // Add cc_out operand if the original instruction did not have one.
     if (!HasCCOut)
       MI.addOperand(MachineOperand::CreateReg(0, false));
-
   } else {
-
     // AddrMode4 and AddrMode6 cannot handle any offset.
     if (AddrMode == ARMII::AddrMode4 || AddrMode == ARMII::AddrMode6)
       return false;
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index c90475c28db7..d911dd97b1ac 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -14,10 +14,10 @@
 #include "Thumb2InstrInfo.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
diff --git a/lib/Target/AVR/AVR.h b/lib/Target/AVR/AVR.h
index 8e5cc5360ad4..5eadf7bdcef6 100644
--- a/lib/Target/AVR/AVR.h
+++ b/lib/Target/AVR/AVR.h
@@ -15,8 +15,8 @@
 #ifndef LLVM_AVR_H
 #define LLVM_AVR_H
 
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Target/TargetMachine.h"
 
 namespace llvm {
 
diff --git a/lib/Target/AVR/AVRAsmPrinter.cpp b/lib/Target/AVR/AVRAsmPrinter.cpp
index d6491ce5c3bf..f0c7b11895b4 100644
--- a/lib/Target/AVR/AVRAsmPrinter.cpp
+++ b/lib/Target/AVR/AVRAsmPrinter.cpp
@@ -18,8 +18,8 @@
 #include "InstPrinter/AVRInstPrinter.h"
 
 #include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/IR/Mangler.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCStreamer.h"
diff --git a/lib/Target/AVR/AVRRegisterInfo.cpp b/lib/Target/AVR/AVRRegisterInfo.cpp
index 11a47bad78ba..55f3f5cf428a 100644
--- a/lib/Target/AVR/AVRRegisterInfo.cpp
+++ b/lib/Target/AVR/AVRRegisterInfo.cpp
@@ -51,7 +51,6 @@ AVRRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
 
 BitVector AVRRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   BitVector Reserved(getNumRegs());
-  const AVRTargetMachine &TM = static_cast<const AVRTargetMachine&>(MF.getTarget());
 
   // Reserve the intermediate result registers r1 and r2
   // The result of instructions like 'mul' is always stored here.
@@ -269,4 +268,3 @@ void AVRRegisterInfo::splitReg(unsigned Reg,
 }
 
 } // end of namespace llvm
-
diff --git a/lib/Target/AVR/AVRSubtarget.cpp b/lib/Target/AVR/AVRSubtarget.cpp
index c228d051d771..556d69ec5234 100644
--- a/lib/Target/AVR/AVRSubtarget.cpp
+++ b/lib/Target/AVR/AVRSubtarget.cpp
@@ -13,7 +13,7 @@
 
 #include "AVRSubtarget.h"
 
-#include "llvm/Support/ELF.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/Support/TargetRegistry.h"
 
 #include "AVR.h"
diff --git a/lib/Target/AVR/AVRSubtarget.h b/lib/Target/AVR/AVRSubtarget.h
index a37849c3f3f7..b0e634f86168 100644
--- a/lib/Target/AVR/AVRSubtarget.h
+++ b/lib/Target/AVR/AVRSubtarget.h
@@ -14,10 +14,9 @@
 #ifndef LLVM_AVR_SUBTARGET_H
 #define LLVM_AVR_SUBTARGET_H
 
-#include "llvm/Target/TargetSubtargetInfo.h"
-#include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 
 #include "AVRFrameLowering.h"
 #include "AVRISelLowering.h"
diff --git a/lib/Target/AVR/AVRTargetMachine.cpp b/lib/Target/AVR/AVRTargetMachine.cpp
index 2ab0b1080c6a..91d2a8737b87 100644
--- a/lib/Target/AVR/AVRTargetMachine.cpp
+++ b/lib/Target/AVR/AVRTargetMachine.cpp
@@ -15,12 +15,12 @@
 
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/IR/Module.h"
 #include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/TargetRegistry.h"
 
-#include "AVRTargetObjectFile.h"
 #include "AVR.h"
+#include "AVRTargetObjectFile.h"
 #include "MCTargetDesc/AVRMCTargetDesc.h"
 
 namespace llvm {
diff --git a/lib/Target/AVR/AVRTargetObjectFile.cpp b/lib/Target/AVR/AVRTargetObjectFile.cpp
index af14d9292f27..0cebb0f043f9 100644
--- a/lib/Target/AVR/AVRTargetObjectFile.cpp
+++ b/lib/Target/AVR/AVRTargetObjectFile.cpp
@@ -9,12 +9,12 @@
 
 #include "AVRTargetObjectFile.h"
 
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/Mangler.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCSectionELF.h"
-#include "llvm/Support/ELF.h"
 
 #include "AVR.h"
 
diff --git a/lib/Target/AVR/AsmParser/AVRAsmParser.cpp b/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
index 5b0398c0ca34..cf52e552978f 100644
--- a/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
+++ b/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
@@ -18,12 +18,12 @@
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstBuilder.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCParser/MCAsmLexer.h"
 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCValue.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/MathExtras.h"
diff --git a/lib/Target/AVR/Disassembler/AVRDisassembler.cpp b/lib/Target/AVR/Disassembler/AVRDisassembler.cpp
index d2a21fb64635..e69accfa9393 100644
--- a/lib/Target/AVR/Disassembler/AVRDisassembler.cpp
+++ b/lib/Target/AVR/Disassembler/AVRDisassembler.cpp
@@ -16,11 +16,11 @@
 #include "AVRSubtarget.h"
 #include "MCTargetDesc/AVRMCTargetDesc.h"
 
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
 #include "llvm/MC/MCFixedLenDisassembler.h"
 #include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Support/TargetRegistry.h"
 
 using namespace llvm;
diff --git a/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp b/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp
index 713754821005..1e61eccf775f 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp
+++ b/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp
@@ -1,8 +1,8 @@
 #include "AVRELFStreamer.h"
 
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/SubtargetFeature.h"
-#include "llvm/Support/ELF.h"
 #include "llvm/Support/FormattedStream.h"
 
 #include "AVRMCTargetDesc.h"
diff --git a/lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp b/lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp
index 400296b8409b..085afd23a83c 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp
+++ b/lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp
@@ -9,11 +9,11 @@
 
 #include "AVRMCExpr.h"
 
+#include "llvm/MC/MCAsmLayout.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCValue.h"
-#include "llvm/MC/MCAsmLayout.h"
 
 namespace llvm {
 
diff --git a/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp b/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp
index a4fa5c0a9310..826430e94b9c 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp
+++ b/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp
@@ -11,9 +11,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "AVRMCTargetDesc.h"
 #include "AVRELFStreamer.h"
 #include "AVRMCAsmInfo.h"
-#include "AVRMCTargetDesc.h"
 #include "AVRTargetStreamer.h"
 #include "InstPrinter/AVRInstPrinter.h"
 
diff --git a/lib/Target/BPF/BPFAsmPrinter.cpp b/lib/Target/BPF/BPFAsmPrinter.cpp
index c5201465e074..fcd903b7a4a8 100644
--- a/lib/Target/BPF/BPFAsmPrinter.cpp
+++ b/lib/Target/BPF/BPFAsmPrinter.cpp
@@ -18,10 +18,10 @@
 #include "BPFTargetMachine.h"
 #include "InstPrinter/BPFInstPrinter.h"
 #include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCStreamer.h"
diff --git a/lib/Target/BPF/BPFInstrInfo.cpp b/lib/Target/BPF/BPFInstrInfo.cpp
index e38facead922..5351cfa95020 100644
--- a/lib/Target/BPF/BPFInstrInfo.cpp
+++ b/lib/Target/BPF/BPFInstrInfo.cpp
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "BPF.h"
 #include "BPFInstrInfo.h"
+#include "BPF.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
diff --git a/lib/Target/BPF/BPFRegisterInfo.cpp b/lib/Target/BPF/BPFRegisterInfo.cpp
index 7925bee9c587..273843e92701 100644
--- a/lib/Target/BPF/BPFRegisterInfo.cpp
+++ b/lib/Target/BPF/BPFRegisterInfo.cpp
@@ -11,17 +11,17 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "BPF.h"
 #include "BPFRegisterInfo.h"
+#include "BPF.h"
 #include "BPFSubtarget.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/IR/DiagnosticInfo.h"
 
 #define GET_REGINFO_TARGET_DESC
 #include "BPFGenRegisterInfo.inc"
diff --git a/lib/Target/BPF/BPFTargetMachine.cpp b/lib/Target/BPF/BPFTargetMachine.cpp
index cf8e73540904..d84b0a80fc0c 100644
--- a/lib/Target/BPF/BPFTargetMachine.cpp
+++ b/lib/Target/BPF/BPFTargetMachine.cpp
@@ -11,12 +11,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "BPF.h"
 #include "BPFTargetMachine.h"
-#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/IR/LegacyPassManager.h"
+#include "BPF.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/LegacyPassManager.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Target/TargetOptions.h"
diff --git a/lib/Target/BPF/Disassembler/BPFDisassembler.cpp b/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
index b98621ca4749..a1d732c339e5 100644
--- a/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
+++ b/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
@@ -15,10 +15,10 @@
 #include "BPFSubtarget.h"
 #include "MCTargetDesc/BPFMCTargetDesc.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
 #include "llvm/MC/MCFixedLenDisassembler.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/TargetRegistry.h"
diff --git a/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp b/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp
index ffd29f3ea991..64e986fe0f04 100644
--- a/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp
+++ b/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "BPF.h"
 #include "BPFInstPrinter.h"
+#include "BPF.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
diff --git a/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp b/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
index ebe9abd8ffac..d5e1d7706edc 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
+++ b/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
@@ -8,9 +8,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "MCTargetDesc/BPFMCTargetDesc.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/MC/MCELFObjectWriter.h"
 #include "llvm/MC/MCFixup.h"
-#include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <cstdint>
 
diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
index b58409730de0..797904e1c976 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
+++ b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
@@ -11,9 +11,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "MCTargetDesc/BPFMCTargetDesc.h"
 #include "BPF.h"
 #include "InstPrinter/BPFInstPrinter.h"
-#include "MCTargetDesc/BPFMCTargetDesc.h"
 #include "MCTargetDesc/BPFMCAsmInfo.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
index 3df673eaeb4b..d1c97c9987e1 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
+++ b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
@@ -14,8 +14,8 @@
 #ifndef LLVM_LIB_TARGET_BPF_MCTARGETDESC_BPFMCTARGETDESC_H
 #define LLVM_LIB_TARGET_BPF_MCTARGETDESC_BPFMCTARGETDESC_H
 
-#include "llvm/Support/DataTypes.h"
 #include "llvm/Config/config.h"
+#include "llvm/Support/DataTypes.h"
 
 namespace llvm {
 class MCAsmBackend;
diff --git a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
index b0b2644fffbe..c19e636d79ca 100644
--- a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
+++ b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
@@ -17,11 +17,12 @@
 #include "MCTargetDesc/HexagonMCInstrInfo.h"
 #include "MCTargetDesc/HexagonMCTargetDesc.h"
 #include "MCTargetDesc/HexagonShuffler.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDirectives.h"
@@ -42,13 +43,12 @@
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/SMLoc.h"
 #include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cassert>
 #include <cctype>
diff --git a/lib/Target/Hexagon/BitTracker.cpp b/lib/Target/Hexagon/BitTracker.cpp
index 07767d1037a9..5b02aa3ca3ae 100644
--- a/lib/Target/Hexagon/BitTracker.cpp
+++ b/lib/Target/Hexagon/BitTracker.cpp
@@ -65,9 +65,9 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include <iterator>
 #include <cassert>
 #include <cstdint>
+#include <iterator>
 
 using namespace llvm;
 
diff --git a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
index 87c212b6163f..586220dfec26 100644
--- a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
+++ b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
@@ -12,12 +12,12 @@
 #include "Hexagon.h"
 #include "MCTargetDesc/HexagonBaseInfo.h"
 #include "MCTargetDesc/HexagonMCChecker.h"
-#include "MCTargetDesc/HexagonMCTargetDesc.h"
 #include "MCTargetDesc/HexagonMCInstrInfo.h"
+#include "MCTargetDesc/HexagonMCTargetDesc.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/MC/MCDisassembler/MCDisassembler.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDisassembler/MCDisassembler.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCFixedLenDisassembler.h"
 #include "llvm/MC/MCInst.h"
@@ -25,8 +25,8 @@
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
 #include <cassert>
 #include <cstddef>
 #include <cstdint>
diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
index bb5128e7500f..e689483a0999 100644
--- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp
+++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
@@ -13,8 +13,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "Hexagon.h"
 #include "HexagonAsmPrinter.h"
+#include "Hexagon.h"
 #include "HexagonMachineFunctionInfo.h"
 #include "HexagonSubtarget.h"
 #include "HexagonTargetMachine.h"
@@ -23,6 +23,7 @@
 #include "MCTargetDesc/HexagonMCShuffler.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
@@ -43,7 +44,6 @@
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/ELF.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/TargetRegistry.h"
diff --git a/lib/Target/Hexagon/HexagonBitSimplify.cpp b/lib/Target/Hexagon/HexagonBitSimplify.cpp
index 8502bf24c02f..14c682c6df4b 100644
--- a/lib/Target/Hexagon/HexagonBitSimplify.cpp
+++ b/lib/Target/Hexagon/HexagonBitSimplify.cpp
@@ -13,8 +13,8 @@
 #include "HexagonTargetMachine.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineDominators.h"
diff --git a/lib/Target/Hexagon/HexagonBitTracker.cpp b/lib/Target/Hexagon/HexagonBitTracker.cpp
index af0f8b265bda..730026121d3b 100644
--- a/lib/Target/Hexagon/HexagonBitTracker.cpp
+++ b/lib/Target/Hexagon/HexagonBitTracker.cpp
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "Hexagon.h"
 #include "HexagonBitTracker.h"
+#include "Hexagon.h"
 #include "HexagonInstrInfo.h"
 #include "HexagonRegisterInfo.h"
 #include "HexagonTargetMachine.h"
diff --git a/lib/Target/Hexagon/HexagonBlockRanges.h b/lib/Target/Hexagon/HexagonBlockRanges.h
index 717480314d16..769ec7044a0e 100644
--- a/lib/Target/Hexagon/HexagonBlockRanges.h
+++ b/lib/Target/Hexagon/HexagonBlockRanges.h
@@ -14,8 +14,8 @@
 #include <cassert>
 #include <map>
 #include <set>
-#include <vector>
 #include <utility>
+#include <vector>
 
 namespace llvm {
 
diff --git a/lib/Target/Hexagon/HexagonCommonGEP.cpp b/lib/Target/Hexagon/HexagonCommonGEP.cpp
index a07ba77e6f3e..b5b46f2b7d19 100644
--- a/lib/Target/Hexagon/HexagonCommonGEP.cpp
+++ b/lib/Target/Hexagon/HexagonCommonGEP.cpp
@@ -175,7 +175,8 @@ namespace {
       None      = 0,
       Root      = 0x01,
       Internal  = 0x02,
-      Used      = 0x04
+      Used      = 0x04,
+      InBounds  = 0x08
     };
 
     uint32_t Flags;
@@ -231,6 +232,11 @@ namespace {
         OS << ',';
       OS << "used";
     }
+    if (GN.Flags & GepNode::InBounds) {
+      if (Comma)
+        OS << ',';
+      OS << "inbounds";
+    }
     OS << "} ";
     if (GN.Flags & GepNode::Root)
       OS << "BaseVal:" << GN.BaseVal->getName() << '(' << GN.BaseVal << ')';
@@ -334,10 +340,11 @@ void HexagonCommonGEP::processGepInst(GetElementPtrInst *GepI,
   DEBUG(dbgs() << "Visiting GEP: " << *GepI << '\n');
   GepNode *N = new (*Mem) GepNode;
   Value *PtrOp = GepI->getPointerOperand();
+  uint32_t InBounds = GepI->isInBounds() ? GepNode::InBounds : 0;
   ValueToNodeMap::iterator F = NM.find(PtrOp);
   if (F == NM.end()) {
     N->BaseVal = PtrOp;
-    N->Flags |= GepNode::Root;
+    N->Flags |= GepNode::Root | InBounds;
   } else {
     // If PtrOp was a GEP instruction, it must have already been processed.
     // The ValueToNodeMap entry for it is the last gep node in the generated
@@ -373,7 +380,7 @@ void HexagonCommonGEP::processGepInst(GetElementPtrInst *GepI,
     Value *Op = *OI;
     GepNode *Nx = new (*Mem) GepNode;
     Nx->Parent = PN;  // Link Nx to the previous node.
-    Nx->Flags |= GepNode::Internal;
+    Nx->Flags |= GepNode::Internal | InBounds;
     Nx->PTy = PtrTy;
     Nx->Idx = Op;
     Nodes.push_back(Nx);
@@ -1081,7 +1088,7 @@ Value *HexagonCommonGEP::fabricateGEP(NodeVect &NA, BasicBlock::iterator At,
   GepNode *RN = NA[0];
   assert((RN->Flags & GepNode::Root) && "Creating GEP for non-root");
 
-  Value *NewInst = nullptr;
+  GetElementPtrInst *NewInst = nullptr;
   Value *Input = RN->BaseVal;
   Value **IdxList = new Value*[Num+1];
   unsigned nax = 0;
@@ -1112,6 +1119,7 @@ Value *HexagonCommonGEP::fabricateGEP(NodeVect &NA, BasicBlock::iterator At,
     Type *InpTy = Input->getType();
     Type *ElTy = cast<PointerType>(InpTy->getScalarType())->getElementType();
     NewInst = GetElementPtrInst::Create(ElTy, Input, A, "cgep", &*At);
+    NewInst->setIsInBounds(RN->Flags & GepNode::InBounds);
     DEBUG(dbgs() << "new GEP: " << *NewInst << '\n');
     Input = NewInst;
   } while (nax <= Num);
diff --git a/lib/Target/Hexagon/HexagonConstPropagation.cpp b/lib/Target/Hexagon/HexagonConstPropagation.cpp
index 783b916e04b0..aa68f6cfdfc1 100644
--- a/lib/Target/Hexagon/HexagonConstPropagation.cpp
+++ b/lib/Target/Hexagon/HexagonConstPropagation.cpp
@@ -2276,7 +2276,7 @@ Undetermined:
       goto Undetermined;
 
     uint32_t Props = PredC.properties();
-    bool CTrue = false, CFalse = false;;
+    bool CTrue = false, CFalse = false;
     if (Props & ConstantProperties::Zero)
       CFalse = true;
     else if (Props & ConstantProperties::NonZero)
diff --git a/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/lib/Target/Hexagon/HexagonCopyToCombine.cpp
index 8118c8eb149d..6b4f53428256 100644
--- a/lib/Target/Hexagon/HexagonCopyToCombine.cpp
+++ b/lib/Target/Hexagon/HexagonCopyToCombine.cpp
@@ -13,7 +13,6 @@
 //===----------------------------------------------------------------------===//
 #include "HexagonInstrInfo.h"
 #include "HexagonSubtarget.h"
-#include "llvm/PassSupport.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
@@ -22,6 +21,7 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/PassSupport.h"
 #include "llvm/Support/CodeGen.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
diff --git a/lib/Target/Hexagon/HexagonEarlyIfConv.cpp b/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
index 67af947e089d..03c4a83594b3 100644
--- a/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
+++ b/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
@@ -65,9 +65,9 @@
 #include "HexagonInstrInfo.h"
 #include "HexagonSubtarget.h"
 #include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/iterator_range.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
 #include "llvm/CodeGen/MachineDominators.h"
diff --git a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
index 015d3b840e6f..23d4e2610d9a 100644
--- a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
+++ b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
@@ -12,10 +12,9 @@
 // form.
 //===----------------------------------------------------------------------===//
 
-
-#include "llvm/ADT/DenseMap.h"
 #include "Hexagon.h"
 #include "HexagonTargetMachine.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp
index 25018b9ed510..18e49c69b8e3 100644
--- a/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -10,8 +10,8 @@
 
 #define DEBUG_TYPE "hexagon-pei"
 
-#include "HexagonBlockRanges.h"
 #include "HexagonFrameLowering.h"
+#include "HexagonBlockRanges.h"
 #include "HexagonInstrInfo.h"
 #include "HexagonMachineFunctionInfo.h"
 #include "HexagonRegisterInfo.h"
diff --git a/lib/Target/Hexagon/HexagonGenExtract.cpp b/lib/Target/Hexagon/HexagonGenExtract.cpp
index c99ad5130aef..7c6de6d513e8 100644
--- a/lib/Target/Hexagon/HexagonGenExtract.cpp
+++ b/lib/Target/Hexagon/HexagonGenExtract.cpp
@@ -14,10 +14,10 @@
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Value.h"
diff --git a/lib/Target/Hexagon/HexagonGenInsert.cpp b/lib/Target/Hexagon/HexagonGenInsert.cpp
index 54d99d399f88..bf31e1699284 100644
--- a/lib/Target/Hexagon/HexagonGenInsert.cpp
+++ b/lib/Target/Hexagon/HexagonGenInsert.cpp
@@ -17,9 +17,9 @@
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineDominators.h"
@@ -34,8 +34,8 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Timer.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include <algorithm>
 #include <cassert>
diff --git a/lib/Target/Hexagon/HexagonGenMux.cpp b/lib/Target/Hexagon/HexagonGenMux.cpp
index 85222944c77c..3c37d9ebb0eb 100644
--- a/lib/Target/Hexagon/HexagonGenMux.cpp
+++ b/lib/Target/Hexagon/HexagonGenMux.cpp
@@ -40,8 +40,8 @@
 #include "llvm/Pass.h"
 #include "llvm/Support/MathExtras.h"
 #include <algorithm>
-#include <limits>
 #include <iterator>
+#include <limits>
 #include <utility>
 
 using namespace llvm;
@@ -235,8 +235,11 @@ bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) {
     unsigned DR = MI->getOperand(0).getReg();
     if (isRegPair(DR))
       continue;
+    MachineOperand &PredOp = MI->getOperand(1);
+    if (PredOp.isUndef())
+      continue;
 
-    unsigned PR = MI->getOperand(1).getReg();
+    unsigned PR = PredOp.getReg();
     unsigned Idx = I2X.lookup(MI);
     CondsetMap::iterator F = CM.find(DR);
     bool IfTrue = HII->isPredicatedTrue(Opc);
diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp
index 4c6c6eeafbe0..afed894cfb9a 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "Hexagon.h"
 #include "HexagonISelLowering.h"
+#include "Hexagon.h"
 #include "HexagonMachineFunctionInfo.h"
 #include "HexagonRegisterInfo.h"
 #include "HexagonSubtarget.h"
@@ -26,8 +26,8 @@
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/RuntimeLibcalls.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/IR/BasicBlock.h"
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp
index b76da727237c..f43101fa456d 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -11,9 +11,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "HexagonInstrInfo.h"
 #include "Hexagon.h"
 #include "HexagonHazardRecognizer.h"
-#include "HexagonInstrInfo.h"
 #include "HexagonRegisterInfo.h"
 #include "HexagonSubtarget.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -57,9 +57,9 @@ using namespace llvm;
 
 #define GET_INSTRINFO_CTOR_DTOR
 #define GET_INSTRMAP_INFO
-#include "HexagonGenInstrInfo.inc"
-#include "HexagonGenDFAPacketizer.inc"
 #include "HexagonDepTimingClasses.h"
+#include "HexagonGenDFAPacketizer.inc"
+#include "HexagonGenInstrInfo.inc"
 
 cl::opt<bool> ScheduleInlineAsm("hexagon-sched-inline-asm", cl::Hidden,
   cl::init(false), cl::desc("Do not consider inline-asm a scheduling/"
diff --git a/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp b/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
index 9aa185fc85a6..b748b58bc0ae 100644
--- a/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
+++ b/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
@@ -23,11 +23,11 @@
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/PatternMatch.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/KnownBits.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
 
 #include <algorithm>
 #include <array>
diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
index 324108284a9a..4602de979024 100644
--- a/lib/Target/Hexagon/HexagonMachineScheduler.cpp
+++ b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
@@ -563,40 +563,33 @@ void ConvergingVLIWScheduler::readyQueueVerboseDump(
 }
 #endif
 
-/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
-/// of SU, return it, otherwise return null.
-static SUnit *getSingleUnscheduledPred(SUnit *SU) {
-  SUnit *OnlyAvailablePred = nullptr;
-  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
-       I != E; ++I) {
-    SUnit &Pred = *I->getSUnit();
-    if (!Pred.isScheduled) {
-      // We found an available, but not scheduled, predecessor.  If it's the
-      // only one we have found, keep track of it... otherwise give up.
-      if (OnlyAvailablePred && OnlyAvailablePred != &Pred)
-        return nullptr;
-      OnlyAvailablePred = &Pred;
-    }
+/// isSingleUnscheduledPred - If SU2 is the only unscheduled predecessor
+/// of SU, return true (we may have duplicates)
+static inline bool isSingleUnscheduledPred(SUnit *SU, SUnit *SU2) {
+  if (SU->NumPredsLeft == 0)
+    return false;
+
+  for (auto &Pred : SU->Preds) {
+    // We found an available, but not scheduled, predecessor.
+    if (!Pred.getSUnit()->isScheduled && (Pred.getSUnit() != SU2))
+      return false;
   }
-  return OnlyAvailablePred;
+
+  return true;
 }
 
-/// getSingleUnscheduledSucc - If there is exactly one unscheduled successor
-/// of SU, return it, otherwise return null.
-static SUnit *getSingleUnscheduledSucc(SUnit *SU) {
-  SUnit *OnlyAvailableSucc = nullptr;
-  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
-       I != E; ++I) {
-    SUnit &Succ = *I->getSUnit();
-    if (!Succ.isScheduled) {
-      // We found an available, but not scheduled, successor.  If it's the
-      // only one we have found, keep track of it... otherwise give up.
-      if (OnlyAvailableSucc && OnlyAvailableSucc != &Succ)
-        return nullptr;
-      OnlyAvailableSucc = &Succ;
-    }
+/// isSingleUnscheduledSucc - If SU2 is the only unscheduled successor
+/// of SU, return true (we may have duplicates)
+static inline bool isSingleUnscheduledSucc(SUnit *SU, SUnit *SU2) {
+  if (SU->NumSuccsLeft == 0)
+    return false;
+
+  for (auto &Succ : SU->Succs) {
+    // We found an available, but not scheduled, successor.
+    if (!Succ.getSUnit()->isScheduled && (Succ.getSUnit() != SU2))
+      return false;
   }
-  return OnlyAvailableSucc;
+  return true;
 }
 
 // Constants used to denote relative importance of
@@ -673,12 +666,12 @@ int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,
     // Count the number of nodes that
     // this node is the sole unscheduled node for.
     for (const SDep &SI : SU->Succs)
-      if (getSingleUnscheduledPred(SI.getSUnit()) == SU)
+      if (isSingleUnscheduledPred(SI.getSUnit(), SU))
         ++NumNodesBlocking;
   } else {
     // How many unscheduled predecessors block this node?
     for (const SDep &PI : SU->Preds)
-      if (getSingleUnscheduledSucc(PI.getSUnit()) == SU)
+      if (isSingleUnscheduledSucc(PI.getSUnit(), SU))
         ++NumNodesBlocking;
   }
   ResCount += (NumNodesBlocking * ScaleTwo);
diff --git a/lib/Target/Hexagon/HexagonPatterns.td b/lib/Target/Hexagon/HexagonPatterns.td
index 70ed123bc898..f269b74fc447 100644
--- a/lib/Target/Hexagon/HexagonPatterns.td
+++ b/lib/Target/Hexagon/HexagonPatterns.td
@@ -1,3 +1,12 @@
+//==- HexagonPatterns.td - Target Description for Hexagon -*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
 // Pattern fragment that combines the value type and the register class
 // into a single parameter.
 
@@ -345,7 +354,7 @@ def: Pat<(add (mul IntRegs:$src2, u32_0ImmPred:$src3), IntRegs:$src1),
          (M2_macsip IntRegs:$src1, IntRegs:$src2, imm:$src3)>;
 def: Pat<(add (mul I32:$src2, I32:$src3), I32:$src1),
          (M2_maci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
-def: Pat<(add (add IntRegs:$src2, u32_0ImmPred:$src3), IntRegs:$src1),
+def: Pat<(add (add IntRegs:$src2, s32_0ImmPred:$src3), IntRegs:$src1),
          (M2_accii IntRegs:$src1, IntRegs:$src2, imm:$src3)>;
 def: Pat<(add (add I32:$src2, I32:$src3), I32:$src1),
          (M2_acci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
@@ -674,6 +683,8 @@ def I32toI1: OutPatFrag<(ops node:$Rs),
 defm: Storexm_pat<store, I1, s32_0ImmPred, I1toI32, S2_storerb_io>;
 def: Storexm_simple_pat<store, I1, I1toI32, S2_storerb_io>;
 
+def: Pat<(sra (add (sra I64:$src, u6_0ImmPred:$u6), 1), (i32 1)),
+         (S2_asr_i_p_rnd DoubleRegs:$src, imm:$u6)>, Requires<[HasV5T]>;
 def: Pat<(sra I64:$src, u6_0ImmPred:$u6),
          (S2_asr_i_p DoubleRegs:$src, imm:$u6)>;
 def: Pat<(srl I64:$src, u6_0ImmPred:$u6),
@@ -786,27 +797,19 @@ def: Pat<(i64 (sext_inreg I64:$src1, i16)),
 def: Pat<(i64 (sext_inreg I64:$src1, i8)),
          (A2_sxtw (A2_sxtb (LoReg DoubleRegs:$src1)))>;
 
-// We want to prevent emitting pnot's as much as possible.
-// Map brcond with an unsupported setcc to a J2_jumpf.
-def : Pat <(brcond (i1 (setne I32:$src1, I32:$src2)),
-                        bb:$offset),
-      (J2_jumpf (C2_cmpeq I32:$src1, I32:$src2),
-                bb:$offset)>;
-
-def : Pat <(brcond (i1 (setne I32:$src1, s10_0ImmPred:$src2)),
-                        bb:$offset),
-      (J2_jumpf (C2_cmpeqi I32:$src1, s10_0ImmPred:$src2), bb:$offset)>;
-
-def: Pat<(brcond (i1 (setne I1:$src1, (i1 -1))), bb:$offset),
-         (J2_jumpf PredRegs:$src1, bb:$offset)>;
-
-def: Pat<(brcond (i1 (setne I1:$src1, (i1 0))), bb:$offset),
-         (J2_jumpt PredRegs:$src1, bb:$offset)>;
+def: Pat<(brcond (i1 (setne I32:$Rs, I32:$Rt)), bb:$offset),
+         (J2_jumpf (C2_cmpeq I32:$Rs, I32:$Rt), bb:$offset)>;
+def: Pat<(brcond (i1 (setne I32:$Rs, s10_0ImmPred:$s10)), bb:$offset),
+         (J2_jumpf (C2_cmpeqi I32:$Rs, imm:$s10), bb:$offset)>;
+def: Pat<(brcond (i1 (setne I1:$Pu, (i1 -1))), bb:$offset),
+         (J2_jumpf PredRegs:$Pu, bb:$offset)>;
+def: Pat<(brcond (i1 (setne I1:$Pu, (i1 0))), bb:$offset),
+         (J2_jumpt PredRegs:$Pu, bb:$offset)>;
 
 // cmp.lt(Rs, Imm) -> !cmp.ge(Rs, Imm) -> !cmp.gt(Rs, Imm-1)
-def: Pat<(brcond (i1 (setlt I32:$src1, s8_0ImmPred:$src2)), bb:$offset),
-        (J2_jumpf (C2_cmpgti IntRegs:$src1, (SDEC1 s8_0ImmPred:$src2)),
-                  bb:$offset)>;
+def: Pat<(brcond (i1 (setlt I32:$Rs, s8_0ImmPred:$s8)), bb:$offset),
+         (J2_jumpf (C2_cmpgti IntRegs:$Rs, (SDEC1 imm:$s8)), bb:$offset)>;
+
 
 // Map from a 64-bit select to an emulated 64-bit mux.
 // Hexagon does not support 64-bit MUXes; so emulate with combines.
@@ -860,15 +863,13 @@ def: Pat<(i1 (setne I1:$src1, I1:$src2)),
 def: Pat<(i1 (setne I64:$src1, I64:$src2)),
          (C2_not (C2_cmpeqp DoubleRegs:$src1, DoubleRegs:$src2))>;
 
-// Map cmpge(Rs, Rt) -> !cmpgt(Rs, Rt).
-// rs >= rt -> !(rt > rs).
-def : Pat <(i1 (setge I32:$src1, I32:$src2)),
-      (i1 (C2_not (i1 (C2_cmpgt I32:$src2, I32:$src1))))>;
+// rs >= rt -> rt <= rs
+def: Pat<(i1 (setge I32:$Rs, I32:$Rt)),
+         (C4_cmplte I32:$Rt, I32:$Rs)>;
 
-// cmpge(Rs, Imm) -> cmpgt(Rs, Imm-1)
 let AddedComplexity = 30 in
-def: Pat<(i1 (setge I32:$src1, s32_0ImmPred:$src2)),
-         (C2_cmpgti IntRegs:$src1, (SDEC1 s32_0ImmPred:$src2))>;
+def: Pat<(i1 (setge I32:$Rs, s32_0ImmPred:$s10)),
+         (C2_cmpgti IntRegs:$Rs, (SDEC1 imm:$s10))>;
 
 // Map cmpge(Rss, Rtt) -> !cmpgt(Rtt, Rss).
 // rss >= rtt -> !(rtt > rss).
@@ -1634,9 +1635,14 @@ def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), I32:$Rt)),
 
 def: Pat<(add (mul I32:$Rs, u6_0ImmPred:$U6), u32_0ImmPred:$u6),
          (M4_mpyri_addi imm:$u6, IntRegs:$Rs, imm:$U6)>;
+def: Pat<(add (mul I32:$Rs, u6_0ImmPred:$U6),
+              (HexagonCONST32 tglobaladdr:$global)),
+         (M4_mpyri_addi tglobaladdr:$global, IntRegs:$Rs, imm:$U6)>;
 def: Pat<(add (mul I32:$Rs, I32:$Rt), u32_0ImmPred:$u6),
          (M4_mpyrr_addi imm:$u6, IntRegs:$Rs, IntRegs:$Rt)>;
-
+def: Pat<(add (mul I32:$Rs, I32:$Rt),
+              (HexagonCONST32 tglobaladdr:$global)),
+         (M4_mpyrr_addi tglobaladdr:$global, IntRegs:$Rs, IntRegs:$Rt)>;
 def: Pat<(add I32:$src1, (mul I32:$src3, u6_2ImmPred:$src2)),
          (M4_mpyri_addr_u2 IntRegs:$src1, imm:$src2, IntRegs:$src3)>;
 def: Pat<(add I32:$src1, (mul I32:$src3, u32_0ImmPred:$src2)),
@@ -2129,6 +2135,11 @@ let AddedComplexity  = 30 in {
   def: Storea_pat<truncstorei8,  I32, u32_0ImmPred, PS_storerbabs>;
   def: Storea_pat<truncstorei16, I32, u32_0ImmPred, PS_storerhabs>;
   def: Storea_pat<store,         I32, u32_0ImmPred, PS_storeriabs>;
+  def: Storea_pat<store,         I64, u32_0ImmPred, PS_storerdabs>;
+
+  def: Stoream_pat<truncstorei8,  I64, u32_0ImmPred, LoReg, PS_storerbabs>;
+  def: Stoream_pat<truncstorei16, I64, u32_0ImmPred, LoReg, PS_storerhabs>;
+  def: Stoream_pat<truncstorei32, I64, u32_0ImmPred, LoReg, PS_storeriabs>;
 }
 
 let AddedComplexity  = 30 in {
@@ -2137,6 +2148,19 @@ let AddedComplexity  = 30 in {
   def: Loada_pat<zextloadi8,  i32, u32_0ImmPred, PS_loadrubabs>;
   def: Loada_pat<sextloadi16, i32, u32_0ImmPred, PS_loadrhabs>;
   def: Loada_pat<zextloadi16, i32, u32_0ImmPred, PS_loadruhabs>;
+  def: Loada_pat<load,        i64, u32_0ImmPred, PS_loadrdabs>;
+
+  def: Loadam_pat<extloadi8,   i64, u32_0ImmPred, ToZext64, PS_loadrubabs>;
+  def: Loadam_pat<sextloadi8,  i64, u32_0ImmPred, ToSext64, PS_loadrbabs>;
+  def: Loadam_pat<zextloadi8,  i64, u32_0ImmPred, ToZext64, PS_loadrubabs>;
+
+  def: Loadam_pat<extloadi16,  i64, u32_0ImmPred, ToZext64, PS_loadruhabs>;
+  def: Loadam_pat<sextloadi16, i64, u32_0ImmPred, ToSext64, PS_loadrhabs>;
+  def: Loadam_pat<zextloadi16, i64, u32_0ImmPred, ToZext64, PS_loadruhabs>;
+
+  def: Loadam_pat<extloadi32,  i64, u32_0ImmPred, ToZext64, PS_loadriabs>;
+  def: Loadam_pat<sextloadi32, i64, u32_0ImmPred, ToSext64, PS_loadriabs>;
+  def: Loadam_pat<zextloadi32, i64, u32_0ImmPred, ToZext64, PS_loadriabs>;
 }
 
 // Indexed store word - global address.
@@ -2707,6 +2731,15 @@ def: Pat<(fneg F64:$Rs),
               (S2_togglebit_i (HiReg $Rs), 31), isub_hi,
               (i32 (LoReg $Rs)), isub_lo)>;
 
+def: Pat<(mul I64:$Rss, I64:$Rtt),
+         (A2_combinew
+           (M2_maci (M2_maci (HiReg (M2_dpmpyuu_s0 (LoReg $Rss), (LoReg $Rtt))),
+                             (LoReg $Rss),
+                             (HiReg $Rtt)),
+                    (LoReg $Rtt),
+                    (HiReg $Rss)),
+           (LoReg (M2_dpmpyuu_s0 (LoReg $Rss), (LoReg $Rtt))))>;
+
 def alignedload : PatFrag<(ops node:$addr), (load $addr), [{
   return isAlignedMemNode(dyn_cast<MemSDNode>(N));
 }]>;
diff --git a/lib/Target/Hexagon/HexagonSplitDouble.cpp b/lib/Target/Hexagon/HexagonSplitDouble.cpp
index 471e32221b29..db268b78cd73 100644
--- a/lib/Target/Hexagon/HexagonSplitDouble.cpp
+++ b/lib/Target/Hexagon/HexagonSplitDouble.cpp
@@ -13,8 +13,8 @@
 #include "HexagonRegisterInfo.h"
 #include "HexagonSubtarget.h"
 #include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
index 14ecf297d351..c757b6ecdd00 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -23,8 +23,8 @@
 #include "llvm/IR/Module.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/TargetRegistry.h"
-#include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm/Transforms/Scalar.h"
 
 using namespace llvm;
 
@@ -276,27 +276,27 @@ bool HexagonPassConfig::addInstSelector() {
   if (!NoOpt) {
     // Create logical operations on predicate registers.
     if (EnableGenPred)
-      addPass(createHexagonGenPredicate(), false);
+      addPass(createHexagonGenPredicate());
     // Rotate loops to expose bit-simplification opportunities.
     if (EnableLoopResched)
-      addPass(createHexagonLoopRescheduling(), false);
+      addPass(createHexagonLoopRescheduling());
     // Split double registers.
     if (!DisableHSDR)
       addPass(createHexagonSplitDoubleRegs());
     // Bit simplification.
     if (EnableBitSimplify)
-      addPass(createHexagonBitSimplify(), false);
+      addPass(createHexagonBitSimplify());
     addPass(createHexagonPeephole());
     printAndVerify("After hexagon peephole pass");
     // Constant propagation.
     if (!DisableHCP) {
-      addPass(createHexagonConstPropagationPass(), false);
-      addPass(&UnreachableMachineBlockElimID, false);
+      addPass(createHexagonConstPropagationPass());
+      addPass(&UnreachableMachineBlockElimID);
     }
     if (EnableGenInsert)
-      addPass(createHexagonGenInsert(), false);
+      addPass(createHexagonGenInsert());
     if (EnableEarlyIf)
-      addPass(createHexagonEarlyIfConversion(), false);
+      addPass(createHexagonEarlyIfConversion());
   }
 
   return false;
@@ -307,9 +307,9 @@ void HexagonPassConfig::addPreRegAlloc() {
     if (EnableExpandCondsets)
       insertPass(&RegisterCoalescerID, &HexagonExpandCondsetsID);
     if (!DisableStoreWidening)
-      addPass(createHexagonStoreWidening(), false);
+      addPass(createHexagonStoreWidening());
     if (!DisableHardwareLoops)
-      addPass(createHexagonHardwareLoops(), false);
+      addPass(createHexagonHardwareLoops());
   }
   if (TM->getOptLevel() >= CodeGenOpt::Default)
     addPass(&MachinePipelinerID);
@@ -320,16 +320,16 @@ void HexagonPassConfig::addPostRegAlloc() {
     if (EnableRDFOpt)
       addPass(createHexagonRDFOpt());
     if (!DisableHexagonCFGOpt)
-      addPass(createHexagonCFGOptimizer(), false);
+      addPass(createHexagonCFGOptimizer());
     if (!DisableAModeOpt)
-      addPass(createHexagonOptAddrMode(), false);
+      addPass(createHexagonOptAddrMode());
   }
 }
 
 void HexagonPassConfig::addPreSched2() {
-  addPass(createHexagonCopyToCombine(), false);
+  addPass(createHexagonCopyToCombine());
   if (getOptLevel() != CodeGenOpt::None)
-    addPass(&IfConverterID, false);
+    addPass(&IfConverterID);
   addPass(createHexagonSplitConst32AndConst64());
 }
 
@@ -337,17 +337,17 @@ void HexagonPassConfig::addPreEmitPass() {
   bool NoOpt = (getOptLevel() == CodeGenOpt::None);
 
   if (!NoOpt)
-    addPass(createHexagonNewValueJump(), false);
+    addPass(createHexagonNewValueJump());
 
-  addPass(createHexagonBranchRelaxation(), false);
+  addPass(createHexagonBranchRelaxation());
 
   // Create Packets.
   if (!NoOpt) {
     if (!DisableHardwareLoops)
-      addPass(createHexagonFixupHwLoops(), false);
+      addPass(createHexagonFixupHwLoops());
     // Generate MUX from pairs of conditional transfers.
     if (EnableGenMux)
-      addPass(createHexagonGenMux(), false);
+      addPass(createHexagonGenMux());
 
     addPass(createHexagonPacketizer(), false);
   }
diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
index c9c4f95dbaaa..4dacb1501392 100644
--- a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
+++ b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
@@ -17,6 +17,7 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/GlobalObject.h"
@@ -28,7 +29,6 @@
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/ELF.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetMachine.h"
 
diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
index fa08afe4019d..7667bfb7a0eb 100644
--- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
+++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -16,10 +16,10 @@
 // prune the dependence.
 //
 //===----------------------------------------------------------------------===//
+#include "HexagonVLIWPacketizer.h"
 #include "HexagonRegisterInfo.h"
 #include "HexagonSubtarget.h"
 #include "HexagonTargetMachine.h"
-#include "HexagonVLIWPacketizer.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
index 904403543e18..545c8b6b2acd 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
@@ -12,9 +12,9 @@
 #include "MCTargetDesc/HexagonBaseInfo.h"
 #include "MCTargetDesc/HexagonMCChecker.h"
 #include "MCTargetDesc/HexagonMCCodeEmitter.h"
-#include "MCTargetDesc/HexagonMCTargetDesc.h"
 #include "MCTargetDesc/HexagonMCInstrInfo.h"
 #include "MCTargetDesc/HexagonMCShuffler.h"
+#include "MCTargetDesc/HexagonMCTargetDesc.h"
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCAsmLayout.h"
 #include "llvm/MC/MCAssembler.h"
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
index dd790fd41257..1929152129fa 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "HexagonAsmPrinter.h"
 #include "HexagonInstPrinter.h"
+#include "HexagonAsmPrinter.h"
 #include "MCTargetDesc/HexagonBaseInfo.h"
 #include "MCTargetDesc/HexagonMCInstrInfo.h"
 #include "llvm/MC/MCAsmInfo.h"
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
index 70410ff03a64..50f00d1aaeac 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
@@ -7,10 +7,10 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "MCTargetDesc/HexagonMCCodeEmitter.h"
 #include "Hexagon.h"
 #include "MCTargetDesc/HexagonBaseInfo.h"
 #include "MCTargetDesc/HexagonFixupKinds.h"
-#include "MCTargetDesc/HexagonMCCodeEmitter.h"
 #include "MCTargetDesc/HexagonMCInstrInfo.h"
 #include "MCTargetDesc/HexagonMCTargetDesc.h"
 #include "llvm/ADT/Statistic.h"
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
index 9e1ff9ca35d7..47007e08a2ff 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
@@ -17,6 +17,7 @@
 #include "MCTargetDesc/HexagonMCInstrInfo.h"
 #include "MCTargetDesc/HexagonMCShuffler.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
@@ -29,7 +30,6 @@
 #include "llvm/MC/MCSymbolELF.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include <cassert>
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp
index aece36790486..b2c7f1569380 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp
@@ -14,9 +14,9 @@
 
 #define DEBUG_TYPE "hexagon-shuffle"
 
+#include "MCTargetDesc/HexagonMCShuffler.h"
 #include "Hexagon.h"
 #include "MCTargetDesc/HexagonMCInstrInfo.h"
-#include "MCTargetDesc/HexagonMCShuffler.h"
 #include "MCTargetDesc/HexagonMCTargetDesc.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index bb98c2bbef6d..1a361548f938 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -11,14 +11,15 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "MCTargetDesc/HexagonMCTargetDesc.h"
 #include "Hexagon.h"
 #include "HexagonTargetStreamer.h"
 #include "MCTargetDesc/HexagonInstPrinter.h"
 #include "MCTargetDesc/HexagonMCAsmInfo.h"
 #include "MCTargetDesc/HexagonMCELFStreamer.h"
 #include "MCTargetDesc/HexagonMCInstrInfo.h"
-#include "MCTargetDesc/HexagonMCTargetDesc.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDwarf.h"
 #include "llvm/MC/MCELFStreamer.h"
@@ -27,10 +28,9 @@
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
 #include <cassert>
 #include <cstdint>
 #include <new>
diff --git a/lib/Target/Hexagon/RDFDeadCode.cpp b/lib/Target/Hexagon/RDFDeadCode.cpp
index 9aa8ad68e07e..60a12dcf2f03 100644
--- a/lib/Target/Hexagon/RDFDeadCode.cpp
+++ b/lib/Target/Hexagon/RDFDeadCode.cpp
@@ -9,9 +9,9 @@
 //
 // RDF-based generic dead code elimination.
 
+#include "RDFDeadCode.h"
 #include "RDFGraph.h"
 #include "RDFLiveness.h"
-#include "RDFDeadCode.h"
 
 #include "llvm/ADT/SetVector.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
diff --git a/lib/Target/Hexagon/RDFGraph.cpp b/lib/Target/Hexagon/RDFGraph.cpp
index 7a2895aa4e8c..8d1272370899 100644
--- a/lib/Target/Hexagon/RDFGraph.cpp
+++ b/lib/Target/Hexagon/RDFGraph.cpp
@@ -10,8 +10,8 @@
 // Target-independent, SSA-based data flow graph for register data flow (RDF).
 //
 #include "RDFGraph.h"
-#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineDominanceFrontier.h"
 #include "llvm/CodeGen/MachineDominators.h"
diff --git a/lib/Target/Hexagon/RDFLiveness.cpp b/lib/Target/Hexagon/RDFLiveness.cpp
index 9d8a3881797b..83e8968086d8 100644
--- a/lib/Target/Hexagon/RDFLiveness.cpp
+++ b/lib/Target/Hexagon/RDFLiveness.cpp
@@ -23,8 +23,8 @@
 // and Embedded Architectures and Compilers", 8 (4),
 // <10.1145/2086696.2086706>. <hal-00647369>
 //
-#include "RDFGraph.h"
 #include "RDFLiveness.h"
+#include "RDFGraph.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineDominanceFrontier.h"
diff --git a/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp b/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
index 1d6c07974beb..72e471f5766e 100644
--- a/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
+++ b/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
@@ -28,8 +28,8 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/SMLoc.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cassert>
 #include <cstddef>
diff --git a/lib/Target/Lanai/LanaiTargetObjectFile.cpp b/lib/Target/Lanai/LanaiTargetObjectFile.cpp
index 7475dbd68ae4..38e75108ba16 100644
--- a/lib/Target/Lanai/LanaiTargetObjectFile.cpp
+++ b/lib/Target/Lanai/LanaiTargetObjectFile.cpp
@@ -10,13 +10,13 @@
 
 #include "LanaiSubtarget.h"
 #include "LanaiTargetMachine.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/ELF.h"
 #include "llvm/Target/TargetMachine.h"
 
 using namespace llvm;
diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp b/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp
index e02bba529bd5..64cd3342ac18 100644
--- a/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp
+++ b/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp
@@ -9,8 +9,8 @@
 
 #include "MCTargetDesc/LanaiBaseInfo.h"
 #include "MCTargetDesc/LanaiFixupKinds.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/MC/MCELFObjectWriter.h"
-#include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
 
 using namespace llvm;
diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp b/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp
index 10254677a5ad..c3727416ecb9 100644
--- a/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp
+++ b/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp
@@ -19,8 +19,8 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/MC/MCCodeEmitter.h"
-#include "llvm/MC/MCFixup.h"
 #include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixup.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp b/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp
index a47ff9ff3d61..bcbde2b8b794 100644
--- a/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp
+++ b/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp
@@ -11,9 +11,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "LanaiMCAsmInfo.h"
 #include "LanaiMCTargetDesc.h"
 #include "InstPrinter/LanaiInstPrinter.h"
+#include "LanaiMCAsmInfo.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/MC/MCInst.h"
diff --git a/lib/Target/MSP430/MSP430AsmPrinter.cpp b/lib/Target/MSP430/MSP430AsmPrinter.cpp
index abf062fe86ae..f39c21fc8aa2 100644
--- a/lib/Target/MSP430/MSP430AsmPrinter.cpp
+++ b/lib/Target/MSP430/MSP430AsmPrinter.cpp
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "MSP430.h"
 #include "InstPrinter/MSP430InstPrinter.h"
+#include "MSP430.h"
 #include "MSP430InstrInfo.h"
 #include "MSP430MCInstLower.h"
 #include "MSP430TargetMachine.h"
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index d855d3e7f778..694c201cbe8d 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -9,17 +9,18 @@
 
 #include "MCTargetDesc/MipsABIFlagsSection.h"
 #include "MCTargetDesc/MipsABIInfo.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
 #include "MCTargetDesc/MipsMCExpr.h"
 #include "MCTargetDesc/MipsMCTargetDesc.h"
 #include "MipsTargetStreamer.h"
-#include "MCTargetDesc/MipsBaseInfo.h"
 #include "llvm/ADT/APFloat.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
@@ -40,13 +41,12 @@
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/SMLoc.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
index ecdf6b0de6e7..b0b994323036 100644
--- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
+++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
@@ -17,14 +17,14 @@
 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
 #include "llvm/MC/MCFixedLenDisassembler.h"
 #include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
 #include <cassert>
 #include <cstdint>
 
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index 1e2eb7dbec3e..6d3d4db03603 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -12,8 +12,8 @@
 //===----------------------------------------------------------------------===//
 //
 
-#include "MCTargetDesc/MipsFixupKinds.h"
 #include "MCTargetDesc/MipsAsmBackend.h"
+#include "MCTargetDesc/MipsFixupKinds.h"
 #include "MCTargetDesc/MipsMCExpr.h"
 #include "MCTargetDesc/MipsMCTargetDesc.h"
 #include "llvm/MC/MCAsmBackend.h"
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
index 1a1c613cfce0..d116ac3471bc 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
@@ -10,13 +10,13 @@
 #include "MCTargetDesc/MipsFixupKinds.h"
 #include "MCTargetDesc/MipsMCTargetDesc.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/MC/MCELFObjectWriter.h"
 #include "llvm/MC/MCFixup.h"
 #include "llvm/MC/MCSymbolELF.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
index ae3278322311..f658aadff22f 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
@@ -10,12 +10,12 @@
 #include "MipsELFStreamer.h"
 #include "MipsOptionRecord.h"
 #include "MipsTargetStreamer.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCSymbolELF.h"
 #include "llvm/Support/Casting.h"
-#include "llvm/Support/ELF.h"
 
 using namespace llvm;
 
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index a35eb2a8e03a..0330824fd614 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -11,10 +11,10 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "MipsMCCodeEmitter.h"
 #include "MCTargetDesc/MipsFixupKinds.h"
 #include "MCTargetDesc/MipsMCExpr.h"
 #include "MCTargetDesc/MipsMCTargetDesc.h"
-#include "MipsMCCodeEmitter.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/SmallVector.h"
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
index be04480044d4..aad6bf378ea0 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
@@ -8,14 +8,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "MipsMCExpr.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCSymbolELF.h"
 #include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbolELF.h"
 #include "llvm/MC/MCValue.h"
 #include "llvm/Support/Casting.h"
-#include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp b/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp
index 74d5e4cc9841..2d84528e7469 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp
@@ -7,15 +7,15 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "MipsOptionRecord.h"
 #include "MipsABIInfo.h"
 #include "MipsELFStreamer.h"
-#include "MipsOptionRecord.h"
 #include "MipsTargetStreamer.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSectionELF.h"
-#include "llvm/Support/ELF.h"
 #include <cassert>
 
 using namespace llvm;
diff --git a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
index 2d4083b27ed1..0cd4aebe4d16 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
@@ -11,19 +11,19 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "MCTargetDesc/MipsABIInfo.h"
 #include "MipsTargetStreamer.h"
 #include "InstPrinter/MipsInstPrinter.h"
+#include "MCTargetDesc/MipsABIInfo.h"
 #include "MipsELFStreamer.h"
 #include "MipsMCExpr.h"
 #include "MipsMCTargetDesc.h"
 #include "MipsTargetObjectFile.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCSymbolELF.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
 
diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td
index 9615bc38bfce..f24761d7d101 100644
--- a/lib/Target/Mips/Mips.td
+++ b/lib/Target/Mips/Mips.td
@@ -185,6 +185,9 @@ def FeatureUseTCCInDIV : SubtargetFeature<
                                "UseTCCInDIV", "false",
                                "Force the assembler to use trapping">;
 
+def FeatureMadd4 : SubtargetFeature<"nomadd4", "DisableMadd4", "true",
+                                    "Disable 4-operand madd.fmt and related instructions">;
+
 //===----------------------------------------------------------------------===//
 // Mips processors supported.
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/Mips16FrameLowering.cpp b/lib/Target/Mips/Mips16FrameLowering.cpp
index a222080f6b81..09e41e1423ae 100644
--- a/lib/Target/Mips/Mips16FrameLowering.cpp
+++ b/lib/Target/Mips/Mips16FrameLowering.cpp
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "MCTargetDesc/MipsBaseInfo.h"
 #include "Mips16FrameLowering.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
 #include "Mips16InstrInfo.h"
 #include "MipsInstrInfo.h"
 #include "MipsRegisterInfo.h"
@@ -25,10 +25,10 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/IR/DebugLoc.h"
-#include "llvm/MC/MachineLocation.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDwarf.h"
 #include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MachineLocation.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include <cassert>
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index 9cdbf510737f..f7ff7c3dc7bb 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -12,17 +12,18 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "MipsAsmPrinter.h"
 #include "InstPrinter/MipsInstPrinter.h"
 #include "MCTargetDesc/MipsBaseInfo.h"
 #include "MCTargetDesc/MipsMCNaCl.h"
 #include "Mips.h"
-#include "MipsAsmPrinter.h"
 #include "MipsInstrInfo.h"
 #include "MipsMCInstLower.h"
 #include "MipsTargetMachine.h"
 #include "MipsTargetStreamer.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -43,7 +44,6 @@
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCSymbolELF.h"
-#include "llvm/Support/ELF.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
diff --git a/lib/Target/Mips/MipsCCState.cpp b/lib/Target/Mips/MipsCCState.cpp
index cb9f676c237a..6a03ee9927d7 100644
--- a/lib/Target/Mips/MipsCCState.cpp
+++ b/lib/Target/Mips/MipsCCState.cpp
@@ -51,6 +51,22 @@ static bool originalTypeIsF128(const Type *Ty, const char *Func) {
   return (Func && Ty->isIntegerTy(128) && isF128SoftLibCall(Func));
 }
 
+/// Return true if the original type was vXfXX.
+static bool originalEVTTypeIsVectorFloat(EVT Ty) {
+  if (Ty.isVector() && Ty.getVectorElementType().isFloatingPoint())
+    return true;
+
+  return false;
+}
+
+/// Return true if the original type was vXfXX / vXfXX.
+static bool originalTypeIsVectorFloat(const Type * Ty) {
+  if (Ty->isVectorTy() && Ty->isFPOrFPVectorTy())
+    return true;
+
+  return false;
+}
+
 MipsCCState::SpecialCallingConvType
 MipsCCState::getSpecialCallingConvForCallee(const SDNode *Callee,
                                             const MipsSubtarget &Subtarget) {
@@ -78,8 +94,8 @@ void MipsCCState::PreAnalyzeCallResultForF128(
   }
 }
 
-/// Identify lowered values that originated from f128 arguments and record
-/// this for use by RetCC_MipsN.
+/// Identify lowered values that originated from f128 or float arguments and
+/// record this for use by RetCC_MipsN.
 void MipsCCState::PreAnalyzeReturnForF128(
     const SmallVectorImpl<ISD::OutputArg> &Outs) {
   const MachineFunction &MF = getMachineFunction();
@@ -91,23 +107,44 @@ void MipsCCState::PreAnalyzeReturnForF128(
   }
 }
 
-/// Identify lowered values that originated from f128 arguments and record
+/// Identify lower values that originated from vXfXX and record
 /// this.
+void MipsCCState::PreAnalyzeCallResultForVectorFloat(
+    const SmallVectorImpl<ISD::InputArg> &Ins, const Type *RetTy) {
+  for (unsigned i = 0; i < Ins.size(); ++i) {
+    OriginalRetWasFloatVector.push_back(originalTypeIsVectorFloat(RetTy));
+  }
+}
+
+/// Identify lowered values that originated from vXfXX arguments and record
+/// this.
+void MipsCCState::PreAnalyzeReturnForVectorFloat(
+    const SmallVectorImpl<ISD::OutputArg> &Outs) {
+  for (unsigned i = 0; i < Outs.size(); ++i) {
+    ISD::OutputArg Out = Outs[i];
+    OriginalRetWasFloatVector.push_back(
+        originalEVTTypeIsVectorFloat(Out.ArgVT));
+  }
+}
+
+/// Identify lowered values that originated from f128, float and sret to vXfXX
+/// arguments and record this.
 void MipsCCState::PreAnalyzeCallOperands(
     const SmallVectorImpl<ISD::OutputArg> &Outs,
     std::vector<TargetLowering::ArgListEntry> &FuncArgs,
     const char *Func) {
   for (unsigned i = 0; i < Outs.size(); ++i) {
-    OriginalArgWasF128.push_back(
-        originalTypeIsF128(FuncArgs[Outs[i].OrigArgIndex].Ty, Func));
-    OriginalArgWasFloat.push_back(
-        FuncArgs[Outs[i].OrigArgIndex].Ty->isFloatingPointTy());
+    TargetLowering::ArgListEntry FuncArg = FuncArgs[Outs[i].OrigArgIndex];
+
+    OriginalArgWasF128.push_back(originalTypeIsF128(FuncArg.Ty, Func));
+    OriginalArgWasFloat.push_back(FuncArg.Ty->isFloatingPointTy());
+    OriginalArgWasFloatVector.push_back(FuncArg.Ty->isVectorTy());
     CallOperandIsFixed.push_back(Outs[i].IsFixed);
   }
 }
 
-/// Identify lowered values that originated from f128 arguments and record
-/// this.
+/// Identify lowered values that originated from f128, float and vXfXX arguments
+/// and record this.
 void MipsCCState::PreAnalyzeFormalArgumentsForF128(
     const SmallVectorImpl<ISD::InputArg> &Ins) {
   const MachineFunction &MF = getMachineFunction();
@@ -120,6 +157,7 @@ void MipsCCState::PreAnalyzeFormalArgumentsForF128(
     if (Ins[i].Flags.isSRet()) {
       OriginalArgWasF128.push_back(false);
       OriginalArgWasFloat.push_back(false);
+      OriginalArgWasFloatVector.push_back(false);
       continue;
     }
 
@@ -129,5 +167,10 @@ void MipsCCState::PreAnalyzeFormalArgumentsForF128(
     OriginalArgWasF128.push_back(
         originalTypeIsF128(FuncArg->getType(), nullptr));
     OriginalArgWasFloat.push_back(FuncArg->getType()->isFloatingPointTy());
+
+    // The MIPS vector ABI exhibits a corner case of sorts or quirk; if the
+    // first argument is actually an SRet pointer to a vector, then the next
+    // argument slot is $a2.
+    OriginalArgWasFloatVector.push_back(FuncArg->getType()->isVectorTy());
   }
 }
diff --git a/lib/Target/Mips/MipsCCState.h b/lib/Target/Mips/MipsCCState.h
index 77ecc65b2eee..27901699480b 100644
--- a/lib/Target/Mips/MipsCCState.h
+++ b/lib/Target/Mips/MipsCCState.h
@@ -45,16 +45,33 @@ private:
                          const char *Func);
 
   /// Identify lowered values that originated from f128 arguments and record
-  /// this.
+  /// this for use by RetCC_MipsN.
   void
   PreAnalyzeFormalArgumentsForF128(const SmallVectorImpl<ISD::InputArg> &Ins);
 
+  void
+  PreAnalyzeCallResultForVectorFloat(const SmallVectorImpl<ISD::InputArg> &Ins,
+                                     const Type *RetTy);
+
+  void PreAnalyzeFormalArgumentsForVectorFloat(
+      const SmallVectorImpl<ISD::InputArg> &Ins);
+
+  void
+  PreAnalyzeReturnForVectorFloat(const SmallVectorImpl<ISD::OutputArg> &Outs);
+
   /// Records whether the value has been lowered from an f128.
   SmallVector<bool, 4> OriginalArgWasF128;
 
   /// Records whether the value has been lowered from float.
   SmallVector<bool, 4> OriginalArgWasFloat;
 
+  /// Records whether the value has been lowered from a floating point vector.
+  SmallVector<bool, 4> OriginalArgWasFloatVector;
+
+  /// Records whether the return value has been lowered from a floating point
+  /// vector.
+  SmallVector<bool, 4> OriginalRetWasFloatVector;
+
   /// Records whether the value was a fixed argument.
   /// See ISD::OutputArg::IsFixed,
   SmallVector<bool, 4> CallOperandIsFixed;
@@ -78,6 +95,7 @@ public:
     CCState::AnalyzeCallOperands(Outs, Fn);
     OriginalArgWasF128.clear();
     OriginalArgWasFloat.clear();
+    OriginalArgWasFloatVector.clear();
     CallOperandIsFixed.clear();
   }
 
@@ -96,31 +114,38 @@ public:
     CCState::AnalyzeFormalArguments(Ins, Fn);
     OriginalArgWasFloat.clear();
     OriginalArgWasF128.clear();
+    OriginalArgWasFloatVector.clear();
   }
 
   void AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
                          CCAssignFn Fn, const Type *RetTy,
                          const char *Func) {
     PreAnalyzeCallResultForF128(Ins, RetTy, Func);
+    PreAnalyzeCallResultForVectorFloat(Ins, RetTy);
     CCState::AnalyzeCallResult(Ins, Fn);
     OriginalArgWasFloat.clear();
     OriginalArgWasF128.clear();
+    OriginalArgWasFloatVector.clear();
   }
 
   void AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
                      CCAssignFn Fn) {
     PreAnalyzeReturnForF128(Outs);
+    PreAnalyzeReturnForVectorFloat(Outs);
     CCState::AnalyzeReturn(Outs, Fn);
     OriginalArgWasFloat.clear();
     OriginalArgWasF128.clear();
+    OriginalArgWasFloatVector.clear();
   }
 
   bool CheckReturn(const SmallVectorImpl<ISD::OutputArg> &ArgsFlags,
                    CCAssignFn Fn) {
     PreAnalyzeReturnForF128(ArgsFlags);
+    PreAnalyzeReturnForVectorFloat(ArgsFlags);
     bool Return = CCState::CheckReturn(ArgsFlags, Fn);
     OriginalArgWasFloat.clear();
     OriginalArgWasF128.clear();
+    OriginalArgWasFloatVector.clear();
     return Return;
   }
 
@@ -128,6 +153,12 @@ public:
   bool WasOriginalArgFloat(unsigned ValNo) {
       return OriginalArgWasFloat[ValNo];
   }
+  bool WasOriginalArgVectorFloat(unsigned ValNo) const {
+    return OriginalArgWasFloatVector[ValNo];
+  }
+  bool WasOriginalRetVectorFloat(unsigned ValNo) const {
+    return OriginalRetWasFloatVector[ValNo];
+  }
   bool IsCallOperandFixed(unsigned ValNo) { return CallOperandIsFixed[ValNo]; }
   SpecialCallingConvType getSpecialCallingConv() { return SpecialCallingConv; }
 };
diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td
index a57cb7badc17..b5df78f89a6b 100644
--- a/lib/Target/Mips/MipsCallingConv.td
+++ b/lib/Target/Mips/MipsCallingConv.td
@@ -37,6 +37,10 @@ class CCIfOrigArgWasF128<CCAction A>
 class CCIfArgIsVarArg<CCAction A>
     : CCIf<"!static_cast<MipsCCState *>(&State)->IsCallOperandFixed(ValNo)", A>;
 
+/// Match if the return was a floating point vector.
+class CCIfOrigArgWasNotVectorFloat<CCAction A>
+    : CCIf<"!static_cast<MipsCCState *>(&State)"
+                "->WasOriginalRetVectorFloat(ValNo)", A>;
 
 /// Match if the special calling conv is the specified value.
 class CCIfSpecialCallingConv<string CC, CCAction A>
@@ -93,8 +97,10 @@ def RetCC_MipsO32 : CallingConv<[
   // Promote i1/i8/i16 return values to i32.
   CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
 
-  // i32 are returned in registers V0, V1, A0, A1
-  CCIfType<[i32], CCAssignToReg<[V0, V1, A0, A1]>>,
+  // i32 are returned in registers V0, V1, A0, A1, unless the original return
+  // type was a vector of floats.
+  CCIfOrigArgWasNotVectorFloat<CCIfType<[i32],
+                                        CCAssignToReg<[V0, V1, A0, A1]>>>,
 
   // f32 are returned in registers F0, F2
   CCIfType<[f32], CCAssignToReg<[F0, F2]>>,
diff --git a/lib/Target/Mips/MipsConstantIslandPass.cpp b/lib/Target/Mips/MipsConstantIslandPass.cpp
index 026f66a1c0e1..ff43a3950610 100644
--- a/lib/Target/Mips/MipsConstantIslandPass.cpp
+++ b/lib/Target/Mips/MipsConstantIslandPass.cpp
@@ -24,10 +24,10 @@
 #include "Mips16InstrInfo.h"
 #include "MipsMachineFunction.h"
 #include "MipsSubtarget.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
diff --git a/lib/Target/Mips/MipsDSPInstrInfo.td b/lib/Target/Mips/MipsDSPInstrInfo.td
index ac9a81b1bb2f..c238a65378e2 100644
--- a/lib/Target/Mips/MipsDSPInstrInfo.td
+++ b/lib/Target/Mips/MipsDSPInstrInfo.td
@@ -19,6 +19,7 @@ def immZExt4 : ImmLeaf<i32, [{return isUInt<4>(Imm);}]>;
 def immZExt8 : ImmLeaf<i32, [{return isUInt<8>(Imm);}]>;
 def immZExt10 : ImmLeaf<i32, [{return isUInt<10>(Imm);}]>;
 def immSExt6 : ImmLeaf<i32, [{return isInt<6>(Imm);}]>;
+def immSExt10 : ImmLeaf<i32, [{return isInt<10>(Imm);}]>;
 
 // Mips-specific dsp nodes
 def SDT_MipsExtr : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>,
@@ -851,8 +852,8 @@ class PACKRL_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"packrl.ph", int_mips_packrl_ph,
 class REPL_QB_DESC : REPL_DESC_BASE<"repl.qb", int_mips_repl_qb, uimm8,
                                     immZExt8, NoItinerary, DSPROpnd>;
 
-class REPL_PH_DESC : REPL_DESC_BASE<"repl.ph", int_mips_repl_ph, uimm10,
-                                    immZExt10, NoItinerary, DSPROpnd>;
+class REPL_PH_DESC : REPL_DESC_BASE<"repl.ph", int_mips_repl_ph, simm10,
+                                    immSExt10, NoItinerary, DSPROpnd>;
 
 class REPLV_QB_DESC : ABSQ_S_PH_R2_DESC_BASE<"replv.qb", int_mips_repl_qb,
                                              NoItinerary, DSPROpnd, GPR32Opnd>;
diff --git a/lib/Target/Mips/MipsFastISel.cpp b/lib/Target/Mips/MipsFastISel.cpp
index b83f44a74d5b..f79cb0e67200 100644
--- a/lib/Target/Mips/MipsFastISel.cpp
+++ b/lib/Target/Mips/MipsFastISel.cpp
@@ -17,8 +17,8 @@
 #include "MCTargetDesc/MipsABIInfo.h"
 #include "MCTargetDesc/MipsBaseInfo.h"
 #include "MipsCCState.h"
-#include "MipsInstrInfo.h"
 #include "MipsISelLowering.h"
+#include "MipsInstrInfo.h"
 #include "MipsMachineFunction.h"
 #include "MipsSubtarget.h"
 #include "MipsTargetMachine.h"
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 760630c41176..f2193013b7aa 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -22,12 +22,12 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/FunctionLoweringInfo.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/IR/CallingConv.h"
@@ -71,6 +71,48 @@ static bool isShiftedMask(uint64_t I, uint64_t &Pos, uint64_t &Size) {
   return true;
 }
 
+// The MIPS MSA ABI passes vector arguments in the integer register set.
+// The number of integer registers used is dependant on the ABI used.
+MVT MipsTargetLowering::getRegisterTypeForCallingConv(MVT VT) const {
+  if (VT.isVector() && Subtarget.hasMSA())
+    return Subtarget.isABI_O32() ? MVT::i32 : MVT::i64;
+  return MipsTargetLowering::getRegisterType(VT);
+}
+
+MVT MipsTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
+                                                      EVT VT) const {
+  if (VT.isVector()) {
+      if (Subtarget.isABI_O32()) {
+        return MVT::i32;
+      } else {
+        return (VT.getSizeInBits() == 32) ? MVT::i32 : MVT::i64;
+      }
+  }
+  return MipsTargetLowering::getRegisterType(Context, VT);
+}
+
+unsigned MipsTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
+                                                           EVT VT) const {
+  if (VT.isVector())
+    return std::max((VT.getSizeInBits() / (Subtarget.isABI_O32() ? 32 : 64)),
+                    1U);
+  return MipsTargetLowering::getNumRegisters(Context, VT);
+}
+
+unsigned MipsTargetLowering::getVectorTypeBreakdownForCallingConv(
+    LLVMContext &Context, EVT VT, EVT &IntermediateVT,
+    unsigned &NumIntermediates, MVT &RegisterVT) const {
+
+  // Break down vector types to either 2 i64s or 4 i32s.
+  RegisterVT = getRegisterTypeForCallingConv(Context, VT) ;
+  IntermediateVT = RegisterVT;
+  NumIntermediates = VT.getSizeInBits() < RegisterVT.getSizeInBits()
+                         ? VT.getVectorNumElements()
+                         : VT.getSizeInBits() / RegisterVT.getSizeInBits();
+
+  return NumIntermediates;
+}
+
 SDValue MipsTargetLowering::getGlobalReg(SelectionDAG &DAG, EVT Ty) const {
   MipsFunctionInfo *FI = DAG.getMachineFunction().getInfo<MipsFunctionInfo>();
   return DAG.getRegister(FI->getGlobalBaseReg(), Ty);
@@ -470,8 +512,9 @@ MipsTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
                      !Subtarget.hasMips32r6() && !Subtarget.inMips16Mode() &&
                      !Subtarget.inMicroMipsMode();
 
-  // Disable if we don't generate PIC or the ABI isn't O32.
-  if (!TM.isPositionIndependent() || !TM.getABI().IsO32())
+  // Disable if either of the following is true:
+  // We do not generate PIC, the ABI is not O32, LargeGOT is being used.
+  if (!TM.isPositionIndependent() || !TM.getABI().IsO32() || LargeGOT)
     UseFastISel = false;
 
   return UseFastISel ? Mips::createFastISel(funcInfo, libInfo) : nullptr;
@@ -2551,6 +2594,11 @@ SDValue MipsTargetLowering::lowerFP_TO_SINT(SDValue Op,
 //       yet to hold an argument. Otherwise, use A2, A3 and stack. If A1 is
 //       not used, it must be shadowed. If only A3 is available, shadow it and
 //       go to stack.
+// vXiX - Received as scalarized i32s, passed in A0 - A3 and the stack.
+// vXf32 - Passed in either a pair of registers {A0, A1}, {A2, A3} or {A0 - A3}
+//         with the remainder spilled to the stack.
+// vXf64 - Passed in either {A0, A1, A2, A3} or {A2, A3} and in both cases
+//         spilling the remainder to the stack.
 //
 //  For vararg functions, all arguments are passed in A0, A1, A2, A3 and stack.
 //===----------------------------------------------------------------------===//
@@ -2562,8 +2610,13 @@ static bool CC_MipsO32(unsigned ValNo, MVT ValVT, MVT LocVT,
       State.getMachineFunction().getSubtarget());
 
   static const MCPhysReg IntRegs[] = { Mips::A0, Mips::A1, Mips::A2, Mips::A3 };
+
+  const MipsCCState * MipsState = static_cast<MipsCCState *>(&State);
+
   static const MCPhysReg F32Regs[] = { Mips::F12, Mips::F14 };
 
+  static const MCPhysReg FloatVectorIntRegs[] = { Mips::A0, Mips::A2 };
+
   // Do not process byval args here.
   if (ArgFlags.isByVal())
     return true;
@@ -2601,8 +2654,26 @@ static bool CC_MipsO32(unsigned ValNo, MVT ValVT, MVT LocVT,
                                 State.getFirstUnallocated(F32Regs) != ValNo;
   unsigned OrigAlign = ArgFlags.getOrigAlign();
   bool isI64 = (ValVT == MVT::i32 && OrigAlign == 8);
+  bool isVectorFloat = MipsState->WasOriginalArgVectorFloat(ValNo);
 
-  if (ValVT == MVT::i32 || (ValVT == MVT::f32 && AllocateFloatsInIntReg)) {
+  // The MIPS vector ABI for floats passes them in a pair of registers
+  if (ValVT == MVT::i32 && isVectorFloat) {
+    // This is the start of an vector that was scalarized into an unknown number
+    // of components. It doesn't matter how many there are. Allocate one of the
+    // notional 8 byte aligned registers which map onto the argument stack, and
+    // shadow the register lost to alignment requirements.
+    if (ArgFlags.isSplit()) {
+      Reg = State.AllocateReg(FloatVectorIntRegs);
+      if (Reg == Mips::A2)
+        State.AllocateReg(Mips::A1);
+      else if (Reg == 0)
+        State.AllocateReg(Mips::A3);
+    } else {
+      // If we're an intermediate component of the split, we can just attempt to
+      // allocate a register directly.
+      Reg = State.AllocateReg(IntRegs);
+    }
+  } else if (ValVT == MVT::i32 || (ValVT == MVT::f32 && AllocateFloatsInIntReg)) {
     Reg = State.AllocateReg(IntRegs);
     // If this is the first part of an i64 arg,
     // the allocated register must be either A0 or A2.
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index 2dcafd51061a..0e47ed38f420 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -248,6 +248,33 @@ namespace llvm {
     bool isCheapToSpeculateCttz() const override;
     bool isCheapToSpeculateCtlz() const override;
 
+    /// Return the register type for a given MVT, ensuring vectors are treated
+    /// as a series of gpr sized integers.
+    virtual MVT getRegisterTypeForCallingConv(MVT VT) const override;
+
+    /// Return the register type for a given MVT, ensuring vectors are treated
+    /// as a series of gpr sized integers.
+    virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context,
+                                              EVT VT) const override;
+
+    /// Return the number of registers for a given MVT, ensuring vectors are
+    /// treated as a series of gpr sized integers.
+    virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context,
+                                                   EVT VT) const override;
+
+    /// Break down vectors to the correct number of gpr sized integers.
+    virtual unsigned getVectorTypeBreakdownForCallingConv(
+        LLVMContext &Context, EVT VT, EVT &IntermediateVT,
+        unsigned &NumIntermediates, MVT &RegisterVT) const override;
+
+    /// Return the correct alignment for the current calling convention.
+    virtual unsigned
+    getABIAlignmentForCallingConv(Type *ArgTy, DataLayout DL) const override {
+      if (ArgTy->isVectorTy())
+        return std::min(DL.getABITypeAlignment(ArgTy), 8U);
+      return DL.getABITypeAlignment(ArgTy);
+    }
+
     ISD::NodeType getExtendForAtomicOps() const override {
       return ISD::SIGN_EXTEND;
     }
diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td
index d81a769d7fd9..94f3a74be98b 100644
--- a/lib/Target/Mips/MipsInstrFPU.td
+++ b/lib/Target/Mips/MipsInstrFPU.td
@@ -557,11 +557,11 @@ def FSUB_S : MMRel, ADDS_FT<"sub.s", FGR32Opnd, II_SUB_S, 0, fsub>,
 defm FSUB :  ADDS_M<"sub.d", II_SUB_D, 0, fsub>, ADDS_FM<0x01, 17>;
 
 def MADD_S : MMRel, MADDS_FT<"madd.s", FGR32Opnd, II_MADD_S, fadd>,
-             MADDS_FM<4, 0>, INSN_MIPS4_32R2_NOT_32R6_64R6;
+             MADDS_FM<4, 0>, INSN_MIPS4_32R2_NOT_32R6_64R6, MADD4;
 def MSUB_S : MMRel, MADDS_FT<"msub.s", FGR32Opnd, II_MSUB_S, fsub>,
-             MADDS_FM<5, 0>, INSN_MIPS4_32R2_NOT_32R6_64R6;
+             MADDS_FM<5, 0>, INSN_MIPS4_32R2_NOT_32R6_64R6, MADD4;
 
-let AdditionalPredicates = [NoNaNsFPMath] in {
+let AdditionalPredicates = [NoNaNsFPMath, HasMadd4] in {
   def NMADD_S : MMRel, NMADDS_FT<"nmadd.s", FGR32Opnd, II_NMADD_S, fadd>,
                 MADDS_FM<6, 0>, INSN_MIPS4_32R2_NOT_32R6_64R6;
   def NMSUB_S : MMRel, NMADDS_FT<"nmsub.s", FGR32Opnd, II_NMSUB_S, fsub>,
@@ -569,11 +569,11 @@ let AdditionalPredicates = [NoNaNsFPMath] in {
 }
 
 def MADD_D32 : MMRel, MADDS_FT<"madd.d", AFGR64Opnd, II_MADD_D, fadd>,
-               MADDS_FM<4, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32;
+               MADDS_FM<4, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32, MADD4;
 def MSUB_D32 : MMRel, MADDS_FT<"msub.d", AFGR64Opnd, II_MSUB_D, fsub>,
-               MADDS_FM<5, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32;
+               MADDS_FM<5, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32, MADD4;
 
-let AdditionalPredicates = [NoNaNsFPMath] in {
+let AdditionalPredicates = [NoNaNsFPMath, HasMadd4] in {
   def NMADD_D32 : MMRel, NMADDS_FT<"nmadd.d", AFGR64Opnd, II_NMADD_D, fadd>,
                   MADDS_FM<6, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32;
   def NMSUB_D32 : MMRel, NMADDS_FT<"nmsub.d", AFGR64Opnd, II_NMSUB_D, fsub>,
@@ -582,12 +582,12 @@ let AdditionalPredicates = [NoNaNsFPMath] in {
 
 let DecoderNamespace = "Mips64" in {
   def MADD_D64 : MADDS_FT<"madd.d", FGR64Opnd, II_MADD_D, fadd>,
-                 MADDS_FM<4, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64;
+                 MADDS_FM<4, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64, MADD4;
   def MSUB_D64 : MADDS_FT<"msub.d", FGR64Opnd, II_MSUB_D, fsub>,
-                 MADDS_FM<5, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64;
+                 MADDS_FM<5, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64, MADD4;
 }
 
-let AdditionalPredicates = [NoNaNsFPMath],
+let AdditionalPredicates = [NoNaNsFPMath, HasMadd4],
     DecoderNamespace = "Mips64" in {
   def NMADD_D64 : NMADDS_FT<"nmadd.d", FGR64Opnd, II_NMADD_D, fadd>,
                   MADDS_FM<6, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64;
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index 8761946b8dbb..40078fb77144 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -238,6 +238,8 @@ def HasEVA       :    Predicate<"Subtarget->hasEVA()">,
                       AssemblerPredicate<"FeatureEVA,FeatureMips32r2">;
 def HasMSA : Predicate<"Subtarget->hasMSA()">,
              AssemblerPredicate<"FeatureMSA">;
+def HasMadd4 : Predicate<"!Subtarget->disableMadd4()">,
+               AssemblerPredicate<"!FeatureMadd4">;
 
 
 //===----------------------------------------------------------------------===//
@@ -390,6 +392,10 @@ class ASE_NOT_DSP {
   list<Predicate> InsnPredicates = [NotDSP];
 }
 
+class MADD4 {
+  list<Predicate> AdditionalPredicates = [HasMadd4];
+}
+
 //===----------------------------------------------------------------------===//
 
 class MipsPat<dag pattern, dag result> : Pat<pattern, result>, PredicateControl {
diff --git a/lib/Target/Mips/MipsMachineFunction.cpp b/lib/Target/Mips/MipsMachineFunction.cpp
index 63034ecab93b..e01c03db2227 100644
--- a/lib/Target/Mips/MipsMachineFunction.cpp
+++ b/lib/Target/Mips/MipsMachineFunction.cpp
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "MCTargetDesc/MipsABIInfo.h"
 #include "MipsMachineFunction.h"
+#include "MCTargetDesc/MipsABIInfo.h"
 #include "MipsSubtarget.h"
 #include "MipsTargetMachine.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -40,7 +40,11 @@ unsigned MipsFunctionInfo::getGlobalBaseReg() {
   const TargetRegisterClass *RC =
       STI.inMips16Mode()
           ? &Mips::CPU16RegsRegClass
-          : static_cast<const MipsTargetMachine &>(MF.getTarget())
+          : STI.inMicroMipsMode()
+                ? STI.hasMips64()
+                      ? &Mips::GPRMM16_64RegClass
+                      : &Mips::GPRMM16RegClass
+                : static_cast<const MipsTargetMachine &>(MF.getTarget())
                           .getABI()
                           .IsN64()
                       ? &Mips::GPR64RegClass
diff --git a/lib/Target/Mips/MipsOptimizePICCall.cpp b/lib/Target/Mips/MipsOptimizePICCall.cpp
index 94a1965f9ffb..79c8395d9dcc 100644
--- a/lib/Target/Mips/MipsOptimizePICCall.cpp
+++ b/lib/Target/Mips/MipsOptimizePICCall.cpp
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "Mips.h"
 #include "MCTargetDesc/MipsBaseInfo.h"
+#include "Mips.h"
 #include "MipsMachineFunction.h"
 #include "MipsTargetMachine.h"
 #include "llvm/ADT/ScopedHashTable.h"
diff --git a/lib/Target/Mips/MipsOs16.cpp b/lib/Target/Mips/MipsOs16.cpp
index 70ead5cde6fa..7ee45c28a7d0 100644
--- a/lib/Target/Mips/MipsOs16.cpp
+++ b/lib/Target/Mips/MipsOs16.cpp
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/IR/Instructions.h"
 #include "Mips.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index 65be350f259d..de3389b5a6bf 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -286,7 +286,9 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
 
   DEBUG(errs() << "FrameIndex : " << FrameIndex << "\n"
                << "spOffset   : " << spOffset << "\n"
-               << "stackSize  : " << stackSize << "\n");
+               << "stackSize  : " << stackSize << "\n"
+               << "alignment  : "
+               << MF.getFrameInfo().getObjectAlignment(FrameIndex) << "\n");
 
   eliminateFI(MI, FIOperandNum, FrameIndex, stackSize, spOffset);
 }
diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp
index e765b4625206..102ebb21609a 100644
--- a/lib/Target/Mips/MipsSEFrameLowering.cpp
+++ b/lib/Target/Mips/MipsSEFrameLowering.cpp
@@ -11,10 +11,10 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "MipsSEFrameLowering.h"
 #include "MCTargetDesc/MipsABIInfo.h"
 #include "MipsMachineFunction.h"
 #include "MipsRegisterInfo.h"
-#include "MipsSEFrameLowering.h"
 #include "MipsSEInstrInfo.h"
 #include "MipsSubtarget.h"
 #include "llvm/ADT/BitVector.h"
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index c9cf9363b8c9..49ae6dd4cd39 100644
--- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -24,11 +24,11 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/IR/CFG.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Type.h"
-#include "llvm/IR/Dominators.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp
index 1f4e933db2a2..154d5825427b 100644
--- a/lib/Target/Mips/MipsSubtarget.cpp
+++ b/lib/Target/Mips/MipsSubtarget.cpp
@@ -11,10 +11,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "MipsMachineFunction.h"
-#include "Mips.h"
-#include "MipsRegisterInfo.h"
 #include "MipsSubtarget.h"
+#include "Mips.h"
+#include "MipsMachineFunction.h"
+#include "MipsRegisterInfo.h"
 #include "MipsTargetMachine.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/Function.h"
@@ -70,7 +70,7 @@ MipsSubtarget::MipsSubtarget(const Triple &TT, StringRef CPU, StringRef FS,
       InMips16HardFloat(Mips16HardFloat), InMicroMipsMode(false), HasDSP(false),
       HasDSPR2(false), HasDSPR3(false), AllowMixed16_32(Mixed16_32 | Mips_Os16),
       Os16(Mips_Os16), HasMSA(false), UseTCCInDIV(false), HasSym32(false),
-      HasEVA(false), TM(TM), TargetTriple(TT), TSInfo(),
+      HasEVA(false), DisableMadd4(false), TM(TM), TargetTriple(TT), TSInfo(),
       InstrInfo(
           MipsInstrInfo::create(initializeSubtargetDependencies(CPU, FS, TM))),
       FrameLowering(MipsFrameLowering::create(*this)),
diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h
index b4d15ee361ff..625a652a0ca0 100644
--- a/lib/Target/Mips/MipsSubtarget.h
+++ b/lib/Target/Mips/MipsSubtarget.h
@@ -144,6 +144,10 @@ class MipsSubtarget : public MipsGenSubtargetInfo {
 
   // HasEVA -- supports EVA ASE.
   bool HasEVA;
+ 
+  // nomadd4 - disables generation of 4-operand madd.s, madd.d and
+  // related instructions.
+  bool DisableMadd4;
 
   InstrItineraryData InstrItins;
 
@@ -253,6 +257,7 @@ public:
   bool hasDSPR2() const { return HasDSPR2; }
   bool hasDSPR3() const { return HasDSPR3; }
   bool hasMSA() const { return HasMSA; }
+  bool disableMadd4() const { return DisableMadd4; }
   bool hasEVA() const { return HasEVA; }
   bool useSmallSection() const { return UseSmallSection; }
 
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index a9d6ab055892..330ae19ecd0f 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -11,6 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "MipsTargetMachine.h"
 #include "MCTargetDesc/MipsABIInfo.h"
 #include "MCTargetDesc/MipsMCTargetDesc.h"
 #include "Mips.h"
@@ -18,7 +19,6 @@
 #include "MipsSEISelDAGToDAG.h"
 #include "MipsSubtarget.h"
 #include "MipsTargetObjectFile.h"
-#include "MipsTargetMachine.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
diff --git a/lib/Target/Mips/MipsTargetObjectFile.cpp b/lib/Target/Mips/MipsTargetObjectFile.cpp
index c5d6a05d6611..4d73c3991035 100644
--- a/lib/Target/Mips/MipsTargetObjectFile.cpp
+++ b/lib/Target/Mips/MipsTargetObjectFile.cpp
@@ -10,13 +10,13 @@
 #include "MipsTargetObjectFile.h"
 #include "MipsSubtarget.h"
 #include "MipsTargetMachine.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/ELF.h"
 #include "llvm/Target/TargetMachine.h"
 using namespace llvm;
 
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 58cb7793d040..0139646fc3f7 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -12,11 +12,11 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "NVPTXAsmPrinter.h"
 #include "InstPrinter/NVPTXInstPrinter.h"
 #include "MCTargetDesc/NVPTXBaseInfo.h"
 #include "MCTargetDesc/NVPTXMCAsmInfo.h"
 #include "NVPTX.h"
-#include "NVPTXAsmPrinter.h"
 #include "NVPTXMCExpr.h"
 #include "NVPTXMachineFunctionInfo.h"
 #include "NVPTXRegisterInfo.h"
@@ -73,8 +73,8 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Path.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
diff --git a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
index 390776212ce7..916b0e115664 100644
--- a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
+++ b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "NVPTX.h"
 #include "MCTargetDesc/NVPTXBaseInfo.h"
+#include "NVPTX.h"
 #include "NVPTXUtilities.h"
 #include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/IR/Constants.h"
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp
index ebaaf42bc64e..f26b9a7cb8dd 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -12,9 +12,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "NVPTXISelLowering.h"
 #include "MCTargetDesc/NVPTXBaseInfo.h"
 #include "NVPTX.h"
-#include "NVPTXISelLowering.h"
 #include "NVPTXSection.h"
 #include "NVPTXSubtarget.h"
 #include "NVPTXTargetMachine.h"
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/lib/Target/NVPTX/NVPTXInstrInfo.cpp
index 0f6c2e53e60a..da563f0531d4 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.cpp
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "NVPTX.h"
 #include "NVPTXInstrInfo.h"
+#include "NVPTX.h"
 #include "NVPTXTargetMachine.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/MachineFunction.h"
diff --git a/lib/Target/NVPTX/NVPTXLowerArgs.cpp b/lib/Target/NVPTX/NVPTXLowerArgs.cpp
index e858b37e1843..139dc7fbeeda 100644
--- a/lib/Target/NVPTX/NVPTXLowerArgs.cpp
+++ b/lib/Target/NVPTX/NVPTXLowerArgs.cpp
@@ -90,8 +90,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "NVPTX.h"
-#include "NVPTXUtilities.h"
 #include "NVPTXTargetMachine.h"
+#include "NVPTXUtilities.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
diff --git a/lib/Target/NVPTX/NVPTXPeephole.cpp b/lib/Target/NVPTX/NVPTXPeephole.cpp
index e10b046f7c97..4e902c0fb507 100644
--- a/lib/Target/NVPTX/NVPTXPeephole.cpp
+++ b/lib/Target/NVPTX/NVPTXPeephole.cpp
@@ -36,8 +36,8 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 
 using namespace llvm;
 
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 8dfbfece9b8e..2b6ba8c85d4d 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -11,10 +11,10 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "NVPTXTargetMachine.h"
 #include "NVPTX.h"
 #include "NVPTXAllocaHoisting.h"
 #include "NVPTXLowerAggrCopies.h"
-#include "NVPTXTargetMachine.h"
 #include "NVPTXTargetObjectFile.h"
 #include "NVPTXTargetTransformInfo.h"
 #include "llvm/ADT/STLExtras.h"
diff --git a/lib/Target/NVPTX/NVVMIntrRange.cpp b/lib/Target/NVPTX/NVVMIntrRange.cpp
index 9c71a2ee165b..11277f5ba596 100644
--- a/lib/Target/NVPTX/NVVMIntrRange.cpp
+++ b/lib/Target/NVPTX/NVVMIntrRange.cpp
@@ -15,8 +15,8 @@
 #include "NVPTX.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/InstIterator.h"
-#include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
 
 using namespace llvm;
 
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index 84bb9ec56800..baf5902ddf58 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -12,9 +12,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "PPCInstPrinter.h"
-#include "PPCInstrInfo.h"
 #include "MCTargetDesc/PPCMCTargetDesc.h"
 #include "MCTargetDesc/PPCPredicates.h"
+#include "PPCInstrInfo.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstrInfo.h"
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index 4863ac542736..028c2cb562f8 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -7,8 +7,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "MCTargetDesc/PPCMCTargetDesc.h"
 #include "MCTargetDesc/PPCFixupKinds.h"
+#include "MCTargetDesc/PPCMCTargetDesc.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/BinaryFormat/MachO.h"
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCELFObjectWriter.h"
@@ -18,9 +20,7 @@
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCSymbolELF.h"
 #include "llvm/MC/MCValue.h"
-#include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachO.h"
 #include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
 
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
index fd279c60f3f5..1488bd5b0be6 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -7,9 +7,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "MCTargetDesc/PPCMCTargetDesc.h"
 #include "MCTargetDesc/PPCFixupKinds.h"
 #include "MCTargetDesc/PPCMCExpr.h"
+#include "MCTargetDesc/PPCMCTargetDesc.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/MC/MCELFObjectWriter.h"
 #include "llvm/MC/MCExpr.h"
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
index 6b97d4c1456b..54f664314578 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "PPCFixupKinds.h"
 #include "PPCMCExpr.h"
+#include "PPCFixupKinds.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCContext.h"
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index 2d686f227919..e8f220ea5457 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -11,12 +11,13 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "MCTargetDesc/PPCMCTargetDesc.h"
 #include "InstPrinter/PPCInstPrinter.h"
 #include "MCTargetDesc/PPCMCAsmInfo.h"
-#include "MCTargetDesc/PPCMCTargetDesc.h"
 #include "PPCTargetStreamer.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDwarf.h"
@@ -30,11 +31,10 @@
 #include "llvm/MC/MCSymbolELF.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CodeGen.h"
-#include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
index 1f38a8c947e7..6d591ca964a6 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
@@ -7,9 +7,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "MCTargetDesc/PPCMCTargetDesc.h"
 #include "MCTargetDesc/PPCFixupKinds.h"
+#include "MCTargetDesc/PPCMCTargetDesc.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/MachO.h"
 #include "llvm/MC/MCAsmLayout.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCContext.h"
@@ -18,7 +19,6 @@
 #include "llvm/MC/MCValue.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
-#include "llvm/Support/MachO.h"
 
 using namespace llvm;
 
@@ -151,7 +151,7 @@ static void makeRelocationInfo(MachO::any_relocation_info &MRE,
   // The bitfield offsets that work (as determined by trial-and-error)
   // are different than what is documented in the mach-o manuals.
   // This appears to be an endianness issue; reversing the order of the
-  // documented bitfields in <llvm/Support/MachO.h> fixes this (but
+  // documented bitfields in <llvm/BinaryFormat/MachO.h> fixes this (but
   // breaks x86/ARM assembly).
   MRE.r_word1 = ((Index << 8) |    // was << 0
                  (IsPCRel << 7) |  // was << 24
@@ -222,7 +222,7 @@ bool PPCMachObjectWriter::recordScatteredRelocation(
       report_fatal_error("symbol '" + B->getSymbol().getName() +
                          "' can not be undefined in a subtraction expression");
 
-    // FIXME: is Type correct? see include/llvm/Support/MachO.h
+    // FIXME: is Type correct? see include/llvm/BinaryFormat/MachO.h
     Value2 = Writer->getSymbolAddress(B->getSymbol(), Layout);
     FixedValue -= Writer->getSectionAddress(SB->getFragment()->getParent());
   }
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 1f181d007f63..841b8c514464 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -16,11 +16,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "PPC.h"
-#include "PPCInstrInfo.h"
 #include "InstPrinter/PPCInstPrinter.h"
 #include "MCTargetDesc/PPCMCExpr.h"
 #include "MCTargetDesc/PPCMCTargetDesc.h"
+#include "PPC.h"
+#include "PPCInstrInfo.h"
 #include "PPCMachineFunctionInfo.h"
 #include "PPCSubtarget.h"
 #include "PPCTargetMachine.h"
@@ -29,6 +29,8 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/BinaryFormat/MachO.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -55,11 +57,9 @@
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CodeGen.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachO.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetMachine.h"
 #include <algorithm>
 #include <cassert>
diff --git a/lib/Target/PowerPC/PPCBoolRetToInt.cpp b/lib/Target/PowerPC/PPCBoolRetToInt.cpp
index 93c201d03869..55e105dad0e5 100644
--- a/lib/Target/PowerPC/PPCBoolRetToInt.cpp
+++ b/lib/Target/PowerPC/PPCBoolRetToInt.cpp
@@ -7,15 +7,15 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements converting i1 values to i32 if they could be more
+// This file implements converting i1 values to i32/i64 if they could be more
 // profitably allocated as GPRs rather than CRs. This pass will become totally
 // unnecessary if Register Bank Allocation and Global Instruction Selection ever
 // go upstream.
 //
-// Presently, the pass converts i1 Constants, and Arguments to i32 if the
+// Presently, the pass converts i1 Constants, and Arguments to i32/i64 if the
 // transitive closure of their uses includes only PHINodes, CallInsts, and
 // ReturnInsts. The rational is that arguments are generally passed and returned
-// in GPRs rather than CRs, so casting them to i32 at the LLVM IR level will
+// in GPRs rather than CRs, so casting them to i32/i64 at the LLVM IR level will
 // actually save casts at the Machine Instruction level.
 //
 // It might be useful to expand this pass to add bit-wise operations to the list
@@ -33,11 +33,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "PPC.h"
+#include "PPCTargetMachine.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/IR/Argument.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Dominators.h"
@@ -50,8 +51,9 @@
 #include "llvm/IR/Use.h"
 #include "llvm/IR/User.h"
 #include "llvm/IR/Value.h"
-#include "llvm/Support/Casting.h"
 #include "llvm/Pass.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/Support/Casting.h"
 #include <cassert>
 
 using namespace llvm;
@@ -87,17 +89,19 @@ class PPCBoolRetToInt : public FunctionPass {
     return Defs;
   }
 
-  // Translate a i1 value to an equivalent i32 value:
-  static Value *translate(Value *V) {
-    Type *Int32Ty = Type::getInt32Ty(V->getContext());
+  // Translate a i1 value to an equivalent i32/i64 value:
+  Value *translate(Value *V) {
+    Type *IntTy = ST->isPPC64() ? Type::getInt64Ty(V->getContext())
+                                : Type::getInt32Ty(V->getContext());
+
     if (auto *C = dyn_cast<Constant>(V))
-      return ConstantExpr::getZExt(C, Int32Ty);
+      return ConstantExpr::getZExt(C, IntTy);
     if (auto *P = dyn_cast<PHINode>(V)) {
       // Temporarily set the operands to 0. We'll fix this later in
       // runOnUse.
-      Value *Zero = Constant::getNullValue(Int32Ty);
+      Value *Zero = Constant::getNullValue(IntTy);
       PHINode *Q =
-        PHINode::Create(Int32Ty, P->getNumIncomingValues(), P->getName(), P);
+        PHINode::Create(IntTy, P->getNumIncomingValues(), P->getName(), P);
       for (unsigned i = 0; i < P->getNumOperands(); ++i)
         Q->addIncoming(Zero, P->getIncomingBlock(i));
       return Q;
@@ -109,7 +113,7 @@ class PPCBoolRetToInt : public FunctionPass {
 
     auto InstPt =
       A ? &*A->getParent()->getEntryBlock().begin() : I->getNextNode();
-    return new ZExtInst(V, Int32Ty, "", InstPt);
+    return new ZExtInst(V, IntTy, "", InstPt);
   }
 
   typedef SmallPtrSet<const PHINode *, 8> PHINodeSet;
@@ -185,6 +189,13 @@ class PPCBoolRetToInt : public FunctionPass {
     if (skipFunction(F))
       return false;
 
+    auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
+    if (!TPC)
+      return false;
+
+    auto &TM = TPC->getTM<PPCTargetMachine>();
+    ST = TM.getSubtargetImpl(F);
+
     PHINodeSet PromotablePHINodes = getPromotablePHINodes(F);
     B2IMap Bool2IntMap;
     bool Changed = false;
@@ -205,7 +216,7 @@ class PPCBoolRetToInt : public FunctionPass {
     return Changed;
   }
 
-  static bool runOnUse(Use &U, const PHINodeSet &PromotablePHINodes,
+  bool runOnUse(Use &U, const PHINodeSet &PromotablePHINodes,
                        B2IMap &BoolToIntMap) {
     auto Defs = findAllDefs(U);
 
@@ -262,13 +273,16 @@ class PPCBoolRetToInt : public FunctionPass {
     AU.addPreserved<DominatorTreeWrapperPass>();
     FunctionPass::getAnalysisUsage(AU);
   }
+
+private:
+  const PPCSubtarget *ST;
 };
 
 } // end anonymous namespace
 
 char PPCBoolRetToInt::ID = 0;
 INITIALIZE_PASS(PPCBoolRetToInt, "bool-ret-to-int",
-                "Convert i1 constants to i32 if they are returned",
+                "Convert i1 constants to i32/i64 if they are returned",
                 false, false)
 
 FunctionPass *llvm::createPPCBoolRetToIntPass() { return new PPCBoolRetToInt(); }
diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp
index b7d3154d0000..d0b66f9bca09 100644
--- a/lib/Target/PowerPC/PPCBranchSelector.cpp
+++ b/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -15,8 +15,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "PPC.h"
 #include "MCTargetDesc/PPCPredicates.h"
+#include "PPC.h"
 #include "PPCInstrBuilder.h"
 #include "PPCInstrInfo.h"
 #include "PPCSubtarget.h"
diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp
index 70c4170653ae..24bc027f8106 100644
--- a/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -23,7 +23,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/Scalar.h"
 #include "PPC.h"
 #include "PPCTargetMachine.h"
 #include "llvm/ADT/STLExtras.h"
@@ -43,6 +42,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
diff --git a/lib/Target/PowerPC/PPCEarlyReturn.cpp b/lib/Target/PowerPC/PPCEarlyReturn.cpp
index 6bd229625fc3..811e4dd9dfe1 100644
--- a/lib/Target/PowerPC/PPCEarlyReturn.cpp
+++ b/lib/Target/PowerPC/PPCEarlyReturn.cpp
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "PPC.h"
 #include "MCTargetDesc/PPCPredicates.h"
+#include "PPC.h"
 #include "PPCInstrBuilder.h"
 #include "PPCInstrInfo.h"
 #include "PPCMachineFunctionInfo.h"
diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp
index 2fc8654deeab..bc9957194f6d 100644
--- a/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/lib/Target/PowerPC/PPCFastISel.cpp
@@ -13,10 +13,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "PPC.h"
 #include "MCTargetDesc/PPCPredicates.h"
-#include "PPCCallingConv.h"
+#include "PPC.h"
 #include "PPCCCState.h"
+#include "PPCCallingConv.h"
 #include "PPCISelLowering.h"
 #include "PPCMachineFunctionInfo.h"
 #include "PPCSubtarget.h"
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 54414457388d..28d496ee9ca1 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -21,9 +21,10 @@
 #include "PPCTargetMachine.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
 #include "llvm/CodeGen/ISDOpcodes.h"
@@ -54,7 +55,6 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/Statistic.h"
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
@@ -2824,6 +2824,20 @@ SDValue PPCDAGToDAGISel::get32BitZExtCompare(SDValue LHS, SDValue RHS,
     return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
                                           ShiftOps), 0);
   }
+  case ISD::SETNE: {
+    // (zext (setcc %a, %b, setne)) -> (xor (lshr (cntlzw (xor %a, %b)), 5), 1)
+    // (zext (setcc %a, 0, setne))  -> (xor (lshr (cntlzw %a), 5), 1)
+    SDValue Xor = IsRHSZero ? LHS :
+      SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
+    SDValue Clz =
+      SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
+    SDValue ShiftOps[] = { Clz, getI32Imm(27, dl), getI32Imm(5, dl),
+      getI32Imm(31, dl) };
+    SDValue Shift =
+      SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0);
+    return SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift,
+                                          getI32Imm(1, dl)), 0);
+  }
   }
 }
 
@@ -2850,6 +2864,27 @@ SDValue PPCDAGToDAGISel::get32BitSExtCompare(SDValue LHS, SDValue RHS,
     return SDValue(CurDAG->getMachineNode(PPC::SRADI_32, dl, MVT::i32, Sldi,
                                           getI32Imm(63, dl)), 0);
   }
+  case ISD::SETNE: {
+    // Bitwise xor the operands, count leading zeros, shift right by 5 bits and
+    // flip the bit, finally take 2's complement.
+    // (sext (setcc %a, %b, setne)) ->
+    //   (neg (xor (lshr (ctlz (xor %a, %b)), 5), 1))
+    // Same as above, but the first xor is not needed.
+    // (sext (setcc %a, 0, setne)) ->
+    //   (neg (xor (lshr (ctlz %a), 5), 1))
+    SDValue Xor = IsRHSZero ? LHS :
+      SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
+    SDValue Clz =
+      SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
+    SDValue ShiftOps[] =
+      { Clz, getI32Imm(27, dl), getI32Imm(5, dl), getI32Imm(31, dl) };
+    SDValue Shift =
+      SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0);
+    SDValue Xori =
+      SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift,
+                                     getI32Imm(1, dl)), 0);
+    return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Xori), 0);
+  }
   }
 }
 
@@ -2872,6 +2907,19 @@ SDValue PPCDAGToDAGISel::get64BitZExtCompare(SDValue LHS, SDValue RHS,
                                           getI64Imm(58, dl), getI64Imm(63, dl)),
                    0);
   }
+  case ISD::SETNE: {
+    // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
+    // (zext (setcc %a, %b, setne)) -> (sube addc.reg, addc.reg, addc.CA)
+    // {addcz.reg, addcz.CA} = (addcarry %a, -1)
+    // (zext (setcc %a, 0, setne)) -> (sube addcz.reg, addcz.reg, addcz.CA)
+    SDValue Xor = IsRHSZero ? LHS :
+      SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
+    SDValue AC =
+      SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue,
+                                     Xor, getI32Imm(~0U, dl)), 0);
+    return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, AC,
+                                          Xor, AC.getValue(1)), 0);
+  }
   }
 }
 
@@ -2896,6 +2944,19 @@ SDValue PPCDAGToDAGISel::get64BitSExtCompare(SDValue LHS, SDValue RHS,
     return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, Addic,
                                           Addic, Addic.getValue(1)), 0);
   }
+  case ISD::SETNE: {
+    // {subfc.reg, subfc.CA} = (subcarry 0, (xor %a, %b))
+    // (sext (setcc %a, %b, setne)) -> (sube subfc.reg, subfc.reg, subfc.CA)
+    // {subfcz.reg, subfcz.CA} = (subcarry 0, %a)
+    // (sext (setcc %a, 0, setne)) -> (sube subfcz.reg, subfcz.reg, subfcz.CA)
+    SDValue Xor = IsRHSZero ? LHS :
+      SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
+    SDValue SC =
+      SDValue(CurDAG->getMachineNode(PPC::SUBFIC8, dl, MVT::i64, MVT::Glue,
+                                     Xor, getI32Imm(0, dl)), 0);
+    return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, SC,
+                                          SC, SC.getValue(1)), 0);
+  }
   }
 }
 
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 41ff9d903aa0..bda4e5e81734 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -11,13 +11,13 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "PPCISelLowering.h"
 #include "MCTargetDesc/PPCPredicates.h"
 #include "PPC.h"
-#include "PPCCallingConv.h"
 #include "PPCCCState.h"
+#include "PPCCallingConv.h"
 #include "PPCFrameLowering.h"
 #include "PPCInstrInfo.h"
-#include "PPCISelLowering.h"
 #include "PPCMachineFunctionInfo.h"
 #include "PPCPerfectShuffle.h"
 #include "PPCRegisterInfo.h"
@@ -28,11 +28,11 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/None.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/CodeGen/CallingConvLower.h"
@@ -52,8 +52,8 @@
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/IR/CallingConv.h"
 #include "llvm/IR/CallSite.h"
+#include "llvm/IR/CallingConv.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
@@ -61,9 +61,9 @@
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Use.h"
diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td
index c4139ca8b7bd..e214d26c063b 100644
--- a/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/lib/Target/PowerPC/PPCInstrVSX.td
@@ -2717,6 +2717,40 @@ def DblToFlt {
   dag B0 = (f32 (fpround (f64 (extractelt v2f64:$B, 0))));
   dag B1 = (f32 (fpround (f64 (extractelt v2f64:$B, 1))));
 }
+
+def ByteToWord {
+  dag A0 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 0)), i8));
+  dag A1 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 4)), i8));
+  dag A2 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 8)), i8));
+  dag A3 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 12)), i8));
+}
+
+def ByteToDWord {
+  dag A0 = (i64 (sext_inreg
+            (i64 (anyext (i32 (vector_extract v16i8:$A, 0)))), i8));
+  dag A1 = (i64 (sext_inreg
+            (i64 (anyext (i32 (vector_extract v16i8:$A, 8)))), i8));
+}
+
+def HWordToWord {
+  dag A0 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 0)), i16));
+  dag A1 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 2)), i16));
+  dag A2 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 4)), i16));
+  dag A3 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 6)), i16));
+}
+
+def HWordToDWord {
+  dag A0 = (i64 (sext_inreg
+            (i64 (anyext (i32 (vector_extract v8i16:$A, 0)))), i16));
+  dag A1 = (i64 (sext_inreg
+            (i64 (anyext (i32 (vector_extract v8i16:$A, 4)))), i16));
+}
+
+def WordToDWord {
+  dag A0 = (i64 (sext (i32 (vector_extract v4i32:$A, 0))));
+  dag A1 = (i64 (sext (i32 (vector_extract v4i32:$A, 2))));
+}
+
 def FltToIntLoad {
   dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (extloadf32 xoaddr:$A)))));
 }
@@ -2969,4 +3003,21 @@ let AddedComplexity = 400 in {
               (VMRGOW (COPY_TO_REGCLASS (MTVSRDD AnyExts.D, AnyExts.B), VSRC),
                       (COPY_TO_REGCLASS (MTVSRDD AnyExts.C, AnyExts.A), VSRC))>;
   }
+  // P9 Altivec instructions that can be used to build vectors.
+  // Adding them to PPCInstrVSX.td rather than PPCAltivecVSX.td to compete
+  // with complexities of existing build vector patterns in this file.
+  let Predicates = [HasP9Altivec] in {
+    def : Pat<(v2i64 (build_vector WordToDWord.A0, WordToDWord.A1)),
+              (v2i64 (VEXTSW2D $A))>;
+    def : Pat<(v2i64 (build_vector HWordToDWord.A0, HWordToDWord.A1)),
+              (v2i64 (VEXTSH2D $A))>;
+    def : Pat<(v4i32 (build_vector HWordToWord.A0, HWordToWord.A1,
+                      HWordToWord.A2, HWordToWord.A3)),
+              (v4i32 (VEXTSH2W $A))>;
+    def : Pat<(v4i32 (build_vector ByteToWord.A0, ByteToWord.A1,
+                      ByteToWord.A2, ByteToWord.A3)),
+              (v4i32 (VEXTSB2W $A))>;
+    def : Pat<(v2i64 (build_vector ByteToDWord.A0, ByteToDWord.A1)),
+              (v2i64 (VEXTSB2D $A))>;
+  }
 }
diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp
index 541b98e01b99..b310493587ae 100644
--- a/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "PPC.h"
 #include "MCTargetDesc/PPCMCExpr.h"
+#include "PPC.h"
 #include "PPCSubtarget.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/Twine.h"
diff --git a/lib/Target/PowerPC/PPCMIPeephole.cpp b/lib/Target/PowerPC/PPCMIPeephole.cpp
index c6d2c3ebcc0f..ff5f17c7628f 100644
--- a/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -19,9 +19,9 @@
 //
 //===---------------------------------------------------------------------===//
 
-#include "PPCInstrInfo.h"
 #include "PPC.h"
 #include "PPCInstrBuilder.h"
+#include "PPCInstrInfo.h"
 #include "PPCTargetMachine.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
diff --git a/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index c7aa4cb78b7a..31c50785c2ee 100644
--- a/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -21,9 +21,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "PPCInstrInfo.h"
 #include "PPC.h"
 #include "PPCInstrBuilder.h"
+#include "PPCInstrInfo.h"
 #include "PPCTargetMachine.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
diff --git a/lib/Target/PowerPC/PPCTOCRegDeps.cpp b/lib/Target/PowerPC/PPCTOCRegDeps.cpp
index 7c53a5601790..17345b6ca8d3 100644
--- a/lib/Target/PowerPC/PPCTOCRegDeps.cpp
+++ b/lib/Target/PowerPC/PPCTOCRegDeps.cpp
@@ -61,8 +61,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "PPC.h"
 #include "MCTargetDesc/PPCPredicates.h"
+#include "PPC.h"
 #include "PPCInstrBuilder.h"
 #include "PPCInstrInfo.h"
 #include "PPCMachineFunctionInfo.h"
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index b9004cc8a9f5..5a226b23ff96 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -11,11 +11,11 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "PPCTargetMachine.h"
 #include "MCTargetDesc/PPCMCTargetDesc.h"
 #include "PPC.h"
 #include "PPCSubtarget.h"
 #include "PPCTargetObjectFile.h"
-#include "PPCTargetMachine.h"
 #include "PPCTargetTransformInfo.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/STLExtras.h"
diff --git a/lib/Target/PowerPC/PPCVSXCopy.cpp b/lib/Target/PowerPC/PPCVSXCopy.cpp
index f3a0290da054..93fe3230ab81 100644
--- a/lib/Target/PowerPC/PPCVSXCopy.cpp
+++ b/lib/Target/PowerPC/PPCVSXCopy.cpp
@@ -13,8 +13,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "PPC.h"
 #include "MCTargetDesc/PPCPredicates.h"
+#include "PPC.h"
 #include "PPCHazardRecognizers.h"
 #include "PPCInstrBuilder.h"
 #include "PPCInstrInfo.h"
diff --git a/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
index f6d20ced15a0..a57484e5abdf 100644
--- a/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
+++ b/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
@@ -12,10 +12,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "PPCInstrInfo.h"
 #include "MCTargetDesc/PPCPredicates.h"
 #include "PPC.h"
 #include "PPCInstrBuilder.h"
+#include "PPCInstrInfo.h"
 #include "PPCMachineFunctionInfo.h"
 #include "PPCTargetMachine.h"
 #include "llvm/ADT/STLExtras.h"
diff --git a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
index d3434b77be8a..491eaf326a50 100644
--- a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
+++ b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
@@ -42,9 +42,9 @@
 //
 //===---------------------------------------------------------------------===//
 
-#include "PPCInstrInfo.h"
 #include "PPC.h"
 #include "PPCInstrBuilder.h"
+#include "PPCInstrInfo.h"
 #include "PPCTargetMachine.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/EquivalenceClasses.h"
diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
index d6f2672271e9..d9a71893afee 100644
--- a/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
+++ b/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
@@ -12,10 +12,10 @@
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCDirectives.h"
 #include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCFixupKindInfo.h"
 #include "llvm/MC/MCObjectWriter.h"
 #include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
index b2ed13758d41..9309d493cef4 100644
--- a/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
+++ b/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
@@ -13,13 +13,13 @@
 
 #include "MCTargetDesc/RISCVMCTargetDesc.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Support/EndianStream.h"
 #include "llvm/Support/raw_ostream.h"
 
diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h b/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h
index ddc3bf350452..7c98b1c8f321 100644
--- a/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h
+++ b/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h
@@ -14,9 +14,9 @@
 #ifndef LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVMCTARGETDESC_H
 #define LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVMCTARGETDESC_H
 
+#include "llvm/Config/config.h"
 #include "llvm/MC/MCTargetOptions.h"
 #include "llvm/Support/DataTypes.h"
-#include "llvm/Config/config.h"
 
 namespace llvm {
 class MCAsmBackend;
diff --git a/lib/Target/RISCV/RISCVTargetMachine.cpp b/lib/Target/RISCV/RISCVTargetMachine.cpp
index efdde04c582d..744d7b8aaa3a 100644
--- a/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -13,10 +13,10 @@
 
 #include "RISCVTargetMachine.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/IR/LegacyPassManager.h"
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Target/TargetOptions.h"
diff --git a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
index 7e6dff6b7894..087c037614a9 100644
--- a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
+++ b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
@@ -9,8 +9,8 @@
 
 #include "MCTargetDesc/SparcMCExpr.h"
 #include "MCTargetDesc/SparcMCTargetDesc.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/MC/MCContext.h"
@@ -28,8 +28,8 @@
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/SMLoc.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
diff --git a/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp b/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
index da7e0b737e78..8e298e8316da 100644
--- a/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
+++ b/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
@@ -14,11 +14,11 @@
 #include "Sparc.h"
 #include "SparcRegisterInfo.h"
 #include "SparcSubtarget.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
 #include "llvm/MC/MCFixedLenDisassembler.h"
 #include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Support/TargetRegistry.h"
 
 using namespace llvm;
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
index cc07547ede2c..d1d1334163a2 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
@@ -7,9 +7,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/MC/MCAsmBackend.h"
 #include "MCTargetDesc/SparcFixupKinds.h"
 #include "MCTargetDesc/SparcMCTargetDesc.h"
+#include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCELFObjectWriter.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCFixupKindInfo.h"
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
index 21df60237d96..50e8825b15e8 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
@@ -14,10 +14,10 @@
 #include "SparcMCAsmInfo.h"
 #include "SparcMCExpr.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCTargetOptions.h"
-#include "llvm/Support/Dwarf.h"
 
 using namespace llvm;
 
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp
index e85a8cd5e339..a77f760d9eff 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp
@@ -19,7 +19,6 @@
 #include "llvm/MC/MCSymbolELF.h"
 #include "llvm/Object/ELF.h"
 
-
 using namespace llvm;
 
 #define DEBUG_TYPE "sparcmcexpr"
diff --git a/lib/Target/Sparc/SparcAsmPrinter.cpp b/lib/Target/Sparc/SparcAsmPrinter.cpp
index 31a128a5f271..19fb94534b25 100644
--- a/lib/Target/Sparc/SparcAsmPrinter.cpp
+++ b/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -12,9 +12,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "Sparc.h"
 #include "InstPrinter/SparcInstPrinter.h"
 #include "MCTargetDesc/SparcMCExpr.h"
+#include "Sparc.h"
 #include "SparcInstrInfo.h"
 #include "SparcTargetMachine.h"
 #include "SparcTargetStreamer.h"
diff --git a/lib/Target/Sparc/SparcMCInstLower.cpp b/lib/Target/Sparc/SparcMCInstLower.cpp
index a3cedcbf9dd1..a784124ff688 100644
--- a/lib/Target/Sparc/SparcMCInstLower.cpp
+++ b/lib/Target/Sparc/SparcMCInstLower.cpp
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "Sparc.h"
 #include "MCTargetDesc/SparcMCExpr.h"
+#include "Sparc.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstr.h"
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
index 49c67e0819f7..c7a1ca262d2c 100644
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -11,9 +11,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "SparcTargetMachine.h"
-#include "SparcTargetObjectFile.h"
-#include "Sparc.h"
 #include "LeonPasses.h"
+#include "Sparc.h"
+#include "SparcTargetObjectFile.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/IR/LegacyPassManager.h"
diff --git a/lib/Target/Sparc/SparcTargetObjectFile.cpp b/lib/Target/Sparc/SparcTargetObjectFile.cpp
index 8fdde15d8d27..627e49a95f3c 100644
--- a/lib/Target/Sparc/SparcTargetObjectFile.cpp
+++ b/lib/Target/Sparc/SparcTargetObjectFile.cpp
@@ -9,8 +9,8 @@
 
 #include "SparcTargetObjectFile.h"
 #include "MCTargetDesc/SparcMCExpr.h"
+#include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
-#include "llvm/Support/Dwarf.h"
 #include "llvm/Target/TargetLowering.h"
 
 using namespace llvm;
diff --git a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
index efcf6696fd50..ad05779a9f64 100644
--- a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
+++ b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
@@ -8,8 +8,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "MCTargetDesc/SystemZMCTargetDesc.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
index 23b7d5b5d501..fd1fd7bc40dc 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "MCTargetDesc/SystemZMCTargetDesc.h"
 #include "MCTargetDesc/SystemZMCFixups.h"
+#include "MCTargetDesc/SystemZMCTargetDesc.h"
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCELFObjectWriter.h"
 #include "llvm/MC/MCFixupKindInfo.h"
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
index 3de570bf30cc..df0a8161e6e7 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
@@ -9,11 +9,11 @@
 
 #include "MCTargetDesc/SystemZMCFixups.h"
 #include "MCTargetDesc/SystemZMCTargetDesc.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/MC/MCELFObjectWriter.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCFixup.h"
 #include "llvm/MC/MCValue.h"
-#include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <cassert>
 #include <cstdint>
diff --git a/lib/Target/SystemZ/SystemZHazardRecognizer.h b/lib/Target/SystemZ/SystemZHazardRecognizer.h
index 8fa54ee434cf..0c755c9ad1b9 100644
--- a/lib/Target/SystemZ/SystemZHazardRecognizer.h
+++ b/lib/Target/SystemZ/SystemZHazardRecognizer.h
@@ -25,10 +25,10 @@
 #define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZHAZARDRECOGNIZER_H
 
 #include "SystemZSubtarget.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineScheduler.h"
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/MC/MCInstrDesc.h"
 #include "llvm/Support/raw_ostream.h"
 #include <string>
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index ae141dbcad34..ac4c3f6db684 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -5367,12 +5367,24 @@ MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI,
   if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) {
     if (Invert)
       CCMask ^= CCValid;
+
+    // ISel pattern matching also adds a load memory operand of the same
+    // address, so take special care to find the storing memory operand.
+    MachineMemOperand *MMO = nullptr;
+    for (auto *I : MI.memoperands())
+      if (I->isStore()) {
+          MMO = I;
+          break;
+        }
+
     BuildMI(*MBB, MI, DL, TII->get(STOCOpcode))
-        .addReg(SrcReg)
-        .add(Base)
-        .addImm(Disp)
-        .addImm(CCValid)
-        .addImm(CCMask);
+      .addReg(SrcReg)
+      .add(Base)
+      .addImm(Disp)
+      .addImm(CCValid)
+      .addImm(CCMask)
+      .addMemOperand(MMO);
+
     MI.eraseFromParent();
     return MBB;
   }
@@ -5950,7 +5962,8 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
         .addImm(DestDisp)
         .addImm(ThisLength)
         .add(SrcBase)
-        .addImm(SrcDisp);
+        .addImm(SrcDisp)
+        ->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
     DestDisp += ThisLength;
     SrcDisp += ThisLength;
     Length -= ThisLength;
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp
index b34c181124de..66a5ff12be46 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -11,10 +11,10 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "SystemZInstrInfo.h"
 #include "MCTargetDesc/SystemZMCTargetDesc.h"
 #include "SystemZ.h"
 #include "SystemZInstrBuilder.h"
-#include "SystemZInstrInfo.h"
 #include "SystemZSubtarget.h"
 #include "llvm/CodeGen/LiveInterval.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
diff --git a/lib/Target/SystemZ/SystemZLDCleanup.cpp b/lib/Target/SystemZ/SystemZLDCleanup.cpp
index ec8ce6e911fa..3a0e01da42f0 100644
--- a/lib/Target/SystemZ/SystemZLDCleanup.cpp
+++ b/lib/Target/SystemZ/SystemZLDCleanup.cpp
@@ -13,8 +13,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "SystemZTargetMachine.h"
 #include "SystemZMachineFunctionInfo.h"
+#include "SystemZTargetMachine.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index 6ef8000d6f43..d14a0fb0b0b2 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "SystemZInstrInfo.h"
 #include "SystemZRegisterInfo.h"
+#include "SystemZInstrInfo.h"
 #include "SystemZSubtarget.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
diff --git a/lib/Target/SystemZ/SystemZShortenInst.cpp b/lib/Target/SystemZ/SystemZShortenInst.cpp
index 263aff8b7bfb..7391df8342ef 100644
--- a/lib/Target/SystemZ/SystemZShortenInst.cpp
+++ b/lib/Target/SystemZ/SystemZShortenInst.cpp
@@ -14,9 +14,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "SystemZTargetMachine.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/LivePhysRegs.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 
 using namespace llvm;
diff --git a/lib/Target/SystemZ/SystemZSubtarget.h b/lib/Target/SystemZ/SystemZSubtarget.h
index 36e51921bf2f..be480f03c572 100644
--- a/lib/Target/SystemZ/SystemZSubtarget.h
+++ b/lib/Target/SystemZ/SystemZSubtarget.h
@@ -19,8 +19,8 @@
 #include "SystemZInstrInfo.h"
 #include "SystemZRegisterInfo.h"
 #include "SystemZSelectionDAGInfo.h"
-#include "llvm/IR/DataLayout.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/IR/DataLayout.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
 #include <string>
 
diff --git a/lib/Target/SystemZ/SystemZTDC.cpp b/lib/Target/SystemZ/SystemZTDC.cpp
index 96a9ef82c125..5dbd23d420a3 100644
--- a/lib/Target/SystemZ/SystemZTDC.cpp
+++ b/lib/Target/SystemZ/SystemZTDC.cpp
@@ -47,10 +47,10 @@
 #include "SystemZ.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/IR/Constants.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/InstIterator.h"
-#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Module.h"
 #include <deque>
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp
index f30d52f859d7..cb81c0e5276e 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -7,14 +7,14 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "SystemZTargetMachine.h"
 #include "MCTargetDesc/SystemZMCTargetDesc.h"
 #include "SystemZ.h"
 #include "SystemZMachineScheduler.h"
-#include "SystemZTargetMachine.h"
 #include "SystemZTargetTransformInfo.h"
 #include "llvm/ADT/Optional.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/CodeGen/Passes.h"
diff --git a/lib/Target/Target.cpp b/lib/Target/Target.cpp
index 5d1616d03779..42d92622d6c8 100644
--- a/lib/Target/Target.cpp
+++ b/lib/Target/Target.cpp
@@ -14,12 +14,12 @@
 
 #include "llvm-c/Target.h"
 #include "llvm-c/Initialization.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Value.h"
 #include "llvm/InitializePasses.h"
-#include "llvm/IR/LegacyPassManager.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
 #include <cstring>
 
 using namespace llvm;
diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp
index 91cc97e38b3d..f941891f3183 100644
--- a/lib/Target/TargetLoweringObjectFile.cpp
+++ b/lib/Target/TargetLoweringObjectFile.cpp
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
@@ -24,7 +25,6 @@
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetLowering.h"
@@ -240,6 +240,20 @@ MCSection *TargetLoweringObjectFile::SectionForGlobal(
   if (GO->hasSection())
     return getExplicitSectionGlobal(GO, Kind, TM);
 
+  if (auto *GVar = dyn_cast<GlobalVariable>(GO)) {
+    auto Attrs = GVar->getAttributes();
+    if ((Attrs.hasAttribute("bss-section") && Kind.isBSS()) ||
+        (Attrs.hasAttribute("data-section") && Kind.isData()) ||
+        (Attrs.hasAttribute("rodata-section") && Kind.isReadOnly()))  {
+       return getExplicitSectionGlobal(GO, Kind, TM);
+    }
+  }
+
+  if (auto *F = dyn_cast<Function>(GO)) {
+    if (F->hasFnAttribute("implicit-section-name"))
+      return getExplicitSectionGlobal(GO, Kind, TM);
+  }
+
   // Use default section depending on the 'type' of global
   return SelectSectionForGlobal(GO, Kind, TM);
 }
diff --git a/lib/Target/TargetMachineC.cpp b/lib/Target/TargetMachineC.cpp
index df12e0e88e3b..01f14939864f 100644
--- a/lib/Target/TargetMachineC.cpp
+++ b/lib/Target/TargetMachineC.cpp
@@ -11,13 +11,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm-c/TargetMachine.h"
 #include "llvm-c/Core.h"
 #include "llvm-c/Target.h"
+#include "llvm-c/TargetMachine.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Module.h"
 #include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/CodeGenCWrappers.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/FormattedStream.h"
diff --git a/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp b/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
index b5f53114d3e1..9be11da9afac 100644
--- a/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
+++ b/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
@@ -15,8 +15,8 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "WebAssembly.h"
 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssembly.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
 #include "llvm/MC/MCInst.h"
diff --git a/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h b/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h
index c6158720d62f..b1de84d7e8e6 100644
--- a/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h
+++ b/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h
@@ -16,9 +16,9 @@
 #define LLVM_LIB_TARGET_WEBASSEMBLY_INSTPRINTER_WEBASSEMBLYINSTPRINTER_H
 
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/BinaryFormat/Wasm.h"
 #include "llvm/CodeGen/MachineValueType.h"
 #include "llvm/MC/MCInstPrinter.h"
-#include "llvm/Support/Wasm.h"
 
 namespace llvm {
 
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
index 7c78285fbda4..4f20096c1583 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
@@ -12,8 +12,8 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
 #include "MCTargetDesc/WebAssemblyFixupKinds.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCDirectives.h"
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
index 544cd653fd72..c56c591def36 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
@@ -12,8 +12,8 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
 #include "MCTargetDesc/WebAssemblyFixupKinds.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/MC/MCCodeEmitter.h"
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
index 795658ca96b4..0ba700a86b74 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
@@ -15,9 +15,9 @@
 #ifndef LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCTARGETDESC_H
 #define LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCTARGETDESC_H
 
+#include "llvm/BinaryFormat/Wasm.h"
 #include "llvm/MC/MCInstrDesc.h"
 #include "llvm/Support/DataTypes.h"
-#include "llvm/Support/Wasm.h"
 
 namespace llvm {
 
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
index 68d6747298df..ddf964e7dbb7 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
@@ -16,9 +16,9 @@
 #ifndef LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYTARGETSTREAMER_H
 #define LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYTARGETSTREAMER_H
 
+#include "llvm/BinaryFormat/Wasm.h"
 #include "llvm/CodeGen/MachineValueType.h"
 #include "llvm/MC/MCStreamer.h"
-#include "llvm/Support/Wasm.h"
 
 namespace llvm {
 
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
index 2846ec5e9337..27c01cb8acf7 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
@@ -13,14 +13,14 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
 #include "MCTargetDesc/WebAssemblyFixupKinds.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "llvm/BinaryFormat/Wasm.h"
 #include "llvm/MC/MCFixup.h"
 #include "llvm/MC/MCSymbolWasm.h"
 #include "llvm/MC/MCWasmObjectWriter.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Wasm.h"
 using namespace llvm;
 
 namespace {
@@ -54,6 +54,11 @@ static bool IsFunctionExpr(const MCExpr *Expr) {
   return false;
 }
 
+static bool IsFunctionType(const MCValue &Target) {
+  const MCSymbolRefExpr *RefA = Target.getSymA();
+  return RefA && RefA->getKind() == MCSymbolRefExpr::VK_WebAssembly_TYPEINDEX;
+}
+
 unsigned WebAssemblyWasmObjectWriter::getRelocType(MCContext &Ctx,
                                                    const MCValue &Target,
                                                    const MCFixup &Fixup,
@@ -71,6 +76,8 @@ unsigned WebAssemblyWasmObjectWriter::getRelocType(MCContext &Ctx,
   case WebAssembly::fixup_code_sleb128_i64:
     llvm_unreachable("fixup_sleb128_i64 not implemented yet");
   case WebAssembly::fixup_code_uleb128_i32:
+    if (IsFunctionType(Target))
+      return wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB;
     if (IsFunction)
       return wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB;
     return wasm::R_WEBASSEMBLY_GLOBAL_ADDR_LEB;
diff --git a/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp b/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp
index 40e1928197bc..1691808d05a0 100644
--- a/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp
@@ -17,8 +17,8 @@
 ///
 ////===----------------------------------------------------------------------===//
 
-#include "WebAssembly.h"
 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssembly.h"
 #include "WebAssemblySubtarget.h"
 #include "WebAssemblyUtilities.h"
 #include "llvm/ADT/PriorityQueue.h"
diff --git a/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
index bd11d1b46906..21e0f6b23777 100644
--- a/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
@@ -18,8 +18,8 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "WebAssembly.h"
 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssembly.h"
 #include "WebAssemblyMachineFunctionInfo.h"
 #include "WebAssemblySubtarget.h"
 #include "WebAssemblyUtilities.h"
diff --git a/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp b/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp
index bc6360aafd61..b2330a232093 100644
--- a/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp
@@ -22,8 +22,8 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "WebAssembly.h"
 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" // for WebAssembly::ARGUMENT_*
+#include "WebAssembly.h"
 #include "WebAssemblyMachineFunctionInfo.h"
 #include "WebAssemblySubtarget.h"
 #include "llvm/Analysis/AliasAnalysis.h"
diff --git a/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
index 53698ff09b10..09338a4898e0 100644
--- a/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
@@ -16,8 +16,8 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "WebAssembly.h"
 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssembly.h"
 #include "WebAssemblyMachineFunctionInfo.h"
 #include "WebAssemblySubtarget.h"
 #include "WebAssemblyTargetMachine.h"
diff --git a/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp b/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp
index 2bbf7a2b42f9..41f315c2825b 100644
--- a/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp
@@ -26,8 +26,8 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "WebAssembly.h"
 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssembly.h"
 #include "WebAssemblyMachineFunctionInfo.h"
 #include "WebAssemblySubtarget.h"
 #include "llvm/ADT/PriorityQueue.h"
diff --git a/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
index 257f1d110aa2..4f3ae57733e5 100644
--- a/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
@@ -12,8 +12,8 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "WebAssembly.h"
 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssembly.h"
 #include "WebAssemblyTargetMachine.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/IR/Function.h" // To access function attributes.
diff --git a/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp b/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp
index 744a3ed427af..576b71dd7966 100644
--- a/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp
@@ -15,8 +15,8 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "WebAssembly.h"
 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssembly.h"
 #include "WebAssemblyMachineFunctionInfo.h"
 #include "WebAssemblySubtarget.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
diff --git a/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp b/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp
index 473dcb7a33fd..1462c49aa9fd 100644
--- a/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp
@@ -19,8 +19,8 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "WebAssembly.h"
 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssembly.h"
 #include "WebAssemblyMachineFunctionInfo.h"
 #include "WebAssemblySubtarget.h"
 #include "WebAssemblyUtilities.h"
diff --git a/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp b/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp
index e3470825940c..766ab456a8e6 100644
--- a/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp
@@ -13,8 +13,8 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "WebAssembly.h"
 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssembly.h"
 #include "WebAssemblyMachineFunctionInfo.h"
 #include "WebAssemblySubtarget.h"
 #include "WebAssemblyUtilities.h"
diff --git a/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
index 57d454746b06..6650191807dc 100644
--- a/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
@@ -20,8 +20,8 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "WebAssembly.h"
 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" // for WebAssembly::ARGUMENT_*
+#include "WebAssembly.h"
 #include "WebAssemblyMachineFunctionInfo.h"
 #include "WebAssemblySubtarget.h"
 #include "WebAssemblyUtilities.h"
diff --git a/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp b/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp
index 9e944df637d9..878ffd08d228 100644
--- a/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp
@@ -19,8 +19,8 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "WebAssembly.h"
 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssembly.h"
 #include "WebAssemblyMachineFunctionInfo.h"
 #include "WebAssemblySubtarget.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
diff --git a/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp b/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp
index 2441ead7cb27..b1385f409fd3 100644
--- a/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp
+++ b/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp
@@ -12,8 +12,8 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "WebAssembly.h"
 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssembly.h"
 #include "WebAssemblyMachineFunctionInfo.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
diff --git a/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp b/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp
index a9aa781610ce..8173364fa880 100644
--- a/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp
@@ -24,8 +24,8 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "WebAssembly.h"
 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssembly.h"
 #include "WebAssemblyMachineFunctionInfo.h"
 #include "WebAssemblySubtarget.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
diff --git a/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
index d9b2b8743649..7b05f671bdcb 100644
--- a/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
@@ -12,9 +12,9 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "WebAssembly.h"
-#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
 #include "WebAssemblyTargetMachine.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssembly.h"
 #include "WebAssemblyTargetObjectFile.h"
 #include "WebAssemblyTargetTransformInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
diff --git a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp b/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
index 788fac62626b..f7e31de65f6d 100644
--- a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
@@ -7,11 +7,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "MCTargetDesc/X86MCTargetDesc.h"
 #include "X86AsmInstrumentation.h"
+#include "MCTargetDesc/X86MCTargetDesc.h"
 #include "X86Operand.h"
-#include "llvm/ADT/Twine.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDwarf.h"
 #include "llvm/MC/MCExpr.h"
diff --git a/lib/Target/X86/AsmParser/X86Operand.h b/lib/Target/X86/AsmParser/X86Operand.h
index 33eff14b8215..0fba15cc692c 100644
--- a/lib/Target/X86/AsmParser/X86Operand.h
+++ b/lib/Target/X86/AsmParser/X86Operand.h
@@ -15,8 +15,8 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/SMLoc.h"
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index 7471373334f6..fc4adddc149b 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -11,7 +11,6 @@ tablegen(LLVM X86GenFastISel.inc -gen-fast-isel)
 tablegen(LLVM X86GenCallingConv.inc -gen-callingconv)
 tablegen(LLVM X86GenSubtargetInfo.inc -gen-subtarget)
 tablegen(LLVM X86GenEVEX2VEXTables.inc -gen-x86-EVEX2VEX-tables)
-tablegen(LLVM X86GenFoldTables.inc -gen-x86-fold-tables)
 if(LLVM_BUILD_GLOBAL_ISEL)
   tablegen(LLVM X86GenRegisterBank.inc -gen-register-bank)
   tablegen(LLVM X86GenGlobalISel.inc -gen-global-isel)
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 36ad23bb41c0..4ce908b1da64 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -74,8 +74,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "X86DisassemblerDecoder.h"
 #include "MCTargetDesc/X86MCTargetDesc.h"
+#include "X86DisassemblerDecoder.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
 #include "llvm/MC/MCExpr.h"
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
index b7f637e9a8cd..577b7a776c6d 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
@@ -13,10 +13,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include <cstdarg>   /* for va_*()       */
-#include <cstdio>    /* for vsnprintf()  */
-#include <cstdlib>   /* for exit()       */
-#include <cstring>   /* for memset()     */
+#include <cstdarg> /* for va_*()       */
+#include <cstdio>  /* for vsnprintf()  */
+#include <cstdlib> /* for exit()       */
+#include <cstring> /* for memset()     */
 
 #include "X86DisassemblerDecoder.h"
 
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
index 6aa700306744..4d91300c7ede 100644
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "MCTargetDesc/X86BaseInfo.h"
 #include "X86ATTInstPrinter.h"
+#include "MCTargetDesc/X86BaseInfo.h"
 #include "X86InstComments.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp
index b5a926f915af..5e809c34325e 100644
--- a/lib/Target/X86/InstPrinter/X86InstComments.cpp
+++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp
@@ -15,8 +15,8 @@
 #include "X86InstComments.h"
 #include "MCTargetDesc/X86MCTargetDesc.h"
 #include "Utils/X86ShuffleDecode.h"
-#include "llvm/MC/MCInst.h"
 #include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/MC/MCInst.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
index a8c631ae282f..d6af6712d5a1 100644
--- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
@@ -12,9 +12,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "X86IntelInstPrinter.h"
 #include "MCTargetDesc/X86BaseInfo.h"
 #include "X86InstComments.h"
-#include "X86IntelInstPrinter.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstrDesc.h"
diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index a713af6aadb5..7a9e4f4468ec 100644
--- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -10,6 +10,8 @@
 #include "MCTargetDesc/X86BaseInfo.h"
 #include "MCTargetDesc/X86FixupKinds.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/BinaryFormat/MachO.h"
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCELFObjectWriter.h"
 #include "llvm/MC/MCExpr.h"
@@ -22,9 +24,7 @@
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachO.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
diff --git a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
index 0b73df3a2ff8..4da4eebec038 100644
--- a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
@@ -9,13 +9,13 @@
 
 #include "MCTargetDesc/X86FixupKinds.h"
 #include "MCTargetDesc/X86MCTargetDesc.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCELFObjectWriter.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCFixup.h"
 #include "llvm/MC/MCValue.h"
-#include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <cassert>
 #include <cstdint>
diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
index 9c35a251e480..1538a515f419 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
@@ -13,12 +13,12 @@
 
 #include "X86MCAsmInfo.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/ELF.h"
 using namespace llvm;
 
 enum AsmWriterFlavorTy {
diff --git a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
index 297926ddcfda..4097ef224d50 100644
--- a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
@@ -7,9 +7,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "MCTargetDesc/X86MCTargetDesc.h"
 #include "MCTargetDesc/X86FixupKinds.h"
+#include "MCTargetDesc/X86MCTargetDesc.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/MachO.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCAsmLayout.h"
 #include "llvm/MC/MCAssembler.h"
@@ -19,7 +20,6 @@
 #include "llvm/MC/MCValue.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
-#include "llvm/Support/MachO.h"
 
 using namespace llvm;
 
diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
index d6777fc8aa6a..105580c913a1 100644
--- a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
@@ -9,11 +9,11 @@
 
 #include "MCTargetDesc/X86FixupKinds.h"
 #include "MCTargetDesc/X86MCTargetDesc.h"
+#include "llvm/BinaryFormat/COFF.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCFixup.h"
 #include "llvm/MC/MCValue.h"
 #include "llvm/MC/MCWinCOFFObjectWriter.h"
-#include "llvm/Support/COFF.h"
 #include "llvm/Support/ErrorHandling.h"
 
 using namespace llvm;
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index e1825ca1eda1..dc15aeadaa61 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -17,6 +17,7 @@
 #include "MCTargetDesc/X86BaseInfo.h"
 #include "X86InstrInfo.h"
 #include "X86MachineFunctionInfo.h"
+#include "llvm/BinaryFormat/COFF.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
 #include "llvm/CodeGen/MachineValueType.h"
@@ -34,7 +35,6 @@
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/COFF.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 3cfb924abd01..621505aaded9 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -414,6 +414,8 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
     assert(HasAVX);
     if (IsNonTemporal && Alignment >= 32 && HasAVX2)
       Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
+    else if (IsNonTemporal && Alignment >= 16)
+      return false; // Force split for X86::VMOVNTDQArm
     else if (Alignment >= 32)
       Opc = HasVLX ? X86::VMOVAPSZ256rm : X86::VMOVAPSYrm;
     else
@@ -424,6 +426,8 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
     assert(HasAVX);
     if (IsNonTemporal && Alignment >= 32 && HasAVX2)
       Opc = X86::VMOVNTDQAYrm;
+    else if (IsNonTemporal && Alignment >= 16)
+      return false; // Force split for X86::VMOVNTDQArm
     else if (Alignment >= 32)
       Opc = HasVLX ? X86::VMOVAPDZ256rm : X86::VMOVAPDYrm;
     else
@@ -437,6 +441,8 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
     assert(HasAVX);
     if (IsNonTemporal && Alignment >= 32 && HasAVX2)
       Opc = X86::VMOVNTDQAYrm;
+    else if (IsNonTemporal && Alignment >= 16)
+      return false; // Force split for X86::VMOVNTDQArm
     else if (Alignment >= 32)
       Opc = HasVLX ? X86::VMOVDQA64Z256rm : X86::VMOVDQAYrm;
     else
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index 328a80304602..2777fa89330f 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -29,8 +29,8 @@
 #include "llvm/IR/Function.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/TargetOptions.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetOptions.h"
 #include <cstdlib>
 
 using namespace llvm;
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 5303d7a406ad..831e9bdab0e1 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1,4 +1,4 @@
-
+﻿
 //===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
 //
 //                     The LLVM Compiler Infrastructure
@@ -81,6 +81,12 @@ static cl::opt<int> ExperimentalPrefLoopAlignment(
              " of the loop header PC will be 0)."),
     cl::Hidden);
 
+static cl::opt<bool> MulConstantOptimization(
+    "mul-constant-optimization", cl::init(true),
+    cl::desc("Replace 'mul x, Const' with more effective instructions like "
+             "SHIFT, LEA, etc."),
+    cl::Hidden);
+
 /// Call this when the user attempts to do something unsupported, like
 /// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
 /// report_fatal_error, so calling code should attempt to recover without
@@ -5810,7 +5816,8 @@ static bool setTargetShuffleZeroElements(SDValue N,
 // The decoded shuffle mask may contain a different number of elements to the
 // destination value type.
 static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
-                               SmallVectorImpl<SDValue> &Ops) {
+                               SmallVectorImpl<SDValue> &Ops,
+                               SelectionDAG &DAG) {
   Mask.clear();
   Ops.clear();
 
@@ -5868,8 +5875,7 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
       assert(SrcExtract.getOperand(0).getValueType() == MVT::v16i8);
     }
 
-    if (!SrcExtract || !isa<ConstantSDNode>(SrcExtract.getOperand(1)) ||
-        NumElts <= SrcExtract.getConstantOperandVal(1))
+    if (!SrcExtract || !isa<ConstantSDNode>(SrcExtract.getOperand(1)))
       return false;
 
     SDValue SrcVec = SrcExtract.getOperand(0);
@@ -5877,8 +5883,12 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
     unsigned NumSrcElts = SrcVT.getVectorNumElements();
     unsigned NumZeros = (NumBitsPerElt / SrcVT.getScalarSizeInBits()) - 1;
 
+    unsigned SrcIdx = SrcExtract.getConstantOperandVal(1);
+    if (NumSrcElts <= SrcIdx)
+      return false;
+
     Ops.push_back(SrcVec);
-    Mask.push_back(SrcExtract.getConstantOperandVal(1));
+    Mask.push_back(SrcIdx);
     Mask.append(NumZeros, SM_SentinelZero);
     Mask.append(NumSrcElts - Mask.size(), SM_SentinelUndef);
     return true;
@@ -5915,6 +5925,19 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
       Mask.push_back(i == InIdx ? NumElts + ExIdx : i);
     return true;
   }
+  case X86ISD::PACKSS: {
+    // If we know input saturation won't happen we can treat this
+    // as a truncation shuffle.
+    if (DAG.ComputeNumSignBits(N.getOperand(0)) <= NumBitsPerElt ||
+        DAG.ComputeNumSignBits(N.getOperand(1)) <= NumBitsPerElt)
+      return false;
+
+    Ops.push_back(N.getOperand(0));
+    Ops.push_back(N.getOperand(1));
+    for (unsigned i = 0; i != NumElts; ++i)
+      Mask.push_back(i * 2);
+    return true;
+  }
   case X86ISD::VSHLI:
   case X86ISD::VSRLI: {
     uint64_t ShiftVal = N.getConstantOperandVal(1);
@@ -5989,9 +6012,10 @@ static void resolveTargetShuffleInputsAndMask(SmallVectorImpl<SDValue> &Inputs,
 /// Returns true if the target shuffle mask was decoded.
 static bool resolveTargetShuffleInputs(SDValue Op,
                                        SmallVectorImpl<SDValue> &Inputs,
-                                       SmallVectorImpl<int> &Mask) {
+                                       SmallVectorImpl<int> &Mask,
+                                       SelectionDAG &DAG) {
   if (!setTargetShuffleZeroElements(Op, Mask, Inputs))
-    if (!getFauxShuffleMask(Op, Mask, Inputs))
+    if (!getFauxShuffleMask(Op, Mask, Inputs, DAG))
       return false;
 
   resolveTargetShuffleInputsAndMask(Inputs, Mask);
@@ -6391,6 +6415,7 @@ static SDValue LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, const SDLoc &dl,
 /// Example: <load i32 *a, load i32 *a+4, zero, undef> -> zextload a
 static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
                                         const SDLoc &DL, SelectionDAG &DAG,
+                                        const X86Subtarget &Subtarget,
                                         bool isAfterLegalize) {
   unsigned NumElems = Elts.size();
 
@@ -6495,6 +6520,12 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
     if (isAfterLegalize && !TLI.isOperationLegal(ISD::LOAD, VT))
       return SDValue();
 
+    // Don't create 256-bit non-temporal aligned loads without AVX2 as these
+    // will lower to regular temporal loads and use the cache.
+    if (LDBase->isNonTemporal() && LDBase->getAlignment() >= 32 &&
+        VT.is256BitVector() && !Subtarget.hasInt256())
+      return SDValue();
+
     if (IsConsecutiveLoad)
       return CreateLoad(VT, LDBase);
 
@@ -7701,7 +7732,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
   // See if we can use a vector load to get all of the elements.
   if (VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) {
     SmallVector<SDValue, 64> Ops(Op->op_begin(), Op->op_begin() + NumElems);
-    if (SDValue LD = EltsFromConsecutiveLoads(VT, Ops, dl, DAG, false))
+    if (SDValue LD =
+            EltsFromConsecutiveLoads(VT, Ops, dl, DAG, Subtarget, false))
       return LD;
   }
 
@@ -7825,24 +7857,20 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
     }
 
     // Next, we iteratively mix elements, e.g. for v4f32:
-    //   Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
-    //         : unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
-    //   Step 2: unpcklps X, Y ==>    <3, 2, 1, 0>
-    unsigned EltStride = NumElems >> 1;
-    while (EltStride != 0) {
-      for (unsigned i = 0; i < EltStride; ++i) {
-        // If Ops[i+EltStride] is undef and this is the first round of mixing,
-        // then it is safe to just drop this shuffle: V[i] is already in the
-        // right place, the one element (since it's the first round) being
-        // inserted as undef can be dropped.  This isn't safe for successive
-        // rounds because they will permute elements within both vectors.
-        if (Ops[i+EltStride].isUndef() &&
-            EltStride == NumElems/2)
-          continue;
+    //   Step 1: unpcklps 0, 1 ==> X: <?, ?, 1, 0>
+    //         : unpcklps 2, 3 ==> Y: <?, ?, 3, 2>
+    //   Step 2: unpcklpd X, Y ==>    <3, 2, 1, 0>
+    for (unsigned Scale = 1; Scale < NumElems; Scale *= 2) {
+      // Generate scaled UNPCKL shuffle mask.
+      SmallVector<int, 16> Mask;
+      for(unsigned i = 0; i != Scale; ++i)
+        Mask.push_back(i);
+      for (unsigned i = 0; i != Scale; ++i)
+        Mask.push_back(NumElems+i);
+      Mask.append(NumElems - Mask.size(), SM_SentinelUndef);
 
-        Ops[i] = getUnpackl(DAG, dl, VT, Ops[i], Ops[i + EltStride]);
-      }
-      EltStride >>= 1;
+      for (unsigned i = 0, e = NumElems / (2 * Scale); i != e; ++i)
+        Ops[i] = DAG.getVectorShuffle(VT, dl, Ops[2*i], Ops[(2*i)+1], Mask);
     }
     return Ops[0];
   }
@@ -17177,7 +17205,13 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
               Cond == ISD::SETGE || Cond == ISD::SETUGE;
   bool Invert = Cond == ISD::SETNE ||
                 (Cond != ISD::SETEQ && ISD::isTrueWhenEqual(Cond));
-  bool FlipSigns = ISD::isUnsignedIntSetCC(Cond);
+
+  // If both operands are known non-negative, then an unsigned compare is the
+  // same as a signed compare and there's no need to flip signbits.
+  // TODO: We could check for more general simplifications here since we're
+  // computing known bits.
+  bool FlipSigns = ISD::isUnsignedIntSetCC(Cond) &&
+                   !(DAG.SignBitIsZero(Op0) && DAG.SignBitIsZero(Op1));
 
   // Special case: Use min/max operations for SETULE/SETUGE
   MVT VET = VT.getVectorElementType();
@@ -26741,6 +26775,17 @@ unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(
     return Tmp;
   }
 
+  case X86ISD::VSHLI: {
+    SDValue Src = Op.getOperand(0);
+    unsigned Tmp = DAG.ComputeNumSignBits(Src, Depth + 1);
+    APInt ShiftVal = cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue();
+    if (ShiftVal.uge(VTBits))
+      return VTBits; // Shifted all bits out --> zero.
+    if (ShiftVal.uge(Tmp))
+      return 1; // Shifted all sign bits out --> unknown.
+    return Tmp - ShiftVal.getZExtValue();
+  }
+
   case X86ISD::VSRAI: {
     SDValue Src = Op.getOperand(0);
     unsigned Tmp = DAG.ComputeNumSignBits(Src, Depth + 1);
@@ -27889,7 +27934,7 @@ static bool combineX86ShufflesRecursively(ArrayRef<SDValue> SrcOps,
   // Extract target shuffle mask and resolve sentinels and inputs.
   SmallVector<int, 64> OpMask;
   SmallVector<SDValue, 2> OpInputs;
-  if (!resolveTargetShuffleInputs(Op, OpInputs, OpMask))
+  if (!resolveTargetShuffleInputs(Op, OpInputs, OpMask, DAG))
     return false;
 
   assert(OpInputs.size() <= 2 && "Too many shuffle inputs");
@@ -28788,7 +28833,8 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
   }
 
   if (Elts.size() == VT.getVectorNumElements())
-    if (SDValue LD = EltsFromConsecutiveLoads(VT, Elts, dl, DAG, true))
+    if (SDValue LD =
+            EltsFromConsecutiveLoads(VT, Elts, dl, DAG, Subtarget, true))
       return LD;
 
   // For AVX2, we sometimes want to combine
@@ -29430,7 +29476,7 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
   // Resolve the target shuffle inputs and mask.
   SmallVector<int, 16> Mask;
   SmallVector<SDValue, 2> Ops;
-  if (!resolveTargetShuffleInputs(peekThroughBitcasts(Src), Ops, Mask))
+  if (!resolveTargetShuffleInputs(peekThroughBitcasts(Src), Ops, Mask, DAG))
     return SDValue();
 
   // Attempt to narrow/widen the shuffle mask to the correct size.
@@ -31017,6 +31063,77 @@ static SDValue reduceVMULWidth(SDNode *N, SelectionDAG &DAG,
   }
 }
 
+static SDValue combineMulSpecial(uint64_t MulAmt, SDNode *N, SelectionDAG &DAG,
+                                 EVT VT, SDLoc DL) {
+
+  auto combineMulShlAddOrSub = [&](int Mult, int Shift, bool isAdd) {
+    SDValue Result = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0),
+                                 DAG.getConstant(Mult, DL, VT));
+    Result = DAG.getNode(ISD::SHL, DL, VT, Result,
+                         DAG.getConstant(Shift, DL, MVT::i8));
+    Result = DAG.getNode(isAdd ? ISD::ADD : ISD::SUB, DL, VT, Result,
+                         N->getOperand(0));
+    return Result;
+  };
+
+  auto combineMulMulAddOrSub = [&](bool isAdd) {
+    SDValue Result = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0),
+                                 DAG.getConstant(9, DL, VT));
+    Result = DAG.getNode(ISD::MUL, DL, VT, Result, DAG.getConstant(3, DL, VT));
+    Result = DAG.getNode(isAdd ? ISD::ADD : ISD::SUB, DL, VT, Result,
+                         N->getOperand(0));
+    return Result;
+  };
+
+  switch (MulAmt) {
+  default:
+    break;
+  case 11:
+    // mul x, 11 => add ((shl (mul x, 5), 1), x)
+    return combineMulShlAddOrSub(5, 1, /*isAdd*/ true);
+  case 21:
+    // mul x, 21 => add ((shl (mul x, 5), 2), x)
+    return combineMulShlAddOrSub(5, 2, /*isAdd*/ true);
+  case 22:
+    // mul x, 22 => add (add ((shl (mul x, 5), 2), x), x)
+    return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0),
+                       combineMulShlAddOrSub(5, 2, /*isAdd*/ true));
+  case 19:
+    // mul x, 19 => sub ((shl (mul x, 5), 2), x)
+    return combineMulShlAddOrSub(5, 2, /*isAdd*/ false);
+  case 13:
+    // mul x, 13 => add ((shl (mul x, 3), 2), x)
+    return combineMulShlAddOrSub(3, 2, /*isAdd*/ true);
+  case 23:
+    // mul x, 13 => sub ((shl (mul x, 3), 3), x)
+    return combineMulShlAddOrSub(3, 3, /*isAdd*/ false);
+  case 14:
+    // mul x, 14 => add (add ((shl (mul x, 3), 2), x), x)
+    return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0),
+                       combineMulShlAddOrSub(3, 2, /*isAdd*/ true));
+  case 26:
+    // mul x, 26 => sub ((mul (mul x, 9), 3), x)
+    return combineMulMulAddOrSub(/*isAdd*/ false);
+  case 28:
+    // mul x, 28 => add ((mul (mul x, 9), 3), x)
+    return combineMulMulAddOrSub(/*isAdd*/ true);
+  case 29:
+    // mul x, 29 => add (add ((mul (mul x, 9), 3), x), x)
+    return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0),
+                       combineMulMulAddOrSub(/*isAdd*/ true));
+  case 30:
+    // mul x, 30 => sub (sub ((shl x, 5), x), x)
+    return DAG.getNode(
+        ISD::SUB, DL, VT,
+        DAG.getNode(ISD::SUB, DL, VT,
+                    DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+                                DAG.getConstant(5, DL, MVT::i8)),
+                    N->getOperand(0)),
+        N->getOperand(0));
+  }
+  return SDValue();
+}
+
 /// Optimize a single multiply with constant into two operations in order to
 /// implement it with two cheaper instructions, e.g. LEA + SHL, LEA + LEA.
 static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
@@ -31026,6 +31143,8 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
   if (DCI.isBeforeLegalize() && VT.isVector())
     return reduceVMULWidth(N, DAG, Subtarget);
 
+  if (!MulConstantOptimization)
+    return SDValue();
   // An imul is usually smaller than the alternative sequence.
   if (DAG.getMachineFunction().getFunction()->optForMinSize())
     return SDValue();
@@ -31081,7 +31200,8 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
     else
       NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, NewMul,
                            DAG.getConstant(MulAmt2, DL, VT));
-  }
+  } else if (!Subtarget.slowLEA())
+    NewMul = combineMulSpecial(MulAmt, N, DAG, VT, DL);
 
   if (!NewMul) {
     assert(MulAmt != 0 &&
@@ -32381,15 +32501,17 @@ static SDValue combineLoad(SDNode *N, SelectionDAG &DAG,
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
 
   // For chips with slow 32-byte unaligned loads, break the 32-byte operation
-  // into two 16-byte operations.
+  // into two 16-byte operations. Also split non-temporal aligned loads on
+  // pre-AVX2 targets as 32-byte loads will lower to regular temporal loads.
   ISD::LoadExtType Ext = Ld->getExtensionType();
   bool Fast;
   unsigned AddressSpace = Ld->getAddressSpace();
   unsigned Alignment = Ld->getAlignment();
   if (RegVT.is256BitVector() && !DCI.isBeforeLegalizeOps() &&
       Ext == ISD::NON_EXTLOAD &&
-      TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), RegVT,
-                             AddressSpace, Alignment, &Fast) && !Fast) {
+      ((Ld->isNonTemporal() && !Subtarget.hasInt256() && Alignment >= 16) ||
+       (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), RegVT,
+                               AddressSpace, Alignment, &Fast) && !Fast))) {
     unsigned NumElems = RegVT.getVectorNumElements();
     if (NumElems < 2)
       return SDValue();
@@ -35097,7 +35219,8 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG,
         if (TLI->allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
                                     OpVT, AS, Alignment, &Fast) && Fast) {
           SDValue Ops[] = {SubVec2, SubVec};
-          if (SDValue Ld = EltsFromConsecutiveLoads(OpVT, Ops, dl, DAG, false))
+          if (SDValue Ld = EltsFromConsecutiveLoads(OpVT, Ops, dl, DAG,
+                                                    Subtarget, false))
             return Ld;
         }
       }
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index e2e228f5544b..5224a16613cb 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -641,22 +641,37 @@ def sdmem : Operand<v2f64> {
 // SSE pattern fragments
 //===----------------------------------------------------------------------===//
 
+// Vector load wrappers to prevent folding of non-temporal aligned loads on 
+// supporting targets.
+def vec128load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+  return !Subtarget->hasSSE41() || !cast<LoadSDNode>(N)->isNonTemporal() ||
+         cast<LoadSDNode>(N)->getAlignment() < 16;
+}]>;
+def vec256load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+  return !Subtarget->hasAVX2() || !cast<LoadSDNode>(N)->isNonTemporal() ||
+         cast<LoadSDNode>(N)->getAlignment() < 32;
+}]>;
+def vec512load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+  return !Subtarget->hasAVX512() || !cast<LoadSDNode>(N)->isNonTemporal() ||
+         cast<LoadSDNode>(N)->getAlignment() < 64;
+}]>;
+
 // 128-bit load pattern fragments
 // NOTE: all 128-bit integer vector loads are promoted to v2i64
-def loadv4f32    : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>;
-def loadv2f64    : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>;
-def loadv2i64    : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>;
+def loadv4f32    : PatFrag<(ops node:$ptr), (v4f32 (vec128load node:$ptr))>;
+def loadv2f64    : PatFrag<(ops node:$ptr), (v2f64 (vec128load node:$ptr))>;
+def loadv2i64    : PatFrag<(ops node:$ptr), (v2i64 (vec128load node:$ptr))>;
 
 // 256-bit load pattern fragments
 // NOTE: all 256-bit integer vector loads are promoted to v4i64
-def loadv8f32    : PatFrag<(ops node:$ptr), (v8f32 (load node:$ptr))>;
-def loadv4f64    : PatFrag<(ops node:$ptr), (v4f64 (load node:$ptr))>;
-def loadv4i64    : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>;
+def loadv8f32    : PatFrag<(ops node:$ptr), (v8f32 (vec256load node:$ptr))>;
+def loadv4f64    : PatFrag<(ops node:$ptr), (v4f64 (vec256load node:$ptr))>;
+def loadv4i64    : PatFrag<(ops node:$ptr), (v4i64 (vec256load node:$ptr))>;
 
 // 512-bit load pattern fragments
-def loadv16f32   : PatFrag<(ops node:$ptr), (v16f32 (load node:$ptr))>;
-def loadv8f64    : PatFrag<(ops node:$ptr), (v8f64 (load node:$ptr))>;
-def loadv8i64    : PatFrag<(ops node:$ptr), (v8i64 (load node:$ptr))>;
+def loadv16f32   : PatFrag<(ops node:$ptr), (v16f32 (vec512load node:$ptr))>;
+def loadv8f64    : PatFrag<(ops node:$ptr), (v8f64 (vec512load node:$ptr))>;
+def loadv8i64    : PatFrag<(ops node:$ptr), (v8i64 (vec512load node:$ptr))>;
 
 // 128-/256-/512-bit extload pattern fragments
 def extloadv2f32 : PatFrag<(ops node:$ptr), (v2f64 (extloadvf32 node:$ptr))>;
@@ -728,9 +743,13 @@ def alignedloadv8i64  : PatFrag<(ops node:$ptr),
 // allows unaligned accesses, match any load, though this may require
 // setting a feature bit in the processor (on startup, for example).
 // Opteron 10h and later implement such a feature.
+// Avoid non-temporal aligned loads on supported targets.
 def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{
-  return    Subtarget->hasSSEUnalignedMem()
-         || cast<LoadSDNode>(N)->getAlignment() >= 16;
+  return (Subtarget->hasSSEUnalignedMem() ||
+          cast<LoadSDNode>(N)->getAlignment() >= 16) &&
+         (!Subtarget->hasSSE41() ||
+          !(cast<LoadSDNode>(N)->getAlignment() >= 16 &&
+            cast<LoadSDNode>(N)->isNonTemporal()));
 }]>;
 
 // 128-bit memop pattern fragments
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 0aee30081a35..ff5d90c4e78b 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -121,8 +121,172 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
                       (STI.is64Bit() ? X86::RETQ : X86::RETL)),
       Subtarget(STI), RI(STI.getTargetTriple()) {
 
-// Generated memory folding tables.
-#include "X86GenFoldTables.inc"
+  static const X86MemoryFoldTableEntry MemoryFoldTable2Addr[] = {
+    { X86::ADC32ri,     X86::ADC32mi,    0 },
+    { X86::ADC32ri8,    X86::ADC32mi8,   0 },
+    { X86::ADC32rr,     X86::ADC32mr,    0 },
+    { X86::ADC64ri32,   X86::ADC64mi32,  0 },
+    { X86::ADC64ri8,    X86::ADC64mi8,   0 },
+    { X86::ADC64rr,     X86::ADC64mr,    0 },
+    { X86::ADD16ri,     X86::ADD16mi,    0 },
+    { X86::ADD16ri8,    X86::ADD16mi8,   0 },
+    { X86::ADD16ri_DB,  X86::ADD16mi,    TB_NO_REVERSE },
+    { X86::ADD16ri8_DB, X86::ADD16mi8,   TB_NO_REVERSE },
+    { X86::ADD16rr,     X86::ADD16mr,    0 },
+    { X86::ADD16rr_DB,  X86::ADD16mr,    TB_NO_REVERSE },
+    { X86::ADD32ri,     X86::ADD32mi,    0 },
+    { X86::ADD32ri8,    X86::ADD32mi8,   0 },
+    { X86::ADD32ri_DB,  X86::ADD32mi,    TB_NO_REVERSE },
+    { X86::ADD32ri8_DB, X86::ADD32mi8,   TB_NO_REVERSE },
+    { X86::ADD32rr,     X86::ADD32mr,    0 },
+    { X86::ADD32rr_DB,  X86::ADD32mr,    TB_NO_REVERSE },
+    { X86::ADD64ri32,   X86::ADD64mi32,  0 },
+    { X86::ADD64ri8,    X86::ADD64mi8,   0 },
+    { X86::ADD64ri32_DB,X86::ADD64mi32,  TB_NO_REVERSE },
+    { X86::ADD64ri8_DB, X86::ADD64mi8,   TB_NO_REVERSE },
+    { X86::ADD64rr,     X86::ADD64mr,    0 },
+    { X86::ADD64rr_DB,  X86::ADD64mr,    TB_NO_REVERSE },
+    { X86::ADD8ri,      X86::ADD8mi,     0 },
+    { X86::ADD8rr,      X86::ADD8mr,     0 },
+    { X86::AND16ri,     X86::AND16mi,    0 },
+    { X86::AND16ri8,    X86::AND16mi8,   0 },
+    { X86::AND16rr,     X86::AND16mr,    0 },
+    { X86::AND32ri,     X86::AND32mi,    0 },
+    { X86::AND32ri8,    X86::AND32mi8,   0 },
+    { X86::AND32rr,     X86::AND32mr,    0 },
+    { X86::AND64ri32,   X86::AND64mi32,  0 },
+    { X86::AND64ri8,    X86::AND64mi8,   0 },
+    { X86::AND64rr,     X86::AND64mr,    0 },
+    { X86::AND8ri,      X86::AND8mi,     0 },
+    { X86::AND8rr,      X86::AND8mr,     0 },
+    { X86::DEC16r,      X86::DEC16m,     0 },
+    { X86::DEC32r,      X86::DEC32m,     0 },
+    { X86::DEC64r,      X86::DEC64m,     0 },
+    { X86::DEC8r,       X86::DEC8m,      0 },
+    { X86::INC16r,      X86::INC16m,     0 },
+    { X86::INC32r,      X86::INC32m,     0 },
+    { X86::INC64r,      X86::INC64m,     0 },
+    { X86::INC8r,       X86::INC8m,      0 },
+    { X86::NEG16r,      X86::NEG16m,     0 },
+    { X86::NEG32r,      X86::NEG32m,     0 },
+    { X86::NEG64r,      X86::NEG64m,     0 },
+    { X86::NEG8r,       X86::NEG8m,      0 },
+    { X86::NOT16r,      X86::NOT16m,     0 },
+    { X86::NOT32r,      X86::NOT32m,     0 },
+    { X86::NOT64r,      X86::NOT64m,     0 },
+    { X86::NOT8r,       X86::NOT8m,      0 },
+    { X86::OR16ri,      X86::OR16mi,     0 },
+    { X86::OR16ri8,     X86::OR16mi8,    0 },
+    { X86::OR16rr,      X86::OR16mr,     0 },
+    { X86::OR32ri,      X86::OR32mi,     0 },
+    { X86::OR32ri8,     X86::OR32mi8,    0 },
+    { X86::OR32rr,      X86::OR32mr,     0 },
+    { X86::OR64ri32,    X86::OR64mi32,   0 },
+    { X86::OR64ri8,     X86::OR64mi8,    0 },
+    { X86::OR64rr,      X86::OR64mr,     0 },
+    { X86::OR8ri,       X86::OR8mi,      0 },
+    { X86::OR8rr,       X86::OR8mr,      0 },
+    { X86::ROL16r1,     X86::ROL16m1,    0 },
+    { X86::ROL16rCL,    X86::ROL16mCL,   0 },
+    { X86::ROL16ri,     X86::ROL16mi,    0 },
+    { X86::ROL32r1,     X86::ROL32m1,    0 },
+    { X86::ROL32rCL,    X86::ROL32mCL,   0 },
+    { X86::ROL32ri,     X86::ROL32mi,    0 },
+    { X86::ROL64r1,     X86::ROL64m1,    0 },
+    { X86::ROL64rCL,    X86::ROL64mCL,   0 },
+    { X86::ROL64ri,     X86::ROL64mi,    0 },
+    { X86::ROL8r1,      X86::ROL8m1,     0 },
+    { X86::ROL8rCL,     X86::ROL8mCL,    0 },
+    { X86::ROL8ri,      X86::ROL8mi,     0 },
+    { X86::ROR16r1,     X86::ROR16m1,    0 },
+    { X86::ROR16rCL,    X86::ROR16mCL,   0 },
+    { X86::ROR16ri,     X86::ROR16mi,    0 },
+    { X86::ROR32r1,     X86::ROR32m1,    0 },
+    { X86::ROR32rCL,    X86::ROR32mCL,   0 },
+    { X86::ROR32ri,     X86::ROR32mi,    0 },
+    { X86::ROR64r1,     X86::ROR64m1,    0 },
+    { X86::ROR64rCL,    X86::ROR64mCL,   0 },
+    { X86::ROR64ri,     X86::ROR64mi,    0 },
+    { X86::ROR8r1,      X86::ROR8m1,     0 },
+    { X86::ROR8rCL,     X86::ROR8mCL,    0 },
+    { X86::ROR8ri,      X86::ROR8mi,     0 },
+    { X86::SAR16r1,     X86::SAR16m1,    0 },
+    { X86::SAR16rCL,    X86::SAR16mCL,   0 },
+    { X86::SAR16ri,     X86::SAR16mi,    0 },
+    { X86::SAR32r1,     X86::SAR32m1,    0 },
+    { X86::SAR32rCL,    X86::SAR32mCL,   0 },
+    { X86::SAR32ri,     X86::SAR32mi,    0 },
+    { X86::SAR64r1,     X86::SAR64m1,    0 },
+    { X86::SAR64rCL,    X86::SAR64mCL,   0 },
+    { X86::SAR64ri,     X86::SAR64mi,    0 },
+    { X86::SAR8r1,      X86::SAR8m1,     0 },
+    { X86::SAR8rCL,     X86::SAR8mCL,    0 },
+    { X86::SAR8ri,      X86::SAR8mi,     0 },
+    { X86::SBB32ri,     X86::SBB32mi,    0 },
+    { X86::SBB32ri8,    X86::SBB32mi8,   0 },
+    { X86::SBB32rr,     X86::SBB32mr,    0 },
+    { X86::SBB64ri32,   X86::SBB64mi32,  0 },
+    { X86::SBB64ri8,    X86::SBB64mi8,   0 },
+    { X86::SBB64rr,     X86::SBB64mr,    0 },
+    { X86::SHL16r1,     X86::SHL16m1,    0 },
+    { X86::SHL16rCL,    X86::SHL16mCL,   0 },
+    { X86::SHL16ri,     X86::SHL16mi,    0 },
+    { X86::SHL32r1,     X86::SHL32m1,    0 },
+    { X86::SHL32rCL,    X86::SHL32mCL,   0 },
+    { X86::SHL32ri,     X86::SHL32mi,    0 },
+    { X86::SHL64r1,     X86::SHL64m1,    0 },
+    { X86::SHL64rCL,    X86::SHL64mCL,   0 },
+    { X86::SHL64ri,     X86::SHL64mi,    0 },
+    { X86::SHL8r1,      X86::SHL8m1,     0 },
+    { X86::SHL8rCL,     X86::SHL8mCL,    0 },
+    { X86::SHL8ri,      X86::SHL8mi,     0 },
+    { X86::SHLD16rrCL,  X86::SHLD16mrCL, 0 },
+    { X86::SHLD16rri8,  X86::SHLD16mri8, 0 },
+    { X86::SHLD32rrCL,  X86::SHLD32mrCL, 0 },
+    { X86::SHLD32rri8,  X86::SHLD32mri8, 0 },
+    { X86::SHLD64rrCL,  X86::SHLD64mrCL, 0 },
+    { X86::SHLD64rri8,  X86::SHLD64mri8, 0 },
+    { X86::SHR16r1,     X86::SHR16m1,    0 },
+    { X86::SHR16rCL,    X86::SHR16mCL,   0 },
+    { X86::SHR16ri,     X86::SHR16mi,    0 },
+    { X86::SHR32r1,     X86::SHR32m1,    0 },
+    { X86::SHR32rCL,    X86::SHR32mCL,   0 },
+    { X86::SHR32ri,     X86::SHR32mi,    0 },
+    { X86::SHR64r1,     X86::SHR64m1,    0 },
+    { X86::SHR64rCL,    X86::SHR64mCL,   0 },
+    { X86::SHR64ri,     X86::SHR64mi,    0 },
+    { X86::SHR8r1,      X86::SHR8m1,     0 },
+    { X86::SHR8rCL,     X86::SHR8mCL,    0 },
+    { X86::SHR8ri,      X86::SHR8mi,     0 },
+    { X86::SHRD16rrCL,  X86::SHRD16mrCL, 0 },
+    { X86::SHRD16rri8,  X86::SHRD16mri8, 0 },
+    { X86::SHRD32rrCL,  X86::SHRD32mrCL, 0 },
+    { X86::SHRD32rri8,  X86::SHRD32mri8, 0 },
+    { X86::SHRD64rrCL,  X86::SHRD64mrCL, 0 },
+    { X86::SHRD64rri8,  X86::SHRD64mri8, 0 },
+    { X86::SUB16ri,     X86::SUB16mi,    0 },
+    { X86::SUB16ri8,    X86::SUB16mi8,   0 },
+    { X86::SUB16rr,     X86::SUB16mr,    0 },
+    { X86::SUB32ri,     X86::SUB32mi,    0 },
+    { X86::SUB32ri8,    X86::SUB32mi8,   0 },
+    { X86::SUB32rr,     X86::SUB32mr,    0 },
+    { X86::SUB64ri32,   X86::SUB64mi32,  0 },
+    { X86::SUB64ri8,    X86::SUB64mi8,   0 },
+    { X86::SUB64rr,     X86::SUB64mr,    0 },
+    { X86::SUB8ri,      X86::SUB8mi,     0 },
+    { X86::SUB8rr,      X86::SUB8mr,     0 },
+    { X86::XOR16ri,     X86::XOR16mi,    0 },
+    { X86::XOR16ri8,    X86::XOR16mi8,   0 },
+    { X86::XOR16rr,     X86::XOR16mr,    0 },
+    { X86::XOR32ri,     X86::XOR32mi,    0 },
+    { X86::XOR32ri8,    X86::XOR32mi8,   0 },
+    { X86::XOR32rr,     X86::XOR32mr,    0 },
+    { X86::XOR64ri32,   X86::XOR64mi32,  0 },
+    { X86::XOR64ri8,    X86::XOR64mi8,   0 },
+    { X86::XOR64rr,     X86::XOR64mr,    0 },
+    { X86::XOR8ri,      X86::XOR8mi,     0 },
+    { X86::XOR8rr,      X86::XOR8mr,     0 }
+  };
 
   for (X86MemoryFoldTableEntry Entry : MemoryFoldTable2Addr) {
     AddTableEntry(RegOp2MemOpTable2Addr, MemOp2RegOpTable,
@@ -131,11 +295,746 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
                   Entry.Flags | TB_INDEX_0 | TB_FOLDED_LOAD | TB_FOLDED_STORE);
   }
 
+  static const X86MemoryFoldTableEntry MemoryFoldTable0[] = {
+    { X86::BT16ri8,     X86::BT16mi8,       TB_FOLDED_LOAD },
+    { X86::BT32ri8,     X86::BT32mi8,       TB_FOLDED_LOAD },
+    { X86::BT64ri8,     X86::BT64mi8,       TB_FOLDED_LOAD },
+    { X86::CALL32r,     X86::CALL32m,       TB_FOLDED_LOAD },
+    { X86::CALL64r,     X86::CALL64m,       TB_FOLDED_LOAD },
+    { X86::CMP16ri,     X86::CMP16mi,       TB_FOLDED_LOAD },
+    { X86::CMP16ri8,    X86::CMP16mi8,      TB_FOLDED_LOAD },
+    { X86::CMP16rr,     X86::CMP16mr,       TB_FOLDED_LOAD },
+    { X86::CMP32ri,     X86::CMP32mi,       TB_FOLDED_LOAD },
+    { X86::CMP32ri8,    X86::CMP32mi8,      TB_FOLDED_LOAD },
+    { X86::CMP32rr,     X86::CMP32mr,       TB_FOLDED_LOAD },
+    { X86::CMP64ri32,   X86::CMP64mi32,     TB_FOLDED_LOAD },
+    { X86::CMP64ri8,    X86::CMP64mi8,      TB_FOLDED_LOAD },
+    { X86::CMP64rr,     X86::CMP64mr,       TB_FOLDED_LOAD },
+    { X86::CMP8ri,      X86::CMP8mi,        TB_FOLDED_LOAD },
+    { X86::CMP8rr,      X86::CMP8mr,        TB_FOLDED_LOAD },
+    { X86::DIV16r,      X86::DIV16m,        TB_FOLDED_LOAD },
+    { X86::DIV32r,      X86::DIV32m,        TB_FOLDED_LOAD },
+    { X86::DIV64r,      X86::DIV64m,        TB_FOLDED_LOAD },
+    { X86::DIV8r,       X86::DIV8m,         TB_FOLDED_LOAD },
+    { X86::EXTRACTPSrr, X86::EXTRACTPSmr,   TB_FOLDED_STORE },
+    { X86::IDIV16r,     X86::IDIV16m,       TB_FOLDED_LOAD },
+    { X86::IDIV32r,     X86::IDIV32m,       TB_FOLDED_LOAD },
+    { X86::IDIV64r,     X86::IDIV64m,       TB_FOLDED_LOAD },
+    { X86::IDIV8r,      X86::IDIV8m,        TB_FOLDED_LOAD },
+    { X86::IMUL16r,     X86::IMUL16m,       TB_FOLDED_LOAD },
+    { X86::IMUL32r,     X86::IMUL32m,       TB_FOLDED_LOAD },
+    { X86::IMUL64r,     X86::IMUL64m,       TB_FOLDED_LOAD },
+    { X86::IMUL8r,      X86::IMUL8m,        TB_FOLDED_LOAD },
+    { X86::JMP32r,      X86::JMP32m,        TB_FOLDED_LOAD },
+    { X86::JMP64r,      X86::JMP64m,        TB_FOLDED_LOAD },
+    { X86::MOV16ri,     X86::MOV16mi,       TB_FOLDED_STORE },
+    { X86::MOV16rr,     X86::MOV16mr,       TB_FOLDED_STORE },
+    { X86::MOV32ri,     X86::MOV32mi,       TB_FOLDED_STORE },
+    { X86::MOV32rr,     X86::MOV32mr,       TB_FOLDED_STORE },
+    { X86::MOV64ri32,   X86::MOV64mi32,     TB_FOLDED_STORE },
+    { X86::MOV64rr,     X86::MOV64mr,       TB_FOLDED_STORE },
+    { X86::MOV8ri,      X86::MOV8mi,        TB_FOLDED_STORE },
+    { X86::MOV8rr,      X86::MOV8mr,        TB_FOLDED_STORE },
+    { X86::MOV8rr_NOREX, X86::MOV8mr_NOREX, TB_FOLDED_STORE },
+    { X86::MOVAPDrr,    X86::MOVAPDmr,      TB_FOLDED_STORE | TB_ALIGN_16 },
+    { X86::MOVAPSrr,    X86::MOVAPSmr,      TB_FOLDED_STORE | TB_ALIGN_16 },
+    { X86::MOVDQArr,    X86::MOVDQAmr,      TB_FOLDED_STORE | TB_ALIGN_16 },
+    { X86::MOVDQUrr,    X86::MOVDQUmr,      TB_FOLDED_STORE },
+    { X86::MOVPDI2DIrr, X86::MOVPDI2DImr,   TB_FOLDED_STORE },
+    { X86::MOVPQIto64rr,X86::MOVPQI2QImr,   TB_FOLDED_STORE },
+    { X86::MOVSDto64rr, X86::MOVSDto64mr,   TB_FOLDED_STORE },
+    { X86::MOVSS2DIrr,  X86::MOVSS2DImr,    TB_FOLDED_STORE },
+    { X86::MOVUPDrr,    X86::MOVUPDmr,      TB_FOLDED_STORE },
+    { X86::MOVUPSrr,    X86::MOVUPSmr,      TB_FOLDED_STORE },
+    { X86::MUL16r,      X86::MUL16m,        TB_FOLDED_LOAD },
+    { X86::MUL32r,      X86::MUL32m,        TB_FOLDED_LOAD },
+    { X86::MUL64r,      X86::MUL64m,        TB_FOLDED_LOAD },
+    { X86::MUL8r,       X86::MUL8m,         TB_FOLDED_LOAD },
+    { X86::PEXTRDrr,    X86::PEXTRDmr,      TB_FOLDED_STORE },
+    { X86::PEXTRQrr,    X86::PEXTRQmr,      TB_FOLDED_STORE },
+    { X86::PUSH16r,     X86::PUSH16rmm,     TB_FOLDED_LOAD },
+    { X86::PUSH32r,     X86::PUSH32rmm,     TB_FOLDED_LOAD },
+    { X86::PUSH64r,     X86::PUSH64rmm,     TB_FOLDED_LOAD },
+    { X86::SETAEr,      X86::SETAEm,        TB_FOLDED_STORE },
+    { X86::SETAr,       X86::SETAm,         TB_FOLDED_STORE },
+    { X86::SETBEr,      X86::SETBEm,        TB_FOLDED_STORE },
+    { X86::SETBr,       X86::SETBm,         TB_FOLDED_STORE },
+    { X86::SETEr,       X86::SETEm,         TB_FOLDED_STORE },
+    { X86::SETGEr,      X86::SETGEm,        TB_FOLDED_STORE },
+    { X86::SETGr,       X86::SETGm,         TB_FOLDED_STORE },
+    { X86::SETLEr,      X86::SETLEm,        TB_FOLDED_STORE },
+    { X86::SETLr,       X86::SETLm,         TB_FOLDED_STORE },
+    { X86::SETNEr,      X86::SETNEm,        TB_FOLDED_STORE },
+    { X86::SETNOr,      X86::SETNOm,        TB_FOLDED_STORE },
+    { X86::SETNPr,      X86::SETNPm,        TB_FOLDED_STORE },
+    { X86::SETNSr,      X86::SETNSm,        TB_FOLDED_STORE },
+    { X86::SETOr,       X86::SETOm,         TB_FOLDED_STORE },
+    { X86::SETPr,       X86::SETPm,         TB_FOLDED_STORE },
+    { X86::SETSr,       X86::SETSm,         TB_FOLDED_STORE },
+    { X86::TAILJMPr,    X86::TAILJMPm,      TB_FOLDED_LOAD },
+    { X86::TAILJMPr64,  X86::TAILJMPm64,    TB_FOLDED_LOAD },
+    { X86::TAILJMPr64_REX, X86::TAILJMPm64_REX, TB_FOLDED_LOAD },
+    { X86::TEST16ri,    X86::TEST16mi,      TB_FOLDED_LOAD },
+    { X86::TEST32ri,    X86::TEST32mi,      TB_FOLDED_LOAD },
+    { X86::TEST64ri32,  X86::TEST64mi32,    TB_FOLDED_LOAD },
+    { X86::TEST8ri,     X86::TEST8mi,       TB_FOLDED_LOAD },
+
+    // AVX 128-bit versions of foldable instructions
+    { X86::VEXTRACTPSrr,X86::VEXTRACTPSmr,  TB_FOLDED_STORE  },
+    { X86::VEXTRACTF128rr, X86::VEXTRACTF128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
+    { X86::VMOVAPDrr,   X86::VMOVAPDmr,     TB_FOLDED_STORE | TB_ALIGN_16 },
+    { X86::VMOVAPSrr,   X86::VMOVAPSmr,     TB_FOLDED_STORE | TB_ALIGN_16 },
+    { X86::VMOVDQArr,   X86::VMOVDQAmr,     TB_FOLDED_STORE | TB_ALIGN_16 },
+    { X86::VMOVDQUrr,   X86::VMOVDQUmr,     TB_FOLDED_STORE },
+    { X86::VMOVPDI2DIrr,X86::VMOVPDI2DImr,  TB_FOLDED_STORE },
+    { X86::VMOVPQIto64rr, X86::VMOVPQI2QImr,TB_FOLDED_STORE },
+    { X86::VMOVSDto64rr,X86::VMOVSDto64mr,  TB_FOLDED_STORE },
+    { X86::VMOVSS2DIrr, X86::VMOVSS2DImr,   TB_FOLDED_STORE },
+    { X86::VMOVUPDrr,   X86::VMOVUPDmr,     TB_FOLDED_STORE },
+    { X86::VMOVUPSrr,   X86::VMOVUPSmr,     TB_FOLDED_STORE },
+    { X86::VPEXTRDrr,   X86::VPEXTRDmr,     TB_FOLDED_STORE },
+    { X86::VPEXTRQrr,   X86::VPEXTRQmr,     TB_FOLDED_STORE },
+
+    // AVX 256-bit foldable instructions
+    { X86::VEXTRACTI128rr, X86::VEXTRACTI128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
+    { X86::VMOVAPDYrr,  X86::VMOVAPDYmr,    TB_FOLDED_STORE | TB_ALIGN_32 },
+    { X86::VMOVAPSYrr,  X86::VMOVAPSYmr,    TB_FOLDED_STORE | TB_ALIGN_32 },
+    { X86::VMOVDQAYrr,  X86::VMOVDQAYmr,    TB_FOLDED_STORE | TB_ALIGN_32 },
+    { X86::VMOVDQUYrr,  X86::VMOVDQUYmr,    TB_FOLDED_STORE },
+    { X86::VMOVUPDYrr,  X86::VMOVUPDYmr,    TB_FOLDED_STORE },
+    { X86::VMOVUPSYrr,  X86::VMOVUPSYmr,    TB_FOLDED_STORE },
+
+    // AVX-512 foldable instructions
+    { X86::VEXTRACTF32x4Zrr,X86::VEXTRACTF32x4Zmr, TB_FOLDED_STORE },
+    { X86::VEXTRACTF32x8Zrr,X86::VEXTRACTF32x8Zmr, TB_FOLDED_STORE },
+    { X86::VEXTRACTF64x2Zrr,X86::VEXTRACTF64x2Zmr, TB_FOLDED_STORE },
+    { X86::VEXTRACTF64x4Zrr,X86::VEXTRACTF64x4Zmr, TB_FOLDED_STORE },
+    { X86::VEXTRACTI32x4Zrr,X86::VEXTRACTI32x4Zmr, TB_FOLDED_STORE },
+    { X86::VEXTRACTI32x8Zrr,X86::VEXTRACTI32x8Zmr, TB_FOLDED_STORE },
+    { X86::VEXTRACTI64x2Zrr,X86::VEXTRACTI64x2Zmr, TB_FOLDED_STORE },
+    { X86::VEXTRACTI64x4Zrr,X86::VEXTRACTI64x4Zmr, TB_FOLDED_STORE },
+    { X86::VEXTRACTPSZrr,   X86::VEXTRACTPSZmr,    TB_FOLDED_STORE },
+    { X86::VMOVAPDZrr,      X86::VMOVAPDZmr,    TB_FOLDED_STORE | TB_ALIGN_64 },
+    { X86::VMOVAPSZrr,      X86::VMOVAPSZmr,    TB_FOLDED_STORE | TB_ALIGN_64 },
+    { X86::VMOVDQA32Zrr,    X86::VMOVDQA32Zmr,  TB_FOLDED_STORE | TB_ALIGN_64 },
+    { X86::VMOVDQA64Zrr,    X86::VMOVDQA64Zmr,  TB_FOLDED_STORE | TB_ALIGN_64 },
+    { X86::VMOVDQU8Zrr,     X86::VMOVDQU8Zmr,   TB_FOLDED_STORE },
+    { X86::VMOVDQU16Zrr,    X86::VMOVDQU16Zmr,  TB_FOLDED_STORE },
+    { X86::VMOVDQU32Zrr,    X86::VMOVDQU32Zmr,  TB_FOLDED_STORE },
+    { X86::VMOVDQU64Zrr,    X86::VMOVDQU64Zmr,  TB_FOLDED_STORE },
+    { X86::VMOVPDI2DIZrr,   X86::VMOVPDI2DIZmr, TB_FOLDED_STORE },
+    { X86::VMOVPQIto64Zrr,  X86::VMOVPQI2QIZmr, TB_FOLDED_STORE },
+    { X86::VMOVSDto64Zrr,   X86::VMOVSDto64Zmr, TB_FOLDED_STORE },
+    { X86::VMOVSS2DIZrr,    X86::VMOVSS2DIZmr,  TB_FOLDED_STORE },
+    { X86::VMOVUPDZrr,      X86::VMOVUPDZmr,    TB_FOLDED_STORE },
+    { X86::VMOVUPSZrr,      X86::VMOVUPSZmr,    TB_FOLDED_STORE },
+    { X86::VPEXTRDZrr,      X86::VPEXTRDZmr,    TB_FOLDED_STORE },
+    { X86::VPEXTRQZrr,      X86::VPEXTRQZmr,    TB_FOLDED_STORE },
+    { X86::VPMOVDBZrr,      X86::VPMOVDBZmr,    TB_FOLDED_STORE },
+    { X86::VPMOVDWZrr,      X86::VPMOVDWZmr,    TB_FOLDED_STORE },
+    { X86::VPMOVQDZrr,      X86::VPMOVQDZmr,    TB_FOLDED_STORE },
+    { X86::VPMOVQWZrr,      X86::VPMOVQWZmr,    TB_FOLDED_STORE },
+    { X86::VPMOVWBZrr,      X86::VPMOVWBZmr,    TB_FOLDED_STORE },
+    { X86::VPMOVSDBZrr,     X86::VPMOVSDBZmr,   TB_FOLDED_STORE },
+    { X86::VPMOVSDWZrr,     X86::VPMOVSDWZmr,   TB_FOLDED_STORE },
+    { X86::VPMOVSQDZrr,     X86::VPMOVSQDZmr,   TB_FOLDED_STORE },
+    { X86::VPMOVSQWZrr,     X86::VPMOVSQWZmr,   TB_FOLDED_STORE },
+    { X86::VPMOVSWBZrr,     X86::VPMOVSWBZmr,   TB_FOLDED_STORE },
+    { X86::VPMOVUSDBZrr,    X86::VPMOVUSDBZmr,  TB_FOLDED_STORE },
+    { X86::VPMOVUSDWZrr,    X86::VPMOVUSDWZmr,  TB_FOLDED_STORE },
+    { X86::VPMOVUSQDZrr,    X86::VPMOVUSQDZmr,  TB_FOLDED_STORE },
+    { X86::VPMOVUSQWZrr,    X86::VPMOVUSQWZmr,  TB_FOLDED_STORE },
+    { X86::VPMOVUSWBZrr,    X86::VPMOVUSWBZmr,  TB_FOLDED_STORE },
+
+    // AVX-512 foldable instructions (256-bit versions)
+    { X86::VEXTRACTF32x4Z256rr,X86::VEXTRACTF32x4Z256mr, TB_FOLDED_STORE },
+    { X86::VEXTRACTF64x2Z256rr,X86::VEXTRACTF64x2Z256mr, TB_FOLDED_STORE },
+    { X86::VEXTRACTI32x4Z256rr,X86::VEXTRACTI32x4Z256mr, TB_FOLDED_STORE },
+    { X86::VEXTRACTI64x2Z256rr,X86::VEXTRACTI64x2Z256mr, TB_FOLDED_STORE },
+    { X86::VMOVAPDZ256rr,      X86::VMOVAPDZ256mr,    TB_FOLDED_STORE | TB_ALIGN_32 },
+    { X86::VMOVAPSZ256rr,      X86::VMOVAPSZ256mr,    TB_FOLDED_STORE | TB_ALIGN_32 },
+    { X86::VMOVDQA32Z256rr,    X86::VMOVDQA32Z256mr,  TB_FOLDED_STORE | TB_ALIGN_32 },
+    { X86::VMOVDQA64Z256rr,    X86::VMOVDQA64Z256mr,  TB_FOLDED_STORE | TB_ALIGN_32 },
+    { X86::VMOVUPDZ256rr,      X86::VMOVUPDZ256mr,    TB_FOLDED_STORE },
+    { X86::VMOVUPSZ256rr,      X86::VMOVUPSZ256mr,    TB_FOLDED_STORE },
+    { X86::VMOVDQU8Z256rr,     X86::VMOVDQU8Z256mr,   TB_FOLDED_STORE },
+    { X86::VMOVDQU16Z256rr,    X86::VMOVDQU16Z256mr,  TB_FOLDED_STORE },
+    { X86::VMOVDQU32Z256rr,    X86::VMOVDQU32Z256mr,  TB_FOLDED_STORE },
+    { X86::VMOVDQU64Z256rr,    X86::VMOVDQU64Z256mr,  TB_FOLDED_STORE },
+    { X86::VPMOVDWZ256rr,      X86::VPMOVDWZ256mr,    TB_FOLDED_STORE },
+    { X86::VPMOVQDZ256rr,      X86::VPMOVQDZ256mr,    TB_FOLDED_STORE },
+    { X86::VPMOVWBZ256rr,      X86::VPMOVWBZ256mr,    TB_FOLDED_STORE },
+    { X86::VPMOVSDWZ256rr,     X86::VPMOVSDWZ256mr,   TB_FOLDED_STORE },
+    { X86::VPMOVSQDZ256rr,     X86::VPMOVSQDZ256mr,   TB_FOLDED_STORE },
+    { X86::VPMOVSWBZ256rr,     X86::VPMOVSWBZ256mr,   TB_FOLDED_STORE },
+    { X86::VPMOVUSDWZ256rr,    X86::VPMOVUSDWZ256mr,  TB_FOLDED_STORE },
+    { X86::VPMOVUSQDZ256rr,    X86::VPMOVUSQDZ256mr,  TB_FOLDED_STORE },
+    { X86::VPMOVUSWBZ256rr,    X86::VPMOVUSWBZ256mr,  TB_FOLDED_STORE },
+
+    // AVX-512 foldable instructions (128-bit versions)
+    { X86::VMOVAPDZ128rr,      X86::VMOVAPDZ128mr,    TB_FOLDED_STORE | TB_ALIGN_16 },
+    { X86::VMOVAPSZ128rr,      X86::VMOVAPSZ128mr,    TB_FOLDED_STORE | TB_ALIGN_16 },
+    { X86::VMOVDQA32Z128rr,    X86::VMOVDQA32Z128mr,  TB_FOLDED_STORE | TB_ALIGN_16 },
+    { X86::VMOVDQA64Z128rr,    X86::VMOVDQA64Z128mr,  TB_FOLDED_STORE | TB_ALIGN_16 },
+    { X86::VMOVUPDZ128rr,      X86::VMOVUPDZ128mr,    TB_FOLDED_STORE },
+    { X86::VMOVUPSZ128rr,      X86::VMOVUPSZ128mr,    TB_FOLDED_STORE },
+    { X86::VMOVDQU8Z128rr,     X86::VMOVDQU8Z128mr,   TB_FOLDED_STORE },
+    { X86::VMOVDQU16Z128rr,    X86::VMOVDQU16Z128mr,  TB_FOLDED_STORE },
+    { X86::VMOVDQU32Z128rr,    X86::VMOVDQU32Z128mr,  TB_FOLDED_STORE },
+    { X86::VMOVDQU64Z128rr,    X86::VMOVDQU64Z128mr,  TB_FOLDED_STORE },
+
+    // F16C foldable instructions
+    { X86::VCVTPS2PHrr,        X86::VCVTPS2PHmr,      TB_FOLDED_STORE },
+    { X86::VCVTPS2PHYrr,       X86::VCVTPS2PHYmr,     TB_FOLDED_STORE }
+  };
+
   for (X86MemoryFoldTableEntry Entry : MemoryFoldTable0) {
     AddTableEntry(RegOp2MemOpTable0, MemOp2RegOpTable,
                   Entry.RegOp, Entry.MemOp, TB_INDEX_0 | Entry.Flags);
   }
 
+  static const X86MemoryFoldTableEntry MemoryFoldTable1[] = {
+    { X86::BSF16rr,         X86::BSF16rm,             0 },
+    { X86::BSF32rr,         X86::BSF32rm,             0 },
+    { X86::BSF64rr,         X86::BSF64rm,             0 },
+    { X86::BSR16rr,         X86::BSR16rm,             0 },
+    { X86::BSR32rr,         X86::BSR32rm,             0 },
+    { X86::BSR64rr,         X86::BSR64rm,             0 },
+    { X86::CMP16rr,         X86::CMP16rm,             0 },
+    { X86::CMP32rr,         X86::CMP32rm,             0 },
+    { X86::CMP64rr,         X86::CMP64rm,             0 },
+    { X86::CMP8rr,          X86::CMP8rm,              0 },
+    { X86::CVTSD2SSrr,      X86::CVTSD2SSrm,          0 },
+    { X86::CVTSI2SD64rr,    X86::CVTSI2SD64rm,        0 },
+    { X86::CVTSI2SDrr,      X86::CVTSI2SDrm,          0 },
+    { X86::CVTSI2SS64rr,    X86::CVTSI2SS64rm,        0 },
+    { X86::CVTSI2SSrr,      X86::CVTSI2SSrm,          0 },
+    { X86::CVTSS2SDrr,      X86::CVTSS2SDrm,          0 },
+    { X86::CVTTSD2SI64rr,   X86::CVTTSD2SI64rm,       0 },
+    { X86::CVTTSD2SIrr,     X86::CVTTSD2SIrm,         0 },
+    { X86::CVTTSS2SI64rr,   X86::CVTTSS2SI64rm,       0 },
+    { X86::CVTTSS2SIrr,     X86::CVTTSS2SIrm,         0 },
+    { X86::IMUL16rri,       X86::IMUL16rmi,           0 },
+    { X86::IMUL16rri8,      X86::IMUL16rmi8,          0 },
+    { X86::IMUL32rri,       X86::IMUL32rmi,           0 },
+    { X86::IMUL32rri8,      X86::IMUL32rmi8,          0 },
+    { X86::IMUL64rri32,     X86::IMUL64rmi32,         0 },
+    { X86::IMUL64rri8,      X86::IMUL64rmi8,          0 },
+    { X86::Int_COMISDrr,    X86::Int_COMISDrm,        TB_NO_REVERSE },
+    { X86::Int_COMISSrr,    X86::Int_COMISSrm,        TB_NO_REVERSE },
+    { X86::CVTSD2SI64rr,    X86::CVTSD2SI64rm,        TB_NO_REVERSE },
+    { X86::CVTSD2SIrr,      X86::CVTSD2SIrm,          TB_NO_REVERSE },
+    { X86::CVTSS2SI64rr,    X86::CVTSS2SI64rm,        TB_NO_REVERSE },
+    { X86::CVTSS2SIrr,      X86::CVTSS2SIrm,          TB_NO_REVERSE },
+    { X86::CVTDQ2PDrr,      X86::CVTDQ2PDrm,          TB_NO_REVERSE },
+    { X86::CVTDQ2PSrr,      X86::CVTDQ2PSrm,          TB_ALIGN_16 },
+    { X86::CVTPD2DQrr,      X86::CVTPD2DQrm,          TB_ALIGN_16 },
+    { X86::CVTPD2PSrr,      X86::CVTPD2PSrm,          TB_ALIGN_16 },
+    { X86::CVTPS2DQrr,      X86::CVTPS2DQrm,          TB_ALIGN_16 },
+    { X86::CVTPS2PDrr,      X86::CVTPS2PDrm,          TB_NO_REVERSE },
+    { X86::CVTTPD2DQrr,     X86::CVTTPD2DQrm,         TB_ALIGN_16 },
+    { X86::CVTTPS2DQrr,     X86::CVTTPS2DQrm,         TB_ALIGN_16 },
+    { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm,  TB_NO_REVERSE },
+    { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm,     TB_NO_REVERSE },
+    { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm,  TB_NO_REVERSE },
+    { X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm,     TB_NO_REVERSE },
+    { X86::Int_UCOMISDrr,   X86::Int_UCOMISDrm,       TB_NO_REVERSE },
+    { X86::Int_UCOMISSrr,   X86::Int_UCOMISSrm,       TB_NO_REVERSE },
+    { X86::MOV16rr,         X86::MOV16rm,             0 },
+    { X86::MOV32rr,         X86::MOV32rm,             0 },
+    { X86::MOV64rr,         X86::MOV64rm,             0 },
+    { X86::MOV64toPQIrr,    X86::MOVQI2PQIrm,         0 },
+    { X86::MOV64toSDrr,     X86::MOV64toSDrm,         0 },
+    { X86::MOV8rr,          X86::MOV8rm,              0 },
+    { X86::MOVAPDrr,        X86::MOVAPDrm,            TB_ALIGN_16 },
+    { X86::MOVAPSrr,        X86::MOVAPSrm,            TB_ALIGN_16 },
+    { X86::MOVDDUPrr,       X86::MOVDDUPrm,           TB_NO_REVERSE },
+    { X86::MOVDI2PDIrr,     X86::MOVDI2PDIrm,         0 },
+    { X86::MOVDI2SSrr,      X86::MOVDI2SSrm,          0 },
+    { X86::MOVDQArr,        X86::MOVDQArm,            TB_ALIGN_16 },
+    { X86::MOVDQUrr,        X86::MOVDQUrm,            0 },
+    { X86::MOVSHDUPrr,      X86::MOVSHDUPrm,          TB_ALIGN_16 },
+    { X86::MOVSLDUPrr,      X86::MOVSLDUPrm,          TB_ALIGN_16 },
+    { X86::MOVSX16rr8,      X86::MOVSX16rm8,          0 },
+    { X86::MOVSX32rr16,     X86::MOVSX32rm16,         0 },
+    { X86::MOVSX32rr8,      X86::MOVSX32rm8,          0 },
+    { X86::MOVSX64rr16,     X86::MOVSX64rm16,         0 },
+    { X86::MOVSX64rr32,     X86::MOVSX64rm32,         0 },
+    { X86::MOVSX64rr8,      X86::MOVSX64rm8,          0 },
+    { X86::MOVUPDrr,        X86::MOVUPDrm,            0 },
+    { X86::MOVUPSrr,        X86::MOVUPSrm,            0 },
+    { X86::MOVZPQILo2PQIrr, X86::MOVQI2PQIrm,         TB_NO_REVERSE },
+    { X86::MOVZX16rr8,      X86::MOVZX16rm8,          0 },
+    { X86::MOVZX32rr16,     X86::MOVZX32rm16,         0 },
+    { X86::MOVZX32_NOREXrr8, X86::MOVZX32_NOREXrm8,   0 },
+    { X86::MOVZX32rr8,      X86::MOVZX32rm8,          0 },
+    { X86::PABSBrr,         X86::PABSBrm,             TB_ALIGN_16 },
+    { X86::PABSDrr,         X86::PABSDrm,             TB_ALIGN_16 },
+    { X86::PABSWrr,         X86::PABSWrm,             TB_ALIGN_16 },
+    { X86::PCMPESTRIrr,     X86::PCMPESTRIrm,         TB_ALIGN_16 },
+    { X86::PCMPESTRM128rr,  X86::PCMPESTRM128rm,      TB_ALIGN_16 },
+    { X86::PCMPISTRIrr,     X86::PCMPISTRIrm,         TB_ALIGN_16 },
+    { X86::PCMPISTRM128rr,  X86::PCMPISTRM128rm,      TB_ALIGN_16 },
+    { X86::PHMINPOSUWrr128, X86::PHMINPOSUWrm128,     TB_ALIGN_16 },
+    { X86::PMOVSXBDrr,      X86::PMOVSXBDrm,          TB_NO_REVERSE },
+    { X86::PMOVSXBQrr,      X86::PMOVSXBQrm,          TB_NO_REVERSE },
+    { X86::PMOVSXBWrr,      X86::PMOVSXBWrm,          TB_NO_REVERSE },
+    { X86::PMOVSXDQrr,      X86::PMOVSXDQrm,          TB_NO_REVERSE },
+    { X86::PMOVSXWDrr,      X86::PMOVSXWDrm,          TB_NO_REVERSE },
+    { X86::PMOVSXWQrr,      X86::PMOVSXWQrm,          TB_NO_REVERSE },
+    { X86::PMOVZXBDrr,      X86::PMOVZXBDrm,          TB_NO_REVERSE },
+    { X86::PMOVZXBQrr,      X86::PMOVZXBQrm,          TB_NO_REVERSE },
+    { X86::PMOVZXBWrr,      X86::PMOVZXBWrm,          TB_NO_REVERSE },
+    { X86::PMOVZXDQrr,      X86::PMOVZXDQrm,          TB_NO_REVERSE },
+    { X86::PMOVZXWDrr,      X86::PMOVZXWDrm,          TB_NO_REVERSE },
+    { X86::PMOVZXWQrr,      X86::PMOVZXWQrm,          TB_NO_REVERSE },
+    { X86::PSHUFDri,        X86::PSHUFDmi,            TB_ALIGN_16 },
+    { X86::PSHUFHWri,       X86::PSHUFHWmi,           TB_ALIGN_16 },
+    { X86::PSHUFLWri,       X86::PSHUFLWmi,           TB_ALIGN_16 },
+    { X86::PTESTrr,         X86::PTESTrm,             TB_ALIGN_16 },
+    { X86::RCPPSr,          X86::RCPPSm,              TB_ALIGN_16 },
+    { X86::RCPSSr,          X86::RCPSSm,              0 },
+    { X86::RCPSSr_Int,      X86::RCPSSm_Int,          TB_NO_REVERSE },
+    { X86::ROUNDPDr,        X86::ROUNDPDm,            TB_ALIGN_16 },
+    { X86::ROUNDPSr,        X86::ROUNDPSm,            TB_ALIGN_16 },
+    { X86::ROUNDSDr,        X86::ROUNDSDm,            0 },
+    { X86::ROUNDSSr,        X86::ROUNDSSm,            0 },
+    { X86::RSQRTPSr,        X86::RSQRTPSm,            TB_ALIGN_16 },
+    { X86::RSQRTSSr,        X86::RSQRTSSm,            0 },
+    { X86::RSQRTSSr_Int,    X86::RSQRTSSm_Int,        TB_NO_REVERSE },
+    { X86::SQRTPDr,         X86::SQRTPDm,             TB_ALIGN_16 },
+    { X86::SQRTPSr,         X86::SQRTPSm,             TB_ALIGN_16 },
+    { X86::SQRTSDr,         X86::SQRTSDm,             0 },
+    { X86::SQRTSDr_Int,     X86::SQRTSDm_Int,         TB_NO_REVERSE },
+    { X86::SQRTSSr,         X86::SQRTSSm,             0 },
+    { X86::SQRTSSr_Int,     X86::SQRTSSm_Int,         TB_NO_REVERSE },
+    { X86::TEST16rr,        X86::TEST16rm,            0 },
+    { X86::TEST32rr,        X86::TEST32rm,            0 },
+    { X86::TEST64rr,        X86::TEST64rm,            0 },
+    { X86::TEST8rr,         X86::TEST8rm,             0 },
+    // FIXME: TEST*rr EAX,EAX ---> CMP [mem], 0
+    { X86::UCOMISDrr,       X86::UCOMISDrm,           0 },
+    { X86::UCOMISSrr,       X86::UCOMISSrm,           0 },
+
+    // MMX version of foldable instructions
+    { X86::MMX_CVTPD2PIirr,   X86::MMX_CVTPD2PIirm,   0 },
+    { X86::MMX_CVTPI2PDirr,   X86::MMX_CVTPI2PDirm,   0 },
+    { X86::MMX_CVTPS2PIirr,   X86::MMX_CVTPS2PIirm,   0 },
+    { X86::MMX_CVTTPD2PIirr,  X86::MMX_CVTTPD2PIirm,  0 },
+    { X86::MMX_CVTTPS2PIirr,  X86::MMX_CVTTPS2PIirm,  0 },
+    { X86::MMX_MOVD64to64rr,  X86::MMX_MOVQ64rm,      0 },
+    { X86::MMX_PABSBrr64,     X86::MMX_PABSBrm64,     0 },
+    { X86::MMX_PABSDrr64,     X86::MMX_PABSDrm64,     0 },
+    { X86::MMX_PABSWrr64,     X86::MMX_PABSWrm64,     0 },
+    { X86::MMX_PSHUFWri,      X86::MMX_PSHUFWmi,      0 },
+
+    // 3DNow! version of foldable instructions
+    { X86::PF2IDrr,         X86::PF2IDrm,             0 },
+    { X86::PF2IWrr,         X86::PF2IWrm,             0 },
+    { X86::PFRCPrr,         X86::PFRCPrm,             0 },
+    { X86::PFRSQRTrr,       X86::PFRSQRTrm,           0 },
+    { X86::PI2FDrr,         X86::PI2FDrm,             0 },
+    { X86::PI2FWrr,         X86::PI2FWrm,             0 },
+    { X86::PSWAPDrr,        X86::PSWAPDrm,            0 },
+
+    // AVX 128-bit versions of foldable instructions
+    { X86::Int_VCOMISDrr,   X86::Int_VCOMISDrm,       TB_NO_REVERSE },
+    { X86::Int_VCOMISSrr,   X86::Int_VCOMISSrm,       TB_NO_REVERSE },
+    { X86::Int_VUCOMISDrr,  X86::Int_VUCOMISDrm,      TB_NO_REVERSE },
+    { X86::Int_VUCOMISSrr,  X86::Int_VUCOMISSrm,      TB_NO_REVERSE },
+    { X86::VCVTTSD2SI64rr,  X86::VCVTTSD2SI64rm,      0 },
+    { X86::Int_VCVTTSD2SI64rr,X86::Int_VCVTTSD2SI64rm,TB_NO_REVERSE },
+    { X86::VCVTTSD2SIrr,    X86::VCVTTSD2SIrm,        0 },
+    { X86::Int_VCVTTSD2SIrr,X86::Int_VCVTTSD2SIrm,    TB_NO_REVERSE },
+    { X86::VCVTTSS2SI64rr,  X86::VCVTTSS2SI64rm,      0 },
+    { X86::Int_VCVTTSS2SI64rr,X86::Int_VCVTTSS2SI64rm,TB_NO_REVERSE },
+    { X86::VCVTTSS2SIrr,    X86::VCVTTSS2SIrm,        0 },
+    { X86::Int_VCVTTSS2SIrr,X86::Int_VCVTTSS2SIrm,    TB_NO_REVERSE },
+    { X86::VCVTSD2SI64rr,   X86::VCVTSD2SI64rm,       TB_NO_REVERSE },
+    { X86::VCVTSD2SIrr,     X86::VCVTSD2SIrm,         TB_NO_REVERSE },
+    { X86::VCVTSS2SI64rr,   X86::VCVTSS2SI64rm,       TB_NO_REVERSE },
+    { X86::VCVTSS2SIrr,     X86::VCVTSS2SIrm,         TB_NO_REVERSE },
+    { X86::VCVTDQ2PDrr,     X86::VCVTDQ2PDrm,         TB_NO_REVERSE },
+    { X86::VCVTDQ2PSrr,     X86::VCVTDQ2PSrm,         0 },
+    { X86::VCVTPD2DQrr,     X86::VCVTPD2DQrm,         0 },
+    { X86::VCVTPD2PSrr,     X86::VCVTPD2PSrm,         0 },
+    { X86::VCVTPS2DQrr,     X86::VCVTPS2DQrm,         0 },
+    { X86::VCVTPS2PDrr,     X86::VCVTPS2PDrm,         TB_NO_REVERSE },
+    { X86::VCVTTPD2DQrr,    X86::VCVTTPD2DQrm,        0 },
+    { X86::VCVTTPS2DQrr,    X86::VCVTTPS2DQrm,        0 },
+    { X86::VMOV64toPQIrr,   X86::VMOVQI2PQIrm,        0 },
+    { X86::VMOV64toSDrr,    X86::VMOV64toSDrm,        0 },
+    { X86::VMOVAPDrr,       X86::VMOVAPDrm,           TB_ALIGN_16 },
+    { X86::VMOVAPSrr,       X86::VMOVAPSrm,           TB_ALIGN_16 },
+    { X86::VMOVDDUPrr,      X86::VMOVDDUPrm,          TB_NO_REVERSE },
+    { X86::VMOVDI2PDIrr,    X86::VMOVDI2PDIrm,        0 },
+    { X86::VMOVDI2SSrr,     X86::VMOVDI2SSrm,         0 },
+    { X86::VMOVDQArr,       X86::VMOVDQArm,           TB_ALIGN_16 },
+    { X86::VMOVDQUrr,       X86::VMOVDQUrm,           0 },
+    { X86::VMOVSLDUPrr,     X86::VMOVSLDUPrm,         0 },
+    { X86::VMOVSHDUPrr,     X86::VMOVSHDUPrm,         0 },
+    { X86::VMOVUPDrr,       X86::VMOVUPDrm,           0 },
+    { X86::VMOVUPSrr,       X86::VMOVUPSrm,           0 },
+    { X86::VMOVZPQILo2PQIrr,X86::VMOVQI2PQIrm,        TB_NO_REVERSE },
+    { X86::VPABSBrr,        X86::VPABSBrm,            0 },
+    { X86::VPABSDrr,        X86::VPABSDrm,            0 },
+    { X86::VPABSWrr,        X86::VPABSWrm,            0 },
+    { X86::VPCMPESTRIrr,    X86::VPCMPESTRIrm,        0 },
+    { X86::VPCMPESTRM128rr, X86::VPCMPESTRM128rm,     0 },
+    { X86::VPCMPISTRIrr,    X86::VPCMPISTRIrm,        0 },
+    { X86::VPCMPISTRM128rr, X86::VPCMPISTRM128rm,     0 },
+    { X86::VPHMINPOSUWrr128, X86::VPHMINPOSUWrm128,   0 },
+    { X86::VPERMILPDri,     X86::VPERMILPDmi,         0 },
+    { X86::VPERMILPSri,     X86::VPERMILPSmi,         0 },
+    { X86::VPMOVSXBDrr,     X86::VPMOVSXBDrm,         TB_NO_REVERSE },
+    { X86::VPMOVSXBQrr,     X86::VPMOVSXBQrm,         TB_NO_REVERSE },
+    { X86::VPMOVSXBWrr,     X86::VPMOVSXBWrm,         TB_NO_REVERSE },
+    { X86::VPMOVSXDQrr,     X86::VPMOVSXDQrm,         TB_NO_REVERSE },
+    { X86::VPMOVSXWDrr,     X86::VPMOVSXWDrm,         TB_NO_REVERSE },
+    { X86::VPMOVSXWQrr,     X86::VPMOVSXWQrm,         TB_NO_REVERSE },
+    { X86::VPMOVZXBDrr,     X86::VPMOVZXBDrm,         TB_NO_REVERSE },
+    { X86::VPMOVZXBQrr,     X86::VPMOVZXBQrm,         TB_NO_REVERSE },
+    { X86::VPMOVZXBWrr,     X86::VPMOVZXBWrm,         TB_NO_REVERSE },
+    { X86::VPMOVZXDQrr,     X86::VPMOVZXDQrm,         TB_NO_REVERSE },
+    { X86::VPMOVZXWDrr,     X86::VPMOVZXWDrm,         TB_NO_REVERSE },
+    { X86::VPMOVZXWQrr,     X86::VPMOVZXWQrm,         TB_NO_REVERSE },
+    { X86::VPSHUFDri,       X86::VPSHUFDmi,           0 },
+    { X86::VPSHUFHWri,      X86::VPSHUFHWmi,          0 },
+    { X86::VPSHUFLWri,      X86::VPSHUFLWmi,          0 },
+    { X86::VPTESTrr,        X86::VPTESTrm,            0 },
+    { X86::VRCPPSr,         X86::VRCPPSm,             0 },
+    { X86::VROUNDPDr,       X86::VROUNDPDm,           0 },
+    { X86::VROUNDPSr,       X86::VROUNDPSm,           0 },
+    { X86::VRSQRTPSr,       X86::VRSQRTPSm,           0 },
+    { X86::VSQRTPDr,        X86::VSQRTPDm,            0 },
+    { X86::VSQRTPSr,        X86::VSQRTPSm,            0 },
+    { X86::VTESTPDrr,       X86::VTESTPDrm,           0 },
+    { X86::VTESTPSrr,       X86::VTESTPSrm,           0 },
+    { X86::VUCOMISDrr,      X86::VUCOMISDrm,          0 },
+    { X86::VUCOMISSrr,      X86::VUCOMISSrm,          0 },
+
+    // AVX 256-bit foldable instructions
+    { X86::VCVTDQ2PDYrr,    X86::VCVTDQ2PDYrm,        TB_NO_REVERSE },
+    { X86::VCVTDQ2PSYrr,    X86::VCVTDQ2PSYrm,        0 },
+    { X86::VCVTPD2DQYrr,    X86::VCVTPD2DQYrm,        0 },
+    { X86::VCVTPD2PSYrr,    X86::VCVTPD2PSYrm,        0 },
+    { X86::VCVTPS2DQYrr,    X86::VCVTPS2DQYrm,        0 },
+    { X86::VCVTPS2PDYrr,    X86::VCVTPS2PDYrm,        TB_NO_REVERSE },
+    { X86::VCVTTPD2DQYrr,   X86::VCVTTPD2DQYrm,       0 },
+    { X86::VCVTTPS2DQYrr,   X86::VCVTTPS2DQYrm,       0 },
+    { X86::VMOVAPDYrr,      X86::VMOVAPDYrm,          TB_ALIGN_32 },
+    { X86::VMOVAPSYrr,      X86::VMOVAPSYrm,          TB_ALIGN_32 },
+    { X86::VMOVDDUPYrr,     X86::VMOVDDUPYrm,         0 },
+    { X86::VMOVDQAYrr,      X86::VMOVDQAYrm,          TB_ALIGN_32 },
+    { X86::VMOVDQUYrr,      X86::VMOVDQUYrm,          0 },
+    { X86::VMOVSLDUPYrr,    X86::VMOVSLDUPYrm,        0 },
+    { X86::VMOVSHDUPYrr,    X86::VMOVSHDUPYrm,        0 },
+    { X86::VMOVUPDYrr,      X86::VMOVUPDYrm,          0 },
+    { X86::VMOVUPSYrr,      X86::VMOVUPSYrm,          0 },
+    { X86::VPERMILPDYri,    X86::VPERMILPDYmi,        0 },
+    { X86::VPERMILPSYri,    X86::VPERMILPSYmi,        0 },
+    { X86::VPTESTYrr,       X86::VPTESTYrm,           0 },
+    { X86::VRCPPSYr,        X86::VRCPPSYm,            0 },
+    { X86::VROUNDYPDr,      X86::VROUNDYPDm,          0 },
+    { X86::VROUNDYPSr,      X86::VROUNDYPSm,          0 },
+    { X86::VRSQRTPSYr,      X86::VRSQRTPSYm,          0 },
+    { X86::VSQRTPDYr,       X86::VSQRTPDYm,           0 },
+    { X86::VSQRTPSYr,       X86::VSQRTPSYm,           0 },
+    { X86::VTESTPDYrr,      X86::VTESTPDYrm,          0 },
+    { X86::VTESTPSYrr,      X86::VTESTPSYrm,          0 },
+
+    // AVX2 foldable instructions
+
+    // VBROADCASTS{SD}rr register instructions were an AVX2 addition while the
+    // VBROADCASTS{SD}rm memory instructions were available from AVX1.
+    // TB_NO_REVERSE prevents unfolding from introducing an illegal instruction
+    // on AVX1 targets. The VPBROADCAST instructions are all AVX2 instructions
+    // so they don't need an equivalent limitation.
+    { X86::VBROADCASTSSrr,  X86::VBROADCASTSSrm,      TB_NO_REVERSE },
+    { X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrm,     TB_NO_REVERSE },
+    { X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrm,     TB_NO_REVERSE },
+    { X86::VPABSBYrr,       X86::VPABSBYrm,           0 },
+    { X86::VPABSDYrr,       X86::VPABSDYrm,           0 },
+    { X86::VPABSWYrr,       X86::VPABSWYrm,           0 },
+    { X86::VPBROADCASTBrr,  X86::VPBROADCASTBrm,      TB_NO_REVERSE },
+    { X86::VPBROADCASTBYrr, X86::VPBROADCASTBYrm,     TB_NO_REVERSE },
+    { X86::VPBROADCASTDrr,  X86::VPBROADCASTDrm,      TB_NO_REVERSE },
+    { X86::VPBROADCASTDYrr, X86::VPBROADCASTDYrm,     TB_NO_REVERSE },
+    { X86::VPBROADCASTQrr,  X86::VPBROADCASTQrm,      TB_NO_REVERSE },
+    { X86::VPBROADCASTQYrr, X86::VPBROADCASTQYrm,     TB_NO_REVERSE },
+    { X86::VPBROADCASTWrr,  X86::VPBROADCASTWrm,      TB_NO_REVERSE },
+    { X86::VPBROADCASTWYrr, X86::VPBROADCASTWYrm,     TB_NO_REVERSE },
+    { X86::VPERMPDYri,      X86::VPERMPDYmi,          0 },
+    { X86::VPERMQYri,       X86::VPERMQYmi,           0 },
+    { X86::VPMOVSXBDYrr,    X86::VPMOVSXBDYrm,        TB_NO_REVERSE },
+    { X86::VPMOVSXBQYrr,    X86::VPMOVSXBQYrm,        TB_NO_REVERSE },
+    { X86::VPMOVSXBWYrr,    X86::VPMOVSXBWYrm,        0 },
+    { X86::VPMOVSXDQYrr,    X86::VPMOVSXDQYrm,        0 },
+    { X86::VPMOVSXWDYrr,    X86::VPMOVSXWDYrm,        0 },
+    { X86::VPMOVSXWQYrr,    X86::VPMOVSXWQYrm,        TB_NO_REVERSE },
+    { X86::VPMOVZXBDYrr,    X86::VPMOVZXBDYrm,        TB_NO_REVERSE },
+    { X86::VPMOVZXBQYrr,    X86::VPMOVZXBQYrm,        TB_NO_REVERSE },
+    { X86::VPMOVZXBWYrr,    X86::VPMOVZXBWYrm,        0 },
+    { X86::VPMOVZXDQYrr,    X86::VPMOVZXDQYrm,        0 },
+    { X86::VPMOVZXWDYrr,    X86::VPMOVZXWDYrm,        0 },
+    { X86::VPMOVZXWQYrr,    X86::VPMOVZXWQYrm,        TB_NO_REVERSE },
+    { X86::VPSHUFDYri,      X86::VPSHUFDYmi,          0 },
+    { X86::VPSHUFHWYri,     X86::VPSHUFHWYmi,         0 },
+    { X86::VPSHUFLWYri,     X86::VPSHUFLWYmi,         0 },
+
+    // XOP foldable instructions
+    { X86::VFRCZPDrr,          X86::VFRCZPDrm,        0 },
+    { X86::VFRCZPDrrY,         X86::VFRCZPDrmY,       0 },
+    { X86::VFRCZPSrr,          X86::VFRCZPSrm,        0 },
+    { X86::VFRCZPSrrY,         X86::VFRCZPSrmY,       0 },
+    { X86::VFRCZSDrr,          X86::VFRCZSDrm,        0 },
+    { X86::VFRCZSSrr,          X86::VFRCZSSrm,        0 },
+    { X86::VPHADDBDrr,         X86::VPHADDBDrm,       0 },
+    { X86::VPHADDBQrr,         X86::VPHADDBQrm,       0 },
+    { X86::VPHADDBWrr,         X86::VPHADDBWrm,       0 },
+    { X86::VPHADDDQrr,         X86::VPHADDDQrm,       0 },
+    { X86::VPHADDWDrr,         X86::VPHADDWDrm,       0 },
+    { X86::VPHADDWQrr,         X86::VPHADDWQrm,       0 },
+    { X86::VPHADDUBDrr,        X86::VPHADDUBDrm,      0 },
+    { X86::VPHADDUBQrr,        X86::VPHADDUBQrm,      0 },
+    { X86::VPHADDUBWrr,        X86::VPHADDUBWrm,      0 },
+    { X86::VPHADDUDQrr,        X86::VPHADDUDQrm,      0 },
+    { X86::VPHADDUWDrr,        X86::VPHADDUWDrm,      0 },
+    { X86::VPHADDUWQrr,        X86::VPHADDUWQrm,      0 },
+    { X86::VPHSUBBWrr,         X86::VPHSUBBWrm,       0 },
+    { X86::VPHSUBDQrr,         X86::VPHSUBDQrm,       0 },
+    { X86::VPHSUBWDrr,         X86::VPHSUBWDrm,       0 },
+    { X86::VPROTBri,           X86::VPROTBmi,         0 },
+    { X86::VPROTBrr,           X86::VPROTBmr,         0 },
+    { X86::VPROTDri,           X86::VPROTDmi,         0 },
+    { X86::VPROTDrr,           X86::VPROTDmr,         0 },
+    { X86::VPROTQri,           X86::VPROTQmi,         0 },
+    { X86::VPROTQrr,           X86::VPROTQmr,         0 },
+    { X86::VPROTWri,           X86::VPROTWmi,         0 },
+    { X86::VPROTWrr,           X86::VPROTWmr,         0 },
+    { X86::VPSHABrr,           X86::VPSHABmr,         0 },
+    { X86::VPSHADrr,           X86::VPSHADmr,         0 },
+    { X86::VPSHAQrr,           X86::VPSHAQmr,         0 },
+    { X86::VPSHAWrr,           X86::VPSHAWmr,         0 },
+    { X86::VPSHLBrr,           X86::VPSHLBmr,         0 },
+    { X86::VPSHLDrr,           X86::VPSHLDmr,         0 },
+    { X86::VPSHLQrr,           X86::VPSHLQmr,         0 },
+    { X86::VPSHLWrr,           X86::VPSHLWmr,         0 },
+
+    // LWP foldable instructions
+    { X86::LWPINS32rri,        X86::LWPINS32rmi,      0 },
+    { X86::LWPINS64rri,        X86::LWPINS64rmi,      0 },
+    { X86::LWPVAL32rri,        X86::LWPVAL32rmi,      0 },
+    { X86::LWPVAL64rri,        X86::LWPVAL64rmi,      0 },
+
+    // BMI/BMI2/LZCNT/POPCNT/TBM foldable instructions
+    { X86::BEXTR32rr,       X86::BEXTR32rm,           0 },
+    { X86::BEXTR64rr,       X86::BEXTR64rm,           0 },
+    { X86::BEXTRI32ri,      X86::BEXTRI32mi,          0 },
+    { X86::BEXTRI64ri,      X86::BEXTRI64mi,          0 },
+    { X86::BLCFILL32rr,     X86::BLCFILL32rm,         0 },
+    { X86::BLCFILL64rr,     X86::BLCFILL64rm,         0 },
+    { X86::BLCI32rr,        X86::BLCI32rm,            0 },
+    { X86::BLCI64rr,        X86::BLCI64rm,            0 },
+    { X86::BLCIC32rr,       X86::BLCIC32rm,           0 },
+    { X86::BLCIC64rr,       X86::BLCIC64rm,           0 },
+    { X86::BLCMSK32rr,      X86::BLCMSK32rm,          0 },
+    { X86::BLCMSK64rr,      X86::BLCMSK64rm,          0 },
+    { X86::BLCS32rr,        X86::BLCS32rm,            0 },
+    { X86::BLCS64rr,        X86::BLCS64rm,            0 },
+    { X86::BLSFILL32rr,     X86::BLSFILL32rm,         0 },
+    { X86::BLSFILL64rr,     X86::BLSFILL64rm,         0 },
+    { X86::BLSI32rr,        X86::BLSI32rm,            0 },
+    { X86::BLSI64rr,        X86::BLSI64rm,            0 },
+    { X86::BLSIC32rr,       X86::BLSIC32rm,           0 },
+    { X86::BLSIC64rr,       X86::BLSIC64rm,           0 },
+    { X86::BLSMSK32rr,      X86::BLSMSK32rm,          0 },
+    { X86::BLSMSK64rr,      X86::BLSMSK64rm,          0 },
+    { X86::BLSR32rr,        X86::BLSR32rm,            0 },
+    { X86::BLSR64rr,        X86::BLSR64rm,            0 },
+    { X86::BZHI32rr,        X86::BZHI32rm,            0 },
+    { X86::BZHI64rr,        X86::BZHI64rm,            0 },
+    { X86::LZCNT16rr,       X86::LZCNT16rm,           0 },
+    { X86::LZCNT32rr,       X86::LZCNT32rm,           0 },
+    { X86::LZCNT64rr,       X86::LZCNT64rm,           0 },
+    { X86::POPCNT16rr,      X86::POPCNT16rm,          0 },
+    { X86::POPCNT32rr,      X86::POPCNT32rm,          0 },
+    { X86::POPCNT64rr,      X86::POPCNT64rm,          0 },
+    { X86::RORX32ri,        X86::RORX32mi,            0 },
+    { X86::RORX64ri,        X86::RORX64mi,            0 },
+    { X86::SARX32rr,        X86::SARX32rm,            0 },
+    { X86::SARX64rr,        X86::SARX64rm,            0 },
+    { X86::SHRX32rr,        X86::SHRX32rm,            0 },
+    { X86::SHRX64rr,        X86::SHRX64rm,            0 },
+    { X86::SHLX32rr,        X86::SHLX32rm,            0 },
+    { X86::SHLX64rr,        X86::SHLX64rm,            0 },
+    { X86::T1MSKC32rr,      X86::T1MSKC32rm,          0 },
+    { X86::T1MSKC64rr,      X86::T1MSKC64rm,          0 },
+    { X86::TZCNT16rr,       X86::TZCNT16rm,           0 },
+    { X86::TZCNT32rr,       X86::TZCNT32rm,           0 },
+    { X86::TZCNT64rr,       X86::TZCNT64rm,           0 },
+    { X86::TZMSK32rr,       X86::TZMSK32rm,           0 },
+    { X86::TZMSK64rr,       X86::TZMSK64rm,           0 },
+
+    // AVX-512 foldable instructions
+    { X86::VBROADCASTSSZr,   X86::VBROADCASTSSZm,     TB_NO_REVERSE },
+    { X86::VBROADCASTSDZr,   X86::VBROADCASTSDZm,     TB_NO_REVERSE },
+    { X86::VMOV64toPQIZrr,   X86::VMOVQI2PQIZrm,      0 },
+    { X86::VMOV64toSDZrr,    X86::VMOV64toSDZrm,      0 },
+    { X86::VMOVDI2PDIZrr,    X86::VMOVDI2PDIZrm,      0 },
+    { X86::VMOVDI2SSZrr,     X86::VMOVDI2SSZrm,       0 },
+    { X86::VMOVAPDZrr,       X86::VMOVAPDZrm,         TB_ALIGN_64 },
+    { X86::VMOVAPSZrr,       X86::VMOVAPSZrm,         TB_ALIGN_64 },
+    { X86::VMOVDQA32Zrr,     X86::VMOVDQA32Zrm,       TB_ALIGN_64 },
+    { X86::VMOVDQA64Zrr,     X86::VMOVDQA64Zrm,       TB_ALIGN_64 },
+    { X86::VMOVDQU8Zrr,      X86::VMOVDQU8Zrm,        0 },
+    { X86::VMOVDQU16Zrr,     X86::VMOVDQU16Zrm,       0 },
+    { X86::VMOVDQU32Zrr,     X86::VMOVDQU32Zrm,       0 },
+    { X86::VMOVDQU64Zrr,     X86::VMOVDQU64Zrm,       0 },
+    { X86::VMOVUPDZrr,       X86::VMOVUPDZrm,         0 },
+    { X86::VMOVUPSZrr,       X86::VMOVUPSZrm,         0 },
+    { X86::VMOVZPQILo2PQIZrr,X86::VMOVQI2PQIZrm,      TB_NO_REVERSE },
+    { X86::VPABSBZrr,        X86::VPABSBZrm,          0 },
+    { X86::VPABSDZrr,        X86::VPABSDZrm,          0 },
+    { X86::VPABSQZrr,        X86::VPABSQZrm,          0 },
+    { X86::VPABSWZrr,        X86::VPABSWZrm,          0 },
+    { X86::VPERMILPDZri,     X86::VPERMILPDZmi,       0 },
+    { X86::VPERMILPSZri,     X86::VPERMILPSZmi,       0 },
+    { X86::VPERMPDZri,       X86::VPERMPDZmi,         0 },
+    { X86::VPERMQZri,        X86::VPERMQZmi,          0 },
+    { X86::VPMOVSXBDZrr,     X86::VPMOVSXBDZrm,       0 },
+    { X86::VPMOVSXBQZrr,     X86::VPMOVSXBQZrm,       TB_NO_REVERSE },
+    { X86::VPMOVSXBWZrr,     X86::VPMOVSXBWZrm,       0 },
+    { X86::VPMOVSXDQZrr,     X86::VPMOVSXDQZrm,       0 },
+    { X86::VPMOVSXWDZrr,     X86::VPMOVSXWDZrm,       0 },
+    { X86::VPMOVSXWQZrr,     X86::VPMOVSXWQZrm,       0 },
+    { X86::VPMOVZXBDZrr,     X86::VPMOVZXBDZrm,       0 },
+    { X86::VPMOVZXBQZrr,     X86::VPMOVZXBQZrm,       TB_NO_REVERSE },
+    { X86::VPMOVZXBWZrr,     X86::VPMOVZXBWZrm,       0 },
+    { X86::VPMOVZXDQZrr,     X86::VPMOVZXDQZrm,       0 },
+    { X86::VPMOVZXWDZrr,     X86::VPMOVZXWDZrm,       0 },
+    { X86::VPMOVZXWQZrr,     X86::VPMOVZXWQZrm,       0 },
+    { X86::VPOPCNTDZrr,      X86::VPOPCNTDZrm,        0 },
+    { X86::VPOPCNTQZrr,      X86::VPOPCNTQZrm,        0 },
+    { X86::VPSHUFDZri,       X86::VPSHUFDZmi,         0 },
+    { X86::VPSHUFHWZri,      X86::VPSHUFHWZmi,        0 },
+    { X86::VPSHUFLWZri,      X86::VPSHUFLWZmi,        0 },
+    { X86::VPSLLDQZ512rr,    X86::VPSLLDQZ512rm,      0 },
+    { X86::VPSLLDZri,        X86::VPSLLDZmi,          0 },
+    { X86::VPSLLQZri,        X86::VPSLLQZmi,          0 },
+    { X86::VPSLLWZri,        X86::VPSLLWZmi,          0 },
+    { X86::VPSRADZri,        X86::VPSRADZmi,          0 },
+    { X86::VPSRAQZri,        X86::VPSRAQZmi,          0 },
+    { X86::VPSRAWZri,        X86::VPSRAWZmi,          0 },
+    { X86::VPSRLDQZ512rr,    X86::VPSRLDQZ512rm,      0 },
+    { X86::VPSRLDZri,        X86::VPSRLDZmi,          0 },
+    { X86::VPSRLQZri,        X86::VPSRLQZmi,          0 },
+    { X86::VPSRLWZri,        X86::VPSRLWZmi,          0 },
+
+    // AVX-512 foldable instructions (256-bit versions)
+    { X86::VBROADCASTSSZ256r,    X86::VBROADCASTSSZ256m,    TB_NO_REVERSE },
+    { X86::VBROADCASTSDZ256r,    X86::VBROADCASTSDZ256m,    TB_NO_REVERSE },
+    { X86::VMOVAPDZ256rr,        X86::VMOVAPDZ256rm,        TB_ALIGN_32 },
+    { X86::VMOVAPSZ256rr,        X86::VMOVAPSZ256rm,        TB_ALIGN_32 },
+    { X86::VMOVDQA32Z256rr,      X86::VMOVDQA32Z256rm,      TB_ALIGN_32 },
+    { X86::VMOVDQA64Z256rr,      X86::VMOVDQA64Z256rm,      TB_ALIGN_32 },
+    { X86::VMOVDQU8Z256rr,       X86::VMOVDQU8Z256rm,       0 },
+    { X86::VMOVDQU16Z256rr,      X86::VMOVDQU16Z256rm,      0 },
+    { X86::VMOVDQU32Z256rr,      X86::VMOVDQU32Z256rm,      0 },
+    { X86::VMOVDQU64Z256rr,      X86::VMOVDQU64Z256rm,      0 },
+    { X86::VMOVUPDZ256rr,        X86::VMOVUPDZ256rm,        0 },
+    { X86::VMOVUPSZ256rr,        X86::VMOVUPSZ256rm,        0 },
+    { X86::VPABSBZ256rr,         X86::VPABSBZ256rm,         0 },
+    { X86::VPABSDZ256rr,         X86::VPABSDZ256rm,         0 },
+    { X86::VPABSQZ256rr,         X86::VPABSQZ256rm,         0 },
+    { X86::VPABSWZ256rr,         X86::VPABSWZ256rm,         0 },
+    { X86::VPERMILPDZ256ri,      X86::VPERMILPDZ256mi,      0 },
+    { X86::VPERMILPSZ256ri,      X86::VPERMILPSZ256mi,      0 },
+    { X86::VPERMPDZ256ri,        X86::VPERMPDZ256mi,        0 },
+    { X86::VPERMQZ256ri,         X86::VPERMQZ256mi,         0 },
+    { X86::VPMOVSXBDZ256rr,      X86::VPMOVSXBDZ256rm,      TB_NO_REVERSE },
+    { X86::VPMOVSXBQZ256rr,      X86::VPMOVSXBQZ256rm,      TB_NO_REVERSE },
+    { X86::VPMOVSXBWZ256rr,      X86::VPMOVSXBWZ256rm,      0 },
+    { X86::VPMOVSXDQZ256rr,      X86::VPMOVSXDQZ256rm,      0 },
+    { X86::VPMOVSXWDZ256rr,      X86::VPMOVSXWDZ256rm,      0 },
+    { X86::VPMOVSXWQZ256rr,      X86::VPMOVSXWQZ256rm,      TB_NO_REVERSE },
+    { X86::VPMOVZXBDZ256rr,      X86::VPMOVZXBDZ256rm,      TB_NO_REVERSE },
+    { X86::VPMOVZXBQZ256rr,      X86::VPMOVZXBQZ256rm,      TB_NO_REVERSE },
+    { X86::VPMOVZXBWZ256rr,      X86::VPMOVZXBWZ256rm,      0 },
+    { X86::VPMOVZXDQZ256rr,      X86::VPMOVZXDQZ256rm,      0 },
+    { X86::VPMOVZXWDZ256rr,      X86::VPMOVZXWDZ256rm,      0 },
+    { X86::VPMOVZXWQZ256rr,      X86::VPMOVZXWQZ256rm,      TB_NO_REVERSE },
+    { X86::VPSHUFDZ256ri,        X86::VPSHUFDZ256mi,        0 },
+    { X86::VPSHUFHWZ256ri,       X86::VPSHUFHWZ256mi,       0 },
+    { X86::VPSHUFLWZ256ri,       X86::VPSHUFLWZ256mi,       0 },
+    { X86::VPSLLDQZ256rr,        X86::VPSLLDQZ256rm,        0 },
+    { X86::VPSLLDZ256ri,         X86::VPSLLDZ256mi,         0 },
+    { X86::VPSLLQZ256ri,         X86::VPSLLQZ256mi,         0 },
+    { X86::VPSLLWZ256ri,         X86::VPSLLWZ256mi,         0 },
+    { X86::VPSRADZ256ri,         X86::VPSRADZ256mi,         0 },
+    { X86::VPSRAQZ256ri,         X86::VPSRAQZ256mi,         0 },
+    { X86::VPSRAWZ256ri,         X86::VPSRAWZ256mi,         0 },
+    { X86::VPSRLDQZ256rr,        X86::VPSRLDQZ256rm,        0 },
+    { X86::VPSRLDZ256ri,         X86::VPSRLDZ256mi,         0 },
+    { X86::VPSRLQZ256ri,         X86::VPSRLQZ256mi,         0 },
+    { X86::VPSRLWZ256ri,         X86::VPSRLWZ256mi,         0 },
+
+    // AVX-512 foldable instructions (128-bit versions)
+    { X86::VBROADCASTSSZ128r,    X86::VBROADCASTSSZ128m,    TB_NO_REVERSE },
+    { X86::VMOVAPDZ128rr,        X86::VMOVAPDZ128rm,        TB_ALIGN_16 },
+    { X86::VMOVAPSZ128rr,        X86::VMOVAPSZ128rm,        TB_ALIGN_16 },
+    { X86::VMOVDQA32Z128rr,      X86::VMOVDQA32Z128rm,      TB_ALIGN_16 },
+    { X86::VMOVDQA64Z128rr,      X86::VMOVDQA64Z128rm,      TB_ALIGN_16 },
+    { X86::VMOVDQU8Z128rr,       X86::VMOVDQU8Z128rm,       0 },
+    { X86::VMOVDQU16Z128rr,      X86::VMOVDQU16Z128rm,      0 },
+    { X86::VMOVDQU32Z128rr,      X86::VMOVDQU32Z128rm,      0 },
+    { X86::VMOVDQU64Z128rr,      X86::VMOVDQU64Z128rm,      0 },
+    { X86::VMOVUPDZ128rr,        X86::VMOVUPDZ128rm,        0 },
+    { X86::VMOVUPSZ128rr,        X86::VMOVUPSZ128rm,        0 },
+    { X86::VPABSBZ128rr,         X86::VPABSBZ128rm,         0 },
+    { X86::VPABSDZ128rr,         X86::VPABSDZ128rm,         0 },
+    { X86::VPABSQZ128rr,         X86::VPABSQZ128rm,         0 },
+    { X86::VPABSWZ128rr,         X86::VPABSWZ128rm,         0 },
+    { X86::VPERMILPDZ128ri,      X86::VPERMILPDZ128mi,      0 },
+    { X86::VPERMILPSZ128ri,      X86::VPERMILPSZ128mi,      0 },
+    { X86::VPMOVSXBDZ128rr,      X86::VPMOVSXBDZ128rm,      TB_NO_REVERSE },
+    { X86::VPMOVSXBQZ128rr,      X86::VPMOVSXBQZ128rm,      TB_NO_REVERSE },
+    { X86::VPMOVSXBWZ128rr,      X86::VPMOVSXBWZ128rm,      TB_NO_REVERSE },
+    { X86::VPMOVSXDQZ128rr,      X86::VPMOVSXDQZ128rm,      TB_NO_REVERSE },
+    { X86::VPMOVSXWDZ128rr,      X86::VPMOVSXWDZ128rm,      TB_NO_REVERSE },
+    { X86::VPMOVSXWQZ128rr,      X86::VPMOVSXWQZ128rm,      TB_NO_REVERSE },
+    { X86::VPMOVZXBDZ128rr,      X86::VPMOVZXBDZ128rm,      TB_NO_REVERSE },
+    { X86::VPMOVZXBQZ128rr,      X86::VPMOVZXBQZ128rm,      TB_NO_REVERSE },
+    { X86::VPMOVZXBWZ128rr,      X86::VPMOVZXBWZ128rm,      TB_NO_REVERSE },
+    { X86::VPMOVZXDQZ128rr,      X86::VPMOVZXDQZ128rm,      TB_NO_REVERSE },
+    { X86::VPMOVZXWDZ128rr,      X86::VPMOVZXWDZ128rm,      TB_NO_REVERSE },
+    { X86::VPMOVZXWQZ128rr,      X86::VPMOVZXWQZ128rm,      TB_NO_REVERSE },
+    { X86::VPSHUFDZ128ri,        X86::VPSHUFDZ128mi,        0 },
+    { X86::VPSHUFHWZ128ri,       X86::VPSHUFHWZ128mi,       0 },
+    { X86::VPSHUFLWZ128ri,       X86::VPSHUFLWZ128mi,       0 },
+    { X86::VPSLLDQZ128rr,        X86::VPSLLDQZ128rm,        0 },
+    { X86::VPSLLDZ128ri,         X86::VPSLLDZ128mi,         0 },
+    { X86::VPSLLQZ128ri,         X86::VPSLLQZ128mi,         0 },
+    { X86::VPSLLWZ128ri,         X86::VPSLLWZ128mi,         0 },
+    { X86::VPSRADZ128ri,         X86::VPSRADZ128mi,         0 },
+    { X86::VPSRAQZ128ri,         X86::VPSRAQZ128mi,         0 },
+    { X86::VPSRAWZ128ri,         X86::VPSRAWZ128mi,         0 },
+    { X86::VPSRLDQZ128rr,        X86::VPSRLDQZ128rm,        0 },
+    { X86::VPSRLDZ128ri,         X86::VPSRLDZ128mi,         0 },
+    { X86::VPSRLQZ128ri,         X86::VPSRLQZ128mi,         0 },
+    { X86::VPSRLWZ128ri,         X86::VPSRLWZ128mi,         0 },
+
+    // F16C foldable instructions
+    { X86::VCVTPH2PSrr,        X86::VCVTPH2PSrm,            0 },
+    { X86::VCVTPH2PSYrr,       X86::VCVTPH2PSYrm,           0 },
+
+    // AES foldable instructions
+    { X86::AESIMCrr,              X86::AESIMCrm,              TB_ALIGN_16 },
+    { X86::AESKEYGENASSIST128rr,  X86::AESKEYGENASSIST128rm,  TB_ALIGN_16 },
+    { X86::VAESIMCrr,             X86::VAESIMCrm,             0 },
+    { X86::VAESKEYGENASSIST128rr, X86::VAESKEYGENASSIST128rm, 0 }
+  };
+
   for (X86MemoryFoldTableEntry Entry : MemoryFoldTable1) {
     AddTableEntry(RegOp2MemOpTable1, MemOp2RegOpTable,
                   Entry.RegOp, Entry.MemOp,
@@ -143,6 +1042,1396 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
                   Entry.Flags | TB_INDEX_1 | TB_FOLDED_LOAD);
   }
 
+  static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
+    { X86::ADC32rr,         X86::ADC32rm,       0 },
+    { X86::ADC64rr,         X86::ADC64rm,       0 },
+    { X86::ADD16rr,         X86::ADD16rm,       0 },
+    { X86::ADD16rr_DB,      X86::ADD16rm,       TB_NO_REVERSE },
+    { X86::ADD32rr,         X86::ADD32rm,       0 },
+    { X86::ADD32rr_DB,      X86::ADD32rm,       TB_NO_REVERSE },
+    { X86::ADD64rr,         X86::ADD64rm,       0 },
+    { X86::ADD64rr_DB,      X86::ADD64rm,       TB_NO_REVERSE },
+    { X86::ADD8rr,          X86::ADD8rm,        0 },
+    { X86::ADDPDrr,         X86::ADDPDrm,       TB_ALIGN_16 },
+    { X86::ADDPSrr,         X86::ADDPSrm,       TB_ALIGN_16 },
+    { X86::ADDSDrr,         X86::ADDSDrm,       0 },
+    { X86::ADDSDrr_Int,     X86::ADDSDrm_Int,   TB_NO_REVERSE },
+    { X86::ADDSSrr,         X86::ADDSSrm,       0 },
+    { X86::ADDSSrr_Int,     X86::ADDSSrm_Int,   TB_NO_REVERSE },
+    { X86::ADDSUBPDrr,      X86::ADDSUBPDrm,    TB_ALIGN_16 },
+    { X86::ADDSUBPSrr,      X86::ADDSUBPSrm,    TB_ALIGN_16 },
+    { X86::AND16rr,         X86::AND16rm,       0 },
+    { X86::AND32rr,         X86::AND32rm,       0 },
+    { X86::AND64rr,         X86::AND64rm,       0 },
+    { X86::AND8rr,          X86::AND8rm,        0 },
+    { X86::ANDNPDrr,        X86::ANDNPDrm,      TB_ALIGN_16 },
+    { X86::ANDNPSrr,        X86::ANDNPSrm,      TB_ALIGN_16 },
+    { X86::ANDPDrr,         X86::ANDPDrm,       TB_ALIGN_16 },
+    { X86::ANDPSrr,         X86::ANDPSrm,       TB_ALIGN_16 },
+    { X86::BLENDPDrri,      X86::BLENDPDrmi,    TB_ALIGN_16 },
+    { X86::BLENDPSrri,      X86::BLENDPSrmi,    TB_ALIGN_16 },
+    { X86::BLENDVPDrr0,     X86::BLENDVPDrm0,   TB_ALIGN_16 },
+    { X86::BLENDVPSrr0,     X86::BLENDVPSrm0,   TB_ALIGN_16 },
+    { X86::CMOVA16rr,       X86::CMOVA16rm,     0 },
+    { X86::CMOVA32rr,       X86::CMOVA32rm,     0 },
+    { X86::CMOVA64rr,       X86::CMOVA64rm,     0 },
+    { X86::CMOVAE16rr,      X86::CMOVAE16rm,    0 },
+    { X86::CMOVAE32rr,      X86::CMOVAE32rm,    0 },
+    { X86::CMOVAE64rr,      X86::CMOVAE64rm,    0 },
+    { X86::CMOVB16rr,       X86::CMOVB16rm,     0 },
+    { X86::CMOVB32rr,       X86::CMOVB32rm,     0 },
+    { X86::CMOVB64rr,       X86::CMOVB64rm,     0 },
+    { X86::CMOVBE16rr,      X86::CMOVBE16rm,    0 },
+    { X86::CMOVBE32rr,      X86::CMOVBE32rm,    0 },
+    { X86::CMOVBE64rr,      X86::CMOVBE64rm,    0 },
+    { X86::CMOVE16rr,       X86::CMOVE16rm,     0 },
+    { X86::CMOVE32rr,       X86::CMOVE32rm,     0 },
+    { X86::CMOVE64rr,       X86::CMOVE64rm,     0 },
+    { X86::CMOVG16rr,       X86::CMOVG16rm,     0 },
+    { X86::CMOVG32rr,       X86::CMOVG32rm,     0 },
+    { X86::CMOVG64rr,       X86::CMOVG64rm,     0 },
+    { X86::CMOVGE16rr,      X86::CMOVGE16rm,    0 },
+    { X86::CMOVGE32rr,      X86::CMOVGE32rm,    0 },
+    { X86::CMOVGE64rr,      X86::CMOVGE64rm,    0 },
+    { X86::CMOVL16rr,       X86::CMOVL16rm,     0 },
+    { X86::CMOVL32rr,       X86::CMOVL32rm,     0 },
+    { X86::CMOVL64rr,       X86::CMOVL64rm,     0 },
+    { X86::CMOVLE16rr,      X86::CMOVLE16rm,    0 },
+    { X86::CMOVLE32rr,      X86::CMOVLE32rm,    0 },
+    { X86::CMOVLE64rr,      X86::CMOVLE64rm,    0 },
+    { X86::CMOVNE16rr,      X86::CMOVNE16rm,    0 },
+    { X86::CMOVNE32rr,      X86::CMOVNE32rm,    0 },
+    { X86::CMOVNE64rr,      X86::CMOVNE64rm,    0 },
+    { X86::CMOVNO16rr,      X86::CMOVNO16rm,    0 },
+    { X86::CMOVNO32rr,      X86::CMOVNO32rm,    0 },
+    { X86::CMOVNO64rr,      X86::CMOVNO64rm,    0 },
+    { X86::CMOVNP16rr,      X86::CMOVNP16rm,    0 },
+    { X86::CMOVNP32rr,      X86::CMOVNP32rm,    0 },
+    { X86::CMOVNP64rr,      X86::CMOVNP64rm,    0 },
+    { X86::CMOVNS16rr,      X86::CMOVNS16rm,    0 },
+    { X86::CMOVNS32rr,      X86::CMOVNS32rm,    0 },
+    { X86::CMOVNS64rr,      X86::CMOVNS64rm,    0 },
+    { X86::CMOVO16rr,       X86::CMOVO16rm,     0 },
+    { X86::CMOVO32rr,       X86::CMOVO32rm,     0 },
+    { X86::CMOVO64rr,       X86::CMOVO64rm,     0 },
+    { X86::CMOVP16rr,       X86::CMOVP16rm,     0 },
+    { X86::CMOVP32rr,       X86::CMOVP32rm,     0 },
+    { X86::CMOVP64rr,       X86::CMOVP64rm,     0 },
+    { X86::CMOVS16rr,       X86::CMOVS16rm,     0 },
+    { X86::CMOVS32rr,       X86::CMOVS32rm,     0 },
+    { X86::CMOVS64rr,       X86::CMOVS64rm,     0 },
+    { X86::CMPPDrri,        X86::CMPPDrmi,      TB_ALIGN_16 },
+    { X86::CMPPSrri,        X86::CMPPSrmi,      TB_ALIGN_16 },
+    { X86::CMPSDrr,         X86::CMPSDrm,       0 },
+    { X86::CMPSSrr,         X86::CMPSSrm,       0 },
+    { X86::CRC32r32r32,     X86::CRC32r32m32,   0 },
+    { X86::CRC32r64r64,     X86::CRC32r64m64,   0 },
+    { X86::DIVPDrr,         X86::DIVPDrm,       TB_ALIGN_16 },
+    { X86::DIVPSrr,         X86::DIVPSrm,       TB_ALIGN_16 },
+    { X86::DIVSDrr,         X86::DIVSDrm,       0 },
+    { X86::DIVSDrr_Int,     X86::DIVSDrm_Int,   TB_NO_REVERSE },
+    { X86::DIVSSrr,         X86::DIVSSrm,       0 },
+    { X86::DIVSSrr_Int,     X86::DIVSSrm_Int,   TB_NO_REVERSE },
+    { X86::DPPDrri,         X86::DPPDrmi,       TB_ALIGN_16 },
+    { X86::DPPSrri,         X86::DPPSrmi,       TB_ALIGN_16 },
+    { X86::HADDPDrr,        X86::HADDPDrm,      TB_ALIGN_16 },
+    { X86::HADDPSrr,        X86::HADDPSrm,      TB_ALIGN_16 },
+    { X86::HSUBPDrr,        X86::HSUBPDrm,      TB_ALIGN_16 },
+    { X86::HSUBPSrr,        X86::HSUBPSrm,      TB_ALIGN_16 },
+    { X86::IMUL16rr,        X86::IMUL16rm,      0 },
+    { X86::IMUL32rr,        X86::IMUL32rm,      0 },
+    { X86::IMUL64rr,        X86::IMUL64rm,      0 },
+    { X86::Int_CMPSDrr,     X86::Int_CMPSDrm,   TB_NO_REVERSE },
+    { X86::Int_CMPSSrr,     X86::Int_CMPSSrm,   TB_NO_REVERSE },
+    { X86::Int_CVTSD2SSrr,  X86::Int_CVTSD2SSrm,      TB_NO_REVERSE },
+    { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm,    0 },
+    { X86::Int_CVTSI2SDrr,  X86::Int_CVTSI2SDrm,      0 },
+    { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm,    0 },
+    { X86::Int_CVTSI2SSrr,  X86::Int_CVTSI2SSrm,      0 },
+    { X86::Int_CVTSS2SDrr,  X86::Int_CVTSS2SDrm,      TB_NO_REVERSE },
+    { X86::MAXPDrr,         X86::MAXPDrm,       TB_ALIGN_16 },
+    { X86::MAXCPDrr,        X86::MAXCPDrm,      TB_ALIGN_16 },
+    { X86::MAXPSrr,         X86::MAXPSrm,       TB_ALIGN_16 },
+    { X86::MAXCPSrr,        X86::MAXCPSrm,      TB_ALIGN_16 },
+    { X86::MAXSDrr,         X86::MAXSDrm,       0 },
+    { X86::MAXCSDrr,        X86::MAXCSDrm,      0 },
+    { X86::MAXSDrr_Int,     X86::MAXSDrm_Int,   TB_NO_REVERSE },
+    { X86::MAXSSrr,         X86::MAXSSrm,       0 },
+    { X86::MAXCSSrr,        X86::MAXCSSrm,      0 },
+    { X86::MAXSSrr_Int,     X86::MAXSSrm_Int,   TB_NO_REVERSE },
+    { X86::MINPDrr,         X86::MINPDrm,       TB_ALIGN_16 },
+    { X86::MINCPDrr,        X86::MINCPDrm,      TB_ALIGN_16 },
+    { X86::MINPSrr,         X86::MINPSrm,       TB_ALIGN_16 },
+    { X86::MINCPSrr,        X86::MINCPSrm,      TB_ALIGN_16 },
+    { X86::MINSDrr,         X86::MINSDrm,       0 },
+    { X86::MINCSDrr,        X86::MINCSDrm,      0 },
+    { X86::MINSDrr_Int,     X86::MINSDrm_Int,   TB_NO_REVERSE },
+    { X86::MINSSrr,         X86::MINSSrm,       0 },
+    { X86::MINCSSrr,        X86::MINCSSrm,      0 },
+    { X86::MINSSrr_Int,     X86::MINSSrm_Int,   TB_NO_REVERSE },
+    { X86::MOVLHPSrr,       X86::MOVHPSrm,      TB_NO_REVERSE },
+    { X86::MPSADBWrri,      X86::MPSADBWrmi,    TB_ALIGN_16 },
+    { X86::MULPDrr,         X86::MULPDrm,       TB_ALIGN_16 },
+    { X86::MULPSrr,         X86::MULPSrm,       TB_ALIGN_16 },
+    { X86::MULSDrr,         X86::MULSDrm,       0 },
+    { X86::MULSDrr_Int,     X86::MULSDrm_Int,   TB_NO_REVERSE },
+    { X86::MULSSrr,         X86::MULSSrm,       0 },
+    { X86::MULSSrr_Int,     X86::MULSSrm_Int,   TB_NO_REVERSE },
+    { X86::OR16rr,          X86::OR16rm,        0 },
+    { X86::OR32rr,          X86::OR32rm,        0 },
+    { X86::OR64rr,          X86::OR64rm,        0 },
+    { X86::OR8rr,           X86::OR8rm,         0 },
+    { X86::ORPDrr,          X86::ORPDrm,        TB_ALIGN_16 },
+    { X86::ORPSrr,          X86::ORPSrm,        TB_ALIGN_16 },
+    { X86::PACKSSDWrr,      X86::PACKSSDWrm,    TB_ALIGN_16 },
+    { X86::PACKSSWBrr,      X86::PACKSSWBrm,    TB_ALIGN_16 },
+    { X86::PACKUSDWrr,      X86::PACKUSDWrm,    TB_ALIGN_16 },
+    { X86::PACKUSWBrr,      X86::PACKUSWBrm,    TB_ALIGN_16 },
+    { X86::PADDBrr,         X86::PADDBrm,       TB_ALIGN_16 },
+    { X86::PADDDrr,         X86::PADDDrm,       TB_ALIGN_16 },
+    { X86::PADDQrr,         X86::PADDQrm,       TB_ALIGN_16 },
+    { X86::PADDSBrr,        X86::PADDSBrm,      TB_ALIGN_16 },
+    { X86::PADDSWrr,        X86::PADDSWrm,      TB_ALIGN_16 },
+    { X86::PADDUSBrr,       X86::PADDUSBrm,     TB_ALIGN_16 },
+    { X86::PADDUSWrr,       X86::PADDUSWrm,     TB_ALIGN_16 },
+    { X86::PADDWrr,         X86::PADDWrm,       TB_ALIGN_16 },
+    { X86::PALIGNRrri,      X86::PALIGNRrmi,    TB_ALIGN_16 },
+    { X86::PANDNrr,         X86::PANDNrm,       TB_ALIGN_16 },
+    { X86::PANDrr,          X86::PANDrm,        TB_ALIGN_16 },
+    { X86::PAVGBrr,         X86::PAVGBrm,       TB_ALIGN_16 },
+    { X86::PAVGWrr,         X86::PAVGWrm,       TB_ALIGN_16 },
+    { X86::PBLENDVBrr0,     X86::PBLENDVBrm0,   TB_ALIGN_16 },
+    { X86::PBLENDWrri,      X86::PBLENDWrmi,    TB_ALIGN_16 },
+    { X86::PCLMULQDQrr,     X86::PCLMULQDQrm,   TB_ALIGN_16 },
+    { X86::PCMPEQBrr,       X86::PCMPEQBrm,     TB_ALIGN_16 },
+    { X86::PCMPEQDrr,       X86::PCMPEQDrm,     TB_ALIGN_16 },
+    { X86::PCMPEQQrr,       X86::PCMPEQQrm,     TB_ALIGN_16 },
+    { X86::PCMPEQWrr,       X86::PCMPEQWrm,     TB_ALIGN_16 },
+    { X86::PCMPGTBrr,       X86::PCMPGTBrm,     TB_ALIGN_16 },
+    { X86::PCMPGTDrr,       X86::PCMPGTDrm,     TB_ALIGN_16 },
+    { X86::PCMPGTQrr,       X86::PCMPGTQrm,     TB_ALIGN_16 },
+    { X86::PCMPGTWrr,       X86::PCMPGTWrm,     TB_ALIGN_16 },
+    { X86::PHADDDrr,        X86::PHADDDrm,      TB_ALIGN_16 },
+    { X86::PHADDWrr,        X86::PHADDWrm,      TB_ALIGN_16 },
+    { X86::PHADDSWrr128,    X86::PHADDSWrm128,  TB_ALIGN_16 },
+    { X86::PHSUBDrr,        X86::PHSUBDrm,      TB_ALIGN_16 },
+    { X86::PHSUBSWrr128,    X86::PHSUBSWrm128,  TB_ALIGN_16 },
+    { X86::PHSUBWrr,        X86::PHSUBWrm,      TB_ALIGN_16 },
+    { X86::PINSRBrr,        X86::PINSRBrm,      0 },
+    { X86::PINSRDrr,        X86::PINSRDrm,      0 },
+    { X86::PINSRQrr,        X86::PINSRQrm,      0 },
+    { X86::PINSRWrri,       X86::PINSRWrmi,     0 },
+    { X86::PMADDUBSWrr,     X86::PMADDUBSWrm,   TB_ALIGN_16 },
+    { X86::PMADDWDrr,       X86::PMADDWDrm,     TB_ALIGN_16 },
+    { X86::PMAXSBrr,        X86::PMAXSBrm,      TB_ALIGN_16 },
+    { X86::PMAXSDrr,        X86::PMAXSDrm,      TB_ALIGN_16 },
+    { X86::PMAXSWrr,        X86::PMAXSWrm,      TB_ALIGN_16 },
+    { X86::PMAXUBrr,        X86::PMAXUBrm,      TB_ALIGN_16 },
+    { X86::PMAXUDrr,        X86::PMAXUDrm,      TB_ALIGN_16 },
+    { X86::PMAXUWrr,        X86::PMAXUWrm,      TB_ALIGN_16 },
+    { X86::PMINSBrr,        X86::PMINSBrm,      TB_ALIGN_16 },
+    { X86::PMINSDrr,        X86::PMINSDrm,      TB_ALIGN_16 },
+    { X86::PMINSWrr,        X86::PMINSWrm,      TB_ALIGN_16 },
+    { X86::PMINUBrr,        X86::PMINUBrm,      TB_ALIGN_16 },
+    { X86::PMINUDrr,        X86::PMINUDrm,      TB_ALIGN_16 },
+    { X86::PMINUWrr,        X86::PMINUWrm,      TB_ALIGN_16 },
+    { X86::PMULDQrr,        X86::PMULDQrm,      TB_ALIGN_16 },
+    { X86::PMULHRSWrr,      X86::PMULHRSWrm,    TB_ALIGN_16 },
+    { X86::PMULHUWrr,       X86::PMULHUWrm,     TB_ALIGN_16 },
+    { X86::PMULHWrr,        X86::PMULHWrm,      TB_ALIGN_16 },
+    { X86::PMULLDrr,        X86::PMULLDrm,      TB_ALIGN_16 },
+    { X86::PMULLWrr,        X86::PMULLWrm,      TB_ALIGN_16 },
+    { X86::PMULUDQrr,       X86::PMULUDQrm,     TB_ALIGN_16 },
+    { X86::PORrr,           X86::PORrm,         TB_ALIGN_16 },
+    { X86::PSADBWrr,        X86::PSADBWrm,      TB_ALIGN_16 },
+    { X86::PSHUFBrr,        X86::PSHUFBrm,      TB_ALIGN_16 },
+    { X86::PSIGNBrr128,     X86::PSIGNBrm128,   TB_ALIGN_16 },
+    { X86::PSIGNWrr128,     X86::PSIGNWrm128,   TB_ALIGN_16 },
+    { X86::PSIGNDrr128,     X86::PSIGNDrm128,   TB_ALIGN_16 },
+    { X86::PSLLDrr,         X86::PSLLDrm,       TB_ALIGN_16 },
+    { X86::PSLLQrr,         X86::PSLLQrm,       TB_ALIGN_16 },
+    { X86::PSLLWrr,         X86::PSLLWrm,       TB_ALIGN_16 },
+    { X86::PSRADrr,         X86::PSRADrm,       TB_ALIGN_16 },
+    { X86::PSRAWrr,         X86::PSRAWrm,       TB_ALIGN_16 },
+    { X86::PSRLDrr,         X86::PSRLDrm,       TB_ALIGN_16 },
+    { X86::PSRLQrr,         X86::PSRLQrm,       TB_ALIGN_16 },
+    { X86::PSRLWrr,         X86::PSRLWrm,       TB_ALIGN_16 },
+    { X86::PSUBBrr,         X86::PSUBBrm,       TB_ALIGN_16 },
+    { X86::PSUBDrr,         X86::PSUBDrm,       TB_ALIGN_16 },
+    { X86::PSUBQrr,         X86::PSUBQrm,       TB_ALIGN_16 },
+    { X86::PSUBSBrr,        X86::PSUBSBrm,      TB_ALIGN_16 },
+    { X86::PSUBSWrr,        X86::PSUBSWrm,      TB_ALIGN_16 },
+    { X86::PSUBUSBrr,       X86::PSUBUSBrm,     TB_ALIGN_16 },
+    { X86::PSUBUSWrr,       X86::PSUBUSWrm,     TB_ALIGN_16 },
+    { X86::PSUBWrr,         X86::PSUBWrm,       TB_ALIGN_16 },
+    { X86::PUNPCKHBWrr,     X86::PUNPCKHBWrm,   TB_ALIGN_16 },
+    { X86::PUNPCKHDQrr,     X86::PUNPCKHDQrm,   TB_ALIGN_16 },
+    { X86::PUNPCKHQDQrr,    X86::PUNPCKHQDQrm,  TB_ALIGN_16 },
+    { X86::PUNPCKHWDrr,     X86::PUNPCKHWDrm,   TB_ALIGN_16 },
+    { X86::PUNPCKLBWrr,     X86::PUNPCKLBWrm,   TB_ALIGN_16 },
+    { X86::PUNPCKLDQrr,     X86::PUNPCKLDQrm,   TB_ALIGN_16 },
+    { X86::PUNPCKLQDQrr,    X86::PUNPCKLQDQrm,  TB_ALIGN_16 },
+    { X86::PUNPCKLWDrr,     X86::PUNPCKLWDrm,   TB_ALIGN_16 },
+    { X86::PXORrr,          X86::PXORrm,        TB_ALIGN_16 },
+    { X86::ROUNDSDr_Int,    X86::ROUNDSDm_Int,  TB_NO_REVERSE },
+    { X86::ROUNDSSr_Int,    X86::ROUNDSSm_Int,  TB_NO_REVERSE },
+    { X86::SBB32rr,         X86::SBB32rm,       0 },
+    { X86::SBB64rr,         X86::SBB64rm,       0 },
+    { X86::SHUFPDrri,       X86::SHUFPDrmi,     TB_ALIGN_16 },
+    { X86::SHUFPSrri,       X86::SHUFPSrmi,     TB_ALIGN_16 },
+    { X86::SUB16rr,         X86::SUB16rm,       0 },
+    { X86::SUB32rr,         X86::SUB32rm,       0 },
+    { X86::SUB64rr,         X86::SUB64rm,       0 },
+    { X86::SUB8rr,          X86::SUB8rm,        0 },
+    { X86::SUBPDrr,         X86::SUBPDrm,       TB_ALIGN_16 },
+    { X86::SUBPSrr,         X86::SUBPSrm,       TB_ALIGN_16 },
+    { X86::SUBSDrr,         X86::SUBSDrm,       0 },
+    { X86::SUBSDrr_Int,     X86::SUBSDrm_Int,   TB_NO_REVERSE },
+    { X86::SUBSSrr,         X86::SUBSSrm,       0 },
+    { X86::SUBSSrr_Int,     X86::SUBSSrm_Int,   TB_NO_REVERSE },
+    // FIXME: TEST*rr -> swapped operand of TEST*mr.
+    { X86::UNPCKHPDrr,      X86::UNPCKHPDrm,    TB_ALIGN_16 },
+    { X86::UNPCKHPSrr,      X86::UNPCKHPSrm,    TB_ALIGN_16 },
+    { X86::UNPCKLPDrr,      X86::UNPCKLPDrm,    TB_ALIGN_16 },
+    { X86::UNPCKLPSrr,      X86::UNPCKLPSrm,    TB_ALIGN_16 },
+    { X86::XOR16rr,         X86::XOR16rm,       0 },
+    { X86::XOR32rr,         X86::XOR32rm,       0 },
+    { X86::XOR64rr,         X86::XOR64rm,       0 },
+    { X86::XOR8rr,          X86::XOR8rm,        0 },
+    { X86::XORPDrr,         X86::XORPDrm,       TB_ALIGN_16 },
+    { X86::XORPSrr,         X86::XORPSrm,       TB_ALIGN_16 },
+
+    // MMX version of foldable instructions
+    { X86::MMX_CVTPI2PSirr,   X86::MMX_CVTPI2PSirm,   0 },
+    { X86::MMX_PACKSSDWirr,   X86::MMX_PACKSSDWirm,   0 },
+    { X86::MMX_PACKSSWBirr,   X86::MMX_PACKSSWBirm,   0 },
+    { X86::MMX_PACKUSWBirr,   X86::MMX_PACKUSWBirm,   0 },
+    { X86::MMX_PADDBirr,      X86::MMX_PADDBirm,      0 },
+    { X86::MMX_PADDDirr,      X86::MMX_PADDDirm,      0 },
+    { X86::MMX_PADDQirr,      X86::MMX_PADDQirm,      0 },
+    { X86::MMX_PADDSBirr,     X86::MMX_PADDSBirm,     0 },
+    { X86::MMX_PADDSWirr,     X86::MMX_PADDSWirm,     0 },
+    { X86::MMX_PADDUSBirr,    X86::MMX_PADDUSBirm,    0 },
+    { X86::MMX_PADDUSWirr,    X86::MMX_PADDUSWirm,    0 },
+    { X86::MMX_PADDWirr,      X86::MMX_PADDWirm,      0 },
+    { X86::MMX_PALIGNR64irr,  X86::MMX_PALIGNR64irm,  0 },
+    { X86::MMX_PANDNirr,      X86::MMX_PANDNirm,      0 },
+    { X86::MMX_PANDirr,       X86::MMX_PANDirm,       0 },
+    { X86::MMX_PAVGBirr,      X86::MMX_PAVGBirm,      0 },
+    { X86::MMX_PAVGWirr,      X86::MMX_PAVGWirm,      0 },
+    { X86::MMX_PCMPEQBirr,    X86::MMX_PCMPEQBirm,    0 },
+    { X86::MMX_PCMPEQDirr,    X86::MMX_PCMPEQDirm,    0 },
+    { X86::MMX_PCMPEQWirr,    X86::MMX_PCMPEQWirm,    0 },
+    { X86::MMX_PCMPGTBirr,    X86::MMX_PCMPGTBirm,    0 },
+    { X86::MMX_PCMPGTDirr,    X86::MMX_PCMPGTDirm,    0 },
+    { X86::MMX_PCMPGTWirr,    X86::MMX_PCMPGTWirm,    0 },
+    { X86::MMX_PHADDSWrr64,   X86::MMX_PHADDSWrm64,   0 },
+    { X86::MMX_PHADDWrr64,    X86::MMX_PHADDWrm64,    0 },
+    { X86::MMX_PHADDrr64,     X86::MMX_PHADDrm64,     0 },
+    { X86::MMX_PHSUBDrr64,    X86::MMX_PHSUBDrm64,    0 },
+    { X86::MMX_PHSUBSWrr64,   X86::MMX_PHSUBSWrm64,   0 },
+    { X86::MMX_PHSUBWrr64,    X86::MMX_PHSUBWrm64,    0 },
+    { X86::MMX_PINSRWirri,    X86::MMX_PINSRWirmi,    0 },
+    { X86::MMX_PMADDUBSWrr64, X86::MMX_PMADDUBSWrm64, 0 },
+    { X86::MMX_PMADDWDirr,    X86::MMX_PMADDWDirm,    0 },
+    { X86::MMX_PMAXSWirr,     X86::MMX_PMAXSWirm,     0 },
+    { X86::MMX_PMAXUBirr,     X86::MMX_PMAXUBirm,     0 },
+    { X86::MMX_PMINSWirr,     X86::MMX_PMINSWirm,     0 },
+    { X86::MMX_PMINUBirr,     X86::MMX_PMINUBirm,     0 },
+    { X86::MMX_PMULHRSWrr64,  X86::MMX_PMULHRSWrm64,  0 },
+    { X86::MMX_PMULHUWirr,    X86::MMX_PMULHUWirm,    0 },
+    { X86::MMX_PMULHWirr,     X86::MMX_PMULHWirm,     0 },
+    { X86::MMX_PMULLWirr,     X86::MMX_PMULLWirm,     0 },
+    { X86::MMX_PMULUDQirr,    X86::MMX_PMULUDQirm,    0 },
+    { X86::MMX_PORirr,        X86::MMX_PORirm,        0 },
+    { X86::MMX_PSADBWirr,     X86::MMX_PSADBWirm,     0 },
+    { X86::MMX_PSHUFBrr64,    X86::MMX_PSHUFBrm64,    0 },
+    { X86::MMX_PSIGNBrr64,    X86::MMX_PSIGNBrm64,    0 },
+    { X86::MMX_PSIGNDrr64,    X86::MMX_PSIGNDrm64,    0 },
+    { X86::MMX_PSIGNWrr64,    X86::MMX_PSIGNWrm64,    0 },
+    { X86::MMX_PSLLDrr,       X86::MMX_PSLLDrm,       0 },
+    { X86::MMX_PSLLQrr,       X86::MMX_PSLLQrm,       0 },
+    { X86::MMX_PSLLWrr,       X86::MMX_PSLLWrm,       0 },
+    { X86::MMX_PSRADrr,       X86::MMX_PSRADrm,       0 },
+    { X86::MMX_PSRAWrr,       X86::MMX_PSRAWrm,       0 },
+    { X86::MMX_PSRLDrr,       X86::MMX_PSRLDrm,       0 },
+    { X86::MMX_PSRLQrr,       X86::MMX_PSRLQrm,       0 },
+    { X86::MMX_PSRLWrr,       X86::MMX_PSRLWrm,       0 },
+    { X86::MMX_PSUBBirr,      X86::MMX_PSUBBirm,      0 },
+    { X86::MMX_PSUBDirr,      X86::MMX_PSUBDirm,      0 },
+    { X86::MMX_PSUBQirr,      X86::MMX_PSUBQirm,      0 },
+    { X86::MMX_PSUBSBirr,     X86::MMX_PSUBSBirm,     0 },
+    { X86::MMX_PSUBSWirr,     X86::MMX_PSUBSWirm,     0 },
+    { X86::MMX_PSUBUSBirr,    X86::MMX_PSUBUSBirm,    0 },
+    { X86::MMX_PSUBUSWirr,    X86::MMX_PSUBUSWirm,    0 },
+    { X86::MMX_PSUBWirr,      X86::MMX_PSUBWirm,      0 },
+    { X86::MMX_PUNPCKHBWirr,  X86::MMX_PUNPCKHBWirm,  0 },
+    { X86::MMX_PUNPCKHDQirr,  X86::MMX_PUNPCKHDQirm,  0 },
+    { X86::MMX_PUNPCKHWDirr,  X86::MMX_PUNPCKHWDirm,  0 },
+    { X86::MMX_PUNPCKLBWirr,  X86::MMX_PUNPCKLBWirm,  0 },
+    { X86::MMX_PUNPCKLDQirr,  X86::MMX_PUNPCKLDQirm,  0 },
+    { X86::MMX_PUNPCKLWDirr,  X86::MMX_PUNPCKLWDirm,  0 },
+    { X86::MMX_PXORirr,       X86::MMX_PXORirm,       0 },
+
+    // 3DNow! version of foldable instructions
+    { X86::PAVGUSBrr,         X86::PAVGUSBrm,         0 },
+    { X86::PFACCrr,           X86::PFACCrm,           0 },
+    { X86::PFADDrr,           X86::PFADDrm,           0 },
+    { X86::PFCMPEQrr,         X86::PFCMPEQrm,         0 },
+    { X86::PFCMPGErr,         X86::PFCMPGErm,         0 },
+    { X86::PFCMPGTrr,         X86::PFCMPGTrm,         0 },
+    { X86::PFMAXrr,           X86::PFMAXrm,           0 },
+    { X86::PFMINrr,           X86::PFMINrm,           0 },
+    { X86::PFMULrr,           X86::PFMULrm,           0 },
+    { X86::PFNACCrr,          X86::PFNACCrm,          0 },
+    { X86::PFPNACCrr,         X86::PFPNACCrm,         0 },
+    { X86::PFRCPIT1rr,        X86::PFRCPIT1rm,        0 },
+    { X86::PFRCPIT2rr,        X86::PFRCPIT2rm,        0 },
+    { X86::PFRSQIT1rr,        X86::PFRSQIT1rm,        0 },
+    { X86::PFSUBrr,           X86::PFSUBrm,           0 },
+    { X86::PFSUBRrr,          X86::PFSUBRrm,          0 },
+    { X86::PMULHRWrr,         X86::PMULHRWrm,         0 },
+
+    // AVX 128-bit versions of foldable instructions
+    { X86::VCVTSI2SD64rr,     X86::VCVTSI2SD64rm,      0 },
+    { X86::Int_VCVTSI2SD64rr, X86::Int_VCVTSI2SD64rm,  0 },
+    { X86::VCVTSI2SDrr,       X86::VCVTSI2SDrm,        0 },
+    { X86::Int_VCVTSI2SDrr,   X86::Int_VCVTSI2SDrm,    0 },
+    { X86::VCVTSI2SS64rr,     X86::VCVTSI2SS64rm,      0 },
+    { X86::Int_VCVTSI2SS64rr, X86::Int_VCVTSI2SS64rm,  0 },
+    { X86::VCVTSI2SSrr,       X86::VCVTSI2SSrm,        0 },
+    { X86::Int_VCVTSI2SSrr,   X86::Int_VCVTSI2SSrm,    0 },
+    { X86::VADDPDrr,          X86::VADDPDrm,           0 },
+    { X86::VADDPSrr,          X86::VADDPSrm,           0 },
+    { X86::VADDSDrr,          X86::VADDSDrm,           0 },
+    { X86::VADDSDrr_Int,      X86::VADDSDrm_Int,       TB_NO_REVERSE },
+    { X86::VADDSSrr,          X86::VADDSSrm,           0 },
+    { X86::VADDSSrr_Int,      X86::VADDSSrm_Int,       TB_NO_REVERSE },
+    { X86::VADDSUBPDrr,       X86::VADDSUBPDrm,        0 },
+    { X86::VADDSUBPSrr,       X86::VADDSUBPSrm,        0 },
+    { X86::VANDNPDrr,         X86::VANDNPDrm,          0 },
+    { X86::VANDNPSrr,         X86::VANDNPSrm,          0 },
+    { X86::VANDPDrr,          X86::VANDPDrm,           0 },
+    { X86::VANDPSrr,          X86::VANDPSrm,           0 },
+    { X86::VBLENDPDrri,       X86::VBLENDPDrmi,        0 },
+    { X86::VBLENDPSrri,       X86::VBLENDPSrmi,        0 },
+    { X86::VBLENDVPDrr,       X86::VBLENDVPDrm,        0 },
+    { X86::VBLENDVPSrr,       X86::VBLENDVPSrm,        0 },
+    { X86::VCMPPDrri,         X86::VCMPPDrmi,          0 },
+    { X86::VCMPPSrri,         X86::VCMPPSrmi,          0 },
+    { X86::VCMPSDrr,          X86::VCMPSDrm,           0 },
+    { X86::VCMPSSrr,          X86::VCMPSSrm,           0 },
+    { X86::VDIVPDrr,          X86::VDIVPDrm,           0 },
+    { X86::VDIVPSrr,          X86::VDIVPSrm,           0 },
+    { X86::VDIVSDrr,          X86::VDIVSDrm,           0 },
+    { X86::VDIVSDrr_Int,      X86::VDIVSDrm_Int,       TB_NO_REVERSE },
+    { X86::VDIVSSrr,          X86::VDIVSSrm,           0 },
+    { X86::VDIVSSrr_Int,      X86::VDIVSSrm_Int,       TB_NO_REVERSE },
+    { X86::VDPPDrri,          X86::VDPPDrmi,           0 },
+    { X86::VDPPSrri,          X86::VDPPSrmi,           0 },
+    { X86::VHADDPDrr,         X86::VHADDPDrm,          0 },
+    { X86::VHADDPSrr,         X86::VHADDPSrm,          0 },
+    { X86::VHSUBPDrr,         X86::VHSUBPDrm,          0 },
+    { X86::VHSUBPSrr,         X86::VHSUBPSrm,          0 },
+    { X86::Int_VCMPSDrr,      X86::Int_VCMPSDrm,       TB_NO_REVERSE },
+    { X86::Int_VCMPSSrr,      X86::Int_VCMPSSrm,       TB_NO_REVERSE },
+    { X86::VMAXCPDrr,         X86::VMAXCPDrm,          0 },
+    { X86::VMAXCPSrr,         X86::VMAXCPSrm,          0 },
+    { X86::VMAXCSDrr,         X86::VMAXCSDrm,          0 },
+    { X86::VMAXCSSrr,         X86::VMAXCSSrm,          0 },
+    { X86::VMAXPDrr,          X86::VMAXPDrm,           0 },
+    { X86::VMAXPSrr,          X86::VMAXPSrm,           0 },
+    { X86::VMAXSDrr,          X86::VMAXSDrm,           0 },
+    { X86::VMAXSDrr_Int,      X86::VMAXSDrm_Int,       TB_NO_REVERSE },
+    { X86::VMAXSSrr,          X86::VMAXSSrm,           0 },
+    { X86::VMAXSSrr_Int,      X86::VMAXSSrm_Int,       TB_NO_REVERSE },
+    { X86::VMINCPDrr,         X86::VMINCPDrm,          0 },
+    { X86::VMINCPSrr,         X86::VMINCPSrm,          0 },
+    { X86::VMINCSDrr,         X86::VMINCSDrm,          0 },
+    { X86::VMINCSSrr,         X86::VMINCSSrm,          0 },
+    { X86::VMINPDrr,          X86::VMINPDrm,           0 },
+    { X86::VMINPSrr,          X86::VMINPSrm,           0 },
+    { X86::VMINSDrr,          X86::VMINSDrm,           0 },
+    { X86::VMINSDrr_Int,      X86::VMINSDrm_Int,       TB_NO_REVERSE },
+    { X86::VMINSSrr,          X86::VMINSSrm,           0 },
+    { X86::VMINSSrr_Int,      X86::VMINSSrm_Int,       TB_NO_REVERSE },
+    { X86::VMOVLHPSrr,        X86::VMOVHPSrm,          TB_NO_REVERSE },
+    { X86::VMPSADBWrri,       X86::VMPSADBWrmi,        0 },
+    { X86::VMULPDrr,          X86::VMULPDrm,           0 },
+    { X86::VMULPSrr,          X86::VMULPSrm,           0 },
+    { X86::VMULSDrr,          X86::VMULSDrm,           0 },
+    { X86::VMULSDrr_Int,      X86::VMULSDrm_Int,       TB_NO_REVERSE },
+    { X86::VMULSSrr,          X86::VMULSSrm,           0 },
+    { X86::VMULSSrr_Int,      X86::VMULSSrm_Int,       TB_NO_REVERSE },
+    { X86::VORPDrr,           X86::VORPDrm,            0 },
+    { X86::VORPSrr,           X86::VORPSrm,            0 },
+    { X86::VPACKSSDWrr,       X86::VPACKSSDWrm,        0 },
+    { X86::VPACKSSWBrr,       X86::VPACKSSWBrm,        0 },
+    { X86::VPACKUSDWrr,       X86::VPACKUSDWrm,        0 },
+    { X86::VPACKUSWBrr,       X86::VPACKUSWBrm,        0 },
+    { X86::VPADDBrr,          X86::VPADDBrm,           0 },
+    { X86::VPADDDrr,          X86::VPADDDrm,           0 },
+    { X86::VPADDQrr,          X86::VPADDQrm,           0 },
+    { X86::VPADDSBrr,         X86::VPADDSBrm,          0 },
+    { X86::VPADDSWrr,         X86::VPADDSWrm,          0 },
+    { X86::VPADDUSBrr,        X86::VPADDUSBrm,         0 },
+    { X86::VPADDUSWrr,        X86::VPADDUSWrm,         0 },
+    { X86::VPADDWrr,          X86::VPADDWrm,           0 },
+    { X86::VPALIGNRrri,       X86::VPALIGNRrmi,        0 },
+    { X86::VPANDNrr,          X86::VPANDNrm,           0 },
+    { X86::VPANDrr,           X86::VPANDrm,            0 },
+    { X86::VPAVGBrr,          X86::VPAVGBrm,           0 },
+    { X86::VPAVGWrr,          X86::VPAVGWrm,           0 },
+    { X86::VPBLENDVBrr,       X86::VPBLENDVBrm,        0 },
+    { X86::VPBLENDWrri,       X86::VPBLENDWrmi,        0 },
+    { X86::VPCLMULQDQrr,      X86::VPCLMULQDQrm,       0 },
+    { X86::VPCMPEQBrr,        X86::VPCMPEQBrm,         0 },
+    { X86::VPCMPEQDrr,        X86::VPCMPEQDrm,         0 },
+    { X86::VPCMPEQQrr,        X86::VPCMPEQQrm,         0 },
+    { X86::VPCMPEQWrr,        X86::VPCMPEQWrm,         0 },
+    { X86::VPCMPGTBrr,        X86::VPCMPGTBrm,         0 },
+    { X86::VPCMPGTDrr,        X86::VPCMPGTDrm,         0 },
+    { X86::VPCMPGTQrr,        X86::VPCMPGTQrm,         0 },
+    { X86::VPCMPGTWrr,        X86::VPCMPGTWrm,         0 },
+    { X86::VPHADDDrr,         X86::VPHADDDrm,          0 },
+    { X86::VPHADDSWrr128,     X86::VPHADDSWrm128,      0 },
+    { X86::VPHADDWrr,         X86::VPHADDWrm,          0 },
+    { X86::VPHSUBDrr,         X86::VPHSUBDrm,          0 },
+    { X86::VPHSUBSWrr128,     X86::VPHSUBSWrm128,      0 },
+    { X86::VPHSUBWrr,         X86::VPHSUBWrm,          0 },
+    { X86::VPERMILPDrr,       X86::VPERMILPDrm,        0 },
+    { X86::VPERMILPSrr,       X86::VPERMILPSrm,        0 },
+    { X86::VPINSRBrr,         X86::VPINSRBrm,          0 },
+    { X86::VPINSRDrr,         X86::VPINSRDrm,          0 },
+    { X86::VPINSRQrr,         X86::VPINSRQrm,          0 },
+    { X86::VPINSRWrri,        X86::VPINSRWrmi,         0 },
+    { X86::VPMADDUBSWrr,      X86::VPMADDUBSWrm,       0 },
+    { X86::VPMADDWDrr,        X86::VPMADDWDrm,         0 },
+    { X86::VPMAXSBrr,         X86::VPMAXSBrm,          0 },
+    { X86::VPMAXSDrr,         X86::VPMAXSDrm,          0 },
+    { X86::VPMAXSWrr,         X86::VPMAXSWrm,          0 },
+    { X86::VPMAXUBrr,         X86::VPMAXUBrm,          0 },
+    { X86::VPMAXUDrr,         X86::VPMAXUDrm,          0 },
+    { X86::VPMAXUWrr,         X86::VPMAXUWrm,          0 },
+    { X86::VPMINSBrr,         X86::VPMINSBrm,          0 },
+    { X86::VPMINSDrr,         X86::VPMINSDrm,          0 },
+    { X86::VPMINSWrr,         X86::VPMINSWrm,          0 },
+    { X86::VPMINUBrr,         X86::VPMINUBrm,          0 },
+    { X86::VPMINUDrr,         X86::VPMINUDrm,          0 },
+    { X86::VPMINUWrr,         X86::VPMINUWrm,          0 },
+    { X86::VPMULDQrr,         X86::VPMULDQrm,          0 },
+    { X86::VPMULHRSWrr,       X86::VPMULHRSWrm,        0 },
+    { X86::VPMULHUWrr,        X86::VPMULHUWrm,         0 },
+    { X86::VPMULHWrr,         X86::VPMULHWrm,          0 },
+    { X86::VPMULLDrr,         X86::VPMULLDrm,          0 },
+    { X86::VPMULLWrr,         X86::VPMULLWrm,          0 },
+    { X86::VPMULUDQrr,        X86::VPMULUDQrm,         0 },
+    { X86::VPORrr,            X86::VPORrm,             0 },
+    { X86::VPSADBWrr,         X86::VPSADBWrm,          0 },
+    { X86::VPSHUFBrr,         X86::VPSHUFBrm,          0 },
+    { X86::VPSIGNBrr128,      X86::VPSIGNBrm128,       0 },
+    { X86::VPSIGNWrr128,      X86::VPSIGNWrm128,       0 },
+    { X86::VPSIGNDrr128,      X86::VPSIGNDrm128,       0 },
+    { X86::VPSLLDrr,          X86::VPSLLDrm,           0 },
+    { X86::VPSLLQrr,          X86::VPSLLQrm,           0 },
+    { X86::VPSLLWrr,          X86::VPSLLWrm,           0 },
+    { X86::VPSRADrr,          X86::VPSRADrm,           0 },
+    { X86::VPSRAWrr,          X86::VPSRAWrm,           0 },
+    { X86::VPSRLDrr,          X86::VPSRLDrm,           0 },
+    { X86::VPSRLQrr,          X86::VPSRLQrm,           0 },
+    { X86::VPSRLWrr,          X86::VPSRLWrm,           0 },
+    { X86::VPSUBBrr,          X86::VPSUBBrm,           0 },
+    { X86::VPSUBDrr,          X86::VPSUBDrm,           0 },
+    { X86::VPSUBQrr,          X86::VPSUBQrm,           0 },
+    { X86::VPSUBSBrr,         X86::VPSUBSBrm,          0 },
+    { X86::VPSUBSWrr,         X86::VPSUBSWrm,          0 },
+    { X86::VPSUBUSBrr,        X86::VPSUBUSBrm,         0 },
+    { X86::VPSUBUSWrr,        X86::VPSUBUSWrm,         0 },
+    { X86::VPSUBWrr,          X86::VPSUBWrm,           0 },
+    { X86::VPUNPCKHBWrr,      X86::VPUNPCKHBWrm,       0 },
+    { X86::VPUNPCKHDQrr,      X86::VPUNPCKHDQrm,       0 },
+    { X86::VPUNPCKHQDQrr,     X86::VPUNPCKHQDQrm,      0 },
+    { X86::VPUNPCKHWDrr,      X86::VPUNPCKHWDrm,       0 },
+    { X86::VPUNPCKLBWrr,      X86::VPUNPCKLBWrm,       0 },
+    { X86::VPUNPCKLDQrr,      X86::VPUNPCKLDQrm,       0 },
+    { X86::VPUNPCKLQDQrr,     X86::VPUNPCKLQDQrm,      0 },
+    { X86::VPUNPCKLWDrr,      X86::VPUNPCKLWDrm,       0 },
+    { X86::VPXORrr,           X86::VPXORrm,            0 },
+    { X86::VRCPSSr,           X86::VRCPSSm,            0 },
+    { X86::VRCPSSr_Int,       X86::VRCPSSm_Int,        TB_NO_REVERSE },
+    { X86::VRSQRTSSr,         X86::VRSQRTSSm,          0 },
+    { X86::VRSQRTSSr_Int,     X86::VRSQRTSSm_Int,      TB_NO_REVERSE },
+    { X86::VROUNDSDr,         X86::VROUNDSDm,          0 },
+    { X86::VROUNDSDr_Int,     X86::VROUNDSDm_Int,      TB_NO_REVERSE },
+    { X86::VROUNDSSr,         X86::VROUNDSSm,          0 },
+    { X86::VROUNDSSr_Int,     X86::VROUNDSSm_Int,      TB_NO_REVERSE },
+    { X86::VSHUFPDrri,        X86::VSHUFPDrmi,         0 },
+    { X86::VSHUFPSrri,        X86::VSHUFPSrmi,         0 },
+    { X86::VSQRTSDr,          X86::VSQRTSDm,           0 },
+    { X86::VSQRTSDr_Int,      X86::VSQRTSDm_Int,       TB_NO_REVERSE },
+    { X86::VSQRTSSr,          X86::VSQRTSSm,           0 },
+    { X86::VSQRTSSr_Int,      X86::VSQRTSSm_Int,       TB_NO_REVERSE },
+    { X86::VSUBPDrr,          X86::VSUBPDrm,           0 },
+    { X86::VSUBPSrr,          X86::VSUBPSrm,           0 },
+    { X86::VSUBSDrr,          X86::VSUBSDrm,           0 },
+    { X86::VSUBSDrr_Int,      X86::VSUBSDrm_Int,       TB_NO_REVERSE },
+    { X86::VSUBSSrr,          X86::VSUBSSrm,           0 },
+    { X86::VSUBSSrr_Int,      X86::VSUBSSrm_Int,       TB_NO_REVERSE },
+    { X86::VUNPCKHPDrr,       X86::VUNPCKHPDrm,        0 },
+    { X86::VUNPCKHPSrr,       X86::VUNPCKHPSrm,        0 },
+    { X86::VUNPCKLPDrr,       X86::VUNPCKLPDrm,        0 },
+    { X86::VUNPCKLPSrr,       X86::VUNPCKLPSrm,        0 },
+    { X86::VXORPDrr,          X86::VXORPDrm,           0 },
+    { X86::VXORPSrr,          X86::VXORPSrm,           0 },
+
+    // AVX 256-bit foldable instructions
+    { X86::VADDPDYrr,         X86::VADDPDYrm,          0 },
+    { X86::VADDPSYrr,         X86::VADDPSYrm,          0 },
+    { X86::VADDSUBPDYrr,      X86::VADDSUBPDYrm,       0 },
+    { X86::VADDSUBPSYrr,      X86::VADDSUBPSYrm,       0 },
+    { X86::VANDNPDYrr,        X86::VANDNPDYrm,         0 },
+    { X86::VANDNPSYrr,        X86::VANDNPSYrm,         0 },
+    { X86::VANDPDYrr,         X86::VANDPDYrm,          0 },
+    { X86::VANDPSYrr,         X86::VANDPSYrm,          0 },
+    { X86::VBLENDPDYrri,      X86::VBLENDPDYrmi,       0 },
+    { X86::VBLENDPSYrri,      X86::VBLENDPSYrmi,       0 },
+    { X86::VBLENDVPDYrr,      X86::VBLENDVPDYrm,       0 },
+    { X86::VBLENDVPSYrr,      X86::VBLENDVPSYrm,       0 },
+    { X86::VCMPPDYrri,        X86::VCMPPDYrmi,         0 },
+    { X86::VCMPPSYrri,        X86::VCMPPSYrmi,         0 },
+    { X86::VDIVPDYrr,         X86::VDIVPDYrm,          0 },
+    { X86::VDIVPSYrr,         X86::VDIVPSYrm,          0 },
+    { X86::VDPPSYrri,         X86::VDPPSYrmi,          0 },
+    { X86::VHADDPDYrr,        X86::VHADDPDYrm,         0 },
+    { X86::VHADDPSYrr,        X86::VHADDPSYrm,         0 },
+    { X86::VHSUBPDYrr,        X86::VHSUBPDYrm,         0 },
+    { X86::VHSUBPSYrr,        X86::VHSUBPSYrm,         0 },
+    { X86::VINSERTF128rr,     X86::VINSERTF128rm,      0 },
+    { X86::VMAXCPDYrr,        X86::VMAXCPDYrm,         0 },
+    { X86::VMAXCPSYrr,        X86::VMAXCPSYrm,         0 },
+    { X86::VMAXPDYrr,         X86::VMAXPDYrm,          0 },
+    { X86::VMAXPSYrr,         X86::VMAXPSYrm,          0 },
+    { X86::VMINCPDYrr,        X86::VMINCPDYrm,         0 },
+    { X86::VMINCPSYrr,        X86::VMINCPSYrm,         0 },
+    { X86::VMINPDYrr,         X86::VMINPDYrm,          0 },
+    { X86::VMINPSYrr,         X86::VMINPSYrm,          0 },
+    { X86::VMULPDYrr,         X86::VMULPDYrm,          0 },
+    { X86::VMULPSYrr,         X86::VMULPSYrm,          0 },
+    { X86::VORPDYrr,          X86::VORPDYrm,           0 },
+    { X86::VORPSYrr,          X86::VORPSYrm,           0 },
+    { X86::VPERM2F128rr,      X86::VPERM2F128rm,       0 },
+    { X86::VPERMILPDYrr,      X86::VPERMILPDYrm,       0 },
+    { X86::VPERMILPSYrr,      X86::VPERMILPSYrm,       0 },
+    { X86::VSHUFPDYrri,       X86::VSHUFPDYrmi,        0 },
+    { X86::VSHUFPSYrri,       X86::VSHUFPSYrmi,        0 },
+    { X86::VSUBPDYrr,         X86::VSUBPDYrm,          0 },
+    { X86::VSUBPSYrr,         X86::VSUBPSYrm,          0 },
+    { X86::VUNPCKHPDYrr,      X86::VUNPCKHPDYrm,       0 },
+    { X86::VUNPCKHPSYrr,      X86::VUNPCKHPSYrm,       0 },
+    { X86::VUNPCKLPDYrr,      X86::VUNPCKLPDYrm,       0 },
+    { X86::VUNPCKLPSYrr,      X86::VUNPCKLPSYrm,       0 },
+    { X86::VXORPDYrr,         X86::VXORPDYrm,          0 },
+    { X86::VXORPSYrr,         X86::VXORPSYrm,          0 },
+
+    // AVX2 foldable instructions
+    { X86::VINSERTI128rr,     X86::VINSERTI128rm,      0 },
+    { X86::VPACKSSDWYrr,      X86::VPACKSSDWYrm,       0 },
+    { X86::VPACKSSWBYrr,      X86::VPACKSSWBYrm,       0 },
+    { X86::VPACKUSDWYrr,      X86::VPACKUSDWYrm,       0 },
+    { X86::VPACKUSWBYrr,      X86::VPACKUSWBYrm,       0 },
+    { X86::VPADDBYrr,         X86::VPADDBYrm,          0 },
+    { X86::VPADDDYrr,         X86::VPADDDYrm,          0 },
+    { X86::VPADDQYrr,         X86::VPADDQYrm,          0 },
+    { X86::VPADDSBYrr,        X86::VPADDSBYrm,         0 },
+    { X86::VPADDSWYrr,        X86::VPADDSWYrm,         0 },
+    { X86::VPADDUSBYrr,       X86::VPADDUSBYrm,        0 },
+    { X86::VPADDUSWYrr,       X86::VPADDUSWYrm,        0 },
+    { X86::VPADDWYrr,         X86::VPADDWYrm,          0 },
+    { X86::VPALIGNRYrri,      X86::VPALIGNRYrmi,       0 },
+    { X86::VPANDNYrr,         X86::VPANDNYrm,          0 },
+    { X86::VPANDYrr,          X86::VPANDYrm,           0 },
+    { X86::VPAVGBYrr,         X86::VPAVGBYrm,          0 },
+    { X86::VPAVGWYrr,         X86::VPAVGWYrm,          0 },
+    { X86::VPBLENDDrri,       X86::VPBLENDDrmi,        0 },
+    { X86::VPBLENDDYrri,      X86::VPBLENDDYrmi,       0 },
+    { X86::VPBLENDVBYrr,      X86::VPBLENDVBYrm,       0 },
+    { X86::VPBLENDWYrri,      X86::VPBLENDWYrmi,       0 },
+    { X86::VPCMPEQBYrr,       X86::VPCMPEQBYrm,        0 },
+    { X86::VPCMPEQDYrr,       X86::VPCMPEQDYrm,        0 },
+    { X86::VPCMPEQQYrr,       X86::VPCMPEQQYrm,        0 },
+    { X86::VPCMPEQWYrr,       X86::VPCMPEQWYrm,        0 },
+    { X86::VPCMPGTBYrr,       X86::VPCMPGTBYrm,        0 },
+    { X86::VPCMPGTDYrr,       X86::VPCMPGTDYrm,        0 },
+    { X86::VPCMPGTQYrr,       X86::VPCMPGTQYrm,        0 },
+    { X86::VPCMPGTWYrr,       X86::VPCMPGTWYrm,        0 },
+    { X86::VPERM2I128rr,      X86::VPERM2I128rm,       0 },
+    { X86::VPERMDYrr,         X86::VPERMDYrm,          0 },
+    { X86::VPERMPSYrr,        X86::VPERMPSYrm,         0 },
+    { X86::VPHADDDYrr,        X86::VPHADDDYrm,         0 },
+    { X86::VPHADDSWrr256,     X86::VPHADDSWrm256,      0 },
+    { X86::VPHADDWYrr,        X86::VPHADDWYrm,         0 },
+    { X86::VPHSUBDYrr,        X86::VPHSUBDYrm,         0 },
+    { X86::VPHSUBSWrr256,     X86::VPHSUBSWrm256,      0 },
+    { X86::VPHSUBWYrr,        X86::VPHSUBWYrm,         0 },
+    { X86::VPMADDUBSWYrr,     X86::VPMADDUBSWYrm,      0 },
+    { X86::VPMADDWDYrr,       X86::VPMADDWDYrm,        0 },
+    { X86::VPMAXSBYrr,        X86::VPMAXSBYrm,         0 },
+    { X86::VPMAXSDYrr,        X86::VPMAXSDYrm,         0 },
+    { X86::VPMAXSWYrr,        X86::VPMAXSWYrm,         0 },
+    { X86::VPMAXUBYrr,        X86::VPMAXUBYrm,         0 },
+    { X86::VPMAXUDYrr,        X86::VPMAXUDYrm,         0 },
+    { X86::VPMAXUWYrr,        X86::VPMAXUWYrm,         0 },
+    { X86::VPMINSBYrr,        X86::VPMINSBYrm,         0 },
+    { X86::VPMINSDYrr,        X86::VPMINSDYrm,         0 },
+    { X86::VPMINSWYrr,        X86::VPMINSWYrm,         0 },
+    { X86::VPMINUBYrr,        X86::VPMINUBYrm,         0 },
+    { X86::VPMINUDYrr,        X86::VPMINUDYrm,         0 },
+    { X86::VPMINUWYrr,        X86::VPMINUWYrm,         0 },
+    { X86::VMPSADBWYrri,      X86::VMPSADBWYrmi,       0 },
+    { X86::VPMULDQYrr,        X86::VPMULDQYrm,         0 },
+    { X86::VPMULHRSWYrr,      X86::VPMULHRSWYrm,       0 },
+    { X86::VPMULHUWYrr,       X86::VPMULHUWYrm,        0 },
+    { X86::VPMULHWYrr,        X86::VPMULHWYrm,         0 },
+    { X86::VPMULLDYrr,        X86::VPMULLDYrm,         0 },
+    { X86::VPMULLWYrr,        X86::VPMULLWYrm,         0 },
+    { X86::VPMULUDQYrr,       X86::VPMULUDQYrm,        0 },
+    { X86::VPORYrr,           X86::VPORYrm,            0 },
+    { X86::VPSADBWYrr,        X86::VPSADBWYrm,         0 },
+    { X86::VPSHUFBYrr,        X86::VPSHUFBYrm,         0 },
+    { X86::VPSIGNBYrr256,     X86::VPSIGNBYrm256,      0 },
+    { X86::VPSIGNWYrr256,     X86::VPSIGNWYrm256,      0 },
+    { X86::VPSIGNDYrr256,     X86::VPSIGNDYrm256,      0 },
+    { X86::VPSLLDYrr,         X86::VPSLLDYrm,          0 },
+    { X86::VPSLLQYrr,         X86::VPSLLQYrm,          0 },
+    { X86::VPSLLWYrr,         X86::VPSLLWYrm,          0 },
+    { X86::VPSLLVDrr,         X86::VPSLLVDrm,          0 },
+    { X86::VPSLLVDYrr,        X86::VPSLLVDYrm,         0 },
+    { X86::VPSLLVQrr,         X86::VPSLLVQrm,          0 },
+    { X86::VPSLLVQYrr,        X86::VPSLLVQYrm,         0 },
+    { X86::VPSRADYrr,         X86::VPSRADYrm,          0 },
+    { X86::VPSRAWYrr,         X86::VPSRAWYrm,          0 },
+    { X86::VPSRAVDrr,         X86::VPSRAVDrm,          0 },
+    { X86::VPSRAVDYrr,        X86::VPSRAVDYrm,         0 },
+    { X86::VPSRLDYrr,         X86::VPSRLDYrm,          0 },
+    { X86::VPSRLQYrr,         X86::VPSRLQYrm,          0 },
+    { X86::VPSRLWYrr,         X86::VPSRLWYrm,          0 },
+    { X86::VPSRLVDrr,         X86::VPSRLVDrm,          0 },
+    { X86::VPSRLVDYrr,        X86::VPSRLVDYrm,         0 },
+    { X86::VPSRLVQrr,         X86::VPSRLVQrm,          0 },
+    { X86::VPSRLVQYrr,        X86::VPSRLVQYrm,         0 },
+    { X86::VPSUBBYrr,         X86::VPSUBBYrm,          0 },
+    { X86::VPSUBDYrr,         X86::VPSUBDYrm,          0 },
+    { X86::VPSUBQYrr,         X86::VPSUBQYrm,          0 },
+    { X86::VPSUBSBYrr,        X86::VPSUBSBYrm,         0 },
+    { X86::VPSUBSWYrr,        X86::VPSUBSWYrm,         0 },
+    { X86::VPSUBUSBYrr,       X86::VPSUBUSBYrm,        0 },
+    { X86::VPSUBUSWYrr,       X86::VPSUBUSWYrm,        0 },
+    { X86::VPSUBWYrr,         X86::VPSUBWYrm,          0 },
+    { X86::VPUNPCKHBWYrr,     X86::VPUNPCKHBWYrm,      0 },
+    { X86::VPUNPCKHDQYrr,     X86::VPUNPCKHDQYrm,      0 },
+    { X86::VPUNPCKHQDQYrr,    X86::VPUNPCKHQDQYrm,     0 },
+    { X86::VPUNPCKHWDYrr,     X86::VPUNPCKHWDYrm,      0 },
+    { X86::VPUNPCKLBWYrr,     X86::VPUNPCKLBWYrm,      0 },
+    { X86::VPUNPCKLDQYrr,     X86::VPUNPCKLDQYrm,      0 },
+    { X86::VPUNPCKLQDQYrr,    X86::VPUNPCKLQDQYrm,     0 },
+    { X86::VPUNPCKLWDYrr,     X86::VPUNPCKLWDYrm,      0 },
+    { X86::VPXORYrr,          X86::VPXORYrm,           0 },
+
+    // FMA4 foldable patterns
+    { X86::VFMADDSS4rr,       X86::VFMADDSS4mr,        TB_ALIGN_NONE },
+    { X86::VFMADDSS4rr_Int,   X86::VFMADDSS4mr_Int,    TB_NO_REVERSE },
+    { X86::VFMADDSD4rr,       X86::VFMADDSD4mr,        TB_ALIGN_NONE },
+    { X86::VFMADDSD4rr_Int,   X86::VFMADDSD4mr_Int,    TB_NO_REVERSE },
+    { X86::VFMADDPS4rr,       X86::VFMADDPS4mr,        TB_ALIGN_NONE },
+    { X86::VFMADDPD4rr,       X86::VFMADDPD4mr,        TB_ALIGN_NONE },
+    { X86::VFMADDPS4Yrr,      X86::VFMADDPS4Ymr,       TB_ALIGN_NONE },
+    { X86::VFMADDPD4Yrr,      X86::VFMADDPD4Ymr,       TB_ALIGN_NONE },
+    { X86::VFNMADDSS4rr,      X86::VFNMADDSS4mr,       TB_ALIGN_NONE },
+    { X86::VFNMADDSS4rr_Int,  X86::VFNMADDSS4mr_Int,   TB_NO_REVERSE },
+    { X86::VFNMADDSD4rr,      X86::VFNMADDSD4mr,       TB_ALIGN_NONE },
+    { X86::VFNMADDSD4rr_Int,  X86::VFNMADDSD4mr_Int,   TB_NO_REVERSE },
+    { X86::VFNMADDPS4rr,      X86::VFNMADDPS4mr,       TB_ALIGN_NONE },
+    { X86::VFNMADDPD4rr,      X86::VFNMADDPD4mr,       TB_ALIGN_NONE },
+    { X86::VFNMADDPS4Yrr,     X86::VFNMADDPS4Ymr,      TB_ALIGN_NONE },
+    { X86::VFNMADDPD4Yrr,     X86::VFNMADDPD4Ymr,      TB_ALIGN_NONE },
+    { X86::VFMSUBSS4rr,       X86::VFMSUBSS4mr,        TB_ALIGN_NONE },
+    { X86::VFMSUBSS4rr_Int,   X86::VFMSUBSS4mr_Int,    TB_NO_REVERSE },
+    { X86::VFMSUBSD4rr,       X86::VFMSUBSD4mr,        TB_ALIGN_NONE },
+    { X86::VFMSUBSD4rr_Int,   X86::VFMSUBSD4mr_Int,    TB_NO_REVERSE },
+    { X86::VFMSUBPS4rr,       X86::VFMSUBPS4mr,        TB_ALIGN_NONE },
+    { X86::VFMSUBPD4rr,       X86::VFMSUBPD4mr,        TB_ALIGN_NONE },
+    { X86::VFMSUBPS4Yrr,      X86::VFMSUBPS4Ymr,       TB_ALIGN_NONE },
+    { X86::VFMSUBPD4Yrr,      X86::VFMSUBPD4Ymr,       TB_ALIGN_NONE },
+    { X86::VFNMSUBSS4rr,      X86::VFNMSUBSS4mr,       TB_ALIGN_NONE },
+    { X86::VFNMSUBSS4rr_Int,  X86::VFNMSUBSS4mr_Int,   TB_NO_REVERSE },
+    { X86::VFNMSUBSD4rr,      X86::VFNMSUBSD4mr,       TB_ALIGN_NONE },
+    { X86::VFNMSUBSD4rr_Int,  X86::VFNMSUBSD4mr_Int,   TB_NO_REVERSE },
+    { X86::VFNMSUBPS4rr,      X86::VFNMSUBPS4mr,       TB_ALIGN_NONE },
+    { X86::VFNMSUBPD4rr,      X86::VFNMSUBPD4mr,       TB_ALIGN_NONE },
+    { X86::VFNMSUBPS4Yrr,     X86::VFNMSUBPS4Ymr,      TB_ALIGN_NONE },
+    { X86::VFNMSUBPD4Yrr,     X86::VFNMSUBPD4Ymr,      TB_ALIGN_NONE },
+    { X86::VFMADDSUBPS4rr,    X86::VFMADDSUBPS4mr,     TB_ALIGN_NONE },
+    { X86::VFMADDSUBPD4rr,    X86::VFMADDSUBPD4mr,     TB_ALIGN_NONE },
+    { X86::VFMADDSUBPS4Yrr,   X86::VFMADDSUBPS4Ymr,    TB_ALIGN_NONE },
+    { X86::VFMADDSUBPD4Yrr,   X86::VFMADDSUBPD4Ymr,    TB_ALIGN_NONE },
+    { X86::VFMSUBADDPS4rr,    X86::VFMSUBADDPS4mr,     TB_ALIGN_NONE },
+    { X86::VFMSUBADDPD4rr,    X86::VFMSUBADDPD4mr,     TB_ALIGN_NONE },
+    { X86::VFMSUBADDPS4Yrr,   X86::VFMSUBADDPS4Ymr,    TB_ALIGN_NONE },
+    { X86::VFMSUBADDPD4Yrr,   X86::VFMSUBADDPD4Ymr,    TB_ALIGN_NONE },
+
+    // XOP foldable instructions
+    { X86::VPCMOVrrr,         X86::VPCMOVrmr,           0 },
+    { X86::VPCMOVYrrr,        X86::VPCMOVYrmr,          0 },
+    { X86::VPCOMBri,          X86::VPCOMBmi,            0 },
+    { X86::VPCOMDri,          X86::VPCOMDmi,            0 },
+    { X86::VPCOMQri,          X86::VPCOMQmi,            0 },
+    { X86::VPCOMWri,          X86::VPCOMWmi,            0 },
+    { X86::VPCOMUBri,         X86::VPCOMUBmi,           0 },
+    { X86::VPCOMUDri,         X86::VPCOMUDmi,           0 },
+    { X86::VPCOMUQri,         X86::VPCOMUQmi,           0 },
+    { X86::VPCOMUWri,         X86::VPCOMUWmi,           0 },
+    { X86::VPERMIL2PDrr,      X86::VPERMIL2PDmr,        0 },
+    { X86::VPERMIL2PDYrr,     X86::VPERMIL2PDYmr,       0 },
+    { X86::VPERMIL2PSrr,      X86::VPERMIL2PSmr,        0 },
+    { X86::VPERMIL2PSYrr,     X86::VPERMIL2PSYmr,       0 },
+    { X86::VPMACSDDrr,        X86::VPMACSDDrm,          0 },
+    { X86::VPMACSDQHrr,       X86::VPMACSDQHrm,         0 },
+    { X86::VPMACSDQLrr,       X86::VPMACSDQLrm,         0 },
+    { X86::VPMACSSDDrr,       X86::VPMACSSDDrm,         0 },
+    { X86::VPMACSSDQHrr,      X86::VPMACSSDQHrm,        0 },
+    { X86::VPMACSSDQLrr,      X86::VPMACSSDQLrm,        0 },
+    { X86::VPMACSSWDrr,       X86::VPMACSSWDrm,         0 },
+    { X86::VPMACSSWWrr,       X86::VPMACSSWWrm,         0 },
+    { X86::VPMACSWDrr,        X86::VPMACSWDrm,          0 },
+    { X86::VPMACSWWrr,        X86::VPMACSWWrm,          0 },
+    { X86::VPMADCSSWDrr,      X86::VPMADCSSWDrm,        0 },
+    { X86::VPMADCSWDrr,       X86::VPMADCSWDrm,         0 },
+    { X86::VPPERMrrr,         X86::VPPERMrmr,           0 },
+    { X86::VPROTBrr,          X86::VPROTBrm,            0 },
+    { X86::VPROTDrr,          X86::VPROTDrm,            0 },
+    { X86::VPROTQrr,          X86::VPROTQrm,            0 },
+    { X86::VPROTWrr,          X86::VPROTWrm,            0 },
+    { X86::VPSHABrr,          X86::VPSHABrm,            0 },
+    { X86::VPSHADrr,          X86::VPSHADrm,            0 },
+    { X86::VPSHAQrr,          X86::VPSHAQrm,            0 },
+    { X86::VPSHAWrr,          X86::VPSHAWrm,            0 },
+    { X86::VPSHLBrr,          X86::VPSHLBrm,            0 },
+    { X86::VPSHLDrr,          X86::VPSHLDrm,            0 },
+    { X86::VPSHLQrr,          X86::VPSHLQrm,            0 },
+    { X86::VPSHLWrr,          X86::VPSHLWrm,            0 },
+
+    // BMI/BMI2 foldable instructions
+    { X86::ANDN32rr,          X86::ANDN32rm,            0 },
+    { X86::ANDN64rr,          X86::ANDN64rm,            0 },
+    { X86::MULX32rr,          X86::MULX32rm,            0 },
+    { X86::MULX64rr,          X86::MULX64rm,            0 },
+    { X86::PDEP32rr,          X86::PDEP32rm,            0 },
+    { X86::PDEP64rr,          X86::PDEP64rm,            0 },
+    { X86::PEXT32rr,          X86::PEXT32rm,            0 },
+    { X86::PEXT64rr,          X86::PEXT64rm,            0 },
+
+    // ADX foldable instructions
+    { X86::ADCX32rr,          X86::ADCX32rm,            0 },
+    { X86::ADCX64rr,          X86::ADCX64rm,            0 },
+    { X86::ADOX32rr,          X86::ADOX32rm,            0 },
+    { X86::ADOX64rr,          X86::ADOX64rm,            0 },
+
+    // AVX-512 foldable instructions
+    { X86::VADDPDZrr,         X86::VADDPDZrm,           0 },
+    { X86::VADDPSZrr,         X86::VADDPSZrm,           0 },
+    { X86::VADDSDZrr,         X86::VADDSDZrm,           0 },
+    { X86::VADDSDZrr_Int,     X86::VADDSDZrm_Int,       TB_NO_REVERSE },
+    { X86::VADDSSZrr,         X86::VADDSSZrm,           0 },
+    { X86::VADDSSZrr_Int,     X86::VADDSSZrm_Int,       TB_NO_REVERSE },
+    { X86::VALIGNDZrri,       X86::VALIGNDZrmi,         0 },
+    { X86::VALIGNQZrri,       X86::VALIGNQZrmi,         0 },
+    { X86::VANDNPDZrr,        X86::VANDNPDZrm,          0 },
+    { X86::VANDNPSZrr,        X86::VANDNPSZrm,          0 },
+    { X86::VANDPDZrr,         X86::VANDPDZrm,           0 },
+    { X86::VANDPSZrr,         X86::VANDPSZrm,           0 },
+    { X86::VCMPPDZrri,        X86::VCMPPDZrmi,          0 },
+    { X86::VCMPPSZrri,        X86::VCMPPSZrmi,          0 },
+    { X86::VCMPSDZrr,         X86::VCMPSDZrm,           0 },
+    { X86::VCMPSDZrr_Int,     X86::VCMPSDZrm_Int,       TB_NO_REVERSE },
+    { X86::VCMPSSZrr,         X86::VCMPSSZrm,           0 },
+    { X86::VCMPSSZrr_Int,     X86::VCMPSSZrm_Int,       TB_NO_REVERSE },
+    { X86::VDIVPDZrr,         X86::VDIVPDZrm,           0 },
+    { X86::VDIVPSZrr,         X86::VDIVPSZrm,           0 },
+    { X86::VDIVSDZrr,         X86::VDIVSDZrm,           0 },
+    { X86::VDIVSDZrr_Int,     X86::VDIVSDZrm_Int,       TB_NO_REVERSE },
+    { X86::VDIVSSZrr,         X86::VDIVSSZrm,           0 },
+    { X86::VDIVSSZrr_Int,     X86::VDIVSSZrm_Int,       TB_NO_REVERSE },
+    { X86::VINSERTF32x4Zrr,   X86::VINSERTF32x4Zrm,     0 },
+    { X86::VINSERTF32x8Zrr,   X86::VINSERTF32x8Zrm,     0 },
+    { X86::VINSERTF64x2Zrr,   X86::VINSERTF64x2Zrm,     0 },
+    { X86::VINSERTF64x4Zrr,   X86::VINSERTF64x4Zrm,     0 },
+    { X86::VINSERTI32x4Zrr,   X86::VINSERTI32x4Zrm,     0 },
+    { X86::VINSERTI32x8Zrr,   X86::VINSERTI32x8Zrm,     0 },
+    { X86::VINSERTI64x2Zrr,   X86::VINSERTI64x2Zrm,     0 },
+    { X86::VINSERTI64x4Zrr,   X86::VINSERTI64x4Zrm,     0 },
+    { X86::VMAXCPDZrr,        X86::VMAXCPDZrm,          0 },
+    { X86::VMAXCPSZrr,        X86::VMAXCPSZrm,          0 },
+    { X86::VMAXCSDZrr,        X86::VMAXCSDZrm,          0 },
+    { X86::VMAXCSSZrr,        X86::VMAXCSSZrm,          0 },
+    { X86::VMAXPDZrr,         X86::VMAXPDZrm,           0 },
+    { X86::VMAXPSZrr,         X86::VMAXPSZrm,           0 },
+    { X86::VMAXSDZrr,         X86::VMAXSDZrm,           0 },
+    { X86::VMAXSDZrr_Int,     X86::VMAXSDZrm_Int,       TB_NO_REVERSE },
+    { X86::VMAXSSZrr,         X86::VMAXSSZrm,           0 },
+    { X86::VMAXSSZrr_Int,     X86::VMAXSSZrm_Int,       TB_NO_REVERSE },
+    { X86::VMINCPDZrr,        X86::VMINCPDZrm,          0 },
+    { X86::VMINCPSZrr,        X86::VMINCPSZrm,          0 },
+    { X86::VMINCSDZrr,        X86::VMINCSDZrm,          0 },
+    { X86::VMINCSSZrr,        X86::VMINCSSZrm,          0 },
+    { X86::VMINPDZrr,         X86::VMINPDZrm,           0 },
+    { X86::VMINPSZrr,         X86::VMINPSZrm,           0 },
+    { X86::VMINSDZrr,         X86::VMINSDZrm,           0 },
+    { X86::VMINSDZrr_Int,     X86::VMINSDZrm_Int,       TB_NO_REVERSE },
+    { X86::VMINSSZrr,         X86::VMINSSZrm,           0 },
+    { X86::VMINSSZrr_Int,     X86::VMINSSZrm_Int,       TB_NO_REVERSE },
+    { X86::VMOVLHPSZrr,       X86::VMOVHPSZ128rm,       TB_NO_REVERSE },
+    { X86::VMULPDZrr,         X86::VMULPDZrm,           0 },
+    { X86::VMULPSZrr,         X86::VMULPSZrm,           0 },
+    { X86::VMULSDZrr,         X86::VMULSDZrm,           0 },
+    { X86::VMULSDZrr_Int,     X86::VMULSDZrm_Int,       TB_NO_REVERSE },
+    { X86::VMULSSZrr,         X86::VMULSSZrm,           0 },
+    { X86::VMULSSZrr_Int,     X86::VMULSSZrm_Int,       TB_NO_REVERSE },
+    { X86::VORPDZrr,          X86::VORPDZrm,            0 },
+    { X86::VORPSZrr,          X86::VORPSZrm,            0 },
+    { X86::VPACKSSDWZrr,      X86::VPACKSSDWZrm,        0 },
+    { X86::VPACKSSWBZrr,      X86::VPACKSSWBZrm,        0 },
+    { X86::VPACKUSDWZrr,      X86::VPACKUSDWZrm,        0 },
+    { X86::VPACKUSWBZrr,      X86::VPACKUSWBZrm,        0 },
+    { X86::VPADDBZrr,         X86::VPADDBZrm,           0 },
+    { X86::VPADDDZrr,         X86::VPADDDZrm,           0 },
+    { X86::VPADDQZrr,         X86::VPADDQZrm,           0 },
+    { X86::VPADDSBZrr,        X86::VPADDSBZrm,          0 },
+    { X86::VPADDSWZrr,        X86::VPADDSWZrm,          0 },
+    { X86::VPADDUSBZrr,       X86::VPADDUSBZrm,         0 },
+    { X86::VPADDUSWZrr,       X86::VPADDUSWZrm,         0 },
+    { X86::VPADDWZrr,         X86::VPADDWZrm,           0 },
+    { X86::VPALIGNRZrri,      X86::VPALIGNRZrmi,        0 },
+    { X86::VPANDDZrr,         X86::VPANDDZrm,           0 },
+    { X86::VPANDNDZrr,        X86::VPANDNDZrm,          0 },
+    { X86::VPANDNQZrr,        X86::VPANDNQZrm,          0 },
+    { X86::VPANDQZrr,         X86::VPANDQZrm,           0 },
+    { X86::VPAVGBZrr,         X86::VPAVGBZrm,           0 },
+    { X86::VPAVGWZrr,         X86::VPAVGWZrm,           0 },
+    { X86::VPCMPBZrri,        X86::VPCMPBZrmi,          0 },
+    { X86::VPCMPDZrri,        X86::VPCMPDZrmi,          0 },
+    { X86::VPCMPEQBZrr,       X86::VPCMPEQBZrm,         0 },
+    { X86::VPCMPEQDZrr,       X86::VPCMPEQDZrm,         0 },
+    { X86::VPCMPEQQZrr,       X86::VPCMPEQQZrm,         0 },
+    { X86::VPCMPEQWZrr,       X86::VPCMPEQWZrm,         0 },
+    { X86::VPCMPGTBZrr,       X86::VPCMPGTBZrm,         0 },
+    { X86::VPCMPGTDZrr,       X86::VPCMPGTDZrm,         0 },
+    { X86::VPCMPGTQZrr,       X86::VPCMPGTQZrm,         0 },
+    { X86::VPCMPGTWZrr,       X86::VPCMPGTWZrm,         0 },
+    { X86::VPCMPQZrri,        X86::VPCMPQZrmi,          0 },
+    { X86::VPCMPUBZrri,       X86::VPCMPUBZrmi,         0 },
+    { X86::VPCMPUDZrri,       X86::VPCMPUDZrmi,         0 },
+    { X86::VPCMPUQZrri,       X86::VPCMPUQZrmi,         0 },
+    { X86::VPCMPUWZrri,       X86::VPCMPUWZrmi,         0 },
+    { X86::VPCMPWZrri,        X86::VPCMPWZrmi,          0 },
+    { X86::VPERMBZrr,         X86::VPERMBZrm,           0 },
+    { X86::VPERMDZrr,         X86::VPERMDZrm,           0 },
+    { X86::VPERMILPDZrr,      X86::VPERMILPDZrm,        0 },
+    { X86::VPERMILPSZrr,      X86::VPERMILPSZrm,        0 },
+    { X86::VPERMPDZrr,        X86::VPERMPDZrm,          0 },
+    { X86::VPERMPSZrr,        X86::VPERMPSZrm,          0 },
+    { X86::VPERMQZrr,         X86::VPERMQZrm,           0 },
+    { X86::VPERMWZrr,         X86::VPERMWZrm,           0 },
+    { X86::VPINSRBZrr,        X86::VPINSRBZrm,          0 },
+    { X86::VPINSRDZrr,        X86::VPINSRDZrm,          0 },
+    { X86::VPINSRQZrr,        X86::VPINSRQZrm,          0 },
+    { X86::VPINSRWZrr,        X86::VPINSRWZrm,          0 },
+    { X86::VPMADDUBSWZrr,     X86::VPMADDUBSWZrm,       0 },
+    { X86::VPMADDWDZrr,       X86::VPMADDWDZrm,         0 },
+    { X86::VPMAXSBZrr,        X86::VPMAXSBZrm,          0 },
+    { X86::VPMAXSDZrr,        X86::VPMAXSDZrm,          0 },
+    { X86::VPMAXSQZrr,        X86::VPMAXSQZrm,          0 },
+    { X86::VPMAXSWZrr,        X86::VPMAXSWZrm,          0 },
+    { X86::VPMAXUBZrr,        X86::VPMAXUBZrm,          0 },
+    { X86::VPMAXUDZrr,        X86::VPMAXUDZrm,          0 },
+    { X86::VPMAXUQZrr,        X86::VPMAXUQZrm,          0 },
+    { X86::VPMAXUWZrr,        X86::VPMAXUWZrm,          0 },
+    { X86::VPMINSBZrr,        X86::VPMINSBZrm,          0 },
+    { X86::VPMINSDZrr,        X86::VPMINSDZrm,          0 },
+    { X86::VPMINSQZrr,        X86::VPMINSQZrm,          0 },
+    { X86::VPMINSWZrr,        X86::VPMINSWZrm,          0 },
+    { X86::VPMINUBZrr,        X86::VPMINUBZrm,          0 },
+    { X86::VPMINUDZrr,        X86::VPMINUDZrm,          0 },
+    { X86::VPMINUQZrr,        X86::VPMINUQZrm,          0 },
+    { X86::VPMINUWZrr,        X86::VPMINUWZrm,          0 },
+    { X86::VPMULDQZrr,        X86::VPMULDQZrm,          0 },
+    { X86::VPMULLDZrr,        X86::VPMULLDZrm,          0 },
+    { X86::VPMULLQZrr,        X86::VPMULLQZrm,          0 },
+    { X86::VPMULLWZrr,        X86::VPMULLWZrm,          0 },
+    { X86::VPMULUDQZrr,       X86::VPMULUDQZrm,         0 },
+    { X86::VPORDZrr,          X86::VPORDZrm,            0 },
+    { X86::VPORQZrr,          X86::VPORQZrm,            0 },
+    { X86::VPSADBWZ512rr,     X86::VPSADBWZ512rm,       0 },
+    { X86::VPSHUFBZrr,        X86::VPSHUFBZrm,          0 },
+    { X86::VPSLLDZrr,         X86::VPSLLDZrm,           0 },
+    { X86::VPSLLQZrr,         X86::VPSLLQZrm,           0 },
+    { X86::VPSLLVDZrr,        X86::VPSLLVDZrm,          0 },
+    { X86::VPSLLVQZrr,        X86::VPSLLVQZrm,          0 },
+    { X86::VPSLLVWZrr,        X86::VPSLLVWZrm,          0 },
+    { X86::VPSLLWZrr,         X86::VPSLLWZrm,           0 },
+    { X86::VPSRADZrr,         X86::VPSRADZrm,           0 },
+    { X86::VPSRAQZrr,         X86::VPSRAQZrm,           0 },
+    { X86::VPSRAVDZrr,        X86::VPSRAVDZrm,          0 },
+    { X86::VPSRAVQZrr,        X86::VPSRAVQZrm,          0 },
+    { X86::VPSRAVWZrr,        X86::VPSRAVWZrm,          0 },
+    { X86::VPSRAWZrr,         X86::VPSRAWZrm,           0 },
+    { X86::VPSRLDZrr,         X86::VPSRLDZrm,           0 },
+    { X86::VPSRLQZrr,         X86::VPSRLQZrm,           0 },
+    { X86::VPSRLVDZrr,        X86::VPSRLVDZrm,          0 },
+    { X86::VPSRLVQZrr,        X86::VPSRLVQZrm,          0 },
+    { X86::VPSRLVWZrr,        X86::VPSRLVWZrm,          0 },
+    { X86::VPSRLWZrr,         X86::VPSRLWZrm,           0 },
+    { X86::VPSUBBZrr,         X86::VPSUBBZrm,           0 },
+    { X86::VPSUBDZrr,         X86::VPSUBDZrm,           0 },
+    { X86::VPSUBQZrr,         X86::VPSUBQZrm,           0 },
+    { X86::VPSUBSBZrr,        X86::VPSUBSBZrm,          0 },
+    { X86::VPSUBSWZrr,        X86::VPSUBSWZrm,          0 },
+    { X86::VPSUBUSBZrr,       X86::VPSUBUSBZrm,         0 },
+    { X86::VPSUBUSWZrr,       X86::VPSUBUSWZrm,         0 },
+    { X86::VPSUBWZrr,         X86::VPSUBWZrm,           0 },
+    { X86::VPUNPCKHBWZrr,     X86::VPUNPCKHBWZrm,       0 },
+    { X86::VPUNPCKHDQZrr,     X86::VPUNPCKHDQZrm,       0 },
+    { X86::VPUNPCKHQDQZrr,    X86::VPUNPCKHQDQZrm,      0 },
+    { X86::VPUNPCKHWDZrr,     X86::VPUNPCKHWDZrm,       0 },
+    { X86::VPUNPCKLBWZrr,     X86::VPUNPCKLBWZrm,       0 },
+    { X86::VPUNPCKLDQZrr,     X86::VPUNPCKLDQZrm,       0 },
+    { X86::VPUNPCKLQDQZrr,    X86::VPUNPCKLQDQZrm,      0 },
+    { X86::VPUNPCKLWDZrr,     X86::VPUNPCKLWDZrm,       0 },
+    { X86::VPXORDZrr,         X86::VPXORDZrm,           0 },
+    { X86::VPXORQZrr,         X86::VPXORQZrm,           0 },
+    { X86::VSHUFPDZrri,       X86::VSHUFPDZrmi,         0 },
+    { X86::VSHUFPSZrri,       X86::VSHUFPSZrmi,         0 },
+    { X86::VSUBPDZrr,         X86::VSUBPDZrm,           0 },
+    { X86::VSUBPSZrr,         X86::VSUBPSZrm,           0 },
+    { X86::VSUBSDZrr,         X86::VSUBSDZrm,           0 },
+    { X86::VSUBSDZrr_Int,     X86::VSUBSDZrm_Int,       TB_NO_REVERSE },
+    { X86::VSUBSSZrr,         X86::VSUBSSZrm,           0 },
+    { X86::VSUBSSZrr_Int,     X86::VSUBSSZrm_Int,       TB_NO_REVERSE },
+    { X86::VUNPCKHPDZrr,      X86::VUNPCKHPDZrm,        0 },
+    { X86::VUNPCKHPSZrr,      X86::VUNPCKHPSZrm,        0 },
+    { X86::VUNPCKLPDZrr,      X86::VUNPCKLPDZrm,        0 },
+    { X86::VUNPCKLPSZrr,      X86::VUNPCKLPSZrm,        0 },
+    { X86::VXORPDZrr,         X86::VXORPDZrm,           0 },
+    { X86::VXORPSZrr,         X86::VXORPSZrm,           0 },
+
+    // AVX-512{F,VL} foldable instructions
+    { X86::VADDPDZ128rr,      X86::VADDPDZ128rm,        0 },
+    { X86::VADDPDZ256rr,      X86::VADDPDZ256rm,        0 },
+    { X86::VADDPSZ128rr,      X86::VADDPSZ128rm,        0 },
+    { X86::VADDPSZ256rr,      X86::VADDPSZ256rm,        0 },
+    { X86::VALIGNDZ128rri,    X86::VALIGNDZ128rmi,      0 },
+    { X86::VALIGNDZ256rri,    X86::VALIGNDZ256rmi,      0 },
+    { X86::VALIGNQZ128rri,    X86::VALIGNQZ128rmi,      0 },
+    { X86::VALIGNQZ256rri,    X86::VALIGNQZ256rmi,      0 },
+    { X86::VANDNPDZ128rr,     X86::VANDNPDZ128rm,       0 },
+    { X86::VANDNPDZ256rr,     X86::VANDNPDZ256rm,       0 },
+    { X86::VANDNPSZ128rr,     X86::VANDNPSZ128rm,       0 },
+    { X86::VANDNPSZ256rr,     X86::VANDNPSZ256rm,       0 },
+    { X86::VANDPDZ128rr,      X86::VANDPDZ128rm,        0 },
+    { X86::VANDPDZ256rr,      X86::VANDPDZ256rm,        0 },
+    { X86::VANDPSZ128rr,      X86::VANDPSZ128rm,        0 },
+    { X86::VANDPSZ256rr,      X86::VANDPSZ256rm,        0 },
+    { X86::VCMPPDZ128rri,     X86::VCMPPDZ128rmi,       0 },
+    { X86::VCMPPDZ256rri,     X86::VCMPPDZ256rmi,       0 },
+    { X86::VCMPPSZ128rri,     X86::VCMPPSZ128rmi,       0 },
+    { X86::VCMPPSZ256rri,     X86::VCMPPSZ256rmi,       0 },
+    { X86::VDIVPDZ128rr,      X86::VDIVPDZ128rm,        0 },
+    { X86::VDIVPDZ256rr,      X86::VDIVPDZ256rm,        0 },
+    { X86::VDIVPSZ128rr,      X86::VDIVPSZ128rm,        0 },
+    { X86::VDIVPSZ256rr,      X86::VDIVPSZ256rm,        0 },
+    { X86::VINSERTF32x4Z256rr,X86::VINSERTF32x4Z256rm,  0 },
+    { X86::VINSERTF64x2Z256rr,X86::VINSERTF64x2Z256rm,  0 },
+    { X86::VINSERTI32x4Z256rr,X86::VINSERTI32x4Z256rm,  0 },
+    { X86::VINSERTI64x2Z256rr,X86::VINSERTI64x2Z256rm,  0 },
+    { X86::VMAXCPDZ128rr,     X86::VMAXCPDZ128rm,       0 },
+    { X86::VMAXCPDZ256rr,     X86::VMAXCPDZ256rm,       0 },
+    { X86::VMAXCPSZ128rr,     X86::VMAXCPSZ128rm,       0 },
+    { X86::VMAXCPSZ256rr,     X86::VMAXCPSZ256rm,       0 },
+    { X86::VMAXPDZ128rr,      X86::VMAXPDZ128rm,        0 },
+    { X86::VMAXPDZ256rr,      X86::VMAXPDZ256rm,        0 },
+    { X86::VMAXPSZ128rr,      X86::VMAXPSZ128rm,        0 },
+    { X86::VMAXPSZ256rr,      X86::VMAXPSZ256rm,        0 },
+    { X86::VMINCPDZ128rr,     X86::VMINCPDZ128rm,       0 },
+    { X86::VMINCPDZ256rr,     X86::VMINCPDZ256rm,       0 },
+    { X86::VMINCPSZ128rr,     X86::VMINCPSZ128rm,       0 },
+    { X86::VMINCPSZ256rr,     X86::VMINCPSZ256rm,       0 },
+    { X86::VMINPDZ128rr,      X86::VMINPDZ128rm,        0 },
+    { X86::VMINPDZ256rr,      X86::VMINPDZ256rm,        0 },
+    { X86::VMINPSZ128rr,      X86::VMINPSZ128rm,        0 },
+    { X86::VMINPSZ256rr,      X86::VMINPSZ256rm,        0 },
+    { X86::VMULPDZ128rr,      X86::VMULPDZ128rm,        0 },
+    { X86::VMULPDZ256rr,      X86::VMULPDZ256rm,        0 },
+    { X86::VMULPSZ128rr,      X86::VMULPSZ128rm,        0 },
+    { X86::VMULPSZ256rr,      X86::VMULPSZ256rm,        0 },
+    { X86::VORPDZ128rr,       X86::VORPDZ128rm,         0 },
+    { X86::VORPDZ256rr,       X86::VORPDZ256rm,         0 },
+    { X86::VORPSZ128rr,       X86::VORPSZ128rm,         0 },
+    { X86::VORPSZ256rr,       X86::VORPSZ256rm,         0 },
+    { X86::VPACKSSDWZ256rr,   X86::VPACKSSDWZ256rm,     0 },
+    { X86::VPACKSSDWZ128rr,   X86::VPACKSSDWZ128rm,     0 },
+    { X86::VPACKSSWBZ256rr,   X86::VPACKSSWBZ256rm,     0 },
+    { X86::VPACKSSWBZ128rr,   X86::VPACKSSWBZ128rm,     0 },
+    { X86::VPACKUSDWZ256rr,   X86::VPACKUSDWZ256rm,     0 },
+    { X86::VPACKUSDWZ128rr,   X86::VPACKUSDWZ128rm,     0 },
+    { X86::VPACKUSWBZ256rr,   X86::VPACKUSWBZ256rm,     0 },
+    { X86::VPACKUSWBZ128rr,   X86::VPACKUSWBZ128rm,     0 },
+    { X86::VPADDBZ128rr,      X86::VPADDBZ128rm,        0 },
+    { X86::VPADDBZ256rr,      X86::VPADDBZ256rm,        0 },
+    { X86::VPADDDZ128rr,      X86::VPADDDZ128rm,        0 },
+    { X86::VPADDDZ256rr,      X86::VPADDDZ256rm,        0 },
+    { X86::VPADDQZ128rr,      X86::VPADDQZ128rm,        0 },
+    { X86::VPADDQZ256rr,      X86::VPADDQZ256rm,        0 },
+    { X86::VPADDSBZ128rr,     X86::VPADDSBZ128rm,       0 },
+    { X86::VPADDSBZ256rr,     X86::VPADDSBZ256rm,       0 },
+    { X86::VPADDSWZ128rr,     X86::VPADDSWZ128rm,       0 },
+    { X86::VPADDSWZ256rr,     X86::VPADDSWZ256rm,       0 },
+    { X86::VPADDUSBZ128rr,    X86::VPADDUSBZ128rm,      0 },
+    { X86::VPADDUSBZ256rr,    X86::VPADDUSBZ256rm,      0 },
+    { X86::VPADDUSWZ128rr,    X86::VPADDUSWZ128rm,      0 },
+    { X86::VPADDUSWZ256rr,    X86::VPADDUSWZ256rm,      0 },
+    { X86::VPADDWZ128rr,      X86::VPADDWZ128rm,        0 },
+    { X86::VPADDWZ256rr,      X86::VPADDWZ256rm,        0 },
+    { X86::VPALIGNRZ128rri,   X86::VPALIGNRZ128rmi,     0 },
+    { X86::VPALIGNRZ256rri,   X86::VPALIGNRZ256rmi,     0 },
+    { X86::VPANDDZ128rr,      X86::VPANDDZ128rm,        0 },
+    { X86::VPANDDZ256rr,      X86::VPANDDZ256rm,        0 },
+    { X86::VPANDNDZ128rr,     X86::VPANDNDZ128rm,       0 },
+    { X86::VPANDNDZ256rr,     X86::VPANDNDZ256rm,       0 },
+    { X86::VPANDNQZ128rr,     X86::VPANDNQZ128rm,       0 },
+    { X86::VPANDNQZ256rr,     X86::VPANDNQZ256rm,       0 },
+    { X86::VPANDQZ128rr,      X86::VPANDQZ128rm,        0 },
+    { X86::VPANDQZ256rr,      X86::VPANDQZ256rm,        0 },
+    { X86::VPAVGBZ128rr,      X86::VPAVGBZ128rm,        0 },
+    { X86::VPAVGBZ256rr,      X86::VPAVGBZ256rm,        0 },
+    { X86::VPAVGWZ128rr,      X86::VPAVGWZ128rm,        0 },
+    { X86::VPAVGWZ256rr,      X86::VPAVGWZ256rm,        0 },
+    { X86::VPCMPBZ128rri,     X86::VPCMPBZ128rmi,       0 },
+    { X86::VPCMPBZ256rri,     X86::VPCMPBZ256rmi,       0 },
+    { X86::VPCMPDZ128rri,     X86::VPCMPDZ128rmi,       0 },
+    { X86::VPCMPDZ256rri,     X86::VPCMPDZ256rmi,       0 },
+    { X86::VPCMPEQBZ128rr,    X86::VPCMPEQBZ128rm,      0 },
+    { X86::VPCMPEQBZ256rr,    X86::VPCMPEQBZ256rm,      0 },
+    { X86::VPCMPEQDZ128rr,    X86::VPCMPEQDZ128rm,      0 },
+    { X86::VPCMPEQDZ256rr,    X86::VPCMPEQDZ256rm,      0 },
+    { X86::VPCMPEQQZ128rr,    X86::VPCMPEQQZ128rm,      0 },
+    { X86::VPCMPEQQZ256rr,    X86::VPCMPEQQZ256rm,      0 },
+    { X86::VPCMPEQWZ128rr,    X86::VPCMPEQWZ128rm,      0 },
+    { X86::VPCMPEQWZ256rr,    X86::VPCMPEQWZ256rm,      0 },
+    { X86::VPCMPGTBZ128rr,    X86::VPCMPGTBZ128rm,      0 },
+    { X86::VPCMPGTBZ256rr,    X86::VPCMPGTBZ256rm,      0 },
+    { X86::VPCMPGTDZ128rr,    X86::VPCMPGTDZ128rm,      0 },
+    { X86::VPCMPGTDZ256rr,    X86::VPCMPGTDZ256rm,      0 },
+    { X86::VPCMPGTQZ128rr,    X86::VPCMPGTQZ128rm,      0 },
+    { X86::VPCMPGTQZ256rr,    X86::VPCMPGTQZ256rm,      0 },
+    { X86::VPCMPGTWZ128rr,    X86::VPCMPGTWZ128rm,      0 },
+    { X86::VPCMPGTWZ256rr,    X86::VPCMPGTWZ256rm,      0 },
+    { X86::VPCMPQZ128rri,     X86::VPCMPQZ128rmi,       0 },
+    { X86::VPCMPQZ256rri,     X86::VPCMPQZ256rmi,       0 },
+    { X86::VPCMPUBZ128rri,    X86::VPCMPUBZ128rmi,      0 },
+    { X86::VPCMPUBZ256rri,    X86::VPCMPUBZ256rmi,      0 },
+    { X86::VPCMPUDZ128rri,    X86::VPCMPUDZ128rmi,      0 },
+    { X86::VPCMPUDZ256rri,    X86::VPCMPUDZ256rmi,      0 },
+    { X86::VPCMPUQZ128rri,    X86::VPCMPUQZ128rmi,      0 },
+    { X86::VPCMPUQZ256rri,    X86::VPCMPUQZ256rmi,      0 },
+    { X86::VPCMPUWZ128rri,    X86::VPCMPUWZ128rmi,      0 },
+    { X86::VPCMPUWZ256rri,    X86::VPCMPUWZ256rmi,      0 },
+    { X86::VPCMPWZ128rri,     X86::VPCMPWZ128rmi,       0 },
+    { X86::VPCMPWZ256rri,     X86::VPCMPWZ256rmi,       0 },
+    { X86::VPERMBZ128rr,      X86::VPERMBZ128rm,        0 },
+    { X86::VPERMBZ256rr,      X86::VPERMBZ256rm,        0 },
+    { X86::VPERMDZ256rr,      X86::VPERMDZ256rm,        0 },
+    { X86::VPERMILPDZ128rr,   X86::VPERMILPDZ128rm,     0 },
+    { X86::VPERMILPDZ256rr,   X86::VPERMILPDZ256rm,     0 },
+    { X86::VPERMILPSZ128rr,   X86::VPERMILPSZ128rm,     0 },
+    { X86::VPERMILPSZ256rr,   X86::VPERMILPSZ256rm,     0 },
+    { X86::VPERMPDZ256rr,     X86::VPERMPDZ256rm,       0 },
+    { X86::VPERMPSZ256rr,     X86::VPERMPSZ256rm,       0 },
+    { X86::VPERMQZ256rr,      X86::VPERMQZ256rm,        0 },
+    { X86::VPERMWZ128rr,      X86::VPERMWZ128rm,        0 },
+    { X86::VPERMWZ256rr,      X86::VPERMWZ256rm,        0 },
+    { X86::VPMADDUBSWZ128rr,  X86::VPMADDUBSWZ128rm,    0 },
+    { X86::VPMADDUBSWZ256rr,  X86::VPMADDUBSWZ256rm,    0 },
+    { X86::VPMADDWDZ128rr,    X86::VPMADDWDZ128rm,      0 },
+    { X86::VPMADDWDZ256rr,    X86::VPMADDWDZ256rm,      0 },
+    { X86::VPMAXSBZ128rr,     X86::VPMAXSBZ128rm,       0 },
+    { X86::VPMAXSBZ256rr,     X86::VPMAXSBZ256rm,       0 },
+    { X86::VPMAXSDZ128rr,     X86::VPMAXSDZ128rm,       0 },
+    { X86::VPMAXSDZ256rr,     X86::VPMAXSDZ256rm,       0 },
+    { X86::VPMAXSQZ128rr,     X86::VPMAXSQZ128rm,       0 },
+    { X86::VPMAXSQZ256rr,     X86::VPMAXSQZ256rm,       0 },
+    { X86::VPMAXSWZ128rr,     X86::VPMAXSWZ128rm,       0 },
+    { X86::VPMAXSWZ256rr,     X86::VPMAXSWZ256rm,       0 },
+    { X86::VPMAXUBZ128rr,     X86::VPMAXUBZ128rm,       0 },
+    { X86::VPMAXUBZ256rr,     X86::VPMAXUBZ256rm,       0 },
+    { X86::VPMAXUDZ128rr,     X86::VPMAXUDZ128rm,       0 },
+    { X86::VPMAXUDZ256rr,     X86::VPMAXUDZ256rm,       0 },
+    { X86::VPMAXUQZ128rr,     X86::VPMAXUQZ128rm,       0 },
+    { X86::VPMAXUQZ256rr,     X86::VPMAXUQZ256rm,       0 },
+    { X86::VPMAXUWZ128rr,     X86::VPMAXUWZ128rm,       0 },
+    { X86::VPMAXUWZ256rr,     X86::VPMAXUWZ256rm,       0 },
+    { X86::VPMINSBZ128rr,     X86::VPMINSBZ128rm,       0 },
+    { X86::VPMINSBZ256rr,     X86::VPMINSBZ256rm,       0 },
+    { X86::VPMINSDZ128rr,     X86::VPMINSDZ128rm,       0 },
+    { X86::VPMINSDZ256rr,     X86::VPMINSDZ256rm,       0 },
+    { X86::VPMINSQZ128rr,     X86::VPMINSQZ128rm,       0 },
+    { X86::VPMINSQZ256rr,     X86::VPMINSQZ256rm,       0 },
+    { X86::VPMINSWZ128rr,     X86::VPMINSWZ128rm,       0 },
+    { X86::VPMINSWZ256rr,     X86::VPMINSWZ256rm,       0 },
+    { X86::VPMINUBZ128rr,     X86::VPMINUBZ128rm,       0 },
+    { X86::VPMINUBZ256rr,     X86::VPMINUBZ256rm,       0 },
+    { X86::VPMINUDZ128rr,     X86::VPMINUDZ128rm,       0 },
+    { X86::VPMINUDZ256rr,     X86::VPMINUDZ256rm,       0 },
+    { X86::VPMINUQZ128rr,     X86::VPMINUQZ128rm,       0 },
+    { X86::VPMINUQZ256rr,     X86::VPMINUQZ256rm,       0 },
+    { X86::VPMINUWZ128rr,     X86::VPMINUWZ128rm,       0 },
+    { X86::VPMINUWZ256rr,     X86::VPMINUWZ256rm,       0 },
+    { X86::VPMULDQZ128rr,     X86::VPMULDQZ128rm,       0 },
+    { X86::VPMULDQZ256rr,     X86::VPMULDQZ256rm,       0 },
+    { X86::VPMULLDZ128rr,     X86::VPMULLDZ128rm,       0 },
+    { X86::VPMULLDZ256rr,     X86::VPMULLDZ256rm,       0 },
+    { X86::VPMULLQZ128rr,     X86::VPMULLQZ128rm,       0 },
+    { X86::VPMULLQZ256rr,     X86::VPMULLQZ256rm,       0 },
+    { X86::VPMULLWZ128rr,     X86::VPMULLWZ128rm,       0 },
+    { X86::VPMULLWZ256rr,     X86::VPMULLWZ256rm,       0 },
+    { X86::VPMULUDQZ128rr,    X86::VPMULUDQZ128rm,      0 },
+    { X86::VPMULUDQZ256rr,    X86::VPMULUDQZ256rm,      0 },
+    { X86::VPORDZ128rr,       X86::VPORDZ128rm,         0 },
+    { X86::VPORDZ256rr,       X86::VPORDZ256rm,         0 },
+    { X86::VPORQZ128rr,       X86::VPORQZ128rm,         0 },
+    { X86::VPORQZ256rr,       X86::VPORQZ256rm,         0 },
+    { X86::VPSADBWZ128rr,     X86::VPSADBWZ128rm,       0 },
+    { X86::VPSADBWZ256rr,     X86::VPSADBWZ256rm,       0 },
+    { X86::VPSHUFBZ128rr,     X86::VPSHUFBZ128rm,       0 },
+    { X86::VPSHUFBZ256rr,     X86::VPSHUFBZ256rm,       0 },
+    { X86::VPSLLDZ128rr,      X86::VPSLLDZ128rm,        0 },
+    { X86::VPSLLDZ256rr,      X86::VPSLLDZ256rm,        0 },
+    { X86::VPSLLQZ128rr,      X86::VPSLLQZ128rm,        0 },
+    { X86::VPSLLQZ256rr,      X86::VPSLLQZ256rm,        0 },
+    { X86::VPSLLVDZ128rr,     X86::VPSLLVDZ128rm,       0 },
+    { X86::VPSLLVDZ256rr,     X86::VPSLLVDZ256rm,       0 },
+    { X86::VPSLLVQZ128rr,     X86::VPSLLVQZ128rm,       0 },
+    { X86::VPSLLVQZ256rr,     X86::VPSLLVQZ256rm,       0 },
+    { X86::VPSLLVWZ128rr,     X86::VPSLLVWZ128rm,       0 },
+    { X86::VPSLLVWZ256rr,     X86::VPSLLVWZ256rm,       0 },
+    { X86::VPSLLWZ128rr,      X86::VPSLLWZ128rm,        0 },
+    { X86::VPSLLWZ256rr,      X86::VPSLLWZ256rm,        0 },
+    { X86::VPSRADZ128rr,      X86::VPSRADZ128rm,        0 },
+    { X86::VPSRADZ256rr,      X86::VPSRADZ256rm,        0 },
+    { X86::VPSRAQZ128rr,      X86::VPSRAQZ128rm,        0 },
+    { X86::VPSRAQZ256rr,      X86::VPSRAQZ256rm,        0 },
+    { X86::VPSRAVDZ128rr,     X86::VPSRAVDZ128rm,       0 },
+    { X86::VPSRAVDZ256rr,     X86::VPSRAVDZ256rm,       0 },
+    { X86::VPSRAVQZ128rr,     X86::VPSRAVQZ128rm,       0 },
+    { X86::VPSRAVQZ256rr,     X86::VPSRAVQZ256rm,       0 },
+    { X86::VPSRAVWZ128rr,     X86::VPSRAVWZ128rm,       0 },
+    { X86::VPSRAVWZ256rr,     X86::VPSRAVWZ256rm,       0 },
+    { X86::VPSRAWZ128rr,      X86::VPSRAWZ128rm,        0 },
+    { X86::VPSRAWZ256rr,      X86::VPSRAWZ256rm,        0 },
+    { X86::VPSRLDZ128rr,      X86::VPSRLDZ128rm,        0 },
+    { X86::VPSRLDZ256rr,      X86::VPSRLDZ256rm,        0 },
+    { X86::VPSRLQZ128rr,      X86::VPSRLQZ128rm,        0 },
+    { X86::VPSRLQZ256rr,      X86::VPSRLQZ256rm,        0 },
+    { X86::VPSRLVDZ128rr,     X86::VPSRLVDZ128rm,       0 },
+    { X86::VPSRLVDZ256rr,     X86::VPSRLVDZ256rm,       0 },
+    { X86::VPSRLVQZ128rr,     X86::VPSRLVQZ128rm,       0 },
+    { X86::VPSRLVQZ256rr,     X86::VPSRLVQZ256rm,       0 },
+    { X86::VPSRLVWZ128rr,     X86::VPSRLVWZ128rm,       0 },
+    { X86::VPSRLVWZ256rr,     X86::VPSRLVWZ256rm,       0 },
+    { X86::VPSRLWZ128rr,      X86::VPSRLWZ128rm,        0 },
+    { X86::VPSRLWZ256rr,      X86::VPSRLWZ256rm,        0 },
+    { X86::VPSUBBZ128rr,      X86::VPSUBBZ128rm,        0 },
+    { X86::VPSUBBZ256rr,      X86::VPSUBBZ256rm,        0 },
+    { X86::VPSUBDZ128rr,      X86::VPSUBDZ128rm,        0 },
+    { X86::VPSUBDZ256rr,      X86::VPSUBDZ256rm,        0 },
+    { X86::VPSUBQZ128rr,      X86::VPSUBQZ128rm,        0 },
+    { X86::VPSUBQZ256rr,      X86::VPSUBQZ256rm,        0 },
+    { X86::VPSUBSBZ128rr,     X86::VPSUBSBZ128rm,       0 },
+    { X86::VPSUBSBZ256rr,     X86::VPSUBSBZ256rm,       0 },
+    { X86::VPSUBSWZ128rr,     X86::VPSUBSWZ128rm,       0 },
+    { X86::VPSUBSWZ256rr,     X86::VPSUBSWZ256rm,       0 },
+    { X86::VPSUBUSBZ128rr,    X86::VPSUBUSBZ128rm,      0 },
+    { X86::VPSUBUSBZ256rr,    X86::VPSUBUSBZ256rm,      0 },
+    { X86::VPSUBUSWZ128rr,    X86::VPSUBUSWZ128rm,      0 },
+    { X86::VPSUBUSWZ256rr,    X86::VPSUBUSWZ256rm,      0 },
+    { X86::VPSUBWZ128rr,      X86::VPSUBWZ128rm,        0 },
+    { X86::VPSUBWZ256rr,      X86::VPSUBWZ256rm,        0 },
+    { X86::VPUNPCKHBWZ128rr,  X86::VPUNPCKHBWZ128rm,    0 },
+    { X86::VPUNPCKHBWZ256rr,  X86::VPUNPCKHBWZ256rm,    0 },
+    { X86::VPUNPCKHDQZ128rr,  X86::VPUNPCKHDQZ128rm,    0 },
+    { X86::VPUNPCKHDQZ256rr,  X86::VPUNPCKHDQZ256rm,    0 },
+    { X86::VPUNPCKHQDQZ128rr, X86::VPUNPCKHQDQZ128rm,   0 },
+    { X86::VPUNPCKHQDQZ256rr, X86::VPUNPCKHQDQZ256rm,   0 },
+    { X86::VPUNPCKHWDZ128rr,  X86::VPUNPCKHWDZ128rm,    0 },
+    { X86::VPUNPCKHWDZ256rr,  X86::VPUNPCKHWDZ256rm,    0 },
+    { X86::VPUNPCKLBWZ128rr,  X86::VPUNPCKLBWZ128rm,    0 },
+    { X86::VPUNPCKLBWZ256rr,  X86::VPUNPCKLBWZ256rm,    0 },
+    { X86::VPUNPCKLDQZ128rr,  X86::VPUNPCKLDQZ128rm,    0 },
+    { X86::VPUNPCKLDQZ256rr,  X86::VPUNPCKLDQZ256rm,    0 },
+    { X86::VPUNPCKLQDQZ128rr, X86::VPUNPCKLQDQZ128rm,   0 },
+    { X86::VPUNPCKLQDQZ256rr, X86::VPUNPCKLQDQZ256rm,   0 },
+    { X86::VPUNPCKLWDZ128rr,  X86::VPUNPCKLWDZ128rm,    0 },
+    { X86::VPUNPCKLWDZ256rr,  X86::VPUNPCKLWDZ256rm,    0 },
+    { X86::VPXORDZ128rr,      X86::VPXORDZ128rm,        0 },
+    { X86::VPXORDZ256rr,      X86::VPXORDZ256rm,        0 },
+    { X86::VPXORQZ128rr,      X86::VPXORQZ128rm,        0 },
+    { X86::VPXORQZ256rr,      X86::VPXORQZ256rm,        0 },
+    { X86::VSHUFPDZ128rri,    X86::VSHUFPDZ128rmi,      0 },
+    { X86::VSHUFPDZ256rri,    X86::VSHUFPDZ256rmi,      0 },
+    { X86::VSHUFPSZ128rri,    X86::VSHUFPSZ128rmi,      0 },
+    { X86::VSHUFPSZ256rri,    X86::VSHUFPSZ256rmi,      0 },
+    { X86::VSUBPDZ128rr,      X86::VSUBPDZ128rm,        0 },
+    { X86::VSUBPDZ256rr,      X86::VSUBPDZ256rm,        0 },
+    { X86::VSUBPSZ128rr,      X86::VSUBPSZ128rm,        0 },
+    { X86::VSUBPSZ256rr,      X86::VSUBPSZ256rm,        0 },
+    { X86::VUNPCKHPDZ128rr,   X86::VUNPCKHPDZ128rm,     0 },
+    { X86::VUNPCKHPDZ256rr,   X86::VUNPCKHPDZ256rm,     0 },
+    { X86::VUNPCKHPSZ128rr,   X86::VUNPCKHPSZ128rm,     0 },
+    { X86::VUNPCKHPSZ256rr,   X86::VUNPCKHPSZ256rm,     0 },
+    { X86::VUNPCKLPDZ128rr,   X86::VUNPCKLPDZ128rm,     0 },
+    { X86::VUNPCKLPDZ256rr,   X86::VUNPCKLPDZ256rm,     0 },
+    { X86::VUNPCKLPSZ128rr,   X86::VUNPCKLPSZ128rm,     0 },
+    { X86::VUNPCKLPSZ256rr,   X86::VUNPCKLPSZ256rm,     0 },
+    { X86::VXORPDZ128rr,      X86::VXORPDZ128rm,        0 },
+    { X86::VXORPDZ256rr,      X86::VXORPDZ256rm,        0 },
+    { X86::VXORPSZ128rr,      X86::VXORPSZ128rm,        0 },
+    { X86::VXORPSZ256rr,      X86::VXORPSZ256rm,        0 },
+
+    // AVX-512 masked foldable instructions
+    { X86::VBROADCASTSSZrkz,  X86::VBROADCASTSSZmkz,    TB_NO_REVERSE },
+    { X86::VBROADCASTSDZrkz,  X86::VBROADCASTSDZmkz,    TB_NO_REVERSE },
+    { X86::VPABSBZrrkz,       X86::VPABSBZrmkz,         0 },
+    { X86::VPABSDZrrkz,       X86::VPABSDZrmkz,         0 },
+    { X86::VPABSQZrrkz,       X86::VPABSQZrmkz,         0 },
+    { X86::VPABSWZrrkz,       X86::VPABSWZrmkz,         0 },
+    { X86::VPERMILPDZrikz,    X86::VPERMILPDZmikz,      0 },
+    { X86::VPERMILPSZrikz,    X86::VPERMILPSZmikz,      0 },
+    { X86::VPERMPDZrikz,      X86::VPERMPDZmikz,        0 },
+    { X86::VPERMQZrikz,       X86::VPERMQZmikz,         0 },
+    { X86::VPMOVSXBDZrrkz,    X86::VPMOVSXBDZrmkz,      0 },
+    { X86::VPMOVSXBQZrrkz,    X86::VPMOVSXBQZrmkz,      TB_NO_REVERSE },
+    { X86::VPMOVSXBWZrrkz,    X86::VPMOVSXBWZrmkz,      0 },
+    { X86::VPMOVSXDQZrrkz,    X86::VPMOVSXDQZrmkz,      0 },
+    { X86::VPMOVSXWDZrrkz,    X86::VPMOVSXWDZrmkz,      0 },
+    { X86::VPMOVSXWQZrrkz,    X86::VPMOVSXWQZrmkz,      0 },
+    { X86::VPMOVZXBDZrrkz,    X86::VPMOVZXBDZrmkz,      0 },
+    { X86::VPMOVZXBQZrrkz,    X86::VPMOVZXBQZrmkz,      TB_NO_REVERSE },
+    { X86::VPMOVZXBWZrrkz,    X86::VPMOVZXBWZrmkz,      0 },
+    { X86::VPMOVZXDQZrrkz,    X86::VPMOVZXDQZrmkz,      0 },
+    { X86::VPMOVZXWDZrrkz,    X86::VPMOVZXWDZrmkz,      0 },
+    { X86::VPMOVZXWQZrrkz,    X86::VPMOVZXWQZrmkz,      0 },
+    { X86::VPOPCNTDZrrkz,     X86::VPOPCNTDZrmkz,       0 },
+    { X86::VPOPCNTQZrrkz,     X86::VPOPCNTQZrmkz,       0 },
+    { X86::VPSHUFDZrikz,      X86::VPSHUFDZmikz,        0 },
+    { X86::VPSHUFHWZrikz,     X86::VPSHUFHWZmikz,       0 },
+    { X86::VPSHUFLWZrikz,     X86::VPSHUFLWZmikz,       0 },
+    { X86::VPSLLDZrikz,       X86::VPSLLDZmikz,         0 },
+    { X86::VPSLLQZrikz,       X86::VPSLLQZmikz,         0 },
+    { X86::VPSLLWZrikz,       X86::VPSLLWZmikz,         0 },
+    { X86::VPSRADZrikz,       X86::VPSRADZmikz,         0 },
+    { X86::VPSRAQZrikz,       X86::VPSRAQZmikz,         0 },
+    { X86::VPSRAWZrikz,       X86::VPSRAWZmikz,         0 },
+    { X86::VPSRLDZrikz,       X86::VPSRLDZmikz,         0 },
+    { X86::VPSRLQZrikz,       X86::VPSRLQZmikz,         0 },
+    { X86::VPSRLWZrikz,       X86::VPSRLWZmikz,         0 },
+
+    // AVX-512VL 256-bit masked foldable instructions
+    { X86::VBROADCASTSDZ256rkz,  X86::VBROADCASTSDZ256mkz,      TB_NO_REVERSE },
+    { X86::VBROADCASTSSZ256rkz,  X86::VBROADCASTSSZ256mkz,      TB_NO_REVERSE },
+    { X86::VPABSBZ256rrkz,    X86::VPABSBZ256rmkz,      0 },
+    { X86::VPABSDZ256rrkz,    X86::VPABSDZ256rmkz,      0 },
+    { X86::VPABSQZ256rrkz,    X86::VPABSQZ256rmkz,      0 },
+    { X86::VPABSWZ256rrkz,    X86::VPABSWZ256rmkz,      0 },
+    { X86::VPERMILPDZ256rikz, X86::VPERMILPDZ256mikz,   0 },
+    { X86::VPERMILPSZ256rikz, X86::VPERMILPSZ256mikz,   0 },
+    { X86::VPERMPDZ256rikz,   X86::VPERMPDZ256mikz,     0 },
+    { X86::VPERMQZ256rikz,    X86::VPERMQZ256mikz,      0 },
+    { X86::VPMOVSXBDZ256rrkz, X86::VPMOVSXBDZ256rmkz,   TB_NO_REVERSE },
+    { X86::VPMOVSXBQZ256rrkz, X86::VPMOVSXBQZ256rmkz,   TB_NO_REVERSE },
+    { X86::VPMOVSXBWZ256rrkz, X86::VPMOVSXBWZ256rmkz,   0 },
+    { X86::VPMOVSXDQZ256rrkz, X86::VPMOVSXDQZ256rmkz,   0 },
+    { X86::VPMOVSXWDZ256rrkz, X86::VPMOVSXWDZ256rmkz,   0 },
+    { X86::VPMOVSXWQZ256rrkz, X86::VPMOVSXWQZ256rmkz,   TB_NO_REVERSE },
+    { X86::VPMOVZXBDZ256rrkz, X86::VPMOVZXBDZ256rmkz,   TB_NO_REVERSE },
+    { X86::VPMOVZXBQZ256rrkz, X86::VPMOVZXBQZ256rmkz,   TB_NO_REVERSE },
+    { X86::VPMOVZXBWZ256rrkz, X86::VPMOVZXBWZ256rmkz,   0 },
+    { X86::VPMOVZXDQZ256rrkz, X86::VPMOVZXDQZ256rmkz,   0 },
+    { X86::VPMOVZXWDZ256rrkz, X86::VPMOVZXWDZ256rmkz,   0 },
+    { X86::VPMOVZXWQZ256rrkz, X86::VPMOVZXWQZ256rmkz,   TB_NO_REVERSE },
+    { X86::VPSHUFDZ256rikz,   X86::VPSHUFDZ256mikz,     0 },
+    { X86::VPSHUFHWZ256rikz,  X86::VPSHUFHWZ256mikz,    0 },
+    { X86::VPSHUFLWZ256rikz,  X86::VPSHUFLWZ256mikz,    0 },
+    { X86::VPSLLDZ256rikz,    X86::VPSLLDZ256mikz,      0 },
+    { X86::VPSLLQZ256rikz,    X86::VPSLLQZ256mikz,      0 },
+    { X86::VPSLLWZ256rikz,    X86::VPSLLWZ256mikz,      0 },
+    { X86::VPSRADZ256rikz,    X86::VPSRADZ256mikz,      0 },
+    { X86::VPSRAQZ256rikz,    X86::VPSRAQZ256mikz,      0 },
+    { X86::VPSRAWZ256rikz,    X86::VPSRAWZ256mikz,      0 },
+    { X86::VPSRLDZ256rikz,    X86::VPSRLDZ256mikz,      0 },
+    { X86::VPSRLQZ256rikz,    X86::VPSRLQZ256mikz,      0 },
+    { X86::VPSRLWZ256rikz,    X86::VPSRLWZ256mikz,      0 },
+
+    // AVX-512VL 128-bit masked foldable instructions
+    { X86::VBROADCASTSSZ128rkz,  X86::VBROADCASTSSZ128mkz,      TB_NO_REVERSE },
+    { X86::VPABSBZ128rrkz,    X86::VPABSBZ128rmkz,      0 },
+    { X86::VPABSDZ128rrkz,    X86::VPABSDZ128rmkz,      0 },
+    { X86::VPABSQZ128rrkz,    X86::VPABSQZ128rmkz,      0 },
+    { X86::VPABSWZ128rrkz,    X86::VPABSWZ128rmkz,      0 },
+    { X86::VPERMILPDZ128rikz, X86::VPERMILPDZ128mikz,   0 },
+    { X86::VPERMILPSZ128rikz, X86::VPERMILPSZ128mikz,   0 },
+    { X86::VPMOVSXBDZ128rrkz, X86::VPMOVSXBDZ128rmkz,   TB_NO_REVERSE },
+    { X86::VPMOVSXBQZ128rrkz, X86::VPMOVSXBQZ128rmkz,   TB_NO_REVERSE },
+    { X86::VPMOVSXBWZ128rrkz, X86::VPMOVSXBWZ128rmkz,   TB_NO_REVERSE },
+    { X86::VPMOVSXDQZ128rrkz, X86::VPMOVSXDQZ128rmkz,   TB_NO_REVERSE },
+    { X86::VPMOVSXWDZ128rrkz, X86::VPMOVSXWDZ128rmkz,   TB_NO_REVERSE },
+    { X86::VPMOVSXWQZ128rrkz, X86::VPMOVSXWQZ128rmkz,   TB_NO_REVERSE },
+    { X86::VPMOVZXBDZ128rrkz, X86::VPMOVZXBDZ128rmkz,   TB_NO_REVERSE },
+    { X86::VPMOVZXBQZ128rrkz, X86::VPMOVZXBQZ128rmkz,   TB_NO_REVERSE },
+    { X86::VPMOVZXBWZ128rrkz, X86::VPMOVZXBWZ128rmkz,   TB_NO_REVERSE },
+    { X86::VPMOVZXDQZ128rrkz, X86::VPMOVZXDQZ128rmkz,   TB_NO_REVERSE },
+    { X86::VPMOVZXWDZ128rrkz, X86::VPMOVZXWDZ128rmkz,   TB_NO_REVERSE },
+    { X86::VPMOVZXWQZ128rrkz, X86::VPMOVZXWQZ128rmkz,   TB_NO_REVERSE },
+    { X86::VPSHUFDZ128rikz,   X86::VPSHUFDZ128mikz,     0 },
+    { X86::VPSHUFHWZ128rikz,  X86::VPSHUFHWZ128mikz,    0 },
+    { X86::VPSHUFLWZ128rikz,  X86::VPSHUFLWZ128mikz,    0 },
+    { X86::VPSLLDZ128rikz,    X86::VPSLLDZ128mikz,      0 },
+    { X86::VPSLLQZ128rikz,    X86::VPSLLQZ128mikz,      0 },
+    { X86::VPSLLWZ128rikz,    X86::VPSLLWZ128mikz,      0 },
+    { X86::VPSRADZ128rikz,    X86::VPSRADZ128mikz,      0 },
+    { X86::VPSRAQZ128rikz,    X86::VPSRAQZ128mikz,      0 },
+    { X86::VPSRAWZ128rikz,    X86::VPSRAWZ128mikz,      0 },
+    { X86::VPSRLDZ128rikz,    X86::VPSRLDZ128mikz,      0 },
+    { X86::VPSRLQZ128rikz,    X86::VPSRLQZ128mikz,      0 },
+    { X86::VPSRLWZ128rikz,    X86::VPSRLWZ128mikz,      0 },
+
+    // AES foldable instructions
+    { X86::AESDECLASTrr,      X86::AESDECLASTrm,        TB_ALIGN_16 },
+    { X86::AESDECrr,          X86::AESDECrm,            TB_ALIGN_16 },
+    { X86::AESENCLASTrr,      X86::AESENCLASTrm,        TB_ALIGN_16 },
+    { X86::AESENCrr,          X86::AESENCrm,            TB_ALIGN_16 },
+    { X86::VAESDECLASTrr,     X86::VAESDECLASTrm,       0 },
+    { X86::VAESDECrr,         X86::VAESDECrm,           0 },
+    { X86::VAESENCLASTrr,     X86::VAESENCLASTrm,       0 },
+    { X86::VAESENCrr,         X86::VAESENCrm,           0 },
+
+    // SHA foldable instructions
+    { X86::SHA1MSG1rr,        X86::SHA1MSG1rm,          TB_ALIGN_16 },
+    { X86::SHA1MSG2rr,        X86::SHA1MSG2rm,          TB_ALIGN_16 },
+    { X86::SHA1NEXTErr,       X86::SHA1NEXTErm,         TB_ALIGN_16 },
+    { X86::SHA1RNDS4rri,      X86::SHA1RNDS4rmi,        TB_ALIGN_16 },
+    { X86::SHA256MSG1rr,      X86::SHA256MSG1rm,        TB_ALIGN_16 },
+    { X86::SHA256MSG2rr,      X86::SHA256MSG2rm,        TB_ALIGN_16 },
+    { X86::SHA256RNDS2rr,     X86::SHA256RNDS2rm,       TB_ALIGN_16 }
+  };
+
   for (X86MemoryFoldTableEntry Entry : MemoryFoldTable2) {
     AddTableEntry(RegOp2MemOpTable2, MemOp2RegOpTable,
                   Entry.RegOp, Entry.MemOp,
@@ -150,12 +2439,1105 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
                   Entry.Flags | TB_INDEX_2 | TB_FOLDED_LOAD);
   }
 
+  static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
+    // FMA4 foldable patterns
+    { X86::VFMADDSS4rr,           X86::VFMADDSS4rm,           TB_ALIGN_NONE },
+    { X86::VFMADDSS4rr_Int,       X86::VFMADDSS4rm_Int,       TB_NO_REVERSE },
+    { X86::VFMADDSD4rr,           X86::VFMADDSD4rm,           TB_ALIGN_NONE },
+    { X86::VFMADDSD4rr_Int,       X86::VFMADDSD4rm_Int,       TB_NO_REVERSE },
+    { X86::VFMADDPS4rr,           X86::VFMADDPS4rm,           TB_ALIGN_NONE },
+    { X86::VFMADDPD4rr,           X86::VFMADDPD4rm,           TB_ALIGN_NONE },
+    { X86::VFMADDPS4Yrr,          X86::VFMADDPS4Yrm,          TB_ALIGN_NONE },
+    { X86::VFMADDPD4Yrr,          X86::VFMADDPD4Yrm,          TB_ALIGN_NONE },
+    { X86::VFNMADDSS4rr,          X86::VFNMADDSS4rm,          TB_ALIGN_NONE },
+    { X86::VFNMADDSS4rr_Int,      X86::VFNMADDSS4rm_Int,      TB_NO_REVERSE },
+    { X86::VFNMADDSD4rr,          X86::VFNMADDSD4rm,          TB_ALIGN_NONE },
+    { X86::VFNMADDSD4rr_Int,      X86::VFNMADDSD4rm_Int,      TB_NO_REVERSE },
+    { X86::VFNMADDPS4rr,          X86::VFNMADDPS4rm,          TB_ALIGN_NONE },
+    { X86::VFNMADDPD4rr,          X86::VFNMADDPD4rm,          TB_ALIGN_NONE },
+    { X86::VFNMADDPS4Yrr,         X86::VFNMADDPS4Yrm,         TB_ALIGN_NONE },
+    { X86::VFNMADDPD4Yrr,         X86::VFNMADDPD4Yrm,         TB_ALIGN_NONE },
+    { X86::VFMSUBSS4rr,           X86::VFMSUBSS4rm,           TB_ALIGN_NONE },
+    { X86::VFMSUBSS4rr_Int,       X86::VFMSUBSS4rm_Int,       TB_NO_REVERSE },
+    { X86::VFMSUBSD4rr,           X86::VFMSUBSD4rm,           TB_ALIGN_NONE },
+    { X86::VFMSUBSD4rr_Int,       X86::VFMSUBSD4rm_Int,       TB_NO_REVERSE },
+    { X86::VFMSUBPS4rr,           X86::VFMSUBPS4rm,           TB_ALIGN_NONE },
+    { X86::VFMSUBPD4rr,           X86::VFMSUBPD4rm,           TB_ALIGN_NONE },
+    { X86::VFMSUBPS4Yrr,          X86::VFMSUBPS4Yrm,          TB_ALIGN_NONE },
+    { X86::VFMSUBPD4Yrr,          X86::VFMSUBPD4Yrm,          TB_ALIGN_NONE },
+    { X86::VFNMSUBSS4rr,          X86::VFNMSUBSS4rm,          TB_ALIGN_NONE },
+    { X86::VFNMSUBSS4rr_Int,      X86::VFNMSUBSS4rm_Int,      TB_NO_REVERSE },
+    { X86::VFNMSUBSD4rr,          X86::VFNMSUBSD4rm,          TB_ALIGN_NONE },
+    { X86::VFNMSUBSD4rr_Int,      X86::VFNMSUBSD4rm_Int,      TB_NO_REVERSE },
+    { X86::VFNMSUBPS4rr,          X86::VFNMSUBPS4rm,          TB_ALIGN_NONE },
+    { X86::VFNMSUBPD4rr,          X86::VFNMSUBPD4rm,          TB_ALIGN_NONE },
+    { X86::VFNMSUBPS4Yrr,         X86::VFNMSUBPS4Yrm,         TB_ALIGN_NONE },
+    { X86::VFNMSUBPD4Yrr,         X86::VFNMSUBPD4Yrm,         TB_ALIGN_NONE },
+    { X86::VFMADDSUBPS4rr,        X86::VFMADDSUBPS4rm,        TB_ALIGN_NONE },
+    { X86::VFMADDSUBPD4rr,        X86::VFMADDSUBPD4rm,        TB_ALIGN_NONE },
+    { X86::VFMADDSUBPS4Yrr,       X86::VFMADDSUBPS4Yrm,       TB_ALIGN_NONE },
+    { X86::VFMADDSUBPD4Yrr,       X86::VFMADDSUBPD4Yrm,       TB_ALIGN_NONE },
+    { X86::VFMSUBADDPS4rr,        X86::VFMSUBADDPS4rm,        TB_ALIGN_NONE },
+    { X86::VFMSUBADDPD4rr,        X86::VFMSUBADDPD4rm,        TB_ALIGN_NONE },
+    { X86::VFMSUBADDPS4Yrr,       X86::VFMSUBADDPS4Yrm,       TB_ALIGN_NONE },
+    { X86::VFMSUBADDPD4Yrr,       X86::VFMSUBADDPD4Yrm,       TB_ALIGN_NONE },
+
+    // XOP foldable instructions
+    { X86::VPCMOVrrr,             X86::VPCMOVrrm,             0 },
+    { X86::VPCMOVYrrr,            X86::VPCMOVYrrm,            0 },
+    { X86::VPERMIL2PDrr,          X86::VPERMIL2PDrm,          0 },
+    { X86::VPERMIL2PDYrr,         X86::VPERMIL2PDYrm,         0 },
+    { X86::VPERMIL2PSrr,          X86::VPERMIL2PSrm,          0 },
+    { X86::VPERMIL2PSYrr,         X86::VPERMIL2PSYrm,         0 },
+    { X86::VPPERMrrr,             X86::VPPERMrrm,             0 },
+
+    // AVX-512 instructions with 3 source operands.
+    { X86::VPERMI2Brr,            X86::VPERMI2Brm,            0 },
+    { X86::VPERMI2Drr,            X86::VPERMI2Drm,            0 },
+    { X86::VPERMI2PSrr,           X86::VPERMI2PSrm,           0 },
+    { X86::VPERMI2PDrr,           X86::VPERMI2PDrm,           0 },
+    { X86::VPERMI2Qrr,            X86::VPERMI2Qrm,            0 },
+    { X86::VPERMI2Wrr,            X86::VPERMI2Wrm,            0 },
+    { X86::VPERMT2Brr,            X86::VPERMT2Brm,            0 },
+    { X86::VPERMT2Drr,            X86::VPERMT2Drm,            0 },
+    { X86::VPERMT2PSrr,           X86::VPERMT2PSrm,           0 },
+    { X86::VPERMT2PDrr,           X86::VPERMT2PDrm,           0 },
+    { X86::VPERMT2Qrr,            X86::VPERMT2Qrm,            0 },
+    { X86::VPERMT2Wrr,            X86::VPERMT2Wrm,            0 },
+    { X86::VPTERNLOGDZrri,        X86::VPTERNLOGDZrmi,        0 },
+    { X86::VPTERNLOGQZrri,        X86::VPTERNLOGQZrmi,        0 },
+
+    // AVX-512VL 256-bit instructions with 3 source operands.
+    { X86::VPERMI2B256rr,         X86::VPERMI2B256rm,         0 },
+    { X86::VPERMI2D256rr,         X86::VPERMI2D256rm,         0 },
+    { X86::VPERMI2PD256rr,        X86::VPERMI2PD256rm,        0 },
+    { X86::VPERMI2PS256rr,        X86::VPERMI2PS256rm,        0 },
+    { X86::VPERMI2Q256rr,         X86::VPERMI2Q256rm,         0 },
+    { X86::VPERMI2W256rr,         X86::VPERMI2W256rm,         0 },
+    { X86::VPERMT2B256rr,         X86::VPERMT2B256rm,         0 },
+    { X86::VPERMT2D256rr,         X86::VPERMT2D256rm,         0 },
+    { X86::VPERMT2PD256rr,        X86::VPERMT2PD256rm,        0 },
+    { X86::VPERMT2PS256rr,        X86::VPERMT2PS256rm,        0 },
+    { X86::VPERMT2Q256rr,         X86::VPERMT2Q256rm,         0 },
+    { X86::VPERMT2W256rr,         X86::VPERMT2W256rm,         0 },
+    { X86::VPTERNLOGDZ256rri,     X86::VPTERNLOGDZ256rmi,     0 },
+    { X86::VPTERNLOGQZ256rri,     X86::VPTERNLOGQZ256rmi,     0 },
+
+    // AVX-512VL 128-bit instructions with 3 source operands.
+    { X86::VPERMI2B128rr,         X86::VPERMI2B128rm,         0 },
+    { X86::VPERMI2D128rr,         X86::VPERMI2D128rm,         0 },
+    { X86::VPERMI2PD128rr,        X86::VPERMI2PD128rm,        0 },
+    { X86::VPERMI2PS128rr,        X86::VPERMI2PS128rm,        0 },
+    { X86::VPERMI2Q128rr,         X86::VPERMI2Q128rm,         0 },
+    { X86::VPERMI2W128rr,         X86::VPERMI2W128rm,         0 },
+    { X86::VPERMT2B128rr,         X86::VPERMT2B128rm,         0 },
+    { X86::VPERMT2D128rr,         X86::VPERMT2D128rm,         0 },
+    { X86::VPERMT2PD128rr,        X86::VPERMT2PD128rm,        0 },
+    { X86::VPERMT2PS128rr,        X86::VPERMT2PS128rm,        0 },
+    { X86::VPERMT2Q128rr,         X86::VPERMT2Q128rm,         0 },
+    { X86::VPERMT2W128rr,         X86::VPERMT2W128rm,         0 },
+    { X86::VPTERNLOGDZ128rri,     X86::VPTERNLOGDZ128rmi,     0 },
+    { X86::VPTERNLOGQZ128rri,     X86::VPTERNLOGQZ128rmi,     0 },
+
+    // AVX-512 masked instructions
+    { X86::VADDPDZrrkz,           X86::VADDPDZrmkz,           0 },
+    { X86::VADDPSZrrkz,           X86::VADDPSZrmkz,           0 },
+    { X86::VADDSDZrr_Intkz,       X86::VADDSDZrm_Intkz,       TB_NO_REVERSE },
+    { X86::VADDSSZrr_Intkz,       X86::VADDSSZrm_Intkz,       TB_NO_REVERSE },
+    { X86::VALIGNDZrrikz,         X86::VALIGNDZrmikz,         0 },
+    { X86::VALIGNQZrrikz,         X86::VALIGNQZrmikz,         0 },
+    { X86::VANDNPDZrrkz,          X86::VANDNPDZrmkz,          0 },
+    { X86::VANDNPSZrrkz,          X86::VANDNPSZrmkz,          0 },
+    { X86::VANDPDZrrkz,           X86::VANDPDZrmkz,           0 },
+    { X86::VANDPSZrrkz,           X86::VANDPSZrmkz,           0 },
+    { X86::VDIVPDZrrkz,           X86::VDIVPDZrmkz,           0 },
+    { X86::VDIVPSZrrkz,           X86::VDIVPSZrmkz,           0 },
+    { X86::VDIVSDZrr_Intkz,       X86::VDIVSDZrm_Intkz,       TB_NO_REVERSE },
+    { X86::VDIVSSZrr_Intkz,       X86::VDIVSSZrm_Intkz,       TB_NO_REVERSE },
+    { X86::VINSERTF32x4Zrrkz,     X86::VINSERTF32x4Zrmkz,     0 },
+    { X86::VINSERTF32x8Zrrkz,     X86::VINSERTF32x8Zrmkz,     0 },
+    { X86::VINSERTF64x2Zrrkz,     X86::VINSERTF64x2Zrmkz,     0 },
+    { X86::VINSERTF64x4Zrrkz,     X86::VINSERTF64x4Zrmkz,     0 },
+    { X86::VINSERTI32x4Zrrkz,     X86::VINSERTI32x4Zrmkz,     0 },
+    { X86::VINSERTI32x8Zrrkz,     X86::VINSERTI32x8Zrmkz,     0 },
+    { X86::VINSERTI64x2Zrrkz,     X86::VINSERTI64x2Zrmkz,     0 },
+    { X86::VINSERTI64x4Zrrkz,     X86::VINSERTI64x4Zrmkz,     0 },
+    { X86::VMAXCPDZrrkz,          X86::VMAXCPDZrmkz,          0 },
+    { X86::VMAXCPSZrrkz,          X86::VMAXCPSZrmkz,          0 },
+    { X86::VMAXPDZrrkz,           X86::VMAXPDZrmkz,           0 },
+    { X86::VMAXPSZrrkz,           X86::VMAXPSZrmkz,           0 },
+    { X86::VMAXSDZrr_Intkz,       X86::VMAXSDZrm_Intkz,       0 },
+    { X86::VMAXSSZrr_Intkz,       X86::VMAXSSZrm_Intkz,       0 },
+    { X86::VMINCPDZrrkz,          X86::VMINCPDZrmkz,          0 },
+    { X86::VMINCPSZrrkz,          X86::VMINCPSZrmkz,          0 },
+    { X86::VMINPDZrrkz,           X86::VMINPDZrmkz,           0 },
+    { X86::VMINPSZrrkz,           X86::VMINPSZrmkz,           0 },
+    { X86::VMINSDZrr_Intkz,       X86::VMINSDZrm_Intkz,       0 },
+    { X86::VMINSSZrr_Intkz,       X86::VMINSSZrm_Intkz,       0 },
+    { X86::VMULPDZrrkz,           X86::VMULPDZrmkz,           0 },
+    { X86::VMULPSZrrkz,           X86::VMULPSZrmkz,           0 },
+    { X86::VMULSDZrr_Intkz,       X86::VMULSDZrm_Intkz,       TB_NO_REVERSE },
+    { X86::VMULSSZrr_Intkz,       X86::VMULSSZrm_Intkz,       TB_NO_REVERSE },
+    { X86::VORPDZrrkz,            X86::VORPDZrmkz,            0 },
+    { X86::VORPSZrrkz,            X86::VORPSZrmkz,            0 },
+    { X86::VPACKSSDWZrrkz,        X86::VPACKSSDWZrmkz,        0 },
+    { X86::VPACKSSWBZrrkz,        X86::VPACKSSWBZrmkz,        0 },
+    { X86::VPACKUSDWZrrkz,        X86::VPACKUSDWZrmkz,        0 },
+    { X86::VPACKUSWBZrrkz,        X86::VPACKUSWBZrmkz,        0 },
+    { X86::VPADDBZrrkz,           X86::VPADDBZrmkz,           0 },
+    { X86::VPADDDZrrkz,           X86::VPADDDZrmkz,           0 },
+    { X86::VPADDQZrrkz,           X86::VPADDQZrmkz,           0 },
+    { X86::VPADDSBZrrkz,          X86::VPADDSBZrmkz,          0 },
+    { X86::VPADDSWZrrkz,          X86::VPADDSWZrmkz,          0 },
+    { X86::VPADDUSBZrrkz,         X86::VPADDUSBZrmkz,         0 },
+    { X86::VPADDUSWZrrkz,         X86::VPADDUSWZrmkz,         0 },
+    { X86::VPADDWZrrkz,           X86::VPADDWZrmkz,           0 },
+    { X86::VPALIGNRZrrikz,        X86::VPALIGNRZrmikz,        0 },
+    { X86::VPANDDZrrkz,           X86::VPANDDZrmkz,           0 },
+    { X86::VPANDNDZrrkz,          X86::VPANDNDZrmkz,          0 },
+    { X86::VPANDNQZrrkz,          X86::VPANDNQZrmkz,          0 },
+    { X86::VPANDQZrrkz,           X86::VPANDQZrmkz,           0 },
+    { X86::VPAVGBZrrkz,           X86::VPAVGBZrmkz,           0 },
+    { X86::VPAVGWZrrkz,           X86::VPAVGWZrmkz,           0 },
+    { X86::VPERMBZrrkz,           X86::VPERMBZrmkz,           0 },
+    { X86::VPERMDZrrkz,           X86::VPERMDZrmkz,           0 },
+    { X86::VPERMILPDZrrkz,        X86::VPERMILPDZrmkz,        0 },
+    { X86::VPERMILPSZrrkz,        X86::VPERMILPSZrmkz,        0 },
+    { X86::VPERMPDZrrkz,          X86::VPERMPDZrmkz,          0 },
+    { X86::VPERMPSZrrkz,          X86::VPERMPSZrmkz,          0 },
+    { X86::VPERMQZrrkz,           X86::VPERMQZrmkz,           0 },
+    { X86::VPERMWZrrkz,           X86::VPERMWZrmkz,           0 },
+    { X86::VPMADDUBSWZrrkz,       X86::VPMADDUBSWZrmkz,       0 },
+    { X86::VPMADDWDZrrkz,         X86::VPMADDWDZrmkz,         0 },
+    { X86::VPMAXSBZrrkz,          X86::VPMAXSBZrmkz,          0 },
+    { X86::VPMAXSDZrrkz,          X86::VPMAXSDZrmkz,          0 },
+    { X86::VPMAXSQZrrkz,          X86::VPMAXSQZrmkz,          0 },
+    { X86::VPMAXSWZrrkz,          X86::VPMAXSWZrmkz,          0 },
+    { X86::VPMAXUBZrrkz,          X86::VPMAXUBZrmkz,          0 },
+    { X86::VPMAXUDZrrkz,          X86::VPMAXUDZrmkz,          0 },
+    { X86::VPMAXUQZrrkz,          X86::VPMAXUQZrmkz,          0 },
+    { X86::VPMAXUWZrrkz,          X86::VPMAXUWZrmkz,          0 },
+    { X86::VPMINSBZrrkz,          X86::VPMINSBZrmkz,          0 },
+    { X86::VPMINSDZrrkz,          X86::VPMINSDZrmkz,          0 },
+    { X86::VPMINSQZrrkz,          X86::VPMINSQZrmkz,          0 },
+    { X86::VPMINSWZrrkz,          X86::VPMINSWZrmkz,          0 },
+    { X86::VPMINUBZrrkz,          X86::VPMINUBZrmkz,          0 },
+    { X86::VPMINUDZrrkz,          X86::VPMINUDZrmkz,          0 },
+    { X86::VPMINUQZrrkz,          X86::VPMINUQZrmkz,          0 },
+    { X86::VPMINUWZrrkz,          X86::VPMINUWZrmkz,          0 },
+    { X86::VPMULLDZrrkz,          X86::VPMULLDZrmkz,          0 },
+    { X86::VPMULLQZrrkz,          X86::VPMULLQZrmkz,          0 },
+    { X86::VPMULLWZrrkz,          X86::VPMULLWZrmkz,          0 },
+    { X86::VPMULDQZrrkz,          X86::VPMULDQZrmkz,          0 },
+    { X86::VPMULUDQZrrkz,         X86::VPMULUDQZrmkz,         0 },
+    { X86::VPORDZrrkz,            X86::VPORDZrmkz,            0 },
+    { X86::VPORQZrrkz,            X86::VPORQZrmkz,            0 },
+    { X86::VPSHUFBZrrkz,          X86::VPSHUFBZrmkz,          0 },
+    { X86::VPSLLDZrrkz,           X86::VPSLLDZrmkz,           0 },
+    { X86::VPSLLQZrrkz,           X86::VPSLLQZrmkz,           0 },
+    { X86::VPSLLVDZrrkz,          X86::VPSLLVDZrmkz,          0 },
+    { X86::VPSLLVQZrrkz,          X86::VPSLLVQZrmkz,          0 },
+    { X86::VPSLLVWZrrkz,          X86::VPSLLVWZrmkz,          0 },
+    { X86::VPSLLWZrrkz,           X86::VPSLLWZrmkz,           0 },
+    { X86::VPSRADZrrkz,           X86::VPSRADZrmkz,           0 },
+    { X86::VPSRAQZrrkz,           X86::VPSRAQZrmkz,           0 },
+    { X86::VPSRAVDZrrkz,          X86::VPSRAVDZrmkz,          0 },
+    { X86::VPSRAVQZrrkz,          X86::VPSRAVQZrmkz,          0 },
+    { X86::VPSRAVWZrrkz,          X86::VPSRAVWZrmkz,          0 },
+    { X86::VPSRAWZrrkz,           X86::VPSRAWZrmkz,           0 },
+    { X86::VPSRLDZrrkz,           X86::VPSRLDZrmkz,           0 },
+    { X86::VPSRLQZrrkz,           X86::VPSRLQZrmkz,           0 },
+    { X86::VPSRLVDZrrkz,          X86::VPSRLVDZrmkz,          0 },
+    { X86::VPSRLVQZrrkz,          X86::VPSRLVQZrmkz,          0 },
+    { X86::VPSRLVWZrrkz,          X86::VPSRLVWZrmkz,          0 },
+    { X86::VPSRLWZrrkz,           X86::VPSRLWZrmkz,           0 },
+    { X86::VPSUBBZrrkz,           X86::VPSUBBZrmkz,           0 },
+    { X86::VPSUBDZrrkz,           X86::VPSUBDZrmkz,           0 },
+    { X86::VPSUBQZrrkz,           X86::VPSUBQZrmkz,           0 },
+    { X86::VPSUBSBZrrkz,          X86::VPSUBSBZrmkz,          0 },
+    { X86::VPSUBSWZrrkz,          X86::VPSUBSWZrmkz,          0 },
+    { X86::VPSUBUSBZrrkz,         X86::VPSUBUSBZrmkz,         0 },
+    { X86::VPSUBUSWZrrkz,         X86::VPSUBUSWZrmkz,         0 },
+    { X86::VPSUBWZrrkz,           X86::VPSUBWZrmkz,           0 },
+    { X86::VPUNPCKHBWZrrkz,       X86::VPUNPCKHBWZrmkz,       0 },
+    { X86::VPUNPCKHDQZrrkz,       X86::VPUNPCKHDQZrmkz,       0 },
+    { X86::VPUNPCKHQDQZrrkz,      X86::VPUNPCKHQDQZrmkz,      0 },
+    { X86::VPUNPCKHWDZrrkz,       X86::VPUNPCKHWDZrmkz,       0 },
+    { X86::VPUNPCKLBWZrrkz,       X86::VPUNPCKLBWZrmkz,       0 },
+    { X86::VPUNPCKLDQZrrkz,       X86::VPUNPCKLDQZrmkz,       0 },
+    { X86::VPUNPCKLQDQZrrkz,      X86::VPUNPCKLQDQZrmkz,      0 },
+    { X86::VPUNPCKLWDZrrkz,       X86::VPUNPCKLWDZrmkz,       0 },
+    { X86::VPXORDZrrkz,           X86::VPXORDZrmkz,           0 },
+    { X86::VPXORQZrrkz,           X86::VPXORQZrmkz,           0 },
+    { X86::VSHUFPDZrrikz,         X86::VSHUFPDZrmikz,         0 },
+    { X86::VSHUFPSZrrikz,         X86::VSHUFPSZrmikz,         0 },
+    { X86::VSUBPDZrrkz,           X86::VSUBPDZrmkz,           0 },
+    { X86::VSUBPSZrrkz,           X86::VSUBPSZrmkz,           0 },
+    { X86::VSUBSDZrr_Intkz,       X86::VSUBSDZrm_Intkz,       TB_NO_REVERSE },
+    { X86::VSUBSSZrr_Intkz,       X86::VSUBSSZrm_Intkz,       TB_NO_REVERSE },
+    { X86::VUNPCKHPDZrrkz,        X86::VUNPCKHPDZrmkz,        0 },
+    { X86::VUNPCKHPSZrrkz,        X86::VUNPCKHPSZrmkz,        0 },
+    { X86::VUNPCKLPDZrrkz,        X86::VUNPCKLPDZrmkz,        0 },
+    { X86::VUNPCKLPSZrrkz,        X86::VUNPCKLPSZrmkz,        0 },
+    { X86::VXORPDZrrkz,           X86::VXORPDZrmkz,           0 },
+    { X86::VXORPSZrrkz,           X86::VXORPSZrmkz,           0 },
+
+    // AVX-512{F,VL} masked arithmetic instructions 256-bit
+    { X86::VADDPDZ256rrkz,        X86::VADDPDZ256rmkz,        0 },
+    { X86::VADDPSZ256rrkz,        X86::VADDPSZ256rmkz,        0 },
+    { X86::VALIGNDZ256rrikz,      X86::VALIGNDZ256rmikz,      0 },
+    { X86::VALIGNQZ256rrikz,      X86::VALIGNQZ256rmikz,      0 },
+    { X86::VANDNPDZ256rrkz,       X86::VANDNPDZ256rmkz,       0 },
+    { X86::VANDNPSZ256rrkz,       X86::VANDNPSZ256rmkz,       0 },
+    { X86::VANDPDZ256rrkz,        X86::VANDPDZ256rmkz,        0 },
+    { X86::VANDPSZ256rrkz,        X86::VANDPSZ256rmkz,        0 },
+    { X86::VDIVPDZ256rrkz,        X86::VDIVPDZ256rmkz,        0 },
+    { X86::VDIVPSZ256rrkz,        X86::VDIVPSZ256rmkz,        0 },
+    { X86::VINSERTF32x4Z256rrkz,  X86::VINSERTF32x4Z256rmkz,  0 },
+    { X86::VINSERTF64x2Z256rrkz,  X86::VINSERTF64x2Z256rmkz,  0 },
+    { X86::VINSERTI32x4Z256rrkz,  X86::VINSERTI32x4Z256rmkz,  0 },
+    { X86::VINSERTI64x2Z256rrkz,  X86::VINSERTI64x2Z256rmkz,  0 },
+    { X86::VMAXCPDZ256rrkz,       X86::VMAXCPDZ256rmkz,       0 },
+    { X86::VMAXCPSZ256rrkz,       X86::VMAXCPSZ256rmkz,       0 },
+    { X86::VMAXPDZ256rrkz,        X86::VMAXPDZ256rmkz,        0 },
+    { X86::VMAXPSZ256rrkz,        X86::VMAXPSZ256rmkz,        0 },
+    { X86::VMINCPDZ256rrkz,       X86::VMINCPDZ256rmkz,       0 },
+    { X86::VMINCPSZ256rrkz,       X86::VMINCPSZ256rmkz,       0 },
+    { X86::VMINPDZ256rrkz,        X86::VMINPDZ256rmkz,        0 },
+    { X86::VMINPSZ256rrkz,        X86::VMINPSZ256rmkz,        0 },
+    { X86::VMULPDZ256rrkz,        X86::VMULPDZ256rmkz,        0 },
+    { X86::VMULPSZ256rrkz,        X86::VMULPSZ256rmkz,        0 },
+    { X86::VORPDZ256rrkz,         X86::VORPDZ256rmkz,         0 },
+    { X86::VORPSZ256rrkz,         X86::VORPSZ256rmkz,         0 },
+    { X86::VPACKSSDWZ256rrkz,     X86::VPACKSSDWZ256rmkz,     0 },
+    { X86::VPACKSSWBZ256rrkz,     X86::VPACKSSWBZ256rmkz,     0 },
+    { X86::VPACKUSDWZ256rrkz,     X86::VPACKUSDWZ256rmkz,     0 },
+    { X86::VPACKUSWBZ256rrkz,     X86::VPACKUSWBZ256rmkz,     0 },
+    { X86::VPADDBZ256rrkz,        X86::VPADDBZ256rmkz,        0 },
+    { X86::VPADDDZ256rrkz,        X86::VPADDDZ256rmkz,        0 },
+    { X86::VPADDQZ256rrkz,        X86::VPADDQZ256rmkz,        0 },
+    { X86::VPADDSBZ256rrkz,       X86::VPADDSBZ256rmkz,       0 },
+    { X86::VPADDSWZ256rrkz,       X86::VPADDSWZ256rmkz,       0 },
+    { X86::VPADDUSBZ256rrkz,      X86::VPADDUSBZ256rmkz,      0 },
+    { X86::VPADDUSWZ256rrkz,      X86::VPADDUSWZ256rmkz,      0 },
+    { X86::VPADDWZ256rrkz,        X86::VPADDWZ256rmkz,        0 },
+    { X86::VPALIGNRZ256rrikz,     X86::VPALIGNRZ256rmikz,     0 },
+    { X86::VPANDDZ256rrkz,        X86::VPANDDZ256rmkz,        0 },
+    { X86::VPANDNDZ256rrkz,       X86::VPANDNDZ256rmkz,       0 },
+    { X86::VPANDNQZ256rrkz,       X86::VPANDNQZ256rmkz,       0 },
+    { X86::VPANDQZ256rrkz,        X86::VPANDQZ256rmkz,        0 },
+    { X86::VPAVGBZ256rrkz,        X86::VPAVGBZ256rmkz,        0 },
+    { X86::VPAVGWZ256rrkz,        X86::VPAVGWZ256rmkz,        0 },
+    { X86::VPERMBZ256rrkz,        X86::VPERMBZ256rmkz,        0 },
+    { X86::VPERMDZ256rrkz,        X86::VPERMDZ256rmkz,        0 },
+    { X86::VPERMILPDZ256rrkz,     X86::VPERMILPDZ256rmkz,     0 },
+    { X86::VPERMILPSZ256rrkz,     X86::VPERMILPSZ256rmkz,     0 },
+    { X86::VPERMPDZ256rrkz,       X86::VPERMPDZ256rmkz,       0 },
+    { X86::VPERMPSZ256rrkz,       X86::VPERMPSZ256rmkz,       0 },
+    { X86::VPERMQZ256rrkz,        X86::VPERMQZ256rmkz,        0 },
+    { X86::VPERMWZ256rrkz,        X86::VPERMWZ256rmkz,        0 },
+    { X86::VPMADDUBSWZ256rrkz,    X86::VPMADDUBSWZ256rmkz,    0 },
+    { X86::VPMADDWDZ256rrkz,      X86::VPMADDWDZ256rmkz,      0 },
+    { X86::VPMAXSBZ256rrkz,       X86::VPMAXSBZ256rmkz,       0 },
+    { X86::VPMAXSDZ256rrkz,       X86::VPMAXSDZ256rmkz,       0 },
+    { X86::VPMAXSQZ256rrkz,       X86::VPMAXSQZ256rmkz,       0 },
+    { X86::VPMAXSWZ256rrkz,       X86::VPMAXSWZ256rmkz,       0 },
+    { X86::VPMAXUBZ256rrkz,       X86::VPMAXUBZ256rmkz,       0 },
+    { X86::VPMAXUDZ256rrkz,       X86::VPMAXUDZ256rmkz,       0 },
+    { X86::VPMAXUQZ256rrkz,       X86::VPMAXUQZ256rmkz,       0 },
+    { X86::VPMAXUWZ256rrkz,       X86::VPMAXUWZ256rmkz,       0 },
+    { X86::VPMINSBZ256rrkz,       X86::VPMINSBZ256rmkz,       0 },
+    { X86::VPMINSDZ256rrkz,       X86::VPMINSDZ256rmkz,       0 },
+    { X86::VPMINSQZ256rrkz,       X86::VPMINSQZ256rmkz,       0 },
+    { X86::VPMINSWZ256rrkz,       X86::VPMINSWZ256rmkz,       0 },
+    { X86::VPMINUBZ256rrkz,       X86::VPMINUBZ256rmkz,       0 },
+    { X86::VPMINUDZ256rrkz,       X86::VPMINUDZ256rmkz,       0 },
+    { X86::VPMINUQZ256rrkz,       X86::VPMINUQZ256rmkz,       0 },
+    { X86::VPMINUWZ256rrkz,       X86::VPMINUWZ256rmkz,       0 },
+    { X86::VPMULDQZ256rrkz,       X86::VPMULDQZ256rmkz,       0 },
+    { X86::VPMULLDZ256rrkz,       X86::VPMULLDZ256rmkz,       0 },
+    { X86::VPMULLQZ256rrkz,       X86::VPMULLQZ256rmkz,       0 },
+    { X86::VPMULLWZ256rrkz,       X86::VPMULLWZ256rmkz,       0 },
+    { X86::VPMULUDQZ256rrkz,      X86::VPMULUDQZ256rmkz,      0 },
+    { X86::VPORDZ256rrkz,         X86::VPORDZ256rmkz,         0 },
+    { X86::VPORQZ256rrkz,         X86::VPORQZ256rmkz,         0 },
+    { X86::VPSHUFBZ256rrkz,       X86::VPSHUFBZ256rmkz,       0 },
+    { X86::VPSLLDZ256rrkz,        X86::VPSLLDZ256rmkz,        0 },
+    { X86::VPSLLQZ256rrkz,        X86::VPSLLQZ256rmkz,        0 },
+    { X86::VPSLLVDZ256rrkz,       X86::VPSLLVDZ256rmkz,       0 },
+    { X86::VPSLLVQZ256rrkz,       X86::VPSLLVQZ256rmkz,       0 },
+    { X86::VPSLLVWZ256rrkz,       X86::VPSLLVWZ256rmkz,       0 },
+    { X86::VPSLLWZ256rrkz,        X86::VPSLLWZ256rmkz,        0 },
+    { X86::VPSRADZ256rrkz,        X86::VPSRADZ256rmkz,        0 },
+    { X86::VPSRAQZ256rrkz,        X86::VPSRAQZ256rmkz,        0 },
+    { X86::VPSRAVDZ256rrkz,       X86::VPSRAVDZ256rmkz,       0 },
+    { X86::VPSRAVQZ256rrkz,       X86::VPSRAVQZ256rmkz,       0 },
+    { X86::VPSRAVWZ256rrkz,       X86::VPSRAVWZ256rmkz,       0 },
+    { X86::VPSRAWZ256rrkz,        X86::VPSRAWZ256rmkz,        0 },
+    { X86::VPSRLDZ256rrkz,        X86::VPSRLDZ256rmkz,        0 },
+    { X86::VPSRLQZ256rrkz,        X86::VPSRLQZ256rmkz,        0 },
+    { X86::VPSRLVDZ256rrkz,       X86::VPSRLVDZ256rmkz,       0 },
+    { X86::VPSRLVQZ256rrkz,       X86::VPSRLVQZ256rmkz,       0 },
+    { X86::VPSRLVWZ256rrkz,       X86::VPSRLVWZ256rmkz,       0 },
+    { X86::VPSRLWZ256rrkz,        X86::VPSRLWZ256rmkz,        0 },
+    { X86::VPSUBBZ256rrkz,        X86::VPSUBBZ256rmkz,        0 },
+    { X86::VPSUBDZ256rrkz,        X86::VPSUBDZ256rmkz,        0 },
+    { X86::VPSUBQZ256rrkz,        X86::VPSUBQZ256rmkz,        0 },
+    { X86::VPSUBSBZ256rrkz,       X86::VPSUBSBZ256rmkz,       0 },
+    { X86::VPSUBSWZ256rrkz,       X86::VPSUBSWZ256rmkz,       0 },
+    { X86::VPSUBUSBZ256rrkz,      X86::VPSUBUSBZ256rmkz,      0 },
+    { X86::VPSUBUSWZ256rrkz,      X86::VPSUBUSWZ256rmkz,      0 },
+    { X86::VPSUBWZ256rrkz,        X86::VPSUBWZ256rmkz,        0 },
+    { X86::VPUNPCKHBWZ256rrkz,    X86::VPUNPCKHBWZ256rmkz,    0 },
+    { X86::VPUNPCKHDQZ256rrkz,    X86::VPUNPCKHDQZ256rmkz,    0 },
+    { X86::VPUNPCKHQDQZ256rrkz,   X86::VPUNPCKHQDQZ256rmkz,   0 },
+    { X86::VPUNPCKHWDZ256rrkz,    X86::VPUNPCKHWDZ256rmkz,    0 },
+    { X86::VPUNPCKLBWZ256rrkz,    X86::VPUNPCKLBWZ256rmkz,    0 },
+    { X86::VPUNPCKLDQZ256rrkz,    X86::VPUNPCKLDQZ256rmkz,    0 },
+    { X86::VPUNPCKLQDQZ256rrkz,   X86::VPUNPCKLQDQZ256rmkz,   0 },
+    { X86::VPUNPCKLWDZ256rrkz,    X86::VPUNPCKLWDZ256rmkz,    0 },
+    { X86::VPXORDZ256rrkz,        X86::VPXORDZ256rmkz,        0 },
+    { X86::VPXORQZ256rrkz,        X86::VPXORQZ256rmkz,        0 },
+    { X86::VSHUFPDZ256rrikz,      X86::VSHUFPDZ256rmikz,      0 },
+    { X86::VSHUFPSZ256rrikz,      X86::VSHUFPSZ256rmikz,      0 },
+    { X86::VSUBPDZ256rrkz,        X86::VSUBPDZ256rmkz,        0 },
+    { X86::VSUBPSZ256rrkz,        X86::VSUBPSZ256rmkz,        0 },
+    { X86::VUNPCKHPDZ256rrkz,     X86::VUNPCKHPDZ256rmkz,     0 },
+    { X86::VUNPCKHPSZ256rrkz,     X86::VUNPCKHPSZ256rmkz,     0 },
+    { X86::VUNPCKLPDZ256rrkz,     X86::VUNPCKLPDZ256rmkz,     0 },
+    { X86::VUNPCKLPSZ256rrkz,     X86::VUNPCKLPSZ256rmkz,     0 },
+    { X86::VXORPDZ256rrkz,        X86::VXORPDZ256rmkz,        0 },
+    { X86::VXORPSZ256rrkz,        X86::VXORPSZ256rmkz,        0 },
+
+    // AVX-512{F,VL} masked arithmetic instructions 128-bit
+    { X86::VADDPDZ128rrkz,        X86::VADDPDZ128rmkz,        0 },
+    { X86::VADDPSZ128rrkz,        X86::VADDPSZ128rmkz,        0 },
+    { X86::VALIGNDZ128rrikz,      X86::VALIGNDZ128rmikz,      0 },
+    { X86::VALIGNQZ128rrikz,      X86::VALIGNQZ128rmikz,      0 },
+    { X86::VANDNPDZ128rrkz,       X86::VANDNPDZ128rmkz,       0 },
+    { X86::VANDNPSZ128rrkz,       X86::VANDNPSZ128rmkz,       0 },
+    { X86::VANDPDZ128rrkz,        X86::VANDPDZ128rmkz,        0 },
+    { X86::VANDPSZ128rrkz,        X86::VANDPSZ128rmkz,        0 },
+    { X86::VDIVPDZ128rrkz,        X86::VDIVPDZ128rmkz,        0 },
+    { X86::VDIVPSZ128rrkz,        X86::VDIVPSZ128rmkz,        0 },
+    { X86::VMAXCPDZ128rrkz,       X86::VMAXCPDZ128rmkz,       0 },
+    { X86::VMAXCPSZ128rrkz,       X86::VMAXCPSZ128rmkz,       0 },
+    { X86::VMAXPDZ128rrkz,        X86::VMAXPDZ128rmkz,        0 },
+    { X86::VMAXPSZ128rrkz,        X86::VMAXPSZ128rmkz,        0 },
+    { X86::VMINCPDZ128rrkz,       X86::VMINCPDZ128rmkz,       0 },
+    { X86::VMINCPSZ128rrkz,       X86::VMINCPSZ128rmkz,       0 },
+    { X86::VMINPDZ128rrkz,        X86::VMINPDZ128rmkz,        0 },
+    { X86::VMINPSZ128rrkz,        X86::VMINPSZ128rmkz,        0 },
+    { X86::VMULPDZ128rrkz,        X86::VMULPDZ128rmkz,        0 },
+    { X86::VMULPSZ128rrkz,        X86::VMULPSZ128rmkz,        0 },
+    { X86::VORPDZ128rrkz,         X86::VORPDZ128rmkz,         0 },
+    { X86::VORPSZ128rrkz,         X86::VORPSZ128rmkz,         0 },
+    { X86::VPACKSSDWZ128rrkz,     X86::VPACKSSDWZ128rmkz,     0 },
+    { X86::VPACKSSWBZ128rrkz,     X86::VPACKSSWBZ128rmkz,     0 },
+    { X86::VPACKUSDWZ128rrkz,     X86::VPACKUSDWZ128rmkz,     0 },
+    { X86::VPACKUSWBZ128rrkz,     X86::VPACKUSWBZ128rmkz,     0 },
+    { X86::VPADDBZ128rrkz,        X86::VPADDBZ128rmkz,        0 },
+    { X86::VPADDDZ128rrkz,        X86::VPADDDZ128rmkz,        0 },
+    { X86::VPADDQZ128rrkz,        X86::VPADDQZ128rmkz,        0 },
+    { X86::VPADDSBZ128rrkz,       X86::VPADDSBZ128rmkz,       0 },
+    { X86::VPADDSWZ128rrkz,       X86::VPADDSWZ128rmkz,       0 },
+    { X86::VPADDUSBZ128rrkz,      X86::VPADDUSBZ128rmkz,      0 },
+    { X86::VPADDUSWZ128rrkz,      X86::VPADDUSWZ128rmkz,      0 },
+    { X86::VPADDWZ128rrkz,        X86::VPADDWZ128rmkz,        0 },
+    { X86::VPALIGNRZ128rrikz,     X86::VPALIGNRZ128rmikz,     0 },
+    { X86::VPANDDZ128rrkz,        X86::VPANDDZ128rmkz,        0 },
+    { X86::VPANDNDZ128rrkz,       X86::VPANDNDZ128rmkz,       0 },
+    { X86::VPANDNQZ128rrkz,       X86::VPANDNQZ128rmkz,       0 },
+    { X86::VPANDQZ128rrkz,        X86::VPANDQZ128rmkz,        0 },
+    { X86::VPAVGBZ128rrkz,        X86::VPAVGBZ128rmkz,        0 },
+    { X86::VPAVGWZ128rrkz,        X86::VPAVGWZ128rmkz,        0 },
+    { X86::VPERMBZ128rrkz,        X86::VPERMBZ128rmkz,        0 },
+    { X86::VPERMILPDZ128rrkz,     X86::VPERMILPDZ128rmkz,     0 },
+    { X86::VPERMILPSZ128rrkz,     X86::VPERMILPSZ128rmkz,     0 },
+    { X86::VPERMWZ128rrkz,        X86::VPERMWZ128rmkz,        0 },
+    { X86::VPMADDUBSWZ128rrkz,    X86::VPMADDUBSWZ128rmkz,    0 },
+    { X86::VPMADDWDZ128rrkz,      X86::VPMADDWDZ128rmkz,      0 },
+    { X86::VPMAXSBZ128rrkz,       X86::VPMAXSBZ128rmkz,       0 },
+    { X86::VPMAXSDZ128rrkz,       X86::VPMAXSDZ128rmkz,       0 },
+    { X86::VPMAXSQZ128rrkz,       X86::VPMAXSQZ128rmkz,       0 },
+    { X86::VPMAXSWZ128rrkz,       X86::VPMAXSWZ128rmkz,       0 },
+    { X86::VPMAXUBZ128rrkz,       X86::VPMAXUBZ128rmkz,       0 },
+    { X86::VPMAXUDZ128rrkz,       X86::VPMAXUDZ128rmkz,       0 },
+    { X86::VPMAXUQZ128rrkz,       X86::VPMAXUQZ128rmkz,       0 },
+    { X86::VPMAXUWZ128rrkz,       X86::VPMAXUWZ128rmkz,       0 },
+    { X86::VPMINSBZ128rrkz,       X86::VPMINSBZ128rmkz,       0 },
+    { X86::VPMINSDZ128rrkz,       X86::VPMINSDZ128rmkz,       0 },
+    { X86::VPMINSQZ128rrkz,       X86::VPMINSQZ128rmkz,       0 },
+    { X86::VPMINSWZ128rrkz,       X86::VPMINSWZ128rmkz,       0 },
+    { X86::VPMINUBZ128rrkz,       X86::VPMINUBZ128rmkz,       0 },
+    { X86::VPMINUDZ128rrkz,       X86::VPMINUDZ128rmkz,       0 },
+    { X86::VPMINUQZ128rrkz,       X86::VPMINUQZ128rmkz,       0 },
+    { X86::VPMINUWZ128rrkz,       X86::VPMINUWZ128rmkz,       0 },
+    { X86::VPMULDQZ128rrkz,       X86::VPMULDQZ128rmkz,       0 },
+    { X86::VPMULLDZ128rrkz,       X86::VPMULLDZ128rmkz,       0 },
+    { X86::VPMULLQZ128rrkz,       X86::VPMULLQZ128rmkz,       0 },
+    { X86::VPMULLWZ128rrkz,       X86::VPMULLWZ128rmkz,       0 },
+    { X86::VPMULUDQZ128rrkz,      X86::VPMULUDQZ128rmkz,      0 },
+    { X86::VPORDZ128rrkz,         X86::VPORDZ128rmkz,         0 },
+    { X86::VPORQZ128rrkz,         X86::VPORQZ128rmkz,         0 },
+    { X86::VPSHUFBZ128rrkz,       X86::VPSHUFBZ128rmkz,       0 },
+    { X86::VPSLLDZ128rrkz,        X86::VPSLLDZ128rmkz,        0 },
+    { X86::VPSLLQZ128rrkz,        X86::VPSLLQZ128rmkz,        0 },
+    { X86::VPSLLVDZ128rrkz,       X86::VPSLLVDZ128rmkz,       0 },
+    { X86::VPSLLVQZ128rrkz,       X86::VPSLLVQZ128rmkz,       0 },
+    { X86::VPSLLVWZ128rrkz,       X86::VPSLLVWZ128rmkz,       0 },
+    { X86::VPSLLWZ128rrkz,        X86::VPSLLWZ128rmkz,        0 },
+    { X86::VPSRADZ128rrkz,        X86::VPSRADZ128rmkz,        0 },
+    { X86::VPSRAQZ128rrkz,        X86::VPSRAQZ128rmkz,        0 },
+    { X86::VPSRAVDZ128rrkz,       X86::VPSRAVDZ128rmkz,       0 },
+    { X86::VPSRAVQZ128rrkz,       X86::VPSRAVQZ128rmkz,       0 },
+    { X86::VPSRAVWZ128rrkz,       X86::VPSRAVWZ128rmkz,       0 },
+    { X86::VPSRAWZ128rrkz,        X86::VPSRAWZ128rmkz,        0 },
+    { X86::VPSRLDZ128rrkz,        X86::VPSRLDZ128rmkz,        0 },
+    { X86::VPSRLQZ128rrkz,        X86::VPSRLQZ128rmkz,        0 },
+    { X86::VPSRLVDZ128rrkz,       X86::VPSRLVDZ128rmkz,       0 },
+    { X86::VPSRLVQZ128rrkz,       X86::VPSRLVQZ128rmkz,       0 },
+    { X86::VPSRLVWZ128rrkz,       X86::VPSRLVWZ128rmkz,       0 },
+    { X86::VPSRLWZ128rrkz,        X86::VPSRLWZ128rmkz,        0 },
+    { X86::VPSUBBZ128rrkz,        X86::VPSUBBZ128rmkz,        0 },
+    { X86::VPSUBDZ128rrkz,        X86::VPSUBDZ128rmkz,        0 },
+    { X86::VPSUBQZ128rrkz,        X86::VPSUBQZ128rmkz,        0 },
+    { X86::VPSUBSBZ128rrkz,       X86::VPSUBSBZ128rmkz,       0 },
+    { X86::VPSUBSWZ128rrkz,       X86::VPSUBSWZ128rmkz,       0 },
+    { X86::VPSUBUSBZ128rrkz,      X86::VPSUBUSBZ128rmkz,      0 },
+    { X86::VPSUBUSWZ128rrkz,      X86::VPSUBUSWZ128rmkz,      0 },
+    { X86::VPSUBWZ128rrkz,        X86::VPSUBWZ128rmkz,        0 },
+    { X86::VPUNPCKHBWZ128rrkz,    X86::VPUNPCKHBWZ128rmkz,    0 },
+    { X86::VPUNPCKHDQZ128rrkz,    X86::VPUNPCKHDQZ128rmkz,    0 },
+    { X86::VPUNPCKHQDQZ128rrkz,   X86::VPUNPCKHQDQZ128rmkz,   0 },
+    { X86::VPUNPCKHWDZ128rrkz,    X86::VPUNPCKHWDZ128rmkz,    0 },
+    { X86::VPUNPCKLBWZ128rrkz,    X86::VPUNPCKLBWZ128rmkz,    0 },
+    { X86::VPUNPCKLDQZ128rrkz,    X86::VPUNPCKLDQZ128rmkz,    0 },
+    { X86::VPUNPCKLQDQZ128rrkz,   X86::VPUNPCKLQDQZ128rmkz,   0 },
+    { X86::VPUNPCKLWDZ128rrkz,    X86::VPUNPCKLWDZ128rmkz,    0 },
+    { X86::VPXORDZ128rrkz,        X86::VPXORDZ128rmkz,        0 },
+    { X86::VPXORQZ128rrkz,        X86::VPXORQZ128rmkz,        0 },
+    { X86::VSHUFPDZ128rrikz,      X86::VSHUFPDZ128rmikz,      0 },
+    { X86::VSHUFPSZ128rrikz,      X86::VSHUFPSZ128rmikz,      0 },
+    { X86::VSUBPDZ128rrkz,        X86::VSUBPDZ128rmkz,        0 },
+    { X86::VSUBPSZ128rrkz,        X86::VSUBPSZ128rmkz,        0 },
+    { X86::VUNPCKHPDZ128rrkz,     X86::VUNPCKHPDZ128rmkz,     0 },
+    { X86::VUNPCKHPSZ128rrkz,     X86::VUNPCKHPSZ128rmkz,     0 },
+    { X86::VUNPCKLPDZ128rrkz,     X86::VUNPCKLPDZ128rmkz,     0 },
+    { X86::VUNPCKLPSZ128rrkz,     X86::VUNPCKLPSZ128rmkz,     0 },
+    { X86::VXORPDZ128rrkz,        X86::VXORPDZ128rmkz,        0 },
+    { X86::VXORPSZ128rrkz,        X86::VXORPSZ128rmkz,        0 },
+
+    // AVX-512 masked foldable instructions
+    { X86::VBROADCASTSSZrk,       X86::VBROADCASTSSZmk,       TB_NO_REVERSE },
+    { X86::VBROADCASTSDZrk,       X86::VBROADCASTSDZmk,       TB_NO_REVERSE },
+    { X86::VPABSBZrrk,            X86::VPABSBZrmk,            0 },
+    { X86::VPABSDZrrk,            X86::VPABSDZrmk,            0 },
+    { X86::VPABSQZrrk,            X86::VPABSQZrmk,            0 },
+    { X86::VPABSWZrrk,            X86::VPABSWZrmk,            0 },
+    { X86::VPERMILPDZrik,         X86::VPERMILPDZmik,         0 },
+    { X86::VPERMILPSZrik,         X86::VPERMILPSZmik,         0 },
+    { X86::VPERMPDZrik,           X86::VPERMPDZmik,           0 },
+    { X86::VPERMQZrik,            X86::VPERMQZmik,            0 },
+    { X86::VPMOVSXBDZrrk,         X86::VPMOVSXBDZrmk,         0 },
+    { X86::VPMOVSXBQZrrk,         X86::VPMOVSXBQZrmk,         TB_NO_REVERSE },
+    { X86::VPMOVSXBWZrrk,         X86::VPMOVSXBWZrmk,         0 },
+    { X86::VPMOVSXDQZrrk,         X86::VPMOVSXDQZrmk,         0 },
+    { X86::VPMOVSXWDZrrk,         X86::VPMOVSXWDZrmk,         0 },
+    { X86::VPMOVSXWQZrrk,         X86::VPMOVSXWQZrmk,         0 },
+    { X86::VPMOVZXBDZrrk,         X86::VPMOVZXBDZrmk,         0 },
+    { X86::VPMOVZXBQZrrk,         X86::VPMOVZXBQZrmk,         TB_NO_REVERSE },
+    { X86::VPMOVZXBWZrrk,         X86::VPMOVZXBWZrmk,         0 },
+    { X86::VPMOVZXDQZrrk,         X86::VPMOVZXDQZrmk,         0 },
+    { X86::VPMOVZXWDZrrk,         X86::VPMOVZXWDZrmk,         0 },
+    { X86::VPMOVZXWQZrrk,         X86::VPMOVZXWQZrmk,         0 },
+    { X86::VPOPCNTDZrrk,          X86::VPOPCNTDZrmk,          0 },
+    { X86::VPOPCNTQZrrk,          X86::VPOPCNTQZrmk,          0 },
+    { X86::VPSHUFDZrik,           X86::VPSHUFDZmik,           0 },
+    { X86::VPSHUFHWZrik,          X86::VPSHUFHWZmik,          0 },
+    { X86::VPSHUFLWZrik,          X86::VPSHUFLWZmik,          0 },
+    { X86::VPSLLDZrik,            X86::VPSLLDZmik,            0 },
+    { X86::VPSLLQZrik,            X86::VPSLLQZmik,            0 },
+    { X86::VPSLLWZrik,            X86::VPSLLWZmik,            0 },
+    { X86::VPSRADZrik,            X86::VPSRADZmik,            0 },
+    { X86::VPSRAQZrik,            X86::VPSRAQZmik,            0 },
+    { X86::VPSRAWZrik,            X86::VPSRAWZmik,            0 },
+    { X86::VPSRLDZrik,            X86::VPSRLDZmik,            0 },
+    { X86::VPSRLQZrik,            X86::VPSRLQZmik,            0 },
+    { X86::VPSRLWZrik,            X86::VPSRLWZmik,            0 },
+
+    // AVX-512VL 256-bit masked foldable instructions
+    { X86::VBROADCASTSSZ256rk,    X86::VBROADCASTSSZ256mk,    TB_NO_REVERSE },
+    { X86::VBROADCASTSDZ256rk,    X86::VBROADCASTSDZ256mk,    TB_NO_REVERSE },
+    { X86::VPABSBZ256rrk,         X86::VPABSBZ256rmk,         0 },
+    { X86::VPABSDZ256rrk,         X86::VPABSDZ256rmk,         0 },
+    { X86::VPABSQZ256rrk,         X86::VPABSQZ256rmk,         0 },
+    { X86::VPABSWZ256rrk,         X86::VPABSWZ256rmk,         0 },
+    { X86::VPERMILPDZ256rik,      X86::VPERMILPDZ256mik,      0 },
+    { X86::VPERMILPSZ256rik,      X86::VPERMILPSZ256mik,      0 },
+    { X86::VPERMPDZ256rik,        X86::VPERMPDZ256mik,        0 },
+    { X86::VPERMQZ256rik,         X86::VPERMQZ256mik,         0 },
+    { X86::VPMOVSXBDZ256rrk,      X86::VPMOVSXBDZ256rmk,      TB_NO_REVERSE },
+    { X86::VPMOVSXBQZ256rrk,      X86::VPMOVSXBQZ256rmk,      TB_NO_REVERSE },
+    { X86::VPMOVSXBWZ256rrk,      X86::VPMOVSXBWZ256rmk,      0 },
+    { X86::VPMOVSXDQZ256rrk,      X86::VPMOVSXDQZ256rmk,      0 },
+    { X86::VPMOVSXWDZ256rrk,      X86::VPMOVSXWDZ256rmk,      0 },
+    { X86::VPMOVSXWQZ256rrk,      X86::VPMOVSXWQZ256rmk,      TB_NO_REVERSE },
+    { X86::VPMOVZXBDZ256rrk,      X86::VPMOVZXBDZ256rmk,      TB_NO_REVERSE },
+    { X86::VPMOVZXBQZ256rrk,      X86::VPMOVZXBQZ256rmk,      TB_NO_REVERSE },
+    { X86::VPMOVZXBWZ256rrk,      X86::VPMOVZXBWZ256rmk,      0 },
+    { X86::VPMOVZXDQZ256rrk,      X86::VPMOVZXDQZ256rmk,      0 },
+    { X86::VPMOVZXWDZ256rrk,      X86::VPMOVZXWDZ256rmk,      0 },
+    { X86::VPMOVZXWQZ256rrk,      X86::VPMOVZXWQZ256rmk,      TB_NO_REVERSE },
+    { X86::VPSHUFDZ256rik,        X86::VPSHUFDZ256mik,        0 },
+    { X86::VPSHUFHWZ256rik,       X86::VPSHUFHWZ256mik,       0 },
+    { X86::VPSHUFLWZ256rik,       X86::VPSHUFLWZ256mik,       0 },
+    { X86::VPSLLDZ256rik,         X86::VPSLLDZ256mik,         0 },
+    { X86::VPSLLQZ256rik,         X86::VPSLLQZ256mik,         0 },
+    { X86::VPSLLWZ256rik,         X86::VPSLLWZ256mik,         0 },
+    { X86::VPSRADZ256rik,         X86::VPSRADZ256mik,         0 },
+    { X86::VPSRAQZ256rik,         X86::VPSRAQZ256mik,         0 },
+    { X86::VPSRAWZ256rik,         X86::VPSRAWZ256mik,         0 },
+    { X86::VPSRLDZ256rik,         X86::VPSRLDZ256mik,         0 },
+    { X86::VPSRLQZ256rik,         X86::VPSRLQZ256mik,         0 },
+    { X86::VPSRLWZ256rik,         X86::VPSRLWZ256mik,         0 },
+
+    // AVX-512VL 128-bit masked foldable instructions
+    { X86::VBROADCASTSSZ128rk,    X86::VBROADCASTSSZ128mk,    TB_NO_REVERSE },
+    { X86::VPABSBZ128rrk,         X86::VPABSBZ128rmk,         0 },
+    { X86::VPABSDZ128rrk,         X86::VPABSDZ128rmk,         0 },
+    { X86::VPABSQZ128rrk,         X86::VPABSQZ128rmk,         0 },
+    { X86::VPABSWZ128rrk,         X86::VPABSWZ128rmk,         0 },
+    { X86::VPERMILPDZ128rik,      X86::VPERMILPDZ128mik,      0 },
+    { X86::VPERMILPSZ128rik,      X86::VPERMILPSZ128mik,      0 },
+    { X86::VPMOVSXBDZ128rrk,      X86::VPMOVSXBDZ128rmk,      TB_NO_REVERSE },
+    { X86::VPMOVSXBQZ128rrk,      X86::VPMOVSXBQZ128rmk,      TB_NO_REVERSE },
+    { X86::VPMOVSXBWZ128rrk,      X86::VPMOVSXBWZ128rmk,      TB_NO_REVERSE },
+    { X86::VPMOVSXDQZ128rrk,      X86::VPMOVSXDQZ128rmk,      TB_NO_REVERSE },
+    { X86::VPMOVSXWDZ128rrk,      X86::VPMOVSXWDZ128rmk,      TB_NO_REVERSE },
+    { X86::VPMOVSXWQZ128rrk,      X86::VPMOVSXWQZ128rmk,      TB_NO_REVERSE },
+    { X86::VPMOVZXBDZ128rrk,      X86::VPMOVZXBDZ128rmk,      TB_NO_REVERSE },
+    { X86::VPMOVZXBQZ128rrk,      X86::VPMOVZXBQZ128rmk,      TB_NO_REVERSE },
+    { X86::VPMOVZXBWZ128rrk,      X86::VPMOVZXBWZ128rmk,      TB_NO_REVERSE },
+    { X86::VPMOVZXDQZ128rrk,      X86::VPMOVZXDQZ128rmk,      TB_NO_REVERSE },
+    { X86::VPMOVZXWDZ128rrk,      X86::VPMOVZXWDZ128rmk,      TB_NO_REVERSE },
+    { X86::VPMOVZXWQZ128rrk,      X86::VPMOVZXWQZ128rmk,      TB_NO_REVERSE },
+    { X86::VPSHUFDZ128rik,        X86::VPSHUFDZ128mik,        0 },
+    { X86::VPSHUFHWZ128rik,       X86::VPSHUFHWZ128mik,       0 },
+    { X86::VPSHUFLWZ128rik,       X86::VPSHUFLWZ128mik,       0 },
+    { X86::VPSLLDZ128rik,         X86::VPSLLDZ128mik,         0 },
+    { X86::VPSLLQZ128rik,         X86::VPSLLQZ128mik,         0 },
+    { X86::VPSLLWZ128rik,         X86::VPSLLWZ128mik,         0 },
+    { X86::VPSRADZ128rik,         X86::VPSRADZ128mik,         0 },
+    { X86::VPSRAQZ128rik,         X86::VPSRAQZ128mik,         0 },
+    { X86::VPSRAWZ128rik,         X86::VPSRAWZ128mik,         0 },
+    { X86::VPSRLDZ128rik,         X86::VPSRLDZ128mik,         0 },
+    { X86::VPSRLQZ128rik,         X86::VPSRLQZ128mik,         0 },
+    { X86::VPSRLWZ128rik,         X86::VPSRLWZ128mik,         0 },
+  };
+
   for (X86MemoryFoldTableEntry Entry : MemoryFoldTable3) {
     AddTableEntry(RegOp2MemOpTable3, MemOp2RegOpTable,
                   Entry.RegOp, Entry.MemOp,
                   // Index 3, folded load
                   Entry.Flags | TB_INDEX_3 | TB_FOLDED_LOAD);
   }
+  auto I = X86InstrFMA3Info::rm_begin();
+  auto E = X86InstrFMA3Info::rm_end();
+  for (; I != E; ++I) {
+    if (!I.getGroup()->isKMasked()) {
+      // Intrinsic forms need to pass TB_NO_REVERSE.
+      if (I.getGroup()->isIntrinsic()) {
+        AddTableEntry(RegOp2MemOpTable3, MemOp2RegOpTable,
+                      I.getRegOpcode(), I.getMemOpcode(),
+                      TB_ALIGN_NONE | TB_INDEX_3 | TB_FOLDED_LOAD | TB_NO_REVERSE);
+      } else {
+        AddTableEntry(RegOp2MemOpTable3, MemOp2RegOpTable,
+                      I.getRegOpcode(), I.getMemOpcode(),
+                      TB_ALIGN_NONE | TB_INDEX_3 | TB_FOLDED_LOAD);
+      }
+    }
+  }
+
+  static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
+    // AVX-512 foldable masked instructions
+    { X86::VADDPDZrrk,         X86::VADDPDZrmk,           0 },
+    { X86::VADDPSZrrk,         X86::VADDPSZrmk,           0 },
+    { X86::VADDSDZrr_Intk,     X86::VADDSDZrm_Intk,       TB_NO_REVERSE },
+    { X86::VADDSSZrr_Intk,     X86::VADDSSZrm_Intk,       TB_NO_REVERSE },
+    { X86::VALIGNDZrrik,       X86::VALIGNDZrmik,         0 },
+    { X86::VALIGNQZrrik,       X86::VALIGNQZrmik,         0 },
+    { X86::VANDNPDZrrk,        X86::VANDNPDZrmk,          0 },
+    { X86::VANDNPSZrrk,        X86::VANDNPSZrmk,          0 },
+    { X86::VANDPDZrrk,         X86::VANDPDZrmk,           0 },
+    { X86::VANDPSZrrk,         X86::VANDPSZrmk,           0 },
+    { X86::VDIVPDZrrk,         X86::VDIVPDZrmk,           0 },
+    { X86::VDIVPSZrrk,         X86::VDIVPSZrmk,           0 },
+    { X86::VDIVSDZrr_Intk,     X86::VDIVSDZrm_Intk,       TB_NO_REVERSE },
+    { X86::VDIVSSZrr_Intk,     X86::VDIVSSZrm_Intk,       TB_NO_REVERSE },
+    { X86::VINSERTF32x4Zrrk,   X86::VINSERTF32x4Zrmk,     0 },
+    { X86::VINSERTF32x8Zrrk,   X86::VINSERTF32x8Zrmk,     0 },
+    { X86::VINSERTF64x2Zrrk,   X86::VINSERTF64x2Zrmk,     0 },
+    { X86::VINSERTF64x4Zrrk,   X86::VINSERTF64x4Zrmk,     0 },
+    { X86::VINSERTI32x4Zrrk,   X86::VINSERTI32x4Zrmk,     0 },
+    { X86::VINSERTI32x8Zrrk,   X86::VINSERTI32x8Zrmk,     0 },
+    { X86::VINSERTI64x2Zrrk,   X86::VINSERTI64x2Zrmk,     0 },
+    { X86::VINSERTI64x4Zrrk,   X86::VINSERTI64x4Zrmk,     0 },
+    { X86::VMAXCPDZrrk,        X86::VMAXCPDZrmk,          0 },
+    { X86::VMAXCPSZrrk,        X86::VMAXCPSZrmk,          0 },
+    { X86::VMAXPDZrrk,         X86::VMAXPDZrmk,           0 },
+    { X86::VMAXPSZrrk,         X86::VMAXPSZrmk,           0 },
+    { X86::VMAXSDZrr_Intk,     X86::VMAXSDZrm_Intk,       0 },
+    { X86::VMAXSSZrr_Intk,     X86::VMAXSSZrm_Intk,       0 },
+    { X86::VMINCPDZrrk,        X86::VMINCPDZrmk,          0 },
+    { X86::VMINCPSZrrk,        X86::VMINCPSZrmk,          0 },
+    { X86::VMINPDZrrk,         X86::VMINPDZrmk,           0 },
+    { X86::VMINPSZrrk,         X86::VMINPSZrmk,           0 },
+    { X86::VMINSDZrr_Intk,     X86::VMINSDZrm_Intk,       0 },
+    { X86::VMINSSZrr_Intk,     X86::VMINSSZrm_Intk,       0 },
+    { X86::VMULPDZrrk,         X86::VMULPDZrmk,           0 },
+    { X86::VMULPSZrrk,         X86::VMULPSZrmk,           0 },
+    { X86::VMULSDZrr_Intk,     X86::VMULSDZrm_Intk,       TB_NO_REVERSE },
+    { X86::VMULSSZrr_Intk,     X86::VMULSSZrm_Intk,       TB_NO_REVERSE },
+    { X86::VORPDZrrk,          X86::VORPDZrmk,            0 },
+    { X86::VORPSZrrk,          X86::VORPSZrmk,            0 },
+    { X86::VPACKSSDWZrrk,      X86::VPACKSSDWZrmk,        0 },
+    { X86::VPACKSSWBZrrk,      X86::VPACKSSWBZrmk,        0 },
+    { X86::VPACKUSDWZrrk,      X86::VPACKUSDWZrmk,        0 },
+    { X86::VPACKUSWBZrrk,      X86::VPACKUSWBZrmk,        0 },
+    { X86::VPADDBZrrk,         X86::VPADDBZrmk,           0 },
+    { X86::VPADDDZrrk,         X86::VPADDDZrmk,           0 },
+    { X86::VPADDQZrrk,         X86::VPADDQZrmk,           0 },
+    { X86::VPADDSBZrrk,        X86::VPADDSBZrmk,          0 },
+    { X86::VPADDSWZrrk,        X86::VPADDSWZrmk,          0 },
+    { X86::VPADDUSBZrrk,       X86::VPADDUSBZrmk,         0 },
+    { X86::VPADDUSWZrrk,       X86::VPADDUSWZrmk,         0 },
+    { X86::VPADDWZrrk,         X86::VPADDWZrmk,           0 },
+    { X86::VPALIGNRZrrik,      X86::VPALIGNRZrmik,        0 },
+    { X86::VPANDDZrrk,         X86::VPANDDZrmk,           0 },
+    { X86::VPANDNDZrrk,        X86::VPANDNDZrmk,          0 },
+    { X86::VPANDNQZrrk,        X86::VPANDNQZrmk,          0 },
+    { X86::VPANDQZrrk,         X86::VPANDQZrmk,           0 },
+    { X86::VPAVGBZrrk,         X86::VPAVGBZrmk,           0 },
+    { X86::VPAVGWZrrk,         X86::VPAVGWZrmk,           0 },
+    { X86::VPERMBZrrk,         X86::VPERMBZrmk,           0 },
+    { X86::VPERMDZrrk,         X86::VPERMDZrmk,           0 },
+    { X86::VPERMI2Brrk,        X86::VPERMI2Brmk,          0 },
+    { X86::VPERMI2Drrk,        X86::VPERMI2Drmk,          0 },
+    { X86::VPERMI2PSrrk,       X86::VPERMI2PSrmk,         0 },
+    { X86::VPERMI2PDrrk,       X86::VPERMI2PDrmk,         0 },
+    { X86::VPERMI2Qrrk,        X86::VPERMI2Qrmk,          0 },
+    { X86::VPERMI2Wrrk,        X86::VPERMI2Wrmk,          0 },
+    { X86::VPERMILPDZrrk,      X86::VPERMILPDZrmk,        0 },
+    { X86::VPERMILPSZrrk,      X86::VPERMILPSZrmk,        0 },
+    { X86::VPERMPDZrrk,        X86::VPERMPDZrmk,          0 },
+    { X86::VPERMPSZrrk,        X86::VPERMPSZrmk,          0 },
+    { X86::VPERMQZrrk,         X86::VPERMQZrmk,           0 },
+    { X86::VPERMT2Brrk,        X86::VPERMT2Brmk,          0 },
+    { X86::VPERMT2Drrk,        X86::VPERMT2Drmk,          0 },
+    { X86::VPERMT2PSrrk,       X86::VPERMT2PSrmk,         0 },
+    { X86::VPERMT2PDrrk,       X86::VPERMT2PDrmk,         0 },
+    { X86::VPERMT2Qrrk,        X86::VPERMT2Qrmk,          0 },
+    { X86::VPERMT2Wrrk,        X86::VPERMT2Wrmk,          0 },
+    { X86::VPERMWZrrk,         X86::VPERMWZrmk,           0 },
+    { X86::VPMADDUBSWZrrk,     X86::VPMADDUBSWZrmk,       0 },
+    { X86::VPMADDWDZrrk,       X86::VPMADDWDZrmk,         0 },
+    { X86::VPMAXSBZrrk,        X86::VPMAXSBZrmk,          0 },
+    { X86::VPMAXSDZrrk,        X86::VPMAXSDZrmk,          0 },
+    { X86::VPMAXSQZrrk,        X86::VPMAXSQZrmk,          0 },
+    { X86::VPMAXSWZrrk,        X86::VPMAXSWZrmk,          0 },
+    { X86::VPMAXUBZrrk,        X86::VPMAXUBZrmk,          0 },
+    { X86::VPMAXUDZrrk,        X86::VPMAXUDZrmk,          0 },
+    { X86::VPMAXUQZrrk,        X86::VPMAXUQZrmk,          0 },
+    { X86::VPMAXUWZrrk,        X86::VPMAXUWZrmk,          0 },
+    { X86::VPMINSBZrrk,        X86::VPMINSBZrmk,          0 },
+    { X86::VPMINSDZrrk,        X86::VPMINSDZrmk,          0 },
+    { X86::VPMINSQZrrk,        X86::VPMINSQZrmk,          0 },
+    { X86::VPMINSWZrrk,        X86::VPMINSWZrmk,          0 },
+    { X86::VPMINUBZrrk,        X86::VPMINUBZrmk,          0 },
+    { X86::VPMINUDZrrk,        X86::VPMINUDZrmk,          0 },
+    { X86::VPMINUQZrrk,        X86::VPMINUQZrmk,          0 },
+    { X86::VPMINUWZrrk,        X86::VPMINUWZrmk,          0 },
+    { X86::VPMULDQZrrk,        X86::VPMULDQZrmk,          0 },
+    { X86::VPMULLDZrrk,        X86::VPMULLDZrmk,          0 },
+    { X86::VPMULLQZrrk,        X86::VPMULLQZrmk,          0 },
+    { X86::VPMULLWZrrk,        X86::VPMULLWZrmk,          0 },
+    { X86::VPMULUDQZrrk,       X86::VPMULUDQZrmk,         0 },
+    { X86::VPORDZrrk,          X86::VPORDZrmk,            0 },
+    { X86::VPORQZrrk,          X86::VPORQZrmk,            0 },
+    { X86::VPSHUFBZrrk,        X86::VPSHUFBZrmk,          0 },
+    { X86::VPSLLDZrrk,         X86::VPSLLDZrmk,           0 },
+    { X86::VPSLLQZrrk,         X86::VPSLLQZrmk,           0 },
+    { X86::VPSLLVDZrrk,        X86::VPSLLVDZrmk,          0 },
+    { X86::VPSLLVQZrrk,        X86::VPSLLVQZrmk,          0 },
+    { X86::VPSLLVWZrrk,        X86::VPSLLVWZrmk,          0 },
+    { X86::VPSLLWZrrk,         X86::VPSLLWZrmk,           0 },
+    { X86::VPSRADZrrk,         X86::VPSRADZrmk,           0 },
+    { X86::VPSRAQZrrk,         X86::VPSRAQZrmk,           0 },
+    { X86::VPSRAVDZrrk,        X86::VPSRAVDZrmk,          0 },
+    { X86::VPSRAVQZrrk,        X86::VPSRAVQZrmk,          0 },
+    { X86::VPSRAVWZrrk,        X86::VPSRAVWZrmk,          0 },
+    { X86::VPSRAWZrrk,         X86::VPSRAWZrmk,           0 },
+    { X86::VPSRLDZrrk,         X86::VPSRLDZrmk,           0 },
+    { X86::VPSRLQZrrk,         X86::VPSRLQZrmk,           0 },
+    { X86::VPSRLVDZrrk,        X86::VPSRLVDZrmk,          0 },
+    { X86::VPSRLVQZrrk,        X86::VPSRLVQZrmk,          0 },
+    { X86::VPSRLVWZrrk,        X86::VPSRLVWZrmk,          0 },
+    { X86::VPSRLWZrrk,         X86::VPSRLWZrmk,           0 },
+    { X86::VPSUBBZrrk,         X86::VPSUBBZrmk,           0 },
+    { X86::VPSUBDZrrk,         X86::VPSUBDZrmk,           0 },
+    { X86::VPSUBQZrrk,         X86::VPSUBQZrmk,           0 },
+    { X86::VPSUBSBZrrk,        X86::VPSUBSBZrmk,          0 },
+    { X86::VPSUBSWZrrk,        X86::VPSUBSWZrmk,          0 },
+    { X86::VPSUBUSBZrrk,       X86::VPSUBUSBZrmk,         0 },
+    { X86::VPSUBUSWZrrk,       X86::VPSUBUSWZrmk,         0 },
+    { X86::VPTERNLOGDZrrik,    X86::VPTERNLOGDZrmik,      0 },
+    { X86::VPTERNLOGQZrrik,    X86::VPTERNLOGQZrmik,      0 },
+    { X86::VPUNPCKHBWZrrk,     X86::VPUNPCKHBWZrmk,       0 },
+    { X86::VPUNPCKHDQZrrk,     X86::VPUNPCKHDQZrmk,       0 },
+    { X86::VPUNPCKHQDQZrrk,    X86::VPUNPCKHQDQZrmk,      0 },
+    { X86::VPUNPCKHWDZrrk,     X86::VPUNPCKHWDZrmk,       0 },
+    { X86::VPUNPCKLBWZrrk,     X86::VPUNPCKLBWZrmk,       0 },
+    { X86::VPUNPCKLDQZrrk,     X86::VPUNPCKLDQZrmk,       0 },
+    { X86::VPUNPCKLQDQZrrk,    X86::VPUNPCKLQDQZrmk,      0 },
+    { X86::VPUNPCKLWDZrrk,     X86::VPUNPCKLWDZrmk,       0 },
+    { X86::VPXORDZrrk,         X86::VPXORDZrmk,           0 },
+    { X86::VPXORQZrrk,         X86::VPXORQZrmk,           0 },
+    { X86::VSHUFPDZrrik,       X86::VSHUFPDZrmik,         0 },
+    { X86::VSHUFPSZrrik,       X86::VSHUFPSZrmik,         0 },
+    { X86::VSUBPDZrrk,         X86::VSUBPDZrmk,           0 },
+    { X86::VSUBPSZrrk,         X86::VSUBPSZrmk,           0 },
+    { X86::VSUBSDZrr_Intk,     X86::VSUBSDZrm_Intk,       TB_NO_REVERSE },
+    { X86::VSUBSSZrr_Intk,     X86::VSUBSSZrm_Intk,       TB_NO_REVERSE },
+    { X86::VUNPCKHPDZrrk,      X86::VUNPCKHPDZrmk,        0 },
+    { X86::VUNPCKHPSZrrk,      X86::VUNPCKHPSZrmk,        0 },
+    { X86::VUNPCKLPDZrrk,      X86::VUNPCKLPDZrmk,        0 },
+    { X86::VUNPCKLPSZrrk,      X86::VUNPCKLPSZrmk,        0 },
+    { X86::VXORPDZrrk,         X86::VXORPDZrmk,           0 },
+    { X86::VXORPSZrrk,         X86::VXORPSZrmk,           0 },
+
+    // AVX-512{F,VL} foldable masked instructions 256-bit
+    { X86::VADDPDZ256rrk,      X86::VADDPDZ256rmk,        0 },
+    { X86::VADDPSZ256rrk,      X86::VADDPSZ256rmk,        0 },
+    { X86::VALIGNDZ256rrik,    X86::VALIGNDZ256rmik,      0 },
+    { X86::VALIGNQZ256rrik,    X86::VALIGNQZ256rmik,      0 },
+    { X86::VANDNPDZ256rrk,     X86::VANDNPDZ256rmk,       0 },
+    { X86::VANDNPSZ256rrk,     X86::VANDNPSZ256rmk,       0 },
+    { X86::VANDPDZ256rrk,      X86::VANDPDZ256rmk,        0 },
+    { X86::VANDPSZ256rrk,      X86::VANDPSZ256rmk,        0 },
+    { X86::VDIVPDZ256rrk,      X86::VDIVPDZ256rmk,        0 },
+    { X86::VDIVPSZ256rrk,      X86::VDIVPSZ256rmk,        0 },
+    { X86::VINSERTF32x4Z256rrk,X86::VINSERTF32x4Z256rmk,  0 },
+    { X86::VINSERTF64x2Z256rrk,X86::VINSERTF64x2Z256rmk,  0 },
+    { X86::VINSERTI32x4Z256rrk,X86::VINSERTI32x4Z256rmk,  0 },
+    { X86::VINSERTI64x2Z256rrk,X86::VINSERTI64x2Z256rmk,  0 },
+    { X86::VMAXCPDZ256rrk,     X86::VMAXCPDZ256rmk,       0 },
+    { X86::VMAXCPSZ256rrk,     X86::VMAXCPSZ256rmk,       0 },
+    { X86::VMAXPDZ256rrk,      X86::VMAXPDZ256rmk,        0 },
+    { X86::VMAXPSZ256rrk,      X86::VMAXPSZ256rmk,        0 },
+    { X86::VMINCPDZ256rrk,     X86::VMINCPDZ256rmk,       0 },
+    { X86::VMINCPSZ256rrk,     X86::VMINCPSZ256rmk,       0 },
+    { X86::VMINPDZ256rrk,      X86::VMINPDZ256rmk,        0 },
+    { X86::VMINPSZ256rrk,      X86::VMINPSZ256rmk,        0 },
+    { X86::VMULPDZ256rrk,      X86::VMULPDZ256rmk,        0 },
+    { X86::VMULPSZ256rrk,      X86::VMULPSZ256rmk,        0 },
+    { X86::VORPDZ256rrk,       X86::VORPDZ256rmk,         0 },
+    { X86::VORPSZ256rrk,       X86::VORPSZ256rmk,         0 },
+    { X86::VPACKSSDWZ256rrk,   X86::VPACKSSDWZ256rmk,     0 },
+    { X86::VPACKSSWBZ256rrk,   X86::VPACKSSWBZ256rmk,     0 },
+    { X86::VPACKUSDWZ256rrk,   X86::VPACKUSDWZ256rmk,     0 },
+    { X86::VPACKUSWBZ256rrk,   X86::VPACKUSWBZ256rmk,     0 },
+    { X86::VPADDBZ256rrk,      X86::VPADDBZ256rmk,        0 },
+    { X86::VPADDDZ256rrk,      X86::VPADDDZ256rmk,        0 },
+    { X86::VPADDQZ256rrk,      X86::VPADDQZ256rmk,        0 },
+    { X86::VPADDSBZ256rrk,     X86::VPADDSBZ256rmk,       0 },
+    { X86::VPADDSWZ256rrk,     X86::VPADDSWZ256rmk,       0 },
+    { X86::VPADDUSBZ256rrk,    X86::VPADDUSBZ256rmk,      0 },
+    { X86::VPADDUSWZ256rrk,    X86::VPADDUSWZ256rmk,      0 },
+    { X86::VPADDWZ256rrk,      X86::VPADDWZ256rmk,        0 },
+    { X86::VPALIGNRZ256rrik,   X86::VPALIGNRZ256rmik,     0 },
+    { X86::VPANDDZ256rrk,      X86::VPANDDZ256rmk,        0 },
+    { X86::VPANDNDZ256rrk,     X86::VPANDNDZ256rmk,       0 },
+    { X86::VPANDNQZ256rrk,     X86::VPANDNQZ256rmk,       0 },
+    { X86::VPANDQZ256rrk,      X86::VPANDQZ256rmk,        0 },
+    { X86::VPAVGBZ256rrk,      X86::VPAVGBZ256rmk,        0 },
+    { X86::VPAVGWZ256rrk,      X86::VPAVGWZ256rmk,        0 },
+    { X86::VPERMBZ256rrk,      X86::VPERMBZ256rmk,        0 },
+    { X86::VPERMDZ256rrk,      X86::VPERMDZ256rmk,        0 },
+    { X86::VPERMI2B256rrk,     X86::VPERMI2B256rmk,       0 },
+    { X86::VPERMI2D256rrk,     X86::VPERMI2D256rmk,       0 },
+    { X86::VPERMI2PD256rrk,    X86::VPERMI2PD256rmk,      0 },
+    { X86::VPERMI2PS256rrk,    X86::VPERMI2PS256rmk,      0 },
+    { X86::VPERMI2Q256rrk,     X86::VPERMI2Q256rmk,       0 },
+    { X86::VPERMI2W256rrk,     X86::VPERMI2W256rmk,       0 },
+    { X86::VPERMILPDZ256rrk,   X86::VPERMILPDZ256rmk,     0 },
+    { X86::VPERMILPSZ256rrk,   X86::VPERMILPSZ256rmk,     0 },
+    { X86::VPERMPDZ256rrk,     X86::VPERMPDZ256rmk,       0 },
+    { X86::VPERMPSZ256rrk,     X86::VPERMPSZ256rmk,       0 },
+    { X86::VPERMQZ256rrk,      X86::VPERMQZ256rmk,        0 },
+    { X86::VPERMT2B256rrk,     X86::VPERMT2B256rmk,       0 },
+    { X86::VPERMT2D256rrk,     X86::VPERMT2D256rmk,       0 },
+    { X86::VPERMT2PD256rrk,    X86::VPERMT2PD256rmk,      0 },
+    { X86::VPERMT2PS256rrk,    X86::VPERMT2PS256rmk,      0 },
+    { X86::VPERMT2Q256rrk,     X86::VPERMT2Q256rmk,       0 },
+    { X86::VPERMT2W256rrk,     X86::VPERMT2W256rmk,       0 },
+    { X86::VPERMWZ256rrk,      X86::VPERMWZ256rmk,        0 },
+    { X86::VPMADDUBSWZ256rrk,  X86::VPMADDUBSWZ256rmk,    0 },
+    { X86::VPMADDWDZ256rrk,    X86::VPMADDWDZ256rmk,      0 },
+    { X86::VPMAXSBZ256rrk,     X86::VPMAXSBZ256rmk,       0 },
+    { X86::VPMAXSDZ256rrk,     X86::VPMAXSDZ256rmk,       0 },
+    { X86::VPMAXSQZ256rrk,     X86::VPMAXSQZ256rmk,       0 },
+    { X86::VPMAXSWZ256rrk,     X86::VPMAXSWZ256rmk,       0 },
+    { X86::VPMAXUBZ256rrk,     X86::VPMAXUBZ256rmk,       0 },
+    { X86::VPMAXUDZ256rrk,     X86::VPMAXUDZ256rmk,       0 },
+    { X86::VPMAXUQZ256rrk,     X86::VPMAXUQZ256rmk,       0 },
+    { X86::VPMAXUWZ256rrk,     X86::VPMAXUWZ256rmk,       0 },
+    { X86::VPMINSBZ256rrk,     X86::VPMINSBZ256rmk,       0 },
+    { X86::VPMINSDZ256rrk,     X86::VPMINSDZ256rmk,       0 },
+    { X86::VPMINSQZ256rrk,     X86::VPMINSQZ256rmk,       0 },
+    { X86::VPMINSWZ256rrk,     X86::VPMINSWZ256rmk,       0 },
+    { X86::VPMINUBZ256rrk,     X86::VPMINUBZ256rmk,       0 },
+    { X86::VPMINUDZ256rrk,     X86::VPMINUDZ256rmk,       0 },
+    { X86::VPMINUQZ256rrk,     X86::VPMINUQZ256rmk,       0 },
+    { X86::VPMINUWZ256rrk,     X86::VPMINUWZ256rmk,       0 },
+    { X86::VPMULDQZ256rrk,     X86::VPMULDQZ256rmk,       0 },
+    { X86::VPMULLDZ256rrk,     X86::VPMULLDZ256rmk,       0 },
+    { X86::VPMULLQZ256rrk,     X86::VPMULLQZ256rmk,       0 },
+    { X86::VPMULLWZ256rrk,     X86::VPMULLWZ256rmk,       0 },
+    { X86::VPMULUDQZ256rrk,    X86::VPMULUDQZ256rmk,      0 },
+    { X86::VPORDZ256rrk,       X86::VPORDZ256rmk,         0 },
+    { X86::VPORQZ256rrk,       X86::VPORQZ256rmk,         0 },
+    { X86::VPSHUFBZ256rrk,     X86::VPSHUFBZ256rmk,       0 },
+    { X86::VPSLLDZ256rrk,      X86::VPSLLDZ256rmk,        0 },
+    { X86::VPSLLQZ256rrk,      X86::VPSLLQZ256rmk,        0 },
+    { X86::VPSLLVDZ256rrk,     X86::VPSLLVDZ256rmk,       0 },
+    { X86::VPSLLVQZ256rrk,     X86::VPSLLVQZ256rmk,       0 },
+    { X86::VPSLLVWZ256rrk,     X86::VPSLLVWZ256rmk,       0 },
+    { X86::VPSLLWZ256rrk,      X86::VPSLLWZ256rmk,        0 },
+    { X86::VPSRADZ256rrk,      X86::VPSRADZ256rmk,        0 },
+    { X86::VPSRAQZ256rrk,      X86::VPSRAQZ256rmk,        0 },
+    { X86::VPSRAVDZ256rrk,     X86::VPSRAVDZ256rmk,       0 },
+    { X86::VPSRAVQZ256rrk,     X86::VPSRAVQZ256rmk,       0 },
+    { X86::VPSRAVWZ256rrk,     X86::VPSRAVWZ256rmk,       0 },
+    { X86::VPSRAWZ256rrk,      X86::VPSRAWZ256rmk,        0 },
+    { X86::VPSRLDZ256rrk,      X86::VPSRLDZ256rmk,        0 },
+    { X86::VPSRLQZ256rrk,      X86::VPSRLQZ256rmk,        0 },
+    { X86::VPSRLVDZ256rrk,     X86::VPSRLVDZ256rmk,       0 },
+    { X86::VPSRLVQZ256rrk,     X86::VPSRLVQZ256rmk,       0 },
+    { X86::VPSRLVWZ256rrk,     X86::VPSRLVWZ256rmk,       0 },
+    { X86::VPSRLWZ256rrk,      X86::VPSRLWZ256rmk,        0 },
+    { X86::VPSUBBZ256rrk,      X86::VPSUBBZ256rmk,        0 },
+    { X86::VPSUBDZ256rrk,      X86::VPSUBDZ256rmk,        0 },
+    { X86::VPSUBQZ256rrk,      X86::VPSUBQZ256rmk,        0 },
+    { X86::VPSUBSBZ256rrk,     X86::VPSUBSBZ256rmk,       0 },
+    { X86::VPSUBSWZ256rrk,     X86::VPSUBSWZ256rmk,       0 },
+    { X86::VPSUBUSBZ256rrk,    X86::VPSUBUSBZ256rmk,      0 },
+    { X86::VPSUBUSWZ256rrk,    X86::VPSUBUSWZ256rmk,      0 },
+    { X86::VPSUBWZ256rrk,      X86::VPSUBWZ256rmk,        0 },
+    { X86::VPTERNLOGDZ256rrik, X86::VPTERNLOGDZ256rmik,   0 },
+    { X86::VPTERNLOGQZ256rrik, X86::VPTERNLOGQZ256rmik,   0 },
+    { X86::VPUNPCKHBWZ256rrk,  X86::VPUNPCKHBWZ256rmk,    0 },
+    { X86::VPUNPCKHDQZ256rrk,  X86::VPUNPCKHDQZ256rmk,    0 },
+    { X86::VPUNPCKHQDQZ256rrk, X86::VPUNPCKHQDQZ256rmk,   0 },
+    { X86::VPUNPCKHWDZ256rrk,  X86::VPUNPCKHWDZ256rmk,    0 },
+    { X86::VPUNPCKLBWZ256rrk,  X86::VPUNPCKLBWZ256rmk,    0 },
+    { X86::VPUNPCKLDQZ256rrk,  X86::VPUNPCKLDQZ256rmk,    0 },
+    { X86::VPUNPCKLQDQZ256rrk, X86::VPUNPCKLQDQZ256rmk,   0 },
+    { X86::VPUNPCKLWDZ256rrk,  X86::VPUNPCKLWDZ256rmk,    0 },
+    { X86::VPXORDZ256rrk,      X86::VPXORDZ256rmk,        0 },
+    { X86::VPXORQZ256rrk,      X86::VPXORQZ256rmk,        0 },
+    { X86::VSHUFPDZ256rrik,    X86::VSHUFPDZ256rmik,      0 },
+    { X86::VSHUFPSZ256rrik,    X86::VSHUFPSZ256rmik,      0 },
+    { X86::VSUBPDZ256rrk,      X86::VSUBPDZ256rmk,        0 },
+    { X86::VSUBPSZ256rrk,      X86::VSUBPSZ256rmk,        0 },
+    { X86::VUNPCKHPDZ256rrk,   X86::VUNPCKHPDZ256rmk,     0 },
+    { X86::VUNPCKHPSZ256rrk,   X86::VUNPCKHPSZ256rmk,     0 },
+    { X86::VUNPCKLPDZ256rrk,   X86::VUNPCKLPDZ256rmk,     0 },
+    { X86::VUNPCKLPSZ256rrk,   X86::VUNPCKLPSZ256rmk,     0 },
+    { X86::VXORPDZ256rrk,      X86::VXORPDZ256rmk,        0 },
+    { X86::VXORPSZ256rrk,      X86::VXORPSZ256rmk,        0 },
+
+    // AVX-512{F,VL} foldable instructions 128-bit
+    { X86::VADDPDZ128rrk,      X86::VADDPDZ128rmk,        0 },
+    { X86::VADDPSZ128rrk,      X86::VADDPSZ128rmk,        0 },
+    { X86::VALIGNDZ128rrik,    X86::VALIGNDZ128rmik,      0 },
+    { X86::VALIGNQZ128rrik,    X86::VALIGNQZ128rmik,      0 },
+    { X86::VANDNPDZ128rrk,     X86::VANDNPDZ128rmk,       0 },
+    { X86::VANDNPSZ128rrk,     X86::VANDNPSZ128rmk,       0 },
+    { X86::VANDPDZ128rrk,      X86::VANDPDZ128rmk,        0 },
+    { X86::VANDPSZ128rrk,      X86::VANDPSZ128rmk,        0 },
+    { X86::VDIVPDZ128rrk,      X86::VDIVPDZ128rmk,        0 },
+    { X86::VDIVPSZ128rrk,      X86::VDIVPSZ128rmk,        0 },
+    { X86::VMAXCPDZ128rrk,     X86::VMAXCPDZ128rmk,       0 },
+    { X86::VMAXCPSZ128rrk,     X86::VMAXCPSZ128rmk,       0 },
+    { X86::VMAXPDZ128rrk,      X86::VMAXPDZ128rmk,        0 },
+    { X86::VMAXPSZ128rrk,      X86::VMAXPSZ128rmk,        0 },
+    { X86::VMINCPDZ128rrk,     X86::VMINCPDZ128rmk,       0 },
+    { X86::VMINCPSZ128rrk,     X86::VMINCPSZ128rmk,       0 },
+    { X86::VMINPDZ128rrk,      X86::VMINPDZ128rmk,        0 },
+    { X86::VMINPSZ128rrk,      X86::VMINPSZ128rmk,        0 },
+    { X86::VMULPDZ128rrk,      X86::VMULPDZ128rmk,        0 },
+    { X86::VMULPSZ128rrk,      X86::VMULPSZ128rmk,        0 },
+    { X86::VORPDZ128rrk,       X86::VORPDZ128rmk,         0 },
+    { X86::VORPSZ128rrk,       X86::VORPSZ128rmk,         0 },
+    { X86::VPACKSSDWZ128rrk,   X86::VPACKSSDWZ128rmk,     0 },
+    { X86::VPACKSSWBZ128rrk,   X86::VPACKSSWBZ128rmk,     0 },
+    { X86::VPACKUSDWZ128rrk,   X86::VPACKUSDWZ128rmk,     0 },
+    { X86::VPACKUSWBZ128rrk,   X86::VPACKUSWBZ128rmk,     0 },
+    { X86::VPADDBZ128rrk,      X86::VPADDBZ128rmk,        0 },
+    { X86::VPADDDZ128rrk,      X86::VPADDDZ128rmk,        0 },
+    { X86::VPADDQZ128rrk,      X86::VPADDQZ128rmk,        0 },
+    { X86::VPADDSBZ128rrk,     X86::VPADDSBZ128rmk,       0 },
+    { X86::VPADDSWZ128rrk,     X86::VPADDSWZ128rmk,       0 },
+    { X86::VPADDUSBZ128rrk,    X86::VPADDUSBZ128rmk,      0 },
+    { X86::VPADDUSWZ128rrk,    X86::VPADDUSWZ128rmk,      0 },
+    { X86::VPADDWZ128rrk,      X86::VPADDWZ128rmk,        0 },
+    { X86::VPALIGNRZ128rrik,   X86::VPALIGNRZ128rmik,     0 },
+    { X86::VPANDDZ128rrk,      X86::VPANDDZ128rmk,        0 },
+    { X86::VPANDNDZ128rrk,     X86::VPANDNDZ128rmk,       0 },
+    { X86::VPANDNQZ128rrk,     X86::VPANDNQZ128rmk,       0 },
+    { X86::VPANDQZ128rrk,      X86::VPANDQZ128rmk,        0 },
+    { X86::VPAVGBZ128rrk,      X86::VPAVGBZ128rmk,        0 },
+    { X86::VPAVGWZ128rrk,      X86::VPAVGWZ128rmk,        0 },
+    { X86::VPERMBZ128rrk,      X86::VPERMBZ128rmk,        0 },
+    { X86::VPERMI2B128rrk,     X86::VPERMI2B128rmk,       0 },
+    { X86::VPERMI2D128rrk,     X86::VPERMI2D128rmk,       0 },
+    { X86::VPERMI2PD128rrk,    X86::VPERMI2PD128rmk,      0 },
+    { X86::VPERMI2PS128rrk,    X86::VPERMI2PS128rmk,      0 },
+    { X86::VPERMI2Q128rrk,     X86::VPERMI2Q128rmk,       0 },
+    { X86::VPERMI2W128rrk,     X86::VPERMI2W128rmk,       0 },
+    { X86::VPERMILPDZ128rrk,   X86::VPERMILPDZ128rmk,     0 },
+    { X86::VPERMILPSZ128rrk,   X86::VPERMILPSZ128rmk,     0 },
+    { X86::VPERMT2B128rrk,     X86::VPERMT2B128rmk,       0 },
+    { X86::VPERMT2D128rrk,     X86::VPERMT2D128rmk,       0 },
+    { X86::VPERMT2PD128rrk,    X86::VPERMT2PD128rmk,      0 },
+    { X86::VPERMT2PS128rrk,    X86::VPERMT2PS128rmk,      0 },
+    { X86::VPERMT2Q128rrk,     X86::VPERMT2Q128rmk,       0 },
+    { X86::VPERMT2W128rrk,     X86::VPERMT2W128rmk,       0 },
+    { X86::VPERMWZ128rrk,      X86::VPERMWZ128rmk,        0 },
+    { X86::VPMADDUBSWZ128rrk,  X86::VPMADDUBSWZ128rmk,    0 },
+    { X86::VPMADDWDZ128rrk,    X86::VPMADDWDZ128rmk,      0 },
+    { X86::VPMAXSBZ128rrk,     X86::VPMAXSBZ128rmk,       0 },
+    { X86::VPMAXSDZ128rrk,     X86::VPMAXSDZ128rmk,       0 },
+    { X86::VPMAXSQZ128rrk,     X86::VPMAXSQZ128rmk,       0 },
+    { X86::VPMAXSWZ128rrk,     X86::VPMAXSWZ128rmk,       0 },
+    { X86::VPMAXUBZ128rrk,     X86::VPMAXUBZ128rmk,       0 },
+    { X86::VPMAXUDZ128rrk,     X86::VPMAXUDZ128rmk,       0 },
+    { X86::VPMAXUQZ128rrk,     X86::VPMAXUQZ128rmk,       0 },
+    { X86::VPMAXUWZ128rrk,     X86::VPMAXUWZ128rmk,       0 },
+    { X86::VPMINSBZ128rrk,     X86::VPMINSBZ128rmk,       0 },
+    { X86::VPMINSDZ128rrk,     X86::VPMINSDZ128rmk,       0 },
+    { X86::VPMINSQZ128rrk,     X86::VPMINSQZ128rmk,       0 },
+    { X86::VPMINSWZ128rrk,     X86::VPMINSWZ128rmk,       0 },
+    { X86::VPMINUBZ128rrk,     X86::VPMINUBZ128rmk,       0 },
+    { X86::VPMINUDZ128rrk,     X86::VPMINUDZ128rmk,       0 },
+    { X86::VPMINUQZ128rrk,     X86::VPMINUQZ128rmk,       0 },
+    { X86::VPMINUWZ128rrk,     X86::VPMINUWZ128rmk,       0 },
+    { X86::VPMULDQZ128rrk,     X86::VPMULDQZ128rmk,       0 },
+    { X86::VPMULLDZ128rrk,     X86::VPMULLDZ128rmk,       0 },
+    { X86::VPMULLQZ128rrk,     X86::VPMULLQZ128rmk,       0 },
+    { X86::VPMULLWZ128rrk,     X86::VPMULLWZ128rmk,       0 },
+    { X86::VPMULUDQZ128rrk,    X86::VPMULUDQZ128rmk,      0 },
+    { X86::VPORDZ128rrk,       X86::VPORDZ128rmk,         0 },
+    { X86::VPORQZ128rrk,       X86::VPORQZ128rmk,         0 },
+    { X86::VPSHUFBZ128rrk,     X86::VPSHUFBZ128rmk,       0 },
+    { X86::VPSLLDZ128rrk,      X86::VPSLLDZ128rmk,        0 },
+    { X86::VPSLLQZ128rrk,      X86::VPSLLQZ128rmk,        0 },
+    { X86::VPSLLVDZ128rrk,     X86::VPSLLVDZ128rmk,       0 },
+    { X86::VPSLLVQZ128rrk,     X86::VPSLLVQZ128rmk,       0 },
+    { X86::VPSLLVWZ128rrk,     X86::VPSLLVWZ128rmk,       0 },
+    { X86::VPSLLWZ128rrk,      X86::VPSLLWZ128rmk,        0 },
+    { X86::VPSRADZ128rrk,      X86::VPSRADZ128rmk,        0 },
+    { X86::VPSRAQZ128rrk,      X86::VPSRAQZ128rmk,        0 },
+    { X86::VPSRAVDZ128rrk,     X86::VPSRAVDZ128rmk,       0 },
+    { X86::VPSRAVQZ128rrk,     X86::VPSRAVQZ128rmk,       0 },
+    { X86::VPSRAVWZ128rrk,     X86::VPSRAVWZ128rmk,       0 },
+    { X86::VPSRAWZ128rrk,      X86::VPSRAWZ128rmk,        0 },
+    { X86::VPSRLDZ128rrk,      X86::VPSRLDZ128rmk,        0 },
+    { X86::VPSRLQZ128rrk,      X86::VPSRLQZ128rmk,        0 },
+    { X86::VPSRLVDZ128rrk,     X86::VPSRLVDZ128rmk,       0 },
+    { X86::VPSRLVQZ128rrk,     X86::VPSRLVQZ128rmk,       0 },
+    { X86::VPSRLVWZ128rrk,     X86::VPSRLVWZ128rmk,       0 },
+    { X86::VPSRLWZ128rrk,      X86::VPSRLWZ128rmk,        0 },
+    { X86::VPSUBBZ128rrk,      X86::VPSUBBZ128rmk,        0 },
+    { X86::VPSUBDZ128rrk,      X86::VPSUBDZ128rmk,        0 },
+    { X86::VPSUBQZ128rrk,      X86::VPSUBQZ128rmk,        0 },
+    { X86::VPSUBSBZ128rrk,     X86::VPSUBSBZ128rmk,       0 },
+    { X86::VPSUBSWZ128rrk,     X86::VPSUBSWZ128rmk,       0 },
+    { X86::VPSUBUSBZ128rrk,    X86::VPSUBUSBZ128rmk,      0 },
+    { X86::VPSUBUSWZ128rrk,    X86::VPSUBUSWZ128rmk,      0 },
+    { X86::VPSUBWZ128rrk,      X86::VPSUBWZ128rmk,        0 },
+    { X86::VPTERNLOGDZ128rrik, X86::VPTERNLOGDZ128rmik,   0 },
+    { X86::VPTERNLOGQZ128rrik, X86::VPTERNLOGQZ128rmik,   0 },
+    { X86::VPUNPCKHBWZ128rrk,  X86::VPUNPCKHBWZ128rmk,    0 },
+    { X86::VPUNPCKHDQZ128rrk,  X86::VPUNPCKHDQZ128rmk,    0 },
+    { X86::VPUNPCKHQDQZ128rrk, X86::VPUNPCKHQDQZ128rmk,   0 },
+    { X86::VPUNPCKHWDZ128rrk,  X86::VPUNPCKHWDZ128rmk,    0 },
+    { X86::VPUNPCKLBWZ128rrk,  X86::VPUNPCKLBWZ128rmk,    0 },
+    { X86::VPUNPCKLDQZ128rrk,  X86::VPUNPCKLDQZ128rmk,    0 },
+    { X86::VPUNPCKLQDQZ128rrk, X86::VPUNPCKLQDQZ128rmk,   0 },
+    { X86::VPUNPCKLWDZ128rrk,  X86::VPUNPCKLWDZ128rmk,    0 },
+    { X86::VPXORDZ128rrk,      X86::VPXORDZ128rmk,        0 },
+    { X86::VPXORQZ128rrk,      X86::VPXORQZ128rmk,        0 },
+    { X86::VSHUFPDZ128rrik,    X86::VSHUFPDZ128rmik,      0 },
+    { X86::VSHUFPSZ128rrik,    X86::VSHUFPSZ128rmik,      0 },
+    { X86::VSUBPDZ128rrk,      X86::VSUBPDZ128rmk,        0 },
+    { X86::VSUBPSZ128rrk,      X86::VSUBPSZ128rmk,        0 },
+    { X86::VUNPCKHPDZ128rrk,   X86::VUNPCKHPDZ128rmk,     0 },
+    { X86::VUNPCKHPSZ128rrk,   X86::VUNPCKHPSZ128rmk,     0 },
+    { X86::VUNPCKLPDZ128rrk,   X86::VUNPCKLPDZ128rmk,     0 },
+    { X86::VUNPCKLPSZ128rrk,   X86::VUNPCKLPSZ128rmk,     0 },
+    { X86::VXORPDZ128rrk,      X86::VXORPDZ128rmk,        0 },
+    { X86::VXORPSZ128rrk,      X86::VXORPSZ128rmk,        0 },
+
+    // 512-bit three source instructions with zero masking.
+    { X86::VPERMI2Brrkz,       X86::VPERMI2Brmkz,         0 },
+    { X86::VPERMI2Drrkz,       X86::VPERMI2Drmkz,         0 },
+    { X86::VPERMI2PSrrkz,      X86::VPERMI2PSrmkz,        0 },
+    { X86::VPERMI2PDrrkz,      X86::VPERMI2PDrmkz,        0 },
+    { X86::VPERMI2Qrrkz,       X86::VPERMI2Qrmkz,         0 },
+    { X86::VPERMI2Wrrkz,       X86::VPERMI2Wrmkz,         0 },
+    { X86::VPERMT2Brrkz,       X86::VPERMT2Brmkz,         0 },
+    { X86::VPERMT2Drrkz,       X86::VPERMT2Drmkz,         0 },
+    { X86::VPERMT2PSrrkz,      X86::VPERMT2PSrmkz,        0 },
+    { X86::VPERMT2PDrrkz,      X86::VPERMT2PDrmkz,        0 },
+    { X86::VPERMT2Qrrkz,       X86::VPERMT2Qrmkz,         0 },
+    { X86::VPERMT2Wrrkz,       X86::VPERMT2Wrmkz,         0 },
+    { X86::VPTERNLOGDZrrikz,   X86::VPTERNLOGDZrmikz,     0 },
+    { X86::VPTERNLOGQZrrikz,   X86::VPTERNLOGQZrmikz,     0 },
+
+    // 256-bit three source instructions with zero masking.
+    { X86::VPERMI2B256rrkz,    X86::VPERMI2B256rmkz,      0 },
+    { X86::VPERMI2D256rrkz,    X86::VPERMI2D256rmkz,      0 },
+    { X86::VPERMI2PD256rrkz,   X86::VPERMI2PD256rmkz,     0 },
+    { X86::VPERMI2PS256rrkz,   X86::VPERMI2PS256rmkz,     0 },
+    { X86::VPERMI2Q256rrkz,    X86::VPERMI2Q256rmkz,      0 },
+    { X86::VPERMI2W256rrkz,    X86::VPERMI2W256rmkz,      0 },
+    { X86::VPERMT2B256rrkz,    X86::VPERMT2B256rmkz,      0 },
+    { X86::VPERMT2D256rrkz,    X86::VPERMT2D256rmkz,      0 },
+    { X86::VPERMT2PD256rrkz,   X86::VPERMT2PD256rmkz,     0 },
+    { X86::VPERMT2PS256rrkz,   X86::VPERMT2PS256rmkz,     0 },
+    { X86::VPERMT2Q256rrkz,    X86::VPERMT2Q256rmkz,      0 },
+    { X86::VPERMT2W256rrkz,    X86::VPERMT2W256rmkz,      0 },
+    { X86::VPTERNLOGDZ256rrikz,X86::VPTERNLOGDZ256rmikz,  0 },
+    { X86::VPTERNLOGQZ256rrikz,X86::VPTERNLOGQZ256rmikz,  0 },
+
+    // 128-bit three source instructions with zero masking.
+    { X86::VPERMI2B128rrkz,    X86::VPERMI2B128rmkz,      0 },
+    { X86::VPERMI2D128rrkz,    X86::VPERMI2D128rmkz,      0 },
+    { X86::VPERMI2PD128rrkz,   X86::VPERMI2PD128rmkz,     0 },
+    { X86::VPERMI2PS128rrkz,   X86::VPERMI2PS128rmkz,     0 },
+    { X86::VPERMI2Q128rrkz,    X86::VPERMI2Q128rmkz,      0 },
+    { X86::VPERMI2W128rrkz,    X86::VPERMI2W128rmkz,      0 },
+    { X86::VPERMT2B128rrkz,    X86::VPERMT2B128rmkz,      0 },
+    { X86::VPERMT2D128rrkz,    X86::VPERMT2D128rmkz,      0 },
+    { X86::VPERMT2PD128rrkz,   X86::VPERMT2PD128rmkz,     0 },
+    { X86::VPERMT2PS128rrkz,   X86::VPERMT2PS128rmkz,     0 },
+    { X86::VPERMT2Q128rrkz,    X86::VPERMT2Q128rmkz,      0 },
+    { X86::VPERMT2W128rrkz,    X86::VPERMT2W128rmkz,      0 },
+    { X86::VPTERNLOGDZ128rrikz,X86::VPTERNLOGDZ128rmikz,  0 },
+    { X86::VPTERNLOGQZ128rrikz,X86::VPTERNLOGQZ128rmikz,  0 },
+  };
 
   for (X86MemoryFoldTableEntry Entry : MemoryFoldTable4) {
     AddTableEntry(RegOp2MemOpTable4, MemOp2RegOpTable,
@@ -163,6 +3545,20 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
                   // Index 4, folded load
                   Entry.Flags | TB_INDEX_4 | TB_FOLDED_LOAD);
   }
+  for (I = X86InstrFMA3Info::rm_begin(); I != E; ++I) {
+    if (I.getGroup()->isKMasked()) {
+      // Intrinsics need to pass TB_NO_REVERSE.
+      if (I.getGroup()->isIntrinsic()) {
+        AddTableEntry(RegOp2MemOpTable4, MemOp2RegOpTable,
+                      I.getRegOpcode(), I.getMemOpcode(),
+                      TB_ALIGN_NONE | TB_INDEX_4 | TB_FOLDED_LOAD | TB_NO_REVERSE);
+      } else {
+        AddTableEntry(RegOp2MemOpTable4, MemOp2RegOpTable,
+                      I.getRegOpcode(), I.getMemOpcode(),
+                      TB_ALIGN_NONE | TB_INDEX_4 | TB_FOLDED_LOAD);
+      }
+    }
+  }
 }
 
 void
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index a3e677209305..8490b972eb5c 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -5183,14 +5183,14 @@ multiclass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
          !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
          !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
       [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>,
-      Sched<[WriteFAdd]>;
+      Sched<[WriteFHAdd]>;
 
   def rm : S3DI<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
        !if(Is2Addr,
          !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
          !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
       [(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))],
-        IIC_SSE_HADDSUB_RM>, Sched<[WriteFAddLd, ReadAfterLd]>;
+        IIC_SSE_HADDSUB_RM>, Sched<[WriteFHAddLd, ReadAfterLd]>;
 }
 multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
                   X86MemOperand x86memop, SDNode OpNode, PatFrag ld_frag,
@@ -5200,14 +5200,14 @@ multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
          !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
          !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
       [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>,
-      Sched<[WriteFAdd]>;
+      Sched<[WriteFHAdd]>;
 
   def rm : S3I<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
        !if(Is2Addr,
          !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
          !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
       [(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))],
-        IIC_SSE_HADDSUB_RM>, Sched<[WriteFAddLd, ReadAfterLd]>;
+        IIC_SSE_HADDSUB_RM>, Sched<[WriteFHAddLd, ReadAfterLd]>;
 }
 
 let Predicates = [HasAVX] in {
@@ -5310,7 +5310,7 @@ defm PABSD : SS3I_unop_rm<0x1E, "pabsd", v4i32, abs, memopv2i64>;
 // SSSE3 - Packed Binary Operator Instructions
 //===---------------------------------------------------------------------===//
 
-let Sched = WriteVecALU in {
+let Sched = WritePHAdd in {
 def SSE_PHADDSUBD : OpndItins<
   IIC_SSE_PHADDSUBD_RR, IIC_SSE_PHADDSUBD_RM
 >;
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index 598d88d8b9c3..33bc8e11a572 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -12,20 +12,21 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "X86AsmPrinter.h"
-#include "X86RegisterInfo.h"
-#include "X86ShuffleDecodeConstantPool.h"
 #include "InstPrinter/X86ATTInstPrinter.h"
 #include "InstPrinter/X86InstComments.h"
 #include "MCTargetDesc/X86BaseInfo.h"
 #include "Utils/X86ShuffleDecode.h"
+#include "X86AsmPrinter.h"
+#include "X86RegisterInfo.h"
+#include "X86ShuffleDecodeConstantPool.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/iterator_range.h"
-#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/StackMaps.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/GlobalValue.h"
@@ -38,13 +39,12 @@
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstBuilder.h"
 #include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCSymbolELF.h"
-#include "llvm/MC/MCSectionELF.h"
-#include "llvm/MC/MCSectionMachO.h"
 #include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/ELF.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 
 using namespace llvm;
diff --git a/lib/Target/X86/X86OptimizeLEAs.cpp b/lib/Target/X86/X86OptimizeLEAs.cpp
index aabbf67a16b6..e6756b975c10 100644
--- a/lib/Target/X86/X86OptimizeLEAs.cpp
+++ b/lib/Target/X86/X86OptimizeLEAs.cpp
@@ -27,8 +27,8 @@
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/Function.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/lib/Target/X86/X86SchedHaswell.td b/lib/Target/X86/X86SchedHaswell.td
index 677e82459766..03c8ccb53afe 100644
--- a/lib/Target/X86/X86SchedHaswell.td
+++ b/lib/Target/X86/X86SchedHaswell.td
@@ -1488,6 +1488,39 @@ def : InstRW<[WriteVPGATHERQQ256, ReadAfterLd], (instregex "VPGATHERQQYrm")>;
 
 //-- Arithmetic instructions --//
 
+////////////////////////////////////////////////////////////////////////////////
+// Horizontal add/sub  instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+// HADD, HSUB PS/PD
+// x,x / v,v,v.
+def : WriteRes<WriteFHAdd, [HWPort1, HWPort5]> {
+  let Latency = 5;
+  let NumMicroOps = 3;
+  let ResourceCycles = [1, 2];
+}
+
+// x,m / v,v,m.
+def : WriteRes<WriteFHAddLd, [HWPort1, HWPort5, HWPort23]> {
+  let Latency = 9;
+  let NumMicroOps = 4;
+  let ResourceCycles = [1, 2, 1];
+}
+
+// PHADD|PHSUB (S) W/D.
+// v <- v,v.
+def : WriteRes<WritePHAdd, [HWPort1, HWPort5]> {
+  let Latency = 3;
+  let NumMicroOps = 3;
+  let ResourceCycles = [1, 2];
+}
+// v <- v,m.
+def : WriteRes<WritePHAddLd, [HWPort1, HWPort5, HWPort23]> {
+  let Latency = 6;
+  let NumMicroOps = 3;
+  let ResourceCycles = [1, 2, 1];
+}
+
 // PHADD|PHSUB (S) W/D.
 // v <- v,v.
 def WritePHADDSUBr : SchedWriteRes<[HWPort1, HWPort5]> {
diff --git a/lib/Target/X86/X86SchedSandyBridge.td b/lib/Target/X86/X86SchedSandyBridge.td
index eca65c2892b7..b8ec5883152c 100644
--- a/lib/Target/X86/X86SchedSandyBridge.td
+++ b/lib/Target/X86/X86SchedSandyBridge.td
@@ -157,6 +157,31 @@ def : WriteRes<WriteMPSADLd, [SBPort0, SBPort1, SBPort5, SBPort23]> {
   let ResourceCycles = [1, 1, 1, 1];
 }
 
+////////////////////////////////////////////////////////////////////////////////
+// Horizontal add/sub  instructions.
+////////////////////////////////////////////////////////////////////////////////
+// HADD, HSUB PS/PD
+// x,x / v,v,v.
+def : WriteRes<WriteFHAdd, [SBPort1]> {
+  let Latency = 3;
+}
+
+// x,m / v,v,m.
+def : WriteRes<WriteFHAddLd, [SBPort1, SBPort23]> {
+  let Latency = 7;
+  let ResourceCycles = [1, 1];
+}
+
+// PHADD|PHSUB (S) W/D.
+// v <- v,v.
+def : WriteRes<WritePHAdd, [SBPort15]>;
+
+// v <- v,m.
+def : WriteRes<WritePHAddLd, [SBPort15, SBPort23]> {
+  let Latency = 5;
+  let ResourceCycles = [1, 1];
+}
+
 // String instructions.
 // Packed Compare Implicit Length Strings, Return Mask
 def : WriteRes<WritePCmpIStrM, [SBPort015]> {
diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td
index 4eae6ca7abe3..a12fa68faf4f 100644
--- a/lib/Target/X86/X86Schedule.td
+++ b/lib/Target/X86/X86Schedule.td
@@ -77,6 +77,10 @@ defm WriteFVarBlend  : X86SchedWritePair; // Fp vector variable blends.
 // FMA Scheduling helper class.
 class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
 
+// Horizontal Add/Sub (float and integer)
+defm WriteFHAdd  : X86SchedWritePair;
+defm WritePHAdd : X86SchedWritePair;
+
 // Vector integer operations.
 defm WriteVecALU   : X86SchedWritePair; // Vector integer ALU op, no logicals.
 defm WriteVecShift : X86SchedWritePair; // Vector integer shifts.
diff --git a/lib/Target/X86/X86ScheduleBtVer2.td b/lib/Target/X86/X86ScheduleBtVer2.td
index ce1ece34e431..6cb2a3694d92 100644
--- a/lib/Target/X86/X86ScheduleBtVer2.td
+++ b/lib/Target/X86/X86ScheduleBtVer2.td
@@ -319,6 +319,38 @@ def : WriteRes<WriteAESKeyGenLd, [JLAGU, JVIMUL]> {
   let ResourceCycles = [1, 1];
 }
 
+////////////////////////////////////////////////////////////////////////////////
+// Horizontal add/sub  instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+def : WriteRes<WriteFHAdd, [JFPU0]> {
+  let Latency = 3;
+}
+
+def : WriteRes<WriteFHAddLd, [JLAGU, JFPU0]> {
+  let Latency = 8;
+}
+
+def : WriteRes<WritePHAdd, [JFPU01]> {
+  let ResourceCycles = [1];
+}
+def : WriteRes<WritePHAddLd, [JLAGU, JFPU01 ]> {
+  let Latency = 6;
+  let ResourceCycles = [1, 1];
+}
+
+def WriteFHAddY: SchedWriteRes<[JFPU0]> {
+  let Latency = 3;
+  let ResourceCycles = [2];
+}
+def : InstRW<[WriteFHAddY], (instregex "VH(ADD|SUB)P(S|D)Yrr")>;
+
+def WriteFHAddYLd: SchedWriteRes<[JLAGU, JFPU0]> {
+  let Latency = 8;
+  let ResourceCycles = [1, 2];
+}
+def : InstRW<[WriteFHAddYLd], (instregex "VH(ADD|SUB)P(S|D)Yrm")>;
+
 ////////////////////////////////////////////////////////////////////////////////
 // Carry-less multiplication instructions.
 ////////////////////////////////////////////////////////////////////////////////
diff --git a/lib/Target/X86/X86ScheduleSLM.td b/lib/Target/X86/X86ScheduleSLM.td
index f95d4fa04177..03ed2db2350d 100644
--- a/lib/Target/X86/X86ScheduleSLM.td
+++ b/lib/Target/X86/X86ScheduleSLM.td
@@ -137,6 +137,33 @@ defm : SMWriteResPair<WriteShuffle,  FPC_RSV0,  1>;
 defm : SMWriteResPair<WriteBlend,  FPC_RSV0,  1>;
 defm : SMWriteResPair<WriteMPSAD,  FPC_RSV0,  7>;
 
+////////////////////////////////////////////////////////////////////////////////
+// Horizontal add/sub  instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+// HADD, HSUB PS/PD
+
+def : WriteRes<WriteFHAdd,  [FPC_RSV01]> {
+  let Latency = 3;
+  let ResourceCycles = [2];
+}
+
+def : WriteRes<WriteFHAddLd,  [FPC_RSV01, MEC_RSV]> {
+  let Latency = 6;
+  let ResourceCycles = [2, 1];
+}
+
+// PHADD|PHSUB (S) W/D.
+def : WriteRes<WritePHAdd,  [FPC_RSV01]> {
+  let Latency = 1;
+  let ResourceCycles = [1];
+}
+
+def : WriteRes<WritePHAddLd,  [FPC_RSV01, MEC_RSV]> {
+  let Latency = 4;
+  let ResourceCycles = [1, 1];
+}
+
 // String instructions.
 // Packed Compare Implicit Length Strings, Return Mask
 def : WriteRes<WritePCmpIStrM, [FPC_RSV0]> {
diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp
index d4b2392eb1f5..c67aa04aebea 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -11,11 +11,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "X86InstrInfo.h"
+#include "X86SelectionDAGInfo.h"
 #include "X86ISelLowering.h"
+#include "X86InstrInfo.h"
 #include "X86RegisterInfo.h"
 #include "X86Subtarget.h"
-#include "X86SelectionDAGInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/Target/TargetLowering.h"
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 84ec98484f8e..e36a47506ba0 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "MCTargetDesc/X86BaseInfo.h"
 #include "X86Subtarget.h"
+#include "MCTargetDesc/X86BaseInfo.h"
 #include "X86TargetMachine.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/IR/Attributes.h"
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index cb21f1bd7706..278b57eb00b7 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -24,8 +24,8 @@
 #include "X86TargetObjectFile.h"
 #include "X86TargetTransformInfo.h"
 #include "llvm/ADT/Optional.h"
-#include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp
index 7f70829cb6c6..4fd95717478e 100644
--- a/lib/Target/X86/X86TargetObjectFile.cpp
+++ b/lib/Target/X86/X86TargetObjectFile.cpp
@@ -9,6 +9,8 @@
 
 #include "X86TargetObjectFile.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/IR/Mangler.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/MC/MCContext.h"
@@ -16,8 +18,6 @@
 #include "llvm/MC/MCSectionCOFF.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCValue.h"
-#include "llvm/Support/COFF.h"
-#include "llvm/Support/Dwarf.h"
 #include "llvm/Target/TargetLowering.h"
 
 using namespace llvm;
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp
index fe94079fd869..11ba7025e1b7 100644
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -1383,6 +1383,8 @@ int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
   return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
 }
 
+unsigned X86TTIImpl::getAtomicMemIntrinsicMaxElementSize() const { return 16; }
+
 int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
                                       ArrayRef<Type *> Tys, FastMathFlags FMF,
                                       unsigned ScalarizationCostPassed) {
@@ -2176,6 +2178,17 @@ int X86TTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *SrcVTy,
   return getGSVectorCost(Opcode, SrcVTy, Ptr, Alignment, AddressSpace);
 }
 
+bool X86TTIImpl::isLSRCostLess(TargetTransformInfo::LSRCost &C1,
+                               TargetTransformInfo::LSRCost &C2) {
+    // X86 specific here are "instruction number 1st priority".
+    return std::tie(C1.Insns, C1.NumRegs, C1.AddRecCost,
+                    C1.NumIVMuls, C1.NumBaseAdds,
+                    C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
+           std::tie(C2.Insns, C2.NumRegs, C2.AddRecCost,
+                    C2.NumIVMuls, C2.NumBaseAdds,
+                    C2.ScaleCost, C2.ImmCost, C2.SetupCost);
+}
+
 bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy) {
   Type *ScalarTy = DataTy->getScalarType();
   int DataWidth = isa<PointerType>(ScalarTy) ?
diff --git a/lib/Target/X86/X86TargetTransformInfo.h b/lib/Target/X86/X86TargetTransformInfo.h
index 9bef9e80c395..09ce2c90498d 100644
--- a/lib/Target/X86/X86TargetTransformInfo.h
+++ b/lib/Target/X86/X86TargetTransformInfo.h
@@ -76,6 +76,8 @@ public:
   int getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE,
                                 const SCEV *Ptr);
 
+  unsigned getAtomicMemIntrinsicMaxElementSize() const;
+
   int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
                             ArrayRef<Type *> Tys, FastMathFlags FMF,
                             unsigned ScalarizationCostPassed = UINT_MAX);
@@ -99,6 +101,8 @@ public:
   int getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
   int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
                     Type *Ty);
+  bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
+                     TargetTransformInfo::LSRCost &C2);
   bool isLegalMaskedLoad(Type *DataType);
   bool isLegalMaskedStore(Type *DataType);
   bool isLegalMaskedGather(Type *DataType);
diff --git a/lib/Target/X86/X86WinEHState.cpp b/lib/Target/X86/X86WinEHState.cpp
index 3ee14a0ff7b1..0c3b34341476 100644
--- a/lib/Target/X86/X86WinEHState.cpp
+++ b/lib/Target/X86/X86WinEHState.cpp
@@ -22,9 +22,9 @@
 #include "llvm/CodeGen/WinEHFuncInfo.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
index 5fc58d831319..dd27e7ca30aa 100644
--- a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
@@ -11,9 +11,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "MCTargetDesc/XCoreMCTargetDesc.h"
 #include "InstPrinter/XCoreInstPrinter.h"
 #include "MCTargetDesc/XCoreMCAsmInfo.h"
-#include "MCTargetDesc/XCoreMCTargetDesc.h"
 #include "XCoreTargetStreamer.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCDwarf.h"
@@ -23,8 +23,8 @@
 #include "llvm/Support/CodeGen.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
diff --git a/lib/Target/XCore/XCoreAsmPrinter.cpp b/lib/Target/XCore/XCoreAsmPrinter.cpp
index b35aa0b95821..8f7c8a82380a 100644
--- a/lib/Target/XCore/XCoreAsmPrinter.cpp
+++ b/lib/Target/XCore/XCoreAsmPrinter.cpp
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "XCore.h"
 #include "InstPrinter/XCoreInstPrinter.h"
+#include "XCore.h"
 #include "XCoreInstrInfo.h"
 #include "XCoreMCInstLower.h"
 #include "XCoreSubtarget.h"
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
index 1a1cbd474888..cb23399995da 100644
--- a/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -10,9 +10,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "XCoreTargetMachine.h"
 #include "MCTargetDesc/XCoreMCTargetDesc.h"
 #include "XCore.h"
-#include "XCoreTargetMachine.h"
 #include "XCoreTargetObjectFile.h"
 #include "XCoreTargetTransformInfo.h"
 #include "llvm/ADT/Optional.h"
diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h
index 2b53f01a996d..a047b3c9d9fc 100644
--- a/lib/Target/XCore/XCoreTargetMachine.h
+++ b/lib/Target/XCore/XCoreTargetMachine.h
@@ -15,9 +15,9 @@
 #define LLVM_LIB_TARGET_XCORE_XCORETARGETMACHINE_H
 
 #include "XCoreSubtarget.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Support/CodeGen.h"
 #include "llvm/Target/TargetMachine.h"
 #include <memory>
diff --git a/lib/Target/XCore/XCoreTargetObjectFile.cpp b/lib/Target/XCore/XCoreTargetObjectFile.cpp
index ad8693fd325e..c60a262e719c 100644
--- a/lib/Target/XCore/XCoreTargetObjectFile.cpp
+++ b/lib/Target/XCore/XCoreTargetObjectFile.cpp
@@ -9,10 +9,10 @@
 
 #include "XCoreTargetObjectFile.h"
 #include "XCoreSubtarget.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCSectionELF.h"
-#include "llvm/Support/ELF.h"
 #include "llvm/Target/TargetMachine.h"
 
 using namespace llvm;
diff --git a/lib/ToolDrivers/llvm-lib/LLVMBuild.txt b/lib/ToolDrivers/llvm-lib/LLVMBuild.txt
index 799dc997c0bb..e4b32ec4af90 100644
--- a/lib/ToolDrivers/llvm-lib/LLVMBuild.txt
+++ b/lib/ToolDrivers/llvm-lib/LLVMBuild.txt
@@ -19,4 +19,4 @@
 type = Library
 name = LibDriver
 parent = Libraries
-required_libraries = Object Option Support
+required_libraries = BinaryFormat Object Option Support
diff --git a/lib/ToolDrivers/llvm-lib/LibDriver.cpp b/lib/ToolDrivers/llvm-lib/LibDriver.cpp
index 3bae3826d62e..797e4ffc2d45 100644
--- a/lib/ToolDrivers/llvm-lib/LibDriver.cpp
+++ b/lib/ToolDrivers/llvm-lib/LibDriver.cpp
@@ -14,14 +14,15 @@
 
 #include "llvm/ToolDrivers/llvm-lib/LibDriver.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/BinaryFormat/Magic.h"
 #include "llvm/Object/ArchiveWriter.h"
 #include "llvm/Option/Arg.h"
 #include "llvm/Option/ArgList.h"
 #include "llvm/Option/Option.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/StringSaver.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Process.h"
+#include "llvm/Support/StringSaver.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
@@ -143,11 +144,10 @@ int llvm::libDriverMain(llvm::ArrayRef<const char*> ArgsArr) {
       });
       return 1;
     }
-    sys::fs::file_magic Magic =
-        sys::fs::identify_magic(MOrErr->Buf->getBuffer());
-    if (Magic != sys::fs::file_magic::coff_object &&
-        Magic != sys::fs::file_magic::bitcode &&
-        Magic != sys::fs::file_magic::windows_resource) {
+    llvm::file_magic Magic = llvm::identify_magic(MOrErr->Buf->getBuffer());
+    if (Magic != llvm::file_magic::coff_object &&
+        Magic != llvm::file_magic::bitcode &&
+        Magic != llvm::file_magic::windows_resource) {
       llvm::errs() << Arg->getValue()
                    << ": not a COFF object, bitcode or resource file\n";
       return 1;
diff --git a/lib/Transforms/Coroutines/CoroSplit.cpp b/lib/Transforms/Coroutines/CoroSplit.cpp
index 626a891f65c6..173dc05f0584 100644
--- a/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -22,8 +22,8 @@
 #include "CoroInternal.h"
 #include "llvm/Analysis/CallGraphSCCPass.h"
 #include "llvm/IR/DebugInfoMetadata.h"
-#include "llvm/IR/InstIterator.h"
 #include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
 #include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Verifier.h"
 #include "llvm/Transforms/Scalar.h"
diff --git a/lib/Transforms/IPO/ElimAvailExtern.cpp b/lib/Transforms/IPO/ElimAvailExtern.cpp
index 98c4b1740306..ecff88c88dcb 100644
--- a/lib/Transforms/IPO/ElimAvailExtern.cpp
+++ b/lib/Transforms/IPO/ElimAvailExtern.cpp
@@ -17,9 +17,9 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/Transforms/Utils/GlobalStatus.h"
-#include "llvm/Pass.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "elim-avail-extern"
diff --git a/lib/Transforms/IPO/ExtractGV.cpp b/lib/Transforms/IPO/ExtractGV.cpp
index 479fd182598a..d1147f7d844b 100644
--- a/lib/Transforms/IPO/ExtractGV.cpp
+++ b/lib/Transforms/IPO/ExtractGV.cpp
@@ -11,13 +11,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/IPO.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Pass.h"
+#include "llvm/Transforms/IPO.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -53,18 +53,18 @@ static void makeVisible(GlobalValue &GV, bool Delete) {
 }
 
 namespace {
-  /// @brief A pass to extract specific functions and their dependencies.
+  /// @brief A pass to extract specific global values and their dependencies.
   class GVExtractorPass : public ModulePass {
     SetVector<GlobalValue *> Named;
     bool deleteStuff;
   public:
     static char ID; // Pass identification, replacement for typeid
 
-    /// FunctionExtractorPass - If deleteFn is true, this pass deletes as the
-    /// specified function. Otherwise, it deletes as much of the module as
-    /// possible, except for the function specified.
-    ///
-    explicit GVExtractorPass(std::vector<GlobalValue*>& GVs, bool deleteS = true)
+    /// If deleteS is true, this pass deletes the specified global values.
+    /// Otherwise, it deletes as much of the module as possible, except for the
+    /// global values specified.
+    explicit GVExtractorPass(std::vector<GlobalValue*> &GVs,
+                             bool deleteS = true)
       : ModulePass(ID), Named(GVs.begin(), GVs.end()), deleteStuff(deleteS) {}
 
     bool runOnModule(Module &M) override {
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index 5cc29a493798..813a4b6e2831 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -14,7 +14,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/IPO/FunctionAttrs.h"
-#include "llvm/Transforms/IPO.h"
 #include "llvm/ADT/SCCIterator.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallSet.h"
@@ -34,7 +33,7 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Transforms/IPO.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "functionattrs"
diff --git a/lib/Transforms/IPO/GlobalSplit.cpp b/lib/Transforms/IPO/GlobalSplit.cpp
index 4705ebe265ae..e47d881d1127 100644
--- a/lib/Transforms/IPO/GlobalSplit.cpp
+++ b/lib/Transforms/IPO/GlobalSplit.cpp
@@ -14,7 +14,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/IPO.h"
 #include "llvm/Transforms/IPO/GlobalSplit.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/IR/Constants.h"
@@ -23,6 +22,7 @@
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/Pass.h"
+#include "llvm/Transforms/IPO.h"
 
 #include <set>
 
diff --git a/lib/Transforms/IPO/IPConstantPropagation.cpp b/lib/Transforms/IPO/IPConstantPropagation.cpp
index 349807496dc2..f79b61037f1d 100644
--- a/lib/Transforms/IPO/IPConstantPropagation.cpp
+++ b/lib/Transforms/IPO/IPConstantPropagation.cpp
@@ -15,7 +15,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/IPO.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/ValueTracking.h"
@@ -24,6 +23,7 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Pass.h"
+#include "llvm/Transforms/IPO.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "ipconstprop"
diff --git a/lib/Transforms/IPO/IPO.cpp b/lib/Transforms/IPO/IPO.cpp
index 89518f3c5fae..5bb305ca84d0 100644
--- a/lib/Transforms/IPO/IPO.cpp
+++ b/lib/Transforms/IPO/IPO.cpp
@@ -13,10 +13,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm-c/Initialization.h"
 #include "llvm-c/Transforms/IPO.h"
-#include "llvm/InitializePasses.h"
+#include "llvm-c/Initialization.h"
 #include "llvm/IR/LegacyPassManager.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/Transforms/IPO/AlwaysInliner.h"
 #include "llvm/Transforms/IPO/FunctionAttrs.h"
diff --git a/lib/Transforms/IPO/InferFunctionAttrs.cpp b/lib/Transforms/IPO/InferFunctionAttrs.cpp
index 2ef299d9a2f0..15d7515cc842 100644
--- a/lib/Transforms/IPO/InferFunctionAttrs.cpp
+++ b/lib/Transforms/IPO/InferFunctionAttrs.cpp
@@ -8,8 +8,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/IPO/InferFunctionAttrs.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index 673d3af0ab52..c0dfeede05c5 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -519,6 +519,10 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
       Function *Caller = CS.getCaller();
       Function *Callee = CS.getCalledFunction();
 
+      // We can only inline direct calls to non-declarations.
+      if (!Callee || Callee->isDeclaration())
+        continue;
+
       // If this call site is dead and it is to a readonly function, we should
       // just delete the call instead of trying to inline it, regardless of
       // size.  This happens because IPSCCP propagates the result out of the
@@ -531,10 +535,6 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
         CS.getInstruction()->eraseFromParent();
         ++NumCallsDeleted;
       } else {
-        // We can only inline direct calls to non-declarations.
-        if (!Callee || Callee->isDeclaration())
-          continue;
-
         // If this call site was obtained by inlining another function, verify
         // that the include path for the function did not include the callee
         // itself.  If so, we'd be recursively inlining the same function,
diff --git a/lib/Transforms/IPO/LoopExtractor.cpp b/lib/Transforms/IPO/LoopExtractor.cpp
index f898c3b5a935..c74b0a35e296 100644
--- a/lib/Transforms/IPO/LoopExtractor.cpp
+++ b/lib/Transforms/IPO/LoopExtractor.cpp
@@ -14,7 +14,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/IPO.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/IR/Dominators.h"
@@ -22,6 +21,7 @@
 #include "llvm/IR/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/IPO.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/CodeExtractor.h"
diff --git a/lib/Transforms/IPO/LowerTypeTests.cpp b/lib/Transforms/IPO/LowerTypeTests.cpp
index 7bec50d9d25f..90896d285f5a 100644
--- a/lib/Transforms/IPO/LowerTypeTests.cpp
+++ b/lib/Transforms/IPO/LowerTypeTests.cpp
@@ -235,7 +235,6 @@ class LowerTypeTestsModule {
   ModuleSummaryIndex *ExportSummary;
   const ModuleSummaryIndex *ImportSummary;
 
-  bool LinkerSubsectionsViaSymbols;
   Triple::ArchType Arch;
   Triple::OSType OS;
   Triple::ObjectFormatType ObjectFormat;
@@ -475,13 +474,9 @@ void LowerTypeTestsModule::allocateByteArrays() {
     // Create an alias instead of RAUW'ing the gep directly. On x86 this ensures
     // that the pc-relative displacement is folded into the lea instead of the
     // test instruction getting another displacement.
-    if (LinkerSubsectionsViaSymbols) {
-      BAI->ByteArray->replaceAllUsesWith(GEP);
-    } else {
-      GlobalAlias *Alias = GlobalAlias::create(
-          Int8Ty, 0, GlobalValue::PrivateLinkage, "bits", GEP, &M);
-      BAI->ByteArray->replaceAllUsesWith(Alias);
-    }
+    GlobalAlias *Alias = GlobalAlias::create(
+        Int8Ty, 0, GlobalValue::PrivateLinkage, "bits", GEP, &M);
+    BAI->ByteArray->replaceAllUsesWith(Alias);
     BAI->ByteArray->eraseFromParent();
   }
 
@@ -502,7 +497,7 @@ Value *LowerTypeTestsModule::createBitSetTest(IRBuilder<> &B,
     return createMaskedBitTest(B, TIL.InlineBits, BitOffset);
   } else {
     Constant *ByteArray = TIL.TheByteArray;
-    if (!LinkerSubsectionsViaSymbols && AvoidReuse && !ImportSummary) {
+    if (AvoidReuse && !ImportSummary) {
       // Each use of the byte array uses a different alias. This makes the
       // backend less likely to reuse previously computed byte array addresses,
       // improving the security of the CFI mechanism based on this pass.
@@ -608,8 +603,25 @@ Value *LowerTypeTestsModule::lowerTypeTestCall(Metadata *TypeId, CallInst *CI,
   if (TIL.TheKind == TypeTestResolution::AllOnes)
     return OffsetInRange;
 
-  TerminatorInst *Term = SplitBlockAndInsertIfThen(OffsetInRange, CI, false);
-  IRBuilder<> ThenB(Term);
+  // See if the intrinsic is used in the following common pattern:
+  //   br(llvm.type.test(...), thenbb, elsebb)
+  // where nothing happens between the type test and the br.
+  // If so, create slightly simpler IR.
+  if (CI->hasOneUse())
+    if (auto *Br = dyn_cast<BranchInst>(*CI->user_begin()))
+      if (CI->getNextNode() == Br) {
+        BasicBlock *Then = InitialBB->splitBasicBlock(CI->getIterator());
+        BasicBlock *Else = Br->getSuccessor(1);
+        BranchInst *NewBr = BranchInst::Create(Then, Else, OffsetInRange);
+        NewBr->setMetadata(LLVMContext::MD_prof,
+                           Br->getMetadata(LLVMContext::MD_prof));
+        ReplaceInstWithInst(InitialBB->getTerminator(), NewBr);
+
+        IRBuilder<> ThenB(CI);
+        return createBitSetTest(ThenB, TIL, BitOffset);
+      }
+
+  IRBuilder<> ThenB(SplitBlockAndInsertIfThen(OffsetInRange, CI, false));
 
   // Now that we know that the offset is in range and aligned, load the
   // appropriate bit from the bitset.
@@ -680,17 +692,13 @@ void LowerTypeTestsModule::buildBitSetsFromGlobalVariables(
                                       ConstantInt::get(Int32Ty, I * 2)};
     Constant *CombinedGlobalElemPtr = ConstantExpr::getGetElementPtr(
         NewInit->getType(), CombinedGlobal, CombinedGlobalIdxs);
-    if (LinkerSubsectionsViaSymbols) {
-      GV->replaceAllUsesWith(CombinedGlobalElemPtr);
-    } else {
-      assert(GV->getType()->getAddressSpace() == 0);
-      GlobalAlias *GAlias = GlobalAlias::create(NewTy->getElementType(I * 2), 0,
-                                                GV->getLinkage(), "",
-                                                CombinedGlobalElemPtr, &M);
-      GAlias->setVisibility(GV->getVisibility());
-      GAlias->takeName(GV);
-      GV->replaceAllUsesWith(GAlias);
-    }
+    assert(GV->getType()->getAddressSpace() == 0);
+    GlobalAlias *GAlias =
+        GlobalAlias::create(NewTy->getElementType(I * 2), 0, GV->getLinkage(),
+                            "", CombinedGlobalElemPtr, &M);
+    GAlias->setVisibility(GV->getVisibility());
+    GAlias->takeName(GV);
+    GV->replaceAllUsesWith(GAlias);
     GV->eraseFromParent();
   }
 }
@@ -1166,8 +1174,7 @@ void LowerTypeTestsModule::buildBitSetsFromFunctionsNative(
             ArrayRef<Constant *>{ConstantInt::get(IntPtrTy, 0),
                                  ConstantInt::get(IntPtrTy, I)}),
         F->getType());
-    if (LinkerSubsectionsViaSymbols || F->isDeclarationForLinker()) {
-
+    if (F->isDeclarationForLinker()) {
       if (F->isWeakForLinker())
         replaceWeakDeclarationWithJumpTablePtr(F, CombinedGlobalElemPtr);
       else
@@ -1302,7 +1309,6 @@ LowerTypeTestsModule::LowerTypeTestsModule(
     : M(M), ExportSummary(ExportSummary), ImportSummary(ImportSummary) {
   assert(!(ExportSummary && ImportSummary));
   Triple TargetTriple(M.getTargetTriple());
-  LinkerSubsectionsViaSymbols = TargetTriple.isMacOSX();
   Arch = TargetTriple.getArch();
   OS = TargetTriple.getOS();
   ObjectFormat = TargetTriple.getObjectFormat();
diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp
index d9acb9b1a743..3fd59847a005 100644
--- a/lib/Transforms/IPO/PruneEH.cpp
+++ b/lib/Transforms/IPO/PruneEH.cpp
@@ -14,10 +14,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/IPO.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/CallGraphSCCPass.h"
 #include "llvm/Analysis/EHPersonalities.h"
@@ -28,6 +26,8 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/IPO.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include <algorithm>
 using namespace llvm;
diff --git a/lib/Transforms/IPO/SampleProfile.cpp b/lib/Transforms/IPO/SampleProfile.cpp
index e755e2bd8f26..67bc8f5f6b7a 100644
--- a/lib/Transforms/IPO/SampleProfile.cpp
+++ b/lib/Transforms/IPO/SampleProfile.cpp
@@ -695,6 +695,13 @@ bool SampleProfileLoader::inlineHotFunctions(
           CallSite(I).isIndirectCall())
         for (const auto *FS : findIndirectCallFunctionSamples(*I)) {
           auto CalleeFunctionName = FS->getName();
+          // If it is a recursive call, we do not inline it as it could bloat
+          // the code exponentially. There is way to better handle this, e.g.
+          // clone the caller first, and inline the cloned caller if it is
+          // recursive. As llvm does not inline recursive calls, we will simply
+          // ignore it instead of handling it explicitly.
+          if (CalleeFunctionName == F.getName())
+            continue;
           const char *Reason = "Callee function not available";
           auto R = SymbolMap.find(CalleeFunctionName);
           if (R == SymbolMap.end())
diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index fb64367eef91..de1b51e206ff 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -20,7 +20,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/IPO.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DebugInfo.h"
@@ -30,6 +29,7 @@
 #include "llvm/IR/TypeFinder.h"
 #include "llvm/IR/ValueSymbolTable.h"
 #include "llvm/Pass.h"
+#include "llvm/Transforms/IPO.h"
 #include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
 
diff --git a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
index 9dede4cedd1d..a7bcc7cc5532 100644
--- a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
+++ b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
@@ -318,6 +318,12 @@ void splitAndWriteThinLTOBitcode(
   ProfileSummaryInfo PSI(M);
   ModuleSummaryIndex Index = buildModuleSummaryIndex(M, nullptr, &PSI);
 
+  // Mark the merged module as requiring full LTO. We still want an index for
+  // it though, so that it can participate in summary-based dead stripping.
+  MergedM->addModuleFlag(Module::Error, "ThinLTO", uint32_t(0));
+  ModuleSummaryIndex MergedMIndex =
+      buildModuleSummaryIndex(*MergedM, nullptr, &PSI);
+
   SmallVector<char, 0> Buffer;
 
   BitcodeWriter W(Buffer);
@@ -327,7 +333,8 @@ void splitAndWriteThinLTOBitcode(
   ModuleHash ModHash = {{0}};
   W.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index,
                 /*GenerateHash=*/true, &ModHash);
-  W.writeModule(MergedM.get());
+  W.writeModule(MergedM.get(), /*ShouldPreserveUseListOrder=*/false,
+                &MergedMIndex);
   W.writeStrtab();
   OS << Buffer;
 
@@ -340,7 +347,8 @@ void splitAndWriteThinLTOBitcode(
     StripDebugInfo(M);
     W2.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index,
                    /*GenerateHash=*/false, &ModHash);
-    W2.writeModule(MergedM.get());
+    W2.writeModule(MergedM.get(), /*ShouldPreserveUseListOrder=*/false,
+                   &MergedMIndex);
     W2.writeStrtab();
     *ThinLinkOS << Buffer;
   }
diff --git a/lib/Transforms/IPO/WholeProgramDevirt.cpp b/lib/Transforms/IPO/WholeProgramDevirt.cpp
index aae22c5457ba..00769cd63229 100644
--- a/lib/Transforms/IPO/WholeProgramDevirt.cpp
+++ b/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -46,9 +46,9 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/iterator_range.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/BasicAliasAnalysis.h"
 #include "llvm/Analysis/TypeMetadataUtils.h"
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 7204bf517681..287a5167fe2a 100644
--- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -991,8 +991,9 @@ static Instruction *foldAddWithConstant(BinaryOperator &Add,
   // Shifts and add used to flip and mask off the low bit:
   // add (ashr (shl i32 X, 31), 31), 1 --> and (not X), 1
   const APInt *C3;
-  if (*C == 1 && match(Op0, m_OneUse(m_AShr(m_Shl(m_Value(X), m_APInt(C2)),
-                                            m_APInt(C3)))) &&
+  if (C->isOneValue() &&
+      match(Op0,
+            m_OneUse(m_AShr(m_Shl(m_Value(X), m_APInt(C2)), m_APInt(C3)))) &&
       C2 == C3 && *C2 == Ty->getScalarSizeInBits() - 1) {
     Value *NotX = Builder.CreateNot(X);
     return BinaryOperator::CreateAnd(NotX, ConstantInt::get(Ty, 1));
@@ -1008,8 +1009,9 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
   if (Value *V = SimplifyVectorOp(I))
     return replaceInstUsesWith(I, V);
 
-  if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(),
-                                 I.hasNoUnsignedWrap(), SQ))
+  if (Value *V =
+          SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(), I.hasNoUnsignedWrap(),
+                          SQ.getWithInstruction(&I)))
     return replaceInstUsesWith(I, V);
 
    // (A*B)+(A*C) -> A*(B+C) etc
@@ -1294,7 +1296,8 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
   if (Value *V = SimplifyVectorOp(I))
     return replaceInstUsesWith(I, V);
 
-  if (Value *V = SimplifyFAddInst(LHS, RHS, I.getFastMathFlags(), SQ))
+  if (Value *V = SimplifyFAddInst(LHS, RHS, I.getFastMathFlags(),
+                                  SQ.getWithInstruction(&I)))
     return replaceInstUsesWith(I, V);
 
   if (isa<Constant>(RHS))
@@ -1484,8 +1487,9 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
   if (Value *V = SimplifyVectorOp(I))
     return replaceInstUsesWith(I, V);
 
-  if (Value *V = SimplifySubInst(Op0, Op1, I.hasNoSignedWrap(),
-                                 I.hasNoUnsignedWrap(), SQ))
+  if (Value *V =
+          SimplifySubInst(Op0, Op1, I.hasNoSignedWrap(), I.hasNoUnsignedWrap(),
+                          SQ.getWithInstruction(&I)))
     return replaceInstUsesWith(I, V);
 
   // (A*B)-(A*C) -> A*(B-C) etc
@@ -1554,7 +1558,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
 
     // -(X >>u 31) -> (X >>s 31)
     // -(X >>s 31) -> (X >>u 31)
-    if (*Op0C == 0) {
+    if (Op0C->isNullValue()) {
       Value *X;
       const APInt *ShAmt;
       if (match(Op1, m_LShr(m_Value(X), m_APInt(ShAmt))) &&
@@ -1690,7 +1694,8 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
   if (Value *V = SimplifyVectorOp(I))
     return replaceInstUsesWith(I, V);
 
-  if (Value *V = SimplifyFSubInst(Op0, Op1, I.getFastMathFlags(), SQ))
+  if (Value *V = SimplifyFSubInst(Op0, Op1, I.getFastMathFlags(),
+                                  SQ.getWithInstruction(&I)))
     return replaceInstUsesWith(I, V);
 
   // fsub nsz 0, X ==> fsub nsz -0.0, X
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 1f8319efb3be..4fe3225a2172 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -172,12 +172,12 @@ Instruction *InstCombiner::OptAndOp(BinaryOperator *Op,
         const APInt& AddRHS = OpRHS->getValue();
 
         // Check to see if any bits below the one bit set in AndRHSV are set.
-        if ((AddRHS & (AndRHSV-1)) == 0) {
+        if ((AddRHS & (AndRHSV - 1)).isNullValue()) {
           // If not, the only thing that can effect the output of the AND is
           // the bit specified by AndRHSV.  If that bit is set, the effect of
           // the XOR is to toggle the bit.  If it is clear, then the ADD has
           // no effect.
-          if ((AddRHS & AndRHSV) == 0) { // Bit is not set, noop
+          if ((AddRHS & AndRHSV).isNullValue()) { // Bit is not set, noop
             TheAnd.setOperand(0, X);
             return &TheAnd;
           } else {
@@ -641,7 +641,7 @@ static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
     // If there is a conflict, we should actually return a false for the
     // whole construct.
     if (((BCst->getValue() & DCst->getValue()) &
-         (CCst->getValue() ^ ECst->getValue())) != 0)
+         (CCst->getValue() ^ ECst->getValue())).getBoolValue())
       return ConstantInt::get(LHS->getType(), !IsAnd);
 
     Value *NewOr1 = Builder->CreateOr(B, D);
@@ -748,7 +748,7 @@ foldAndOrOfEqualityCmpsWithConstants(ICmpInst *LHS, ICmpInst *RHS,
 
   // Special case: get the ordering right when the values wrap around zero.
   // Ie, we assumed the constants were unsigned when swapping earlier.
-  if (*C1 == 0 && C2->isAllOnesValue())
+  if (C1->isNullValue() && C2->isAllOnesValue())
     std::swap(C1, C2);
 
   if (*C1 == *C2 - 1) {
@@ -840,7 +840,8 @@ Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
 
       // Check that the low bits are zero.
       APInt Low = APInt::getLowBitsSet(BigBitSize, SmallBitSize);
-      if ((Low & AndC->getValue()) == 0 && (Low & BigC->getValue()) == 0) {
+      if ((Low & AndC->getValue()).isNullValue() &&
+          (Low & BigC->getValue()).isNullValue()) {
         Value *NewAnd = Builder->CreateAnd(V, Low | AndC->getValue());
         APInt N = SmallC->getValue().zext(BigBitSize) | BigC->getValue();
         Value *NewVal = ConstantInt::get(AndC->getType()->getContext(), N);
@@ -1234,7 +1235,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
   if (Value *V = SimplifyVectorOp(I))
     return replaceInstUsesWith(I, V);
 
-  if (Value *V = SimplifyAndInst(Op0, Op1, SQ))
+  if (Value *V = SimplifyAndInst(Op0, Op1, SQ.getWithInstruction(&I)))
     return replaceInstUsesWith(I, V);
 
   // See if we can simplify any instructions used by the instruction whose sole
@@ -1286,7 +1287,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
       }
       case Instruction::Sub:
         // -x & 1 -> x & 1
-        if (AndRHSMask == 1 && match(Op0LHS, m_Zero()))
+        if (AndRHSMask.isOneValue() && match(Op0LHS, m_Zero()))
           return BinaryOperator::CreateAnd(Op0RHS, AndRHS);
 
         break;
@@ -1295,7 +1296,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
       case Instruction::LShr:
         // (1 << x) & 1 --> zext(x == 0)
         // (1 >> x) & 1 --> zext(x == 0)
-        if (AndRHSMask == 1 && Op0LHS == AndRHS) {
+        if (AndRHSMask.isOneValue() && Op0LHS == AndRHS) {
           Value *NewICmp =
             Builder->CreateICmpEQ(Op0RHS, Constant::getNullValue(I.getType()));
           return new ZExtInst(NewICmp, I.getType());
@@ -1962,7 +1963,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
   if (Value *V = SimplifyVectorOp(I))
     return replaceInstUsesWith(I, V);
 
-  if (Value *V = SimplifyOrInst(Op0, Op1, SQ))
+  if (Value *V = SimplifyOrInst(Op0, Op1, SQ.getWithInstruction(&I)))
     return replaceInstUsesWith(I, V);
 
   // See if we can simplify any instructions used by the instruction whose sole
@@ -2033,7 +2034,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
     ConstantInt *C1 = dyn_cast<ConstantInt>(C);
     ConstantInt *C2 = dyn_cast<ConstantInt>(D);
     if (C1 && C2) {  // (A & C1)|(B & C2)
-      if ((C1->getValue() & C2->getValue()) == 0) {
+      if ((C1->getValue() & C2->getValue()).isNullValue()) {
         // ((V | N) & C1) | (V & C2) --> (V|N) & (C1|C2)
         // iff (C1&C2) == 0 and (N&~C1) == 0
         if (match(A, m_Or(m_Value(V1), m_Value(V2))) &&
@@ -2056,9 +2057,9 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
         // iff (C1&C2) == 0 and (C3&~C1) == 0 and (C4&~C2) == 0.
         ConstantInt *C3 = nullptr, *C4 = nullptr;
         if (match(A, m_Or(m_Value(V1), m_ConstantInt(C3))) &&
-            (C3->getValue() & ~C1->getValue()) == 0 &&
+            (C3->getValue() & ~C1->getValue()).isNullValue() &&
             match(B, m_Or(m_Specific(V1), m_ConstantInt(C4))) &&
-            (C4->getValue() & ~C2->getValue()) == 0) {
+            (C4->getValue() & ~C2->getValue()).isNullValue()) {
           V2 = Builder->CreateOr(V1, ConstantExpr::getOr(C3, C4), "bitfield");
           return BinaryOperator::CreateAnd(V2,
                                 Builder->getInt(C1->getValue()|C2->getValue()));
@@ -2344,7 +2345,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
   if (Value *V = SimplifyVectorOp(I))
     return replaceInstUsesWith(I, V);
 
-  if (Value *V = SimplifyXorInst(Op0, Op1, SQ))
+  if (Value *V = SimplifyXorInst(Op0, Op1, SQ.getWithInstruction(&I)))
     return replaceInstUsesWith(I, V);
 
   if (Instruction *NewXor = foldXorToXor(I))
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index b44499ec4be9..d29ed49eca0b 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -16,9 +16,9 @@
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/None.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
@@ -393,7 +393,7 @@ static Value *simplifyX86immShift(const IntrinsicInst &II,
   unsigned BitWidth = SVT->getPrimitiveSizeInBits();
 
   // If shift-by-zero then just return the original value.
-  if (Count == 0)
+  if (Count.isNullValue())
     return Vec;
 
   // Handle cases when Shift >= BitWidth.
@@ -1373,10 +1373,6 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC) {
           II.getIntrinsicID() == Intrinsic::ctlz) &&
          "Expected cttz or ctlz intrinsic");
   Value *Op0 = II.getArgOperand(0);
-  // FIXME: Try to simplify vectors of integers.
-  auto *IT = dyn_cast<IntegerType>(Op0->getType());
-  if (!IT)
-    return nullptr;
 
   KnownBits Known = IC.computeKnownBits(Op0, 0, &II);
 
@@ -1392,14 +1388,14 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC) {
   // FIXME: This should be in InstSimplify because we're replacing an
   // instruction with a constant.
   if (PossibleZeros == DefiniteZeros) {
-    auto *C = ConstantInt::get(IT, DefiniteZeros);
+    auto *C = ConstantInt::get(Op0->getType(), DefiniteZeros);
     return IC.replaceInstUsesWith(II, C);
   }
 
   // If the input to cttz/ctlz is known to be non-zero,
   // then change the 'ZeroIsUndef' parameter to 'true'
   // because we know the zero behavior can't affect the result.
-  if (Known.One != 0 ||
+  if (!Known.One.isNullValue() ||
       isKnownNonZero(Op0, IC.getDataLayout(), 0, &IC.getAssumptionCache(), &II,
                      &IC.getDominatorTree())) {
     if (!match(II.getArgOperand(1), m_One())) {
@@ -1818,8 +1814,8 @@ Instruction *InstCombiner::visitVACopyInst(VACopyInst &I) {
 /// lifting.
 Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   auto Args = CI.arg_operands();
-  if (Value *V =
-          SimplifyCall(CI.getCalledValue(), Args.begin(), Args.end(), SQ))
+  if (Value *V = SimplifyCall(&CI, CI.getCalledValue(), Args.begin(),
+                              Args.end(), SQ.getWithInstruction(&CI)))
     return replaceInstUsesWith(CI, V);
 
   if (isFreeCall(&CI, &TLI))
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 766939c56dff..38e95fb11639 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -661,7 +661,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI,
 
     // zext (x <s  0) to i32 --> x>>u31      true if signbit set.
     // zext (x >s -1) to i32 --> (x>>u31)^1  true if signbit clear.
-    if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV == 0) ||
+    if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV.isNullValue()) ||
         (ICI->getPredicate() == ICmpInst::ICMP_SGT && Op1CV.isAllOnesValue())) {
       if (!DoTransform) return ICI;
 
@@ -688,7 +688,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI,
     // zext (X != 0) to i32 --> X>>1     iff X has only the 2nd bit set.
     // zext (X != 1) to i32 --> X^1      iff X has only the low bit set.
     // zext (X != 2) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
-    if ((Op1CV == 0 || Op1CV.isPowerOf2()) &&
+    if ((Op1CV.isNullValue() || Op1CV.isPowerOf2()) &&
         // This only works for EQ and NE
         ICI->isEquality()) {
       // If Op1C some other power of two, convert:
@@ -699,7 +699,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI,
         if (!DoTransform) return ICI;
 
         bool isNE = ICI->getPredicate() == ICmpInst::ICMP_NE;
-        if (Op1CV != 0 && (Op1CV != KnownZeroMask)) {
+        if (!Op1CV.isNullValue() && (Op1CV != KnownZeroMask)) {
           // (X&4) == 2 --> false
           // (X&4) != 2 --> true
           Constant *Res = ConstantInt::get(Type::getInt1Ty(CI.getContext()),
@@ -717,7 +717,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI,
                                    In->getName() + ".lobit");
         }
 
-        if ((Op1CV != 0) == isNE) { // Toggle the low bit.
+        if (!Op1CV.isNullValue() == isNE) { // Toggle the low bit.
           Constant *One = ConstantInt::get(In->getType(), 1);
           In = Builder->CreateXor(In, One);
         }
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index c0798e164c39..1ef4acfb058c 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -127,7 +127,7 @@ static bool isSignBitCheck(ICmpInst::Predicate Pred, const APInt &RHS,
   switch (Pred) {
   case ICmpInst::ICMP_SLT:   // True if LHS s< 0
     TrueIfSigned = true;
-    return RHS == 0;
+    return RHS.isNullValue();
   case ICmpInst::ICMP_SLE:   // True if LHS s<= RHS and RHS == -1
     TrueIfSigned = true;
     return RHS.isAllOnesValue();
@@ -155,10 +155,10 @@ static bool isSignTest(ICmpInst::Predicate &Pred, const APInt &C) {
   if (!ICmpInst::isSigned(Pred))
     return false;
 
-  if (C == 0)
+  if (C.isNullValue())
     return ICmpInst::isRelational(Pred);
 
-  if (C == 1) {
+  if (C.isOneValue()) {
     if (Pred == ICmpInst::ICMP_SLT) {
       Pred = ICmpInst::ICMP_SLE;
       return true;
@@ -1193,7 +1193,7 @@ Instruction *InstCombiner::foldICmpShrConstConst(ICmpInst &I, Value *A,
   };
 
   // Don't bother doing any work for cases which InstSimplify handles.
-  if (AP2 == 0)
+  if (AP2.isNullValue())
     return nullptr;
 
   bool IsAShr = isa<AShrOperator>(I.getOperand(0));
@@ -1252,7 +1252,7 @@ Instruction *InstCombiner::foldICmpShlConstConst(ICmpInst &I, Value *A,
   };
 
   // Don't bother doing any work for cases which InstSimplify handles.
-  if (AP2 == 0)
+  if (AP2.isNullValue())
     return nullptr;
 
   unsigned AP2TrailingZeros = AP2.countTrailingZeros();
@@ -1399,7 +1399,7 @@ Instruction *InstCombiner::foldICmpWithConstant(ICmpInst &Cmp) {
   }
 
   // (icmp sgt smin(PosA, B) 0) -> (icmp sgt B 0)
-  if (*C == 0 && Pred == ICmpInst::ICMP_SGT) {
+  if (C->isNullValue() && Pred == ICmpInst::ICMP_SGT) {
     SelectPatternResult SPR = matchSelectPattern(X, A, B);
     if (SPR.Flavor == SPF_SMIN) {
       if (isKnownPositive(A, DL, 0, &AC, &Cmp, &DT))
@@ -1465,7 +1465,7 @@ Instruction *InstCombiner::foldICmpTruncConstant(ICmpInst &Cmp,
                                                  const APInt *C) {
   ICmpInst::Predicate Pred = Cmp.getPredicate();
   Value *X = Trunc->getOperand(0);
-  if (*C == 1 && C->getBitWidth() > 1) {
+  if (C->isOneValue() && C->getBitWidth() > 1) {
     // icmp slt trunc(signum(V)) 1 --> icmp slt V, 1
     Value *V = nullptr;
     if (Pred == ICmpInst::ICMP_SLT && match(X, m_Signum(m_Value(V))))
@@ -1505,7 +1505,7 @@ Instruction *InstCombiner::foldICmpXorConstant(ICmpInst &Cmp,
   // If this is a comparison that tests the signbit (X < 0) or (x > -1),
   // fold the xor.
   ICmpInst::Predicate Pred = Cmp.getPredicate();
-  if ((Pred == ICmpInst::ICMP_SLT && *C == 0) ||
+  if ((Pred == ICmpInst::ICMP_SLT && C->isNullValue()) ||
       (Pred == ICmpInst::ICMP_SGT && C->isAllOnesValue())) {
 
     // If the sign bit of the XorCst is not set, there is no change to
@@ -1623,7 +1623,7 @@ Instruction *InstCombiner::foldICmpAndShift(ICmpInst &Cmp, BinaryOperator *And,
   // Turn ((X >> Y) & C2) == 0  into  (X & (C2 << Y)) == 0.  The latter is
   // preferable because it allows the C2 << Y expression to be hoisted out of a
   // loop if Y is invariant and X is not.
-  if (Shift->hasOneUse() && *C1 == 0 && Cmp.isEquality() &&
+  if (Shift->hasOneUse() && C1->isNullValue() && Cmp.isEquality() &&
       !Shift->isArithmeticShift() && !isa<Constant>(Shift->getOperand(0))) {
     // Compute C2 << Y.
     Value *NewShift =
@@ -1681,7 +1681,8 @@ Instruction *InstCombiner::foldICmpAndConstConst(ICmpInst &Cmp,
   // (icmp pred (and A, (or (shl 1, B), 1), 0))
   //
   // iff pred isn't signed
-  if (!Cmp.isSigned() && *C1 == 0 && match(And->getOperand(1), m_One())) {
+  if (!Cmp.isSigned() && C1->isNullValue() &&
+      match(And->getOperand(1), m_One())) {
     Constant *One = cast<Constant>(And->getOperand(1));
     Value *Or = And->getOperand(0);
     Value *A, *B, *LShr;
@@ -1764,7 +1765,7 @@ Instruction *InstCombiner::foldICmpAndConstant(ICmpInst &Cmp,
   // (X & C2) != 0 -> (trunc X) <  0
   //   iff C2 is a power of 2 and it masks the sign bit of a legal integer type.
   const APInt *C2;
-  if (And->hasOneUse() && *C == 0 && match(Y, m_APInt(C2))) {
+  if (And->hasOneUse() && C->isNullValue() && match(Y, m_APInt(C2))) {
     int32_t ExactLogBase2 = C2->exactLogBase2();
     if (ExactLogBase2 != -1 && DL.isLegalInteger(ExactLogBase2 + 1)) {
       Type *NTy = IntegerType::get(Cmp.getContext(), ExactLogBase2 + 1);
@@ -1784,7 +1785,7 @@ Instruction *InstCombiner::foldICmpAndConstant(ICmpInst &Cmp,
 Instruction *InstCombiner::foldICmpOrConstant(ICmpInst &Cmp, BinaryOperator *Or,
                                               const APInt *C) {
   ICmpInst::Predicate Pred = Cmp.getPredicate();
-  if (*C == 1) {
+  if (C->isOneValue()) {
     // icmp slt signum(V) 1 --> icmp slt V, 1
     Value *V = nullptr;
     if (Pred == ICmpInst::ICMP_SLT && match(Or, m_Signum(m_Value(V))))
@@ -1801,7 +1802,7 @@ Instruction *InstCombiner::foldICmpOrConstant(ICmpInst &Cmp, BinaryOperator *Or,
     return new ICmpInst(Pred, Or->getOperand(0), Or->getOperand(1));
   }
 
-  if (!Cmp.isEquality() || *C != 0 || !Or->hasOneUse())
+  if (!Cmp.isEquality() || !C->isNullValue() || !Or->hasOneUse())
     return nullptr;
 
   Value *P, *Q;
@@ -2036,7 +2037,8 @@ Instruction *InstCombiner::foldICmpShrConstant(ICmpInst &Cmp,
   // icmp eq/ne (shr X, Y), 0 --> icmp eq/ne X, 0
   Value *X = Shr->getOperand(0);
   CmpInst::Predicate Pred = Cmp.getPredicate();
-  if (Cmp.isEquality() && Shr->isExact() && Shr->hasOneUse() && *C == 0)
+  if (Cmp.isEquality() && Shr->isExact() && Shr->hasOneUse() &&
+      C->isNullValue())
     return new ICmpInst(Pred, X, Cmp.getOperand(1));
 
   const APInt *ShiftVal;
@@ -2127,7 +2129,7 @@ Instruction *InstCombiner::foldICmpUDivConstant(ICmpInst &Cmp,
   if (!match(UDiv->getOperand(0), m_APInt(C2)))
     return nullptr;
 
-  assert(C2 != 0 && "udiv 0, X should have been simplified already.");
+  assert(*C2 != 0 && "udiv 0, X should have been simplified already.");
 
   // (icmp ugt (udiv C2, Y), C) -> (icmp ule Y, C2/(C+1))
   Value *Y = UDiv->getOperand(1);
@@ -2140,7 +2142,7 @@ Instruction *InstCombiner::foldICmpUDivConstant(ICmpInst &Cmp,
 
   // (icmp ult (udiv C2, Y), C) -> (icmp ugt Y, C2/C)
   if (Cmp.getPredicate() == ICmpInst::ICMP_ULT) {
-    assert(C != 0 && "icmp ult X, 0 should have been simplified already.");
+    assert(*C != 0 && "icmp ult X, 0 should have been simplified already.");
     return new ICmpInst(ICmpInst::ICMP_UGT, Y,
                         ConstantInt::get(Y->getType(), C2->udiv(*C)));
   }
@@ -2178,7 +2180,8 @@ Instruction *InstCombiner::foldICmpDivConstant(ICmpInst &Cmp,
   // INT_MIN will also fail if the divisor is 1. Although folds of all these
   // division-by-constant cases should be present, we can not assert that they
   // have happened before we reach this icmp instruction.
-  if (*C2 == 0 || *C2 == 1 || (DivIsSigned && C2->isAllOnesValue()))
+  if (C2->isNullValue() || C2->isOneValue() ||
+      (DivIsSigned && C2->isAllOnesValue()))
     return nullptr;
 
   // TODO: We could do all of the computations below using APInt.
@@ -2224,7 +2227,7 @@ Instruction *InstCombiner::foldICmpDivConstant(ICmpInst &Cmp,
       HiOverflow = addWithOverflow(HiBound, LoBound, RangeSize, false);
     }
   } else if (C2->isStrictlyPositive()) { // Divisor is > 0.
-    if (*C == 0) {       // (X / pos) op 0
+    if (C->isNullValue()) {       // (X / pos) op 0
       // Can't overflow.  e.g.  X/2 op 0 --> [-1, 2)
       LoBound = ConstantExpr::getNeg(SubOne(RangeSize));
       HiBound = RangeSize;
@@ -2245,7 +2248,7 @@ Instruction *InstCombiner::foldICmpDivConstant(ICmpInst &Cmp,
   } else if (C2->isNegative()) { // Divisor is < 0.
     if (Div->isExact())
       RangeSize = ConstantExpr::getNeg(RangeSize);
-    if (*C == 0) {       // (X / neg) op 0
+    if (C->isNullValue()) { // (X / neg) op 0
       // e.g. X/-5 op 0  --> [-4, 5)
       LoBound = AddOne(RangeSize);
       HiBound = ConstantExpr::getNeg(RangeSize);
@@ -2337,15 +2340,15 @@ Instruction *InstCombiner::foldICmpSubConstant(ICmpInst &Cmp,
       return new ICmpInst(ICmpInst::ICMP_SGE, X, Y);
 
     // (icmp sgt (sub nsw X, Y), 0) -> (icmp sgt X, Y)
-    if (Pred == ICmpInst::ICMP_SGT && *C == 0)
+    if (Pred == ICmpInst::ICMP_SGT && C->isNullValue())
       return new ICmpInst(ICmpInst::ICMP_SGT, X, Y);
 
     // (icmp slt (sub nsw X, Y), 0) -> (icmp slt X, Y)
-    if (Pred == ICmpInst::ICMP_SLT && *C == 0)
+    if (Pred == ICmpInst::ICMP_SLT && C->isNullValue())
       return new ICmpInst(ICmpInst::ICMP_SLT, X, Y);
 
     // (icmp slt (sub nsw X, Y), 1) -> (icmp sle X, Y)
-    if (Pred == ICmpInst::ICMP_SLT && *C == 1)
+    if (Pred == ICmpInst::ICMP_SLT && C->isOneValue())
       return new ICmpInst(ICmpInst::ICMP_SLE, X, Y);
   }
 
@@ -2520,7 +2523,7 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp,
   switch (BO->getOpcode()) {
   case Instruction::SRem:
     // If we have a signed (X % (2^c)) == 0, turn it into an unsigned one.
-    if (*C == 0 && BO->hasOneUse()) {
+    if (C->isNullValue() && BO->hasOneUse()) {
       const APInt *BOC;
       if (match(BOp1, m_APInt(BOC)) && BOC->sgt(1) && BOC->isPowerOf2()) {
         Value *NewRem = Builder->CreateURem(BOp0, BOp1, BO->getName());
@@ -2537,7 +2540,7 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp,
         Constant *SubC = ConstantExpr::getSub(RHS, cast<Constant>(BOp1));
         return new ICmpInst(Pred, BOp0, SubC);
       }
-    } else if (*C == 0) {
+    } else if (C->isNullValue()) {
       // Replace ((add A, B) != 0) with (A != -B) if A or B is
       // efficiently invertible, or if the add has just this one use.
       if (Value *NegVal = dyn_castNegVal(BOp1))
@@ -2558,7 +2561,7 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp,
         // For the xor case, we can xor two constants together, eliminating
         // the explicit xor.
         return new ICmpInst(Pred, BOp0, ConstantExpr::getXor(RHS, BOC));
-      } else if (*C == 0) {
+      } else if (C->isNullValue()) {
         // Replace ((xor A, B) != 0) with (A != B)
         return new ICmpInst(Pred, BOp0, BOp1);
       }
@@ -2571,7 +2574,7 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp,
         // Replace ((sub BOC, B) != C) with (B != BOC-C).
         Constant *SubC = ConstantExpr::getSub(cast<Constant>(BOp0), RHS);
         return new ICmpInst(Pred, BOp1, SubC);
-      } else if (*C == 0) {
+      } else if (C->isNullValue()) {
         // Replace ((sub A, B) != 0) with (A != B).
         return new ICmpInst(Pred, BOp0, BOp1);
       }
@@ -2609,7 +2612,7 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp,
       }
 
       // ((X & ~7) == 0) --> X < 8
-      if (*C == 0 && (~(*BOC) + 1).isPowerOf2()) {
+      if (C->isNullValue() && (~(*BOC) + 1).isPowerOf2()) {
         Constant *NegBOC = ConstantExpr::getNeg(cast<Constant>(BOp1));
         auto NewPred = isICMP_NE ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT;
         return new ICmpInst(NewPred, BOp0, NegBOC);
@@ -2618,9 +2621,9 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp,
     break;
   }
   case Instruction::Mul:
-    if (*C == 0 && BO->hasNoSignedWrap()) {
+    if (C->isNullValue() && BO->hasNoSignedWrap()) {
       const APInt *BOC;
-      if (match(BOp1, m_APInt(BOC)) && *BOC != 0) {
+      if (match(BOp1, m_APInt(BOC)) && !BOC->isNullValue()) {
         // The trivial case (mul X, 0) is handled by InstSimplify.
         // General case : (mul X, C) != 0 iff X != 0
         //                (mul X, C) == 0 iff X == 0
@@ -2629,7 +2632,7 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp,
     }
     break;
   case Instruction::UDiv:
-    if (*C == 0) {
+    if (C->isNullValue()) {
       // (icmp eq/ne (udiv A, B), 0) -> (icmp ugt/ule i32 B, A)
       auto NewPred = isICMP_NE ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_UGT;
       return new ICmpInst(NewPred, BOp1, BOp0);
@@ -2668,7 +2671,7 @@ Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp,
   case Intrinsic::ctpop: {
     // popcount(A) == 0  ->  A == 0 and likewise for !=
     // popcount(A) == bitwidth(A)  ->  A == -1 and likewise for !=
-    bool IsZero = *C == 0;
+    bool IsZero = C->isNullValue();
     if (IsZero || *C == C->getBitWidth()) {
       Worklist.Add(II);
       Cmp.setOperand(0, II->getArgOperand(0));
@@ -3057,7 +3060,8 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) {
         break;
 
       const APInt *C;
-      if (match(BO0->getOperand(1), m_APInt(C)) && *C != 0 && *C != 1) {
+      if (match(BO0->getOperand(1), m_APInt(C)) && !C->isNullValue() &&
+          !C->isOneValue()) {
         // icmp eq/ne (X * C), (Y * C) --> icmp (X & Mask), (Y & Mask)
         // Mask = -1 >> count-trailing-zeros(C).
         if (unsigned TZs = C->countTrailingZeros()) {
@@ -4093,7 +4097,7 @@ Instruction *InstCombiner::foldICmpUsingKnownBits(ICmpInst &I) {
 
       // Check if the LHS is 8 >>u x and the result is a power of 2 like 1.
       const APInt *CI;
-      if (Op0KnownZeroInverted == 1 &&
+      if (Op0KnownZeroInverted.isOneValue() &&
           match(LHS, m_LShr(m_Power2(CI), m_Value(X)))) {
         // ((8 >>u X) & 1) == 0 -> X != 3
         // ((8 >>u X) & 1) != 0 -> X == 3
diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h
index 56f133de3de1..fd0a64a5bbb5 100644
--- a/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -21,6 +21,7 @@
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/TargetFolder.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/IR/DIBuilder.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/IRBuilder.h"
@@ -29,7 +30,6 @@
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/Pass.h"
-#include "llvm/Support/Dwarf.h"
 #include "llvm/Support/KnownBits.h"
 #include "llvm/Transforms/InstCombine/InstCombineWorklist.h"
 #include "llvm/Transforms/Utils/Local.h"
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 4d408359eeea..365c4ba75154 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -176,7 +176,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
   if (Value *V = SimplifyVectorOp(I))
     return replaceInstUsesWith(I, V);
 
-  if (Value *V = SimplifyMulInst(Op0, Op1, SQ))
+  if (Value *V = SimplifyMulInst(Op0, Op1, SQ.getWithInstruction(&I)))
     return replaceInstUsesWith(I, V);
 
   if (Value *V = SimplifyUsingDistributiveLaws(I))
@@ -599,7 +599,8 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
   if (isa<Constant>(Op0))
     std::swap(Op0, Op1);
 
-  if (Value *V = SimplifyFMulInst(Op0, Op1, I.getFastMathFlags(), SQ))
+  if (Value *V = SimplifyFMulInst(Op0, Op1, I.getFastMathFlags(),
+                                  SQ.getWithInstruction(&I)))
     return replaceInstUsesWith(I, V);
 
   bool AllowReassociate = I.hasUnsafeAlgebra();
@@ -930,7 +931,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
         }
       }
 
-      if (*C2 != 0) // avoid X udiv 0
+      if (!C2->isNullValue()) // avoid X udiv 0
         if (Instruction *FoldedDiv = foldOpWithConstantIntoOperand(I))
           return FoldedDiv;
     }
@@ -1103,7 +1104,7 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
   if (Value *V = SimplifyVectorOp(I))
     return replaceInstUsesWith(I, V);
 
-  if (Value *V = SimplifyUDivInst(Op0, Op1, SQ))
+  if (Value *V = SimplifyUDivInst(Op0, Op1, SQ.getWithInstruction(&I)))
     return replaceInstUsesWith(I, V);
 
   // Handle the integer div common cases
@@ -1176,7 +1177,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
   if (Value *V = SimplifyVectorOp(I))
     return replaceInstUsesWith(I, V);
 
-  if (Value *V = SimplifySDivInst(Op0, Op1, SQ))
+  if (Value *V = SimplifySDivInst(Op0, Op1, SQ.getWithInstruction(&I)))
     return replaceInstUsesWith(I, V);
 
   // Handle the integer div common cases
@@ -1288,7 +1289,8 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
   if (Value *V = SimplifyVectorOp(I))
     return replaceInstUsesWith(I, V);
 
-  if (Value *V = SimplifyFDivInst(Op0, Op1, I.getFastMathFlags(), SQ))
+  if (Value *V = SimplifyFDivInst(Op0, Op1, I.getFastMathFlags(),
+                                  SQ.getWithInstruction(&I)))
     return replaceInstUsesWith(I, V);
 
   if (isa<Constant>(Op0))
@@ -1472,7 +1474,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
   if (Value *V = SimplifyVectorOp(I))
     return replaceInstUsesWith(I, V);
 
-  if (Value *V = SimplifyURemInst(Op0, Op1, SQ))
+  if (Value *V = SimplifyURemInst(Op0, Op1, SQ.getWithInstruction(&I)))
     return replaceInstUsesWith(I, V);
 
   if (Instruction *common = commonIRemTransforms(I))
@@ -1515,7 +1517,7 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
   if (Value *V = SimplifyVectorOp(I))
     return replaceInstUsesWith(I, V);
 
-  if (Value *V = SimplifySRemInst(Op0, Op1, SQ))
+  if (Value *V = SimplifySRemInst(Op0, Op1, SQ.getWithInstruction(&I)))
     return replaceInstUsesWith(I, V);
 
   // Handle the integer rem common cases
@@ -1588,7 +1590,8 @@ Instruction *InstCombiner::visitFRem(BinaryOperator &I) {
   if (Value *V = SimplifyVectorOp(I))
     return replaceInstUsesWith(I, V);
 
-  if (Value *V = SimplifyFRemInst(Op0, Op1, I.getFastMathFlags(), SQ))
+  if (Value *V = SimplifyFRemInst(Op0, Op1, I.getFastMathFlags(),
+                                  SQ.getWithInstruction(&I)))
     return replaceInstUsesWith(I, V);
 
   // Handle cases involving: rem X, (select Cond, Y, Z)
diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 1117c11f4f51..5dbf1e85b05b 100644
--- a/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -16,9 +16,9 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/Transforms/Utils/Local.h"
-#include "llvm/IR/DebugInfo.h"
 using namespace llvm;
 using namespace llvm::PatternMatch;
 
@@ -880,7 +880,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
 // PHINode simplification
 //
 Instruction *InstCombiner::visitPHINode(PHINode &PN) {
-  if (Value *V = SimplifyInstruction(&PN, SQ))
+  if (Value *V = SimplifyInstruction(&PN, SQ.getWithInstruction(&PN)))
     return replaceInstUsesWith(PN, V);
 
   if (Instruction *Result = FoldPHIArgZextsIntoPHI(PN))
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 7afb8814fe52..b9674d85634d 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -1121,7 +1121,8 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
   Value *FalseVal = SI.getFalseValue();
   Type *SelType = SI.getType();
 
-  if (Value *V = SimplifySelectInst(CondVal, TrueVal, FalseVal, SQ))
+  if (Value *V = SimplifySelectInst(CondVal, TrueVal, FalseVal,
+                                    SQ.getWithInstruction(&SI)))
     return replaceInstUsesWith(SI, V);
 
   if (Instruction *I = canonicalizeSelectToShuffle(SI))
@@ -1478,9 +1479,9 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
   if (!CondVal->getType()->isVectorTy() && !AC.assumptions().empty()) {
     KnownBits Known(1);
     computeKnownBits(CondVal, Known, 0, &SI);
-    if (Known.One == 1)
+    if (Known.One.isOneValue())
       return replaceInstUsesWith(SI, TrueVal);
-    if (Known.Zero == 1)
+    if (Known.Zero.isOneValue())
       return replaceInstUsesWith(SI, FalseVal);
   }
 
diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp
index b40d067b2817..3f2ddcacce2b 100644
--- a/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -520,8 +520,9 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) {
     return replaceInstUsesWith(I, V);
 
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
-  if (Value *V = SimplifyShlInst(Op0, Op1, I.hasNoSignedWrap(),
-                                 I.hasNoUnsignedWrap(), SQ))
+  if (Value *V =
+          SimplifyShlInst(Op0, Op1, I.hasNoSignedWrap(), I.hasNoUnsignedWrap(),
+                          SQ.getWithInstruction(&I)))
     return replaceInstUsesWith(I, V);
 
   if (Instruction *V = commonShiftTransforms(I))
@@ -619,7 +620,8 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
     return replaceInstUsesWith(I, V);
 
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
-  if (Value *V = SimplifyLShrInst(Op0, Op1, I.isExact(), SQ))
+  if (Value *V =
+          SimplifyLShrInst(Op0, Op1, I.isExact(), SQ.getWithInstruction(&I)))
     return replaceInstUsesWith(I, V);
 
   if (Instruction *R = commonShiftTransforms(I))
@@ -680,6 +682,25 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
       return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, Mask));
     }
 
+    if (match(Op0, m_SExt(m_Value(X)))) {
+      // Are we moving the sign bit to the low bit and widening with high zeros?
+      unsigned SrcTyBitWidth = X->getType()->getScalarSizeInBits();
+      if (ShAmt == BitWidth - 1 &&
+          (!Ty->isIntegerTy() || shouldChangeType(Ty, X->getType()))) {
+        // lshr (sext i1 X to iN), N-1 --> zext X to iN
+        if (SrcTyBitWidth == 1)
+          return new ZExtInst(X, Ty);
+
+        // lshr (sext iM X to iN), N-1 --> zext (lshr X, M-1) to iN
+        if (Op0->hasOneUse()) {
+          Value *NewLShr = Builder->CreateLShr(X, SrcTyBitWidth - 1);
+          return new ZExtInst(NewLShr, Ty);
+        }
+      }
+
+      // TODO: Convert to ashr+zext if the shift equals the extension amount.
+    }
+
     if (match(Op0, m_LShr(m_Value(X), m_APInt(ShOp1)))) {
       unsigned AmtSum = ShAmt + ShOp1->getZExtValue();
       // Oversized shifts are simplified to zero in InstSimplify.
@@ -703,7 +724,8 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) {
     return replaceInstUsesWith(I, V);
 
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
-  if (Value *V = SimplifyAShrInst(Op0, Op1, I.isExact(), SQ))
+  if (Value *V =
+          SimplifyAShrInst(Op0, Op1, I.isExact(), SQ.getWithInstruction(&I)))
     return replaceInstUsesWith(I, V);
 
   if (Instruction *R = commonShiftTransforms(I))
diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 5df55f01b83f..03841164b58d 100644
--- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -121,7 +121,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
   }
 
   Known.resetAll();
-  if (DemandedMask == 0)     // Not demanding any bits from V.
+  if (DemandedMask.isNullValue())     // Not demanding any bits from V.
     return UndefValue::get(VTy);
 
   if (Depth == 6)        // Limit search depth.
@@ -488,7 +488,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     // always convert this into a logical shr, even if the shift amount is
     // variable.  The low bit of the shift cannot be an input sign bit unless
     // the shift amount is >= the size of the datatype, which is undefined.
-    if (DemandedMask == 1) {
+    if (DemandedMask.isOneValue()) {
       // Perform the logical shift right.
       Instruction *NewVal = BinaryOperator::CreateLShr(
                         I->getOperand(0), I->getOperand(1), I->getName());
@@ -656,7 +656,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
         // If we don't need any of low bits then return zero,
         // we know that DemandedMask is non-zero already.
         APInt DemandedElts = DemandedMask.zextOrTrunc(ArgWidth);
-        if (DemandedElts == 0)
+        if (DemandedElts.isNullValue())
           return ConstantInt::getNullValue(VTy);
 
         // We know that the upper bits are set to zero.
@@ -908,7 +908,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
     return nullptr;
   }
 
-  if (DemandedElts == 0) { // If nothing is demanded, provide undef.
+  if (DemandedElts.isNullValue()) { // If nothing is demanded, provide undef.
     UndefElts = EltMask;
     return UndefValue::get(V->getType());
   }
diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 7fc6774f1849..926e46655eb8 100644
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -145,7 +145,8 @@ Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
 
 Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
   if (Value *V = SimplifyExtractElementInst(EI.getVectorOperand(),
-                                            EI.getIndexOperand(), SQ))
+                                            EI.getIndexOperand(),
+                                            SQ.getWithInstruction(&EI)))
     return replaceInstUsesWith(EI, V);
 
   // If vector val is constant with all elements the same, replace EI with
@@ -440,7 +441,7 @@ static void replaceExtractElements(InsertElementInst *InsElt,
     if (!OldExt || OldExt->getParent() != WideVec->getParent())
       continue;
     auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1));
-    NewExt->insertAfter(WideVec);
+    NewExt->insertAfter(OldExt);
     IC.replaceInstUsesWith(*OldExt, NewExt);
   }
 }
@@ -1140,8 +1141,8 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
   SmallVector<int, 16> Mask = SVI.getShuffleMask();
   Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
 
-  if (auto *V =
-          SimplifyShuffleVectorInst(LHS, RHS, SVI.getMask(), SVI.getType(), SQ))
+  if (auto *V = SimplifyShuffleVectorInst(
+          LHS, RHS, SVI.getMask(), SVI.getType(), SQ.getWithInstruction(&SVI)))
     return replaceInstUsesWith(SVI, V);
 
   bool MadeChange = false;
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 2730afc5c5b9..65e6d2e35905 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -33,7 +33,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/InstCombine/InstCombine.h"
 #include "InstCombineInternal.h"
 #include "llvm-c/Initialization.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -62,6 +61,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/KnownBits.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/InstCombine/InstCombine.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include <algorithm>
@@ -256,7 +256,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
         Value *C = I.getOperand(1);
 
         // Does "B op C" simplify?
-        if (Value *V = SimplifyBinOp(Opcode, B, C, SQ)) {
+        if (Value *V = SimplifyBinOp(Opcode, B, C, SQ.getWithInstruction(&I))) {
           // It simplifies to V.  Form "A op V".
           I.setOperand(0, A);
           I.setOperand(1, V);
@@ -285,7 +285,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
         Value *C = Op1->getOperand(1);
 
         // Does "A op B" simplify?
-        if (Value *V = SimplifyBinOp(Opcode, A, B, SQ)) {
+        if (Value *V = SimplifyBinOp(Opcode, A, B, SQ.getWithInstruction(&I))) {
           // It simplifies to V.  Form "V op C".
           I.setOperand(0, V);
           I.setOperand(1, C);
@@ -313,7 +313,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
         Value *C = I.getOperand(1);
 
         // Does "C op A" simplify?
-        if (Value *V = SimplifyBinOp(Opcode, C, A, SQ)) {
+        if (Value *V = SimplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) {
           // It simplifies to V.  Form "V op B".
           I.setOperand(0, V);
           I.setOperand(1, B);
@@ -333,7 +333,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
         Value *C = Op1->getOperand(1);
 
         // Does "C op A" simplify?
-        if (Value *V = SimplifyBinOp(Opcode, C, A, SQ)) {
+        if (Value *V = SimplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) {
           // It simplifies to V.  Form "B op V".
           I.setOperand(0, B);
           I.setOperand(1, V);
@@ -521,7 +521,7 @@ Value *InstCombiner::tryFactorization(InstCombiner::BuilderTy *Builder,
         std::swap(C, D);
       // Consider forming "A op' (B op D)".
       // If "B op D" simplifies then it can be formed with no cost.
-      V = SimplifyBinOp(TopLevelOpcode, B, D, SQ);
+      V = SimplifyBinOp(TopLevelOpcode, B, D, SQ.getWithInstruction(&I));
       // If "B op D" doesn't simplify then only go on if both of the existing
       // operations "A op' B" and "C op' D" will be zapped as no longer used.
       if (!V && LHS->hasOneUse() && RHS->hasOneUse())
@@ -540,7 +540,7 @@ Value *InstCombiner::tryFactorization(InstCombiner::BuilderTy *Builder,
         std::swap(C, D);
       // Consider forming "(A op C) op' B".
       // If "A op C" simplifies then it can be formed with no cost.
-      V = SimplifyBinOp(TopLevelOpcode, A, C, SQ);
+      V = SimplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I));
 
       // If "A op C" doesn't simplify then only go on if both of the existing
       // operations "A op' B" and "C op' D" will be zapped as no longer used.
@@ -638,8 +638,10 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
     Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op'
 
     // Do "A op C" and "B op C" both simplify?
-    if (Value *L = SimplifyBinOp(TopLevelOpcode, A, C, SQ))
-      if (Value *R = SimplifyBinOp(TopLevelOpcode, B, C, SQ)) {
+    if (Value *L =
+            SimplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I)))
+      if (Value *R =
+              SimplifyBinOp(TopLevelOpcode, B, C, SQ.getWithInstruction(&I))) {
         // They do! Return "L op' R".
         ++NumExpand;
         C = Builder->CreateBinOp(InnerOpcode, L, R);
@@ -655,8 +657,10 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
     Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op'
 
     // Do "A op B" and "A op C" both simplify?
-    if (Value *L = SimplifyBinOp(TopLevelOpcode, A, B, SQ))
-      if (Value *R = SimplifyBinOp(TopLevelOpcode, A, C, SQ)) {
+    if (Value *L =
+            SimplifyBinOp(TopLevelOpcode, A, B, SQ.getWithInstruction(&I)))
+      if (Value *R =
+              SimplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I))) {
         // They do! Return "L op' R".
         ++NumExpand;
         A = Builder->CreateBinOp(InnerOpcode, L, R);
@@ -671,15 +675,17 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
     if (auto *SI1 = dyn_cast<SelectInst>(RHS)) {
       if (SI0->getCondition() == SI1->getCondition()) {
         Value *SI = nullptr;
-        if (Value *V = SimplifyBinOp(TopLevelOpcode, SI0->getFalseValue(),
-                                     SI1->getFalseValue(), SQ))
+        if (Value *V =
+                SimplifyBinOp(TopLevelOpcode, SI0->getFalseValue(),
+                              SI1->getFalseValue(), SQ.getWithInstruction(&I)))
           SI = Builder->CreateSelect(SI0->getCondition(),
                                      Builder->CreateBinOp(TopLevelOpcode,
                                                           SI0->getTrueValue(),
                                                           SI1->getTrueValue()),
                                      V);
-        if (Value *V = SimplifyBinOp(TopLevelOpcode, SI0->getTrueValue(),
-                                     SI1->getTrueValue(), SQ))
+        if (Value *V =
+                SimplifyBinOp(TopLevelOpcode, SI0->getTrueValue(),
+                              SI1->getTrueValue(), SQ.getWithInstruction(&I)))
           SI = Builder->CreateSelect(
               SI0->getCondition(), V,
               Builder->CreateBinOp(TopLevelOpcode, SI0->getFalseValue(),
@@ -1399,7 +1405,8 @@ Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) {
 Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
   SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end());
 
-  if (Value *V = SimplifyGEPInst(GEP.getSourceElementType(), Ops, SQ))
+  if (Value *V = SimplifyGEPInst(GEP.getSourceElementType(), Ops,
+                                 SQ.getWithInstruction(&GEP)))
     return replaceInstUsesWith(GEP, V);
 
   Value *PtrOp = GEP.getOperand(0);
@@ -1588,7 +1595,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
       if (SO1->getType() != GO1->getType())
         return nullptr;
 
-      Value *Sum = SimplifyAddInst(GO1, SO1, false, false, SQ);
+      Value *Sum =
+          SimplifyAddInst(GO1, SO1, false, false, SQ.getWithInstruction(&GEP));
       // Only do the combine when we are sure the cost after the
       // merge is never more than that before the merge.
       if (Sum == nullptr)
@@ -2283,7 +2291,8 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
   if (!EV.hasIndices())
     return replaceInstUsesWith(EV, Agg);
 
-  if (Value *V = SimplifyExtractValueInst(Agg, EV.getIndices(), SQ))
+  if (Value *V = SimplifyExtractValueInst(Agg, EV.getIndices(),
+                                          SQ.getWithInstruction(&EV)))
     return replaceInstUsesWith(EV, V);
 
   if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) {
diff --git a/lib/Transforms/Instrumentation/BoundsChecking.cpp b/lib/Transforms/Instrumentation/BoundsChecking.cpp
index d4c8369fa9d3..a193efe902cf 100644
--- a/lib/Transforms/Instrumentation/BoundsChecking.cpp
+++ b/lib/Transforms/Instrumentation/BoundsChecking.cpp
@@ -12,7 +12,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/Instrumentation.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/TargetFolder.h"
@@ -25,6 +24,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Instrumentation.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "bounds-checking"
diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
index e2e3cbdbc295..a33490f6e4ac 100644
--- a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -44,15 +44,14 @@
 /// For more information, please refer to the design document:
 /// http://clang.llvm.org/docs/DataFlowSanitizerDesign.html
 
-#include "llvm/Transforms/Instrumentation.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/Dominators.h"
 #include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/InstVisitor.h"
@@ -63,6 +62,7 @@
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/SpecialCaseList.h"
+#include "llvm/Transforms/Instrumentation.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include <algorithm>
diff --git a/lib/Transforms/Instrumentation/EfficiencySanitizer.cpp b/lib/Transforms/Instrumentation/EfficiencySanitizer.cpp
index e89384c559fe..6864d295525c 100644
--- a/lib/Transforms/Instrumentation/EfficiencySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/EfficiencySanitizer.cpp
@@ -18,7 +18,6 @@
 // The rest is handled by the run-time library.
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/Instrumentation.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
@@ -32,6 +31,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Instrumentation.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
diff --git a/lib/Transforms/Instrumentation/InstrProfiling.cpp b/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 9a82532d7703..f83c930ca61b 100644
--- a/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -28,10 +28,10 @@
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
 #include "llvm/Pass.h"
diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index 9260217bd5e6..a991792bf5a3 100644
--- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -19,7 +19,6 @@
 // The rest is handled by the run-time library.
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/Instrumentation.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
@@ -42,6 +41,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Instrumentation.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/EscapeEnumerator.h"
 #include "llvm/Transforms/Utils/Local.h"
diff --git a/lib/Transforms/ObjCARC/BlotMapVector.h b/lib/Transforms/ObjCARC/BlotMapVector.h
index ef075bdccbfe..9c5cf6f5f5ab 100644
--- a/lib/Transforms/ObjCARC/BlotMapVector.h
+++ b/lib/Transforms/ObjCARC/BlotMapVector.h
@@ -8,8 +8,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/DenseMap.h"
-#include <vector>
 #include <algorithm>
+#include <vector>
 
 namespace llvm {
 /// \brief An associative container with fast insertion-order (deterministic)
diff --git a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
index 9d78e5ae3b9b..464805051c65 100644
--- a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
+++ b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
@@ -20,8 +20,8 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "ObjCARC.h"
 #include "DependencyAnalysis.h"
+#include "ObjCARC.h"
 #include "ProvenanceAnalysis.h"
 #include "llvm/IR/CFG.h"
 
diff --git a/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/lib/Transforms/ObjCARC/ObjCARCContract.cpp
index a86eaaec7641..e70e7591f6a7 100644
--- a/lib/Transforms/ObjCARC/ObjCARCContract.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCContract.cpp
@@ -26,9 +26,9 @@
 // TODO: ObjCARCContract could insert PHI nodes when uses aren't
 // dominated by single calls.
 
-#include "ObjCARC.h"
 #include "ARCRuntimeEntryPoints.h"
 #include "DependencyAnalysis.h"
+#include "ObjCARC.h"
 #include "ProvenanceAnalysis.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/IR/Dominators.h"
diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index 3c73376c9906..8f3a33f66c7f 100644
--- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -24,10 +24,10 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "ObjCARC.h"
 #include "ARCRuntimeEntryPoints.h"
 #include "BlotMapVector.h"
 #include "DependencyAnalysis.h"
+#include "ObjCARC.h"
 #include "ProvenanceAnalysis.h"
 #include "PtrState.h"
 #include "llvm/ADT/DenseMap.h"
diff --git a/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp b/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp
index 9ffdfb4f7f9c..62fc52f6d091 100644
--- a/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp
+++ b/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp
@@ -22,8 +22,8 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "ObjCARC.h"
 #include "ProvenanceAnalysis.h"
+#include "ObjCARC.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 
diff --git a/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp b/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp
index c274e8182fb5..870a5f600fd8 100644
--- a/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp
+++ b/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp
@@ -8,13 +8,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "ProvenanceAnalysis.h"
-#include "llvm/Pass.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/Passes.h"
-#include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
diff --git a/lib/Transforms/ObjCARC/PtrState.h b/lib/Transforms/ObjCARC/PtrState.h
index 9749e44822b2..87298fa59bfd 100644
--- a/lib/Transforms/ObjCARC/PtrState.h
+++ b/lib/Transforms/ObjCARC/PtrState.h
@@ -21,8 +21,8 @@
 #include "llvm/Analysis/ObjCARCInstKind.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Value.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 
 namespace llvm {
 namespace objcarc {
diff --git a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index fd931c521c8f..99480f12da9e 100644
--- a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -19,12 +19,11 @@
 #define AA_NAME "alignment-from-assumptions"
 #define DEBUG_TYPE AA_NAME
 #include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h"
-#include "llvm/Transforms/Scalar.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/ValueTracking.h"
@@ -35,6 +34,7 @@
 #include "llvm/IR/Module.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
 using namespace llvm;
 
 STATISTIC(NumLoadAlignChanged,
diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp
index 9e982194bac7..4fa27891a974 100644
--- a/lib/Transforms/Scalar/ConstantProp.cpp
+++ b/lib/Transforms/Scalar/ConstantProp.cpp
@@ -18,15 +18,15 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/Pass.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include <set>
 using namespace llvm;
 
diff --git a/lib/Transforms/Scalar/DCE.cpp b/lib/Transforms/Scalar/DCE.cpp
index 07a0ba9b1222..fa4806e884c3 100644
--- a/lib/Transforms/Scalar/DCE.cpp
+++ b/lib/Transforms/Scalar/DCE.cpp
@@ -19,10 +19,10 @@
 #include "llvm/Transforms/Scalar/DCE.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/Pass.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
diff --git a/lib/Transforms/Scalar/FlattenCFGPass.cpp b/lib/Transforms/Scalar/FlattenCFGPass.cpp
index 185cdbdda378..063df779a30b 100644
--- a/lib/Transforms/Scalar/FlattenCFGPass.cpp
+++ b/lib/Transforms/Scalar/FlattenCFGPass.cpp
@@ -11,10 +11,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/Scalar.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/Pass.h"
+#include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
 
diff --git a/lib/Transforms/Scalar/GVNHoist.cpp b/lib/Transforms/Scalar/GVNHoist.cpp
index b7514a6d5793..29de792bd248 100644
--- a/lib/Transforms/Scalar/GVNHoist.cpp
+++ b/lib/Transforms/Scalar/GVNHoist.cpp
@@ -41,7 +41,6 @@
 //   ret void
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/Scalar/GVN.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
@@ -50,6 +49,7 @@
 #include "llvm/Analysis/MemorySSAUpdater.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/GVN.h"
 #include "llvm/Transforms/Utils/Local.h"
 
 using namespace llvm;
diff --git a/lib/Transforms/Scalar/GVNSink.cpp b/lib/Transforms/Scalar/GVNSink.cpp
index 5c75f39e381d..8634816e702f 100644
--- a/lib/Transforms/Scalar/GVNSink.cpp
+++ b/lib/Transforms/Scalar/GVNSink.cpp
@@ -169,8 +169,8 @@ struct SinkingInstructionCandidate {
             NumExtraPHIs) // PHIs are expensive, so make sure they're worth it.
            - SplitEdgeCost;
   }
-  bool operator>=(const SinkingInstructionCandidate &Other) const {
-    return Cost >= Other.Cost;
+  bool operator>(const SinkingInstructionCandidate &Other) const {
+    return Cost > Other.Cost;
   }
 };
 
@@ -745,7 +745,7 @@ unsigned GVNSink::sinkBB(BasicBlock *BBEnd) {
   std::stable_sort(
       Candidates.begin(), Candidates.end(),
       [](const SinkingInstructionCandidate &A,
-         const SinkingInstructionCandidate &B) { return A >= B; });
+         const SinkingInstructionCandidate &B) { return A > B; });
   DEBUG(dbgs() << " -- Sinking candidates:\n"; for (auto &C
                                                     : Candidates) dbgs()
                                                << "  " << C << "\n";);
diff --git a/lib/Transforms/Scalar/GuardWidening.cpp b/lib/Transforms/Scalar/GuardWidening.cpp
index 65a2cd955672..fb7c6e15758d 100644
--- a/lib/Transforms/Scalar/GuardWidening.cpp
+++ b/lib/Transforms/Scalar/GuardWidening.cpp
@@ -40,7 +40,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Scalar/GuardWidening.h"
-#include "llvm/Pass.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/Analysis/LoopInfo.h"
@@ -50,6 +49,7 @@
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/PatternMatch.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/KnownBits.h"
 #include "llvm/Transforms/Scalar.h"
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 9a7882211bac..10782963177c 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -86,6 +86,10 @@ static cl::opt<bool> UsePostIncrementRanges(
   cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
   cl::init(true));
 
+static cl::opt<bool>
+DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
+            cl::desc("Disable Linear Function Test Replace optimization"));
+
 namespace {
 struct RewritePhi;
 
@@ -2413,7 +2417,8 @@ bool IndVarSimplify::run(Loop *L) {
 
   // If we have a trip count expression, rewrite the loop's exit condition
   // using it.  We can currently only handle loops with a single exit.
-  if (canExpandBackedgeTakenCount(L, SE, Rewriter) && needsLFTR(L, DT)) {
+  if (!DisableLFTR && canExpandBackedgeTakenCount(L, SE, Rewriter) &&
+      needsLFTR(L, DT)) {
     PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT);
     if (IndVar) {
       // Check preconditions for proper SCEVExpander operation. SCEV does not
diff --git a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
index e21b0feb7c5a..2f96c3064b86 100644
--- a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
+++ b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
@@ -59,8 +59,8 @@
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Transforms/Utils/LoopUtils.h"
 #include "llvm/Transforms/Utils/LoopSimplify.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
 
 using namespace llvm;
 
@@ -1371,28 +1371,35 @@ bool LoopConstrainer::run() {
 
   DT.recalculate(F);
 
+  // We need to first add all the pre and post loop blocks into the loop
+  // structures (as part of createClonedLoopStructure), and then update the
+  // LCSSA form and LoopSimplifyForm. This is necessary for correctly updating
+  // LI when LoopSimplifyForm is generated.
+  Loop *PreL = nullptr, *PostL = nullptr;
   if (!PreLoop.Blocks.empty()) {
-    auto *L = createClonedLoopStructure(
+    PreL = createClonedLoopStructure(
         &OriginalLoop, OriginalLoop.getParentLoop(), PreLoop.Map);
-    formLCSSARecursively(*L, DT, &LI, &SE);
-    simplifyLoop(L, &DT, &LI, &SE, nullptr, true);
-    // Pre loops are slow paths, we do not need to perform any loop
-    // optimizations on them.
-    DisableAllLoopOptsOnLoop(*L);
   }
 
   if (!PostLoop.Blocks.empty()) {
-    auto *L = createClonedLoopStructure(
+    PostL = createClonedLoopStructure(
         &OriginalLoop, OriginalLoop.getParentLoop(), PostLoop.Map);
-    formLCSSARecursively(*L, DT, &LI, &SE);
-    simplifyLoop(L, &DT, &LI, &SE, nullptr, true);
-    // Post loops are slow paths, we do not need to perform any loop
-    // optimizations on them.
-    DisableAllLoopOptsOnLoop(*L);
   }
 
-  formLCSSARecursively(OriginalLoop, DT, &LI, &SE);
-  simplifyLoop(&OriginalLoop, &DT, &LI, &SE, nullptr, true);
+  // This function canonicalizes the loop into Loop-Simplify and LCSSA forms.
+  auto CanonicalizeLoop = [&] (Loop *L, bool IsOriginalLoop) {
+    formLCSSARecursively(*L, DT, &LI, &SE);
+    simplifyLoop(L, &DT, &LI, &SE, nullptr, true);
+    // Pre/post loops are slow paths, we do not need to perform any loop
+    // optimizations on them.
+    if (!IsOriginalLoop)
+      DisableAllLoopOptsOnLoop(*L);
+  };
+  if (PreL)
+    CanonicalizeLoop(PreL, false);
+  if (PostL)
+    CanonicalizeLoop(PostL, false);
+  CanonicalizeLoop(&OriginalLoop, true);
 
   return true;
 }
diff --git a/lib/Transforms/Scalar/InferAddressSpaces.cpp b/lib/Transforms/Scalar/InferAddressSpaces.cpp
index 5e116ef2fe75..3c8fbd35bf8c 100644
--- a/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -89,7 +89,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/Scalar.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SetVector.h"
@@ -100,6 +99,7 @@
 #include "llvm/IR/Operator.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
 
@@ -500,6 +500,7 @@ static Value *cloneConstantExprWithNewAddressSpace(
   }
 
   // Computes the operands of the new constant expression.
+  bool IsNew = false;
   SmallVector<Constant *, 4> NewOperands;
   for (unsigned Index = 0; Index < CE->getNumOperands(); ++Index) {
     Constant *Operand = CE->getOperand(Index);
@@ -509,6 +510,7 @@ static Value *cloneConstantExprWithNewAddressSpace(
     // bitcast, and getelementptr) do not incur cycles in the data flow graph
     // and (2) this function is called on constant expressions in postorder.
     if (Value *NewOperand = ValueWithNewAddrSpace.lookup(Operand)) {
+      IsNew = true;
       NewOperands.push_back(cast<Constant>(NewOperand));
     } else {
       // Otherwise, reuses the old operand.
@@ -516,6 +518,11 @@ static Value *cloneConstantExprWithNewAddressSpace(
     }
   }
 
+  // If !IsNew, we will replace the Value with itself. However, replaced values
+  // are assumed to wrapped in a addrspace cast later so drop it now.
+  if (!IsNew)
+    return nullptr;
+
   if (CE->getOpcode() == Instruction::GetElementPtr) {
     // Needs to specify the source type while constructing a getelementptr
     // constant expression.
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index 2ef8f8563bb9..c120036464d0 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -12,16 +12,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Scalar/JumpThreading.h"
-#include "llvm/Transforms/Scalar.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/GlobalsModRef.h"
-#include "llvm/Analysis/CFG.h"
 #include "llvm/Analysis/BlockFrequencyInfoImpl.h"
+#include "llvm/Analysis/CFG.h"
 #include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/Loads.h"
 #include "llvm/Analysis/LoopInfo.h"
@@ -36,6 +35,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/Local.h"
@@ -132,7 +132,7 @@ bool JumpThreading::runOnFunction(Function &F) {
   bool HasProfileData = F.getEntryCount().hasValue();
   if (HasProfileData) {
     LoopInfo LI{DominatorTree(F)};
-    BPI.reset(new BranchProbabilityInfo(F, LI));
+    BPI.reset(new BranchProbabilityInfo(F, LI, TLI));
     BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
   }
 
@@ -152,7 +152,7 @@ PreservedAnalyses JumpThreadingPass::run(Function &F,
   bool HasProfileData = F.getEntryCount().hasValue();
   if (HasProfileData) {
     LoopInfo LI{DominatorTree(F)};
-    BPI.reset(new BranchProbabilityInfo(F, LI));
+    BPI.reset(new BranchProbabilityInfo(F, LI, &TLI));
     BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
   }
 
diff --git a/lib/Transforms/Scalar/LoadCombine.cpp b/lib/Transforms/Scalar/LoadCombine.cpp
index 494cbc61bc9c..025ba1bfedc1 100644
--- a/lib/Transforms/Scalar/LoadCombine.cpp
+++ b/lib/Transforms/Scalar/LoadCombine.cpp
@@ -11,7 +11,6 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/Scalar.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
@@ -28,6 +27,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
 
 using namespace llvm;
 
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index c6a05ecbd0b1..b706152f30c8 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -116,6 +116,7 @@ private:
     Memset,
     MemsetPattern,
     Memcpy,
+    UnorderedAtomicMemcpy,
     DontUse // Dummy retval never to be used. Allows catching errors in retval
             // handling.
   };
@@ -353,8 +354,12 @@ static Constant *getMemSetPatternValue(Value *V, const DataLayout *DL) {
 
 LoopIdiomRecognize::LegalStoreKind
 LoopIdiomRecognize::isLegalStore(StoreInst *SI) {
+
   // Don't touch volatile stores.
-  if (!SI->isSimple())
+  if (SI->isVolatile())
+    return LegalStoreKind::None;
+  // We only want simple or unordered-atomic stores.
+  if (!SI->isUnordered())
     return LegalStoreKind::None;
 
   // Don't convert stores of non-integral pointer types to memsets (which stores
@@ -395,15 +400,18 @@ LoopIdiomRecognize::isLegalStore(StoreInst *SI) {
   Value *SplatValue = isBytewiseValue(StoredVal);
   Constant *PatternValue = nullptr;
 
+  // Note: memset and memset_pattern on unordered-atomic is yet not supported
+  bool UnorderedAtomic = SI->isUnordered() && !SI->isSimple();
+
   // If we're allowed to form a memset, and the stored value would be
   // acceptable for memset, use it.
-  if (HasMemset && SplatValue &&
+  if (!UnorderedAtomic && HasMemset && SplatValue &&
       // Verify that the stored value is loop invariant.  If not, we can't
       // promote the memset.
       CurLoop->isLoopInvariant(SplatValue)) {
     // It looks like we can use SplatValue.
     return LegalStoreKind::Memset;
-  } else if (HasMemsetPattern &&
+  } else if (!UnorderedAtomic && HasMemsetPattern &&
              // Don't create memset_pattern16s with address spaces.
              StorePtr->getType()->getPointerAddressSpace() == 0 &&
              (PatternValue = getMemSetPatternValue(StoredVal, DL))) {
@@ -422,7 +430,12 @@ LoopIdiomRecognize::isLegalStore(StoreInst *SI) {
 
     // The store must be feeding a non-volatile load.
     LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand());
-    if (!LI || !LI->isSimple())
+
+    // Only allow non-volatile loads
+    if (!LI || LI->isVolatile())
+      return LegalStoreKind::None;
+    // Only allow simple or unordered-atomic loads
+    if (!LI->isUnordered())
       return LegalStoreKind::None;
 
     // See if the pointer expression is an AddRec like {base,+,1} on the current
@@ -438,7 +451,9 @@ LoopIdiomRecognize::isLegalStore(StoreInst *SI) {
       return LegalStoreKind::None;
 
     // Success.  This store can be converted into a memcpy.
-    return LegalStoreKind::Memcpy;
+    UnorderedAtomic = UnorderedAtomic || LI->isAtomic();
+    return UnorderedAtomic ? LegalStoreKind::UnorderedAtomicMemcpy
+                           : LegalStoreKind::Memcpy;
   }
   // This store can't be transformed into a memset/memcpy.
   return LegalStoreKind::None;
@@ -469,6 +484,7 @@ void LoopIdiomRecognize::collectStores(BasicBlock *BB) {
       StoreRefsForMemsetPattern[Ptr].push_back(SI);
     } break;
     case LegalStoreKind::Memcpy:
+    case LegalStoreKind::UnorderedAtomicMemcpy:
       StoreRefsForMemcpy.push_back(SI);
       break;
     default:
@@ -882,7 +898,7 @@ bool LoopIdiomRecognize::processLoopStridedStore(
 /// for (i) A[i] = B[i];
 bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
                                                     const SCEV *BECount) {
-  assert(SI->isSimple() && "Expected only non-volatile stores.");
+  assert(SI->isUnordered() && "Expected only non-volatile non-ordered stores.");
 
   Value *StorePtr = SI->getPointerOperand();
   const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
@@ -892,7 +908,7 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
 
   // The store must be feeding a non-volatile load.
   LoadInst *LI = cast<LoadInst>(SI->getValueOperand());
-  assert(LI->isSimple() && "Expected only non-volatile stores.");
+  assert(LI->isUnordered() && "Expected only non-volatile non-ordered loads.");
 
   // See if the pointer expression is an AddRec like {base,+,1} on the current
   // loop, which indicates a strided load.  If we have something else, it's a
@@ -966,16 +982,47 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
 
   const SCEV *NumBytesS =
       SE->getAddExpr(BECount, SE->getOne(IntPtrTy), SCEV::FlagNUW);
-  if (StoreSize != 1)
-    NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtrTy, StoreSize),
-                               SCEV::FlagNUW);
 
-  Value *NumBytes =
-      Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator());
+  unsigned Align = std::min(SI->getAlignment(), LI->getAlignment());
+  CallInst *NewCall = nullptr;
+  // Check whether to generate an unordered atomic memcpy:
+  //  If the load or store are atomic, then they must neccessarily be unordered
+  //  by previous checks.
+  if (!SI->isAtomic() && !LI->isAtomic()) {
+    if (StoreSize != 1)
+      NumBytesS = SE->getMulExpr(
+          NumBytesS, SE->getConstant(IntPtrTy, StoreSize), SCEV::FlagNUW);
 
-  CallInst *NewCall =
-      Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes,
-                           std::min(SI->getAlignment(), LI->getAlignment()));
+    Value *NumBytes =
+        Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator());
+
+    NewCall = Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes, Align);
+  } else {
+    // We cannot allow unaligned ops for unordered load/store, so reject
+    // anything where the alignment isn't at least the element size.
+    if (Align < StoreSize)
+      return false;
+
+    // If the element.atomic memcpy is not lowered into explicit
+    // loads/stores later, then it will be lowered into an element-size
+    // specific lib call. If the lib call doesn't exist for our store size, then
+    // we shouldn't generate the memcpy.
+    if (StoreSize > TTI->getAtomicMemIntrinsicMaxElementSize())
+      return false;
+
+    Value *NumElements =
+        Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator());
+
+    NewCall = Builder.CreateElementAtomicMemCpy(StoreBasePtr, LoadBasePtr,
+                                                NumElements, StoreSize);
+    // Propagate alignment info onto the pointer args. Note that unordered
+    // atomic loads/stores are *required* by the spec to have an alignment
+    // but non-atomic loads/stores may not.
+    NewCall->addParamAttr(0, Attribute::getWithAlignment(NewCall->getContext(),
+                                                         SI->getAlignment()));
+    NewCall->addParamAttr(1, Attribute::getWithAlignment(NewCall->getContext(),
+                                                         LI->getAlignment()));
+  }
   NewCall->setDebugLoc(SI->getDebugLoc());
 
   DEBUG(dbgs() << "  Formed memcpy: " << *NewCall << "\n"
diff --git a/lib/Transforms/Scalar/LoopPredication.cpp b/lib/Transforms/Scalar/LoopPredication.cpp
index 32fd3da465fe..9b12ba180444 100644
--- a/lib/Transforms/Scalar/LoopPredication.cpp
+++ b/lib/Transforms/Scalar/LoopPredication.cpp
@@ -37,7 +37,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Scalar/LoopPredication.h"
-#include "llvm/Pass.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolution.h"
@@ -48,6 +47,7 @@
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/PatternMatch.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
diff --git a/lib/Transforms/Scalar/LoopRerollPass.cpp b/lib/Transforms/Scalar/LoopRerollPass.cpp
index fd15a9014def..fc0216e76a5b 100644
--- a/lib/Transforms/Scalar/LoopRerollPass.cpp
+++ b/lib/Transforms/Scalar/LoopRerollPass.cpp
@@ -11,10 +11,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
@@ -31,6 +30,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 28d94497a3ef..b027278b24f2 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -131,7 +131,7 @@ static cl::opt<bool> EnablePhiElim(
 
 // The flag adds instruction count to solutions cost comparision.
 static cl::opt<bool> InsnsCost(
-  "lsr-insns-cost", cl::Hidden, cl::init(false),
+  "lsr-insns-cost", cl::Hidden, cl::init(true),
   cl::desc("Add instruction count to a LSR cost model"));
 
 // Flag to choose how to narrow complex lsr solution
@@ -950,39 +950,37 @@ namespace {
 
 /// This class is used to measure and compare candidate formulae.
 class Cost {
-  /// TODO: Some of these could be merged. Also, a lexical ordering
-  /// isn't always optimal.
-  unsigned Insns;
-  unsigned NumRegs;
-  unsigned AddRecCost;
-  unsigned NumIVMuls;
-  unsigned NumBaseAdds;
-  unsigned ImmCost;
-  unsigned SetupCost;
-  unsigned ScaleCost;
+  TargetTransformInfo::LSRCost C;
 
 public:
-  Cost()
-    : Insns(0), NumRegs(0), AddRecCost(0), NumIVMuls(0), NumBaseAdds(0),
-      ImmCost(0), SetupCost(0), ScaleCost(0) {}
+  Cost() {
+    C.Insns = 0;
+    C.NumRegs = 0;
+    C.AddRecCost = 0;
+    C.NumIVMuls = 0;
+    C.NumBaseAdds = 0;
+    C.ImmCost = 0;
+    C.SetupCost = 0;
+    C.ScaleCost = 0;
+  }
 
-  bool operator<(const Cost &Other) const;
+  bool isLess(Cost &Other, const TargetTransformInfo &TTI);
 
   void Lose();
 
 #ifndef NDEBUG
   // Once any of the metrics loses, they must all remain losers.
   bool isValid() {
-    return ((Insns | NumRegs | AddRecCost | NumIVMuls | NumBaseAdds
-             | ImmCost | SetupCost | ScaleCost) != ~0u)
-      || ((Insns & NumRegs & AddRecCost & NumIVMuls & NumBaseAdds
-           & ImmCost & SetupCost & ScaleCost) == ~0u);
+    return ((C.Insns | C.NumRegs | C.AddRecCost | C.NumIVMuls | C.NumBaseAdds
+             | C.ImmCost | C.SetupCost | C.ScaleCost) != ~0u)
+      || ((C.Insns & C.NumRegs & C.AddRecCost & C.NumIVMuls & C.NumBaseAdds
+           & C.ImmCost & C.SetupCost & C.ScaleCost) == ~0u);
   }
 #endif
 
   bool isLoser() {
     assert(isValid() && "invalid cost");
-    return NumRegs == ~0u;
+    return C.NumRegs == ~0u;
   }
 
   void RateFormula(const TargetTransformInfo &TTI,
@@ -1170,10 +1168,10 @@ void Cost::RateRegister(const SCEV *Reg,
       }
 
       // Otherwise, it will be an invariant with respect to Loop L.
-      ++NumRegs;
+      ++C.NumRegs;
       return;
     }
-    AddRecCost += 1; /// TODO: This should be a function of the stride.
+    C.AddRecCost += 1; /// TODO: This should be a function of the stride.
 
     // Add the step value register, if it needs one.
     // TODO: The non-affine case isn't precisely modeled here.
@@ -1185,7 +1183,7 @@ void Cost::RateRegister(const SCEV *Reg,
       }
     }
   }
-  ++NumRegs;
+  ++C.NumRegs;
 
   // Rough heuristic; favor registers which don't require extra setup
   // instructions in the preheader.
@@ -1194,9 +1192,9 @@ void Cost::RateRegister(const SCEV *Reg,
       !(isa<SCEVAddRecExpr>(Reg) &&
         (isa<SCEVUnknown>(cast<SCEVAddRecExpr>(Reg)->getStart()) ||
          isa<SCEVConstant>(cast<SCEVAddRecExpr>(Reg)->getStart()))))
-    ++SetupCost;
+    ++C.SetupCost;
 
-  NumIVMuls += isa<SCEVMulExpr>(Reg) &&
+  C.NumIVMuls += isa<SCEVMulExpr>(Reg) &&
                SE.hasComputableLoopEvolution(Reg, L);
 }
 
@@ -1229,9 +1227,9 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
                        SmallPtrSetImpl<const SCEV *> *LoserRegs) {
   assert(F.isCanonical(*L) && "Cost is accurate only for canonical formula");
   // Tally up the registers.
-  unsigned PrevAddRecCost = AddRecCost;
-  unsigned PrevNumRegs = NumRegs;
-  unsigned PrevNumBaseAdds = NumBaseAdds;
+  unsigned PrevAddRecCost = C.AddRecCost;
+  unsigned PrevNumRegs = C.NumRegs;
+  unsigned PrevNumBaseAdds = C.NumBaseAdds;
   if (const SCEV *ScaledReg = F.ScaledReg) {
     if (VisitedRegs.count(ScaledReg)) {
       Lose();
@@ -1251,45 +1249,51 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
       return;
   }
 
-  // Treat every new register that exceeds TTI.getNumberOfRegisters() - 1 as
-  // additional instruction (at least fill).
-  unsigned TTIRegNum = TTI.getNumberOfRegisters(false) - 1;
-  if (NumRegs > TTIRegNum) {
-    // Cost already exceeded TTIRegNum, then only newly added register can add
-    // new instructions.
-    if (PrevNumRegs > TTIRegNum)
-      Insns += (NumRegs - PrevNumRegs);
-    else
-      Insns += (NumRegs - TTIRegNum);
-  }
-
   // Determine how many (unfolded) adds we'll need inside the loop.
   size_t NumBaseParts = F.getNumRegs();
   if (NumBaseParts > 1)
     // Do not count the base and a possible second register if the target
     // allows to fold 2 registers.
-    NumBaseAdds +=
+    C.NumBaseAdds +=
         NumBaseParts - (1 + (F.Scale && isAMCompletelyFolded(TTI, LU, F)));
-  NumBaseAdds += (F.UnfoldedOffset != 0);
+  C.NumBaseAdds += (F.UnfoldedOffset != 0);
 
   // Accumulate non-free scaling amounts.
-  ScaleCost += getScalingFactorCost(TTI, LU, F, *L);
+  C.ScaleCost += getScalingFactorCost(TTI, LU, F, *L);
 
   // Tally up the non-zero immediates.
   for (const LSRFixup &Fixup : LU.Fixups) {
     int64_t O = Fixup.Offset;
     int64_t Offset = (uint64_t)O + F.BaseOffset;
     if (F.BaseGV)
-      ImmCost += 64; // Handle symbolic values conservatively.
+      C.ImmCost += 64; // Handle symbolic values conservatively.
                      // TODO: This should probably be the pointer size.
     else if (Offset != 0)
-      ImmCost += APInt(64, Offset, true).getMinSignedBits();
+      C.ImmCost += APInt(64, Offset, true).getMinSignedBits();
 
     // Check with target if this offset with this instruction is
     // specifically not supported.
     if ((isa<LoadInst>(Fixup.UserInst) || isa<StoreInst>(Fixup.UserInst)) &&
         !TTI.isFoldableMemAccessOffset(Fixup.UserInst, Offset))
-      NumBaseAdds++;
+      C.NumBaseAdds++;
+  }
+
+  // If we don't count instruction cost exit here.
+  if (!InsnsCost) {
+    assert(isValid() && "invalid cost");
+    return;
+  }
+
+  // Treat every new register that exceeds TTI.getNumberOfRegisters() - 1 as
+  // additional instruction (at least fill).
+  unsigned TTIRegNum = TTI.getNumberOfRegisters(false) - 1;
+  if (C.NumRegs > TTIRegNum) {
+    // Cost already exceeded TTIRegNum, then only newly added register can add
+    // new instructions.
+    if (PrevNumRegs > TTIRegNum)
+      C.Insns += (C.NumRegs - PrevNumRegs);
+    else
+      C.Insns += (C.NumRegs - TTIRegNum);
   }
 
   // If ICmpZero formula ends with not 0, it could not be replaced by
@@ -1302,55 +1306,54 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
   // For {-10, +, 1}:
   // i = i + 1;
   if (LU.Kind == LSRUse::ICmpZero && !F.hasZeroEnd())
-    Insns++;
+    C.Insns++;
   // Each new AddRec adds 1 instruction to calculation.
-  Insns += (AddRecCost - PrevAddRecCost);
+  C.Insns += (C.AddRecCost - PrevAddRecCost);
 
   // BaseAdds adds instructions for unfolded registers.
   if (LU.Kind != LSRUse::ICmpZero)
-    Insns += NumBaseAdds - PrevNumBaseAdds;
+    C.Insns += C.NumBaseAdds - PrevNumBaseAdds;
   assert(isValid() && "invalid cost");
 }
 
 /// Set this cost to a losing value.
 void Cost::Lose() {
-  Insns = ~0u;
-  NumRegs = ~0u;
-  AddRecCost = ~0u;
-  NumIVMuls = ~0u;
-  NumBaseAdds = ~0u;
-  ImmCost = ~0u;
-  SetupCost = ~0u;
-  ScaleCost = ~0u;
+  C.Insns = ~0u;
+  C.NumRegs = ~0u;
+  C.AddRecCost = ~0u;
+  C.NumIVMuls = ~0u;
+  C.NumBaseAdds = ~0u;
+  C.ImmCost = ~0u;
+  C.SetupCost = ~0u;
+  C.ScaleCost = ~0u;
 }
 
 /// Choose the lower cost.
-bool Cost::operator<(const Cost &Other) const {
-  if (InsnsCost && Insns != Other.Insns)
-    return Insns < Other.Insns;
-  return std::tie(NumRegs, AddRecCost, NumIVMuls, NumBaseAdds, ScaleCost,
-                  ImmCost, SetupCost) <
-         std::tie(Other.NumRegs, Other.AddRecCost, Other.NumIVMuls,
-                  Other.NumBaseAdds, Other.ScaleCost, Other.ImmCost,
-                  Other.SetupCost);
+bool Cost::isLess(Cost &Other, const TargetTransformInfo &TTI) {
+  if (InsnsCost.getNumOccurrences() > 0 && InsnsCost &&
+      C.Insns != Other.C.Insns)
+    return C.Insns < Other.C.Insns;
+  return TTI.isLSRCostLess(C, Other.C);
 }
 
 void Cost::print(raw_ostream &OS) const {
-  OS << Insns << " instruction" << (Insns == 1 ? " " : "s ");
-  OS << NumRegs << " reg" << (NumRegs == 1 ? "" : "s");
-  if (AddRecCost != 0)
-    OS << ", with addrec cost " << AddRecCost;
-  if (NumIVMuls != 0)
-    OS << ", plus " << NumIVMuls << " IV mul" << (NumIVMuls == 1 ? "" : "s");
-  if (NumBaseAdds != 0)
-    OS << ", plus " << NumBaseAdds << " base add"
-       << (NumBaseAdds == 1 ? "" : "s");
-  if (ScaleCost != 0)
-    OS << ", plus " << ScaleCost << " scale cost";
-  if (ImmCost != 0)
-    OS << ", plus " << ImmCost << " imm cost";
-  if (SetupCost != 0)
-    OS << ", plus " << SetupCost << " setup cost";
+  if (InsnsCost)
+    OS << C.Insns << " instruction" << (C.Insns == 1 ? " " : "s ");
+  OS << C.NumRegs << " reg" << (C.NumRegs == 1 ? "" : "s");
+  if (C.AddRecCost != 0)
+    OS << ", with addrec cost " << C.AddRecCost;
+  if (C.NumIVMuls != 0)
+    OS << ", plus " << C.NumIVMuls << " IV mul"
+       << (C.NumIVMuls == 1 ? "" : "s");
+  if (C.NumBaseAdds != 0)
+    OS << ", plus " << C.NumBaseAdds << " base add"
+       << (C.NumBaseAdds == 1 ? "" : "s");
+  if (C.ScaleCost != 0)
+    OS << ", plus " << C.ScaleCost << " scale cost";
+  if (C.ImmCost != 0)
+    OS << ", plus " << C.ImmCost << " imm cost";
+  if (C.SetupCost != 0)
+    OS << ", plus " << C.SetupCost << " setup cost";
 }
 
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -4105,7 +4108,7 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
         Cost CostBest;
         Regs.clear();
         CostBest.RateFormula(TTI, Best, Regs, VisitedRegs, L, SE, DT, LU);
-        if (CostF < CostBest)
+        if (CostF.isLess(CostBest, TTI))
           std::swap(F, Best);
         DEBUG(dbgs() << "  Filtering out formula "; F.print(dbgs());
               dbgs() << "\n"
@@ -4573,7 +4576,7 @@ void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
     NewCost = CurCost;
     NewRegs = CurRegs;
     NewCost.RateFormula(TTI, F, NewRegs, VisitedRegs, L, SE, DT, LU);
-    if (NewCost < SolutionCost) {
+    if (NewCost.isLess(SolutionCost, TTI)) {
       Workspace.push_back(&F);
       if (Workspace.size() != Uses.size()) {
         SolveRecurse(Solution, SolutionCost, Workspace, NewCost,
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 19daebd0613a..d0c96fa627a4 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -26,34 +26,34 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/Scalar.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/CodeMetrics.h"
 #include "llvm/Analysis/DivergenceAnalysis.h"
+#include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
-#include "llvm/Analysis/BlockFrequencyInfo.h"
-#include "llvm/Analysis/BranchProbabilityInfo.h"
-#include "llvm/Support/BranchProbability.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
-#include "llvm/IR/Instructions.h"
 #include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Module.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/BranchProbability.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/Local.h"
diff --git a/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp b/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
index 7d8da9b453f9..46f8a3564265 100644
--- a/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
+++ b/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
@@ -93,7 +93,9 @@ static bool handleSwitchExpect(SwitchInst &SI) {
 /// the branch probability info for the originating branch can be inferred.
 static void handlePhiDef(CallInst *Expect) {
   Value &Arg = *Expect->getArgOperand(0);
-  ConstantInt *ExpectedValue = cast<ConstantInt>(Expect->getArgOperand(1));
+  ConstantInt *ExpectedValue = dyn_cast<ConstantInt>(Expect->getArgOperand(1));
+  if (!ExpectedValue)
+    return;
   const APInt &ExpectedPhiValue = ExpectedValue->getValue();
 
   // Walk up in backward a list of instructions that
diff --git a/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp b/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp
index 4f413715ffe6..070114a84cc5 100644
--- a/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp
+++ b/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp
@@ -17,10 +17,10 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Pass.h"
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 21a632073da7..7896396f0898 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -12,11 +12,12 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
 #include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/iterator_range.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
@@ -31,12 +32,12 @@
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GetElementPtrTypeIterator.h"
 #include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Operator.h"
@@ -49,7 +50,6 @@
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include <algorithm>
 #include <cassert>
diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp
index 27809f5b6f66..6926aae37963 100644
--- a/lib/Transforms/Scalar/NewGVN.cpp
+++ b/lib/Transforms/Scalar/NewGVN.cpp
@@ -378,6 +378,15 @@ private:
 };
 
 namespace llvm {
+struct ExactEqualsExpression {
+  const Expression &E;
+  explicit ExactEqualsExpression(const Expression &E) : E(E) {}
+  hash_code getComputedHash() const { return E.getComputedHash(); }
+  bool operator==(const Expression &Other) const {
+    return E.exactlyEquals(Other);
+  }
+};
+
 template <> struct DenseMapInfo<const Expression *> {
   static const Expression *getEmptyKey() {
     auto Val = static_cast<uintptr_t>(-1);
@@ -390,8 +399,17 @@ template <> struct DenseMapInfo<const Expression *> {
     return reinterpret_cast<const Expression *>(Val);
   }
   static unsigned getHashValue(const Expression *E) {
-    return static_cast<unsigned>(E->getComputedHash());
+    return E->getComputedHash();
   }
+  static unsigned getHashValue(const ExactEqualsExpression &E) {
+    return E.getComputedHash();
+  }
+  static bool isEqual(const ExactEqualsExpression &LHS, const Expression *RHS) {
+    if (RHS == getTombstoneKey() || RHS == getEmptyKey())
+      return false;
+    return LHS == *RHS;
+  }
+
   static bool isEqual(const Expression *LHS, const Expression *RHS) {
     if (LHS == RHS)
       return true;
@@ -848,6 +866,8 @@ PHIExpression *NewGVN::createPHIExpression(Instruction *I, bool &HasBackedge,
     // Things in TOPClass are equivalent to everything.
     if (ValueToClass.lookup(*U) == TOPClass)
       return false;
+    if (lookupOperandLeader(*U) == PN)
+      return false;
     return true;
   });
   std::transform(Filtered.begin(), Filtered.end(), op_inserter(E),
@@ -1571,30 +1591,6 @@ bool NewGVN::isCycleFree(const Instruction *I) const {
 
 // Evaluate PHI nodes symbolically, and create an expression result.
 const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) const {
-  // Resolve irreducible and reducible phi cycles.
-  // FIXME: This is hopefully a temporary solution while we resolve the issues
-  // with fixpointing self-cycles.  It currently should be "guaranteed" to be
-  // correct, but non-optimal.  The SCCFinder does not, for example, take
-  // reachability of arguments into account, etc.
-  SCCFinder.Start(I);
-  bool CanOptimize = true;
-  SmallPtrSet<Value *, 8> OuterOps;
-
-  auto &Component = SCCFinder.getComponentFor(I);
-  for (auto *Member : Component) {
-    if (!isa<PHINode>(Member)) {
-      CanOptimize = false;
-      break;
-    }
-    for (auto &PHIOp : cast<PHINode>(Member)->operands())
-      if (!isa<PHINode>(PHIOp) || !Component.count(cast<PHINode>(PHIOp)))
-        OuterOps.insert(PHIOp);
-  }
-  if (CanOptimize && OuterOps.size() == 1) {
-    DEBUG(dbgs() << "Resolving cyclic phi to value " << *(*OuterOps.begin())
-                 << "\n");
-    return createVariableOrConstant(*OuterOps.begin());
-  }
   // True if one of the incoming phi edges is a backedge.
   bool HasBackedge = false;
   // All constant tracks the state of whether all the *original* phi operands
@@ -1662,7 +1658,12 @@ const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) const {
         if (!someEquivalentDominates(AllSameInst, I))
           return E;
     }
-
+    // Can't simplify to something that comes later in the iteration.
+    // Otherwise, when and if it changes congruence class, we will never catch
+    // up. We will always be a class behind it.
+    if (isa<Instruction>(AllSameValue) &&
+        InstrToDFSNum(AllSameValue) > InstrToDFSNum(I))
+      return E;
     NumGVNPhisAllSame++;
     DEBUG(dbgs() << "Simplified PHI node " << *I << " to " << *AllSameValue
                  << "\n");
@@ -2158,7 +2159,17 @@ void NewGVN::moveValueToNewCongruenceClass(Instruction *I, const Expression *E,
     if (OldClass->getDefiningExpr()) {
       DEBUG(dbgs() << "Erasing expression " << *OldClass->getDefiningExpr()
                    << " from table\n");
-      ExpressionToClass.erase(OldClass->getDefiningExpr());
+      // We erase it as an exact expression to make sure we don't just erase an
+      // equivalent one.
+      auto Iter = ExpressionToClass.find_as(
+          ExactEqualsExpression(*OldClass->getDefiningExpr()));
+      if (Iter != ExpressionToClass.end())
+        ExpressionToClass.erase(Iter);
+#ifdef EXPENSIVE_CHECKS
+      assert(
+          (*OldClass->getDefiningExpr() != *E || ExpressionToClass.lookup(E)) &&
+          "We erased the expression we just inserted, which should not happen");
+#endif
     }
   } else if (OldClass->getLeader() == I) {
     // When the leader changes, the value numbering of
@@ -2272,8 +2283,13 @@ void NewGVN::performCongruenceFinding(Instruction *I, const Expression *E) {
     auto *OldE = ValueToExpression.lookup(I);
     // It could just be that the old class died. We don't want to erase it if we
     // just moved classes.
-    if (OldE && isa<StoreExpression>(OldE) && *E != *OldE)
-      ExpressionToClass.erase(OldE);
+    if (OldE && isa<StoreExpression>(OldE) && *E != *OldE) {
+      // Erase this as an exact expression to ensure we don't erase expressions
+      // equivalent to it.
+      auto Iter = ExpressionToClass.find_as(ExactEqualsExpression(*OldE));
+      if (Iter != ExpressionToClass.end())
+        ExpressionToClass.erase(Iter);
+    }
   }
   ValueToExpression[I] = E;
 }
@@ -3060,6 +3076,9 @@ void NewGVN::iterateTouchedInstructions() {
         }
         updateProcessedCount(CurrBlock);
       }
+      // Reset after processing (because we may mark ourselves as touched when
+      // we propagate equalities).
+      TouchedInstructions.reset(InstrNum);
 
       if (auto *MP = dyn_cast<MemoryPhi>(V)) {
         DEBUG(dbgs() << "Processing MemoryPhi " << *MP << "\n");
@@ -3070,9 +3089,6 @@ void NewGVN::iterateTouchedInstructions() {
         llvm_unreachable("Should have been a MemoryPhi or Instruction");
       }
       updateProcessedCount(V);
-      // Reset after processing (because we may mark ourselves as touched when
-      // we propagate equalities).
-      TouchedInstructions.reset(InstrNum);
     }
   }
   NumGVNMaxIterations = std::max(NumGVNMaxIterations.getValue(), Iterations);
diff --git a/lib/Transforms/Scalar/Reg2Mem.cpp b/lib/Transforms/Scalar/Reg2Mem.cpp
index 615029dd161b..96295683314c 100644
--- a/lib/Transforms/Scalar/Reg2Mem.cpp
+++ b/lib/Transforms/Scalar/Reg2Mem.cpp
@@ -16,7 +16,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/Scalar.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CFG.h"
@@ -25,6 +24,7 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Pass.h"
+#include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include <list>
 using namespace llvm;
diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index 350b50ffcdd4..bae7911d222c 100644
--- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -12,15 +12,14 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Pass.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Analysis/CFG.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/ADT/SetOperations.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/MapVector.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/Dominators.h"
@@ -28,15 +27,16 @@
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Instructions.h"
-#include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Module.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Module.h"
 #include "llvm/IR/Statepoint.h"
 #include "llvm/IR/Value.h"
 #include "llvm/IR/Verifier.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Cloning.h"
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index 1d0e8396f6a2..815492ac354c 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -1117,7 +1117,7 @@ CallOverdefined:
     // Otherwise, if we have a single return value case, and if the function is
     // a declaration, maybe we can constant fold it.
     if (F && F->isDeclaration() && !I->getType()->isStructTy() &&
-        canConstantFoldCallTo(F)) {
+        canConstantFoldCallTo(CS, F)) {
 
       SmallVector<Constant*, 8> Operands;
       for (CallSite::arg_iterator AI = CS.arg_begin(), E = CS.arg_end();
@@ -1137,7 +1137,7 @@ CallOverdefined:
 
       // If we can constant fold this, mark the result of the call as a
       // constant.
-      if (Constant *C = ConstantFoldCall(F, Operands, TLI)) {
+      if (Constant *C = ConstantFoldCall(CS, F, Operands, TLI)) {
         // call -> undef.
         if (isa<UndefValue>(C))
           return;
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index fb1b5813fd79..1527f15f18a3 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -3626,10 +3626,12 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
         auto *PartPtrTy =
             PLoad->getType()->getPointerTo(SI->getPointerAddressSpace());
 
+        auto AS = SI->getPointerAddressSpace();
         StoreInst *PStore = IRB.CreateAlignedStore(
-            PLoad, getAdjustedPtr(IRB, DL, StoreBasePtr,
-                                  APInt(DL.getPointerSizeInBits(), PartOffset),
-                                  PartPtrTy, StoreBasePtr->getName() + "."),
+            PLoad,
+            getAdjustedPtr(IRB, DL, StoreBasePtr,
+                           APInt(DL.getPointerSizeInBits(AS), PartOffset),
+                           PartPtrTy, StoreBasePtr->getName() + "."),
             getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false);
         PStore->copyMetadata(*LI, LLVMContext::MD_mem_parallel_loop_access);
         DEBUG(dbgs() << "      +" << PartOffset << ":" << *PStore << "\n");
@@ -3707,9 +3709,10 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
         PLoad = (*SplitLoads)[Idx];
       } else {
         IRB.SetInsertPoint(LI);
+        auto AS = LI->getPointerAddressSpace();
         PLoad = IRB.CreateAlignedLoad(
             getAdjustedPtr(IRB, DL, LoadBasePtr,
-                           APInt(DL.getPointerSizeInBits(), PartOffset),
+                           APInt(DL.getPointerSizeInBits(AS), PartOffset),
                            LoadPartPtrTy, LoadBasePtr->getName() + "."),
             getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false,
             LI->getName());
@@ -3717,10 +3720,12 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
 
       // And store this partition.
       IRB.SetInsertPoint(SI);
+      auto AS = SI->getPointerAddressSpace();
       StoreInst *PStore = IRB.CreateAlignedStore(
-          PLoad, getAdjustedPtr(IRB, DL, StoreBasePtr,
-                                APInt(DL.getPointerSizeInBits(), PartOffset),
-                                StorePartPtrTy, StoreBasePtr->getName() + "."),
+          PLoad,
+          getAdjustedPtr(IRB, DL, StoreBasePtr,
+                         APInt(DL.getPointerSizeInBits(AS), PartOffset),
+                         StorePartPtrTy, StoreBasePtr->getName() + "."),
           getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false);
 
       // Now build a new slice for the alloca.
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index 9fa43da99da9..850a01114eeb 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -20,12 +20,12 @@
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/ScopedNoAliasAA.h"
 #include "llvm/Analysis/TypeBasedAliasAnalysis.h"
-#include "llvm/Transforms/Scalar/GVN.h"
-#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Verifier.h"
 #include "llvm/InitializePasses.h"
-#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/Transforms/Scalar/GVN.h"
+#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
 
 using namespace llvm;
 
diff --git a/lib/Transforms/Scalar/Scalarizer.cpp b/lib/Transforms/Scalar/Scalarizer.cpp
index c0c09a7e43fe..d11855f2f3a9 100644
--- a/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/lib/Transforms/Scalar/Scalarizer.cpp
@@ -14,12 +14,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/Scalar.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/VectorUtils.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstVisitor.h"
 #include "llvm/Pass.h"
+#include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 
 using namespace llvm;
diff --git a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index cde659b9d189..84675f41cdd5 100644
--- a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -156,27 +156,27 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
-#include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/Operator.h"
+#include "llvm/IR/PatternMatch.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
-#include "llvm/IR/IRBuilder.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
 
 using namespace llvm;
 using namespace llvm::PatternMatch;
diff --git a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index 0f170e26ce5f..aaab5857e0f1 100644
--- a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -7,13 +7,14 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Sequence.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/LoopAnalysisManager.h"
@@ -37,7 +38,6 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
-#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
 #include <algorithm>
 #include <cassert>
 #include <iterator>
diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp
index 102e9eaeab77..5210f165b874 100644
--- a/lib/Transforms/Scalar/Sink.cpp
+++ b/lib/Transforms/Scalar/Sink.cpp
@@ -114,7 +114,7 @@ static bool IsAcceptableTarget(Instruction *Inst, BasicBlock *SuccToSinkTo,
   if (SuccToSinkTo->getUniquePredecessor() != Inst->getParent()) {
     // We cannot sink a load across a critical edge - there may be stores in
     // other code paths.
-    if (!isSafeToSpeculativelyExecute(Inst))
+    if (isa<LoadInst>(Inst))
       return false;
 
     // We don't want to sink across a critical edge if we don't dominate the
diff --git a/lib/Transforms/Scalar/StructurizeCFG.cpp b/lib/Transforms/Scalar/StructurizeCFG.cpp
index 49ce0262c97b..486f3e5a43d4 100644
--- a/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -7,7 +7,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/Scalar.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/SCCIterator.h"
@@ -20,6 +19,7 @@
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/SSAUpdater.h"
 
 using namespace llvm;
diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp
index bf54a51c7635..3e5993618c4c 100644
--- a/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -51,13 +51,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Scalar/TailRecursionElimination.h"
-#include "llvm/Transforms/Scalar.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/CFG.h"
 #include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/InlineCost.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/Loads.h"
@@ -76,6 +75,7 @@
 #include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt
index 7a21c03da221..83bc05d0311c 100644
--- a/lib/Transforms/Utils/CMakeLists.txt
+++ b/lib/Transforms/Utils/CMakeLists.txt
@@ -37,6 +37,7 @@ add_llvm_library(LLVMTransformUtils
   MetaRenamer.cpp
   ModuleUtils.cpp
   NameAnonGlobals.cpp
+  OrderedInstructions.cpp
   PredicateInfo.cpp
   PromoteMemoryToRegister.cpp
   StripGCRelocates.cpp
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index 1c1a75c111e9..314c990293cc 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -13,7 +13,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/ConstantFolding.h"
@@ -31,6 +30,7 @@
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
 #include <map>
diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp
index 5444b752de82..d27cb45c7d7f 100644
--- a/lib/Transforms/Utils/CloneModule.cpp
+++ b/lib/Transforms/Utils/CloneModule.cpp
@@ -12,12 +12,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm-c/Core.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Module.h"
+#include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
-#include "llvm-c/Core.h"
 using namespace llvm;
 
 static void copyComdat(GlobalObject *Dst, const GlobalObject *Src) {
diff --git a/lib/Transforms/Utils/DemoteRegToStack.cpp b/lib/Transforms/Utils/DemoteRegToStack.cpp
index 0eee6e19efac..6d3d287defdb 100644
--- a/lib/Transforms/Utils/DemoteRegToStack.cpp
+++ b/lib/Transforms/Utils/DemoteRegToStack.cpp
@@ -7,12 +7,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/Analysis/CFG.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Type.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
 
diff --git a/lib/Transforms/Utils/Evaluator.cpp b/lib/Transforms/Utils/Evaluator.cpp
index 59f176e2f231..c97e544e620a 100644
--- a/lib/Transforms/Utils/Evaluator.cpp
+++ b/lib/Transforms/Utils/Evaluator.cpp
@@ -20,8 +20,8 @@
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/DiagnosticPrinter.h"
 #include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -439,7 +439,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
 
       if (Callee->isDeclaration()) {
         // If this is a function we can constant fold, do it.
-        if (Constant *C = ConstantFoldCall(Callee, Formals, TLI)) {
+        if (Constant *C = ConstantFoldCall(CS, Callee, Formals, TLI)) {
           InstResult = C;
           DEBUG(dbgs() << "Constant folded function call. Result: " <<
                 *InstResult << "\n");
diff --git a/lib/Transforms/Utils/FlattenCFG.cpp b/lib/Transforms/Utils/FlattenCFG.cpp
index 7b96fbb11a14..435eff3bef47 100644
--- a/lib/Transforms/Utils/FlattenCFG.cpp
+++ b/lib/Transforms/Utils/FlattenCFG.cpp
@@ -11,7 +11,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/ValueTracking.h"
@@ -19,6 +18,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "flattencfg"
diff --git a/lib/Transforms/Utils/FunctionComparator.cpp b/lib/Transforms/Utils/FunctionComparator.cpp
index 57468be9a2a8..0457294361b5 100644
--- a/lib/Transforms/Utils/FunctionComparator.cpp
+++ b/lib/Transforms/Utils/FunctionComparator.cpp
@@ -15,8 +15,8 @@
 #include "llvm/Transforms/Utils/FunctionComparator.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/IR/CallSite.h"
-#include "llvm/IR/Instructions.h"
 #include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/lib/Transforms/Utils/FunctionImportUtils.cpp b/lib/Transforms/Utils/FunctionImportUtils.cpp
index b00f4b14068a..a98d07237b47 100644
--- a/lib/Transforms/Utils/FunctionImportUtils.cpp
+++ b/lib/Transforms/Utils/FunctionImportUtils.cpp
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Analysis/ModuleSummaryAnalysis.h"
 #include "llvm/Transforms/Utils/FunctionImportUtils.h"
+#include "llvm/Analysis/ModuleSummaryAnalysis.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Instructions.h"
 using namespace llvm;
diff --git a/lib/Transforms/Utils/GlobalStatus.cpp b/lib/Transforms/Utils/GlobalStatus.cpp
index ba4b78ac758a..245fefb38ee8 100644
--- a/lib/Transforms/Utils/GlobalStatus.cpp
+++ b/lib/Transforms/Utils/GlobalStatus.cpp
@@ -7,6 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Transforms/Utils/GlobalStatus.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CallSite.h"
@@ -18,7 +19,6 @@
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
-#include "llvm/Transforms/Utils/GlobalStatus.h"
 #include "llvm/IR/Use.h"
 #include "llvm/IR/User.h"
 #include "llvm/IR/Value.h"
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 0ca9f4c484e6..2a18c140c788 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -12,7 +12,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
@@ -28,13 +27,13 @@
 #include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Attributes.h"
-#include "llvm/IR/CallSite.h"
 #include "llvm/IR/CFG.h"
+#include "llvm/IR/CallSite.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/DIBuilder.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/DIBuilder.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instructions.h"
@@ -43,6 +42,7 @@
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include <algorithm>
 
diff --git a/lib/Transforms/Utils/InstructionNamer.cpp b/lib/Transforms/Utils/InstructionNamer.cpp
index 53b432fcafd4..23ec45edb3ef 100644
--- a/lib/Transforms/Utils/InstructionNamer.cpp
+++ b/lib/Transforms/Utils/InstructionNamer.cpp
@@ -14,10 +14,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/Scalar.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Type.h"
 #include "llvm/Pass.h"
+#include "llvm/Transforms/Scalar.h"
 using namespace llvm;
 
 namespace {
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index ebd528bc8ec1..2af671636cbd 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -22,8 +22,8 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/EHPersonalities.h"
 #include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/LazyValueInfo.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/Constants.h"
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index 72c06aef8037..f3db278ef1e4 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -38,15 +38,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/LoopSimplify.h"
-#include "llvm/Transforms/Scalar.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/SetOperations.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/BasicAliasAnalysis.h"
 #include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
 #include "llvm/Analysis/DependenceAnalysis.h"
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/InstructionSimplify.h"
@@ -65,6 +64,7 @@
 #include "llvm/IR/Type.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index 4ab4d7949d23..f2527f89e83e 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -16,7 +16,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/Utils/UnrollLoop.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AssumptionCache.h"
@@ -39,6 +38,7 @@
 #include "llvm/Transforms/Utils/LoopSimplify.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
+#include "llvm/Transforms/Utils/UnrollLoop.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "loop-unroll"
diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index 391fde3b0b01..a920cd86a26a 100644
--- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -21,7 +21,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/Utils/UnrollLoop.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/LoopIterator.h"
@@ -37,6 +36,7 @@
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/UnrollLoop.h"
 #include <algorithm>
 
 using namespace llvm;
diff --git a/lib/Transforms/Utils/LoopUtils.cpp b/lib/Transforms/Utils/LoopUtils.cpp
index 81f033e7d51a..412f6129407e 100644
--- a/lib/Transforms/Utils/LoopUtils.cpp
+++ b/lib/Transforms/Utils/LoopUtils.cpp
@@ -15,14 +15,13 @@
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/BasicAliasAnalysis.h"
 #include "llvm/Analysis/GlobalsModRef.h"
-#include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Module.h"
diff --git a/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/lib/Transforms/Utils/LowerMemIntrinsics.cpp
index c7cb561b5e21..0a51f9a0e4a2 100644
--- a/lib/Transforms/Utils/LowerMemIntrinsics.cpp
+++ b/lib/Transforms/Utils/LowerMemIntrinsics.cpp
@@ -8,9 +8,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/LowerMemIntrinsics.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
 
 using namespace llvm;
 
diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp
index 8959e77438e9..890afbc46e63 100644
--- a/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/lib/Transforms/Utils/LowerSwitch.cpp
@@ -13,7 +13,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/Scalar.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/Constants.h"
@@ -24,6 +23,7 @@
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
 #include <algorithm>
diff --git a/lib/Transforms/Utils/MetaRenamer.cpp b/lib/Transforms/Utils/MetaRenamer.cpp
index 481c6aa29c3a..9f2ad540c83d 100644
--- a/lib/Transforms/Utils/MetaRenamer.cpp
+++ b/lib/Transforms/Utils/MetaRenamer.cpp
@@ -13,7 +13,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/IPO.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
@@ -23,6 +22,7 @@
 #include "llvm/IR/Type.h"
 #include "llvm/IR/TypeFinder.h"
 #include "llvm/Pass.h"
+#include "llvm/Transforms/IPO.h"
 using namespace llvm;
 
 namespace {
diff --git a/lib/Transforms/Utils/OrderedInstructions.cpp b/lib/Transforms/Utils/OrderedInstructions.cpp
new file mode 100644
index 000000000000..2e67e0def5b9
--- /dev/null
+++ b/lib/Transforms/Utils/OrderedInstructions.cpp
@@ -0,0 +1,33 @@
+//===-- OrderedInstructions.cpp - Instruction dominance function ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines utility to check dominance relation of 2 instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/OrderedInstructions.h"
+using namespace llvm;
+
+/// Given 2 instructions, use OrderedBasicBlock to check for dominance relation
+/// if the instructions are in the same basic block, Otherwise, use dominator
+/// tree.
+bool OrderedInstructions::dominates(const Instruction *InstA,
+                                    const Instruction *InstB) const {
+  const BasicBlock *IBB = InstA->getParent();
+  // Use ordered basic block to do dominance check in case the 2 instructions
+  // are in the same basic block.
+  if (IBB == InstB->getParent()) {
+    auto OBB = OBBMap.find(IBB);
+    if (OBB == OBBMap.end())
+      OBB = OBBMap.insert({IBB, make_unique<OrderedBasicBlock>(IBB)}).first;
+    return OBB->second->dominates(InstA, InstB);
+  } else {
+    return DT->dominates(InstA->getParent(), InstB->getParent());
+  }
+}
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
index 8b6a2c3766d2..6ccf54e49dd3 100644
--- a/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -11,9 +11,10 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Transforms/Utils/SSAUpdater.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/TinyPtrVector.h"
 #include "llvm/Analysis/InstructionSimplify.h"
@@ -30,7 +31,6 @@
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils/SSAUpdater.h"
 #include "llvm/Transforms/Utils/SSAUpdaterImpl.h"
 #include <cassert>
 #include <utility>
diff --git a/lib/Transforms/Utils/SanitizerStats.cpp b/lib/Transforms/Utils/SanitizerStats.cpp
index 9afd175c10ed..8c23957ac43e 100644
--- a/lib/Transforms/Utils/SanitizerStats.cpp
+++ b/lib/Transforms/Utils/SanitizerStats.cpp
@@ -12,13 +12,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/SanitizerStats.h"
-#include "llvm/Transforms/Utils/ModuleUtils.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Module.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
 
 using namespace llvm;
 
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 1b442a9a264d..0970c436e665 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -15,13 +15,13 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetOperations.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/EHPersonalities.h"
@@ -29,8 +29,8 @@
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CallSite.h"
 #include "llvm/IR/CFG.h"
+#include "llvm/IR/CallSite.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/Constants.h"
@@ -55,7 +55,6 @@
 #include "llvm/IR/Type.h"
 #include "llvm/IR/User.h"
 #include "llvm/IR/Value.h"
-#include "llvm/IR/DebugInfo.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp
index 2509b5f22046..2ea15f65cef9 100644
--- a/lib/Transforms/Utils/SimplifyInstructions.cpp
+++ b/lib/Transforms/Utils/SimplifyInstructions.cpp
@@ -27,8 +27,8 @@
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Type.h"
 #include "llvm/Pass.h"
-#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "instsimplify"
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index cc6c47e8f978..b723b65f35e5 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -738,8 +738,8 @@ Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) {
   ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
   if (!LenC)
     return nullptr;
-  uint64_t Len = LenC->getZExtValue();
 
+  uint64_t Len = LenC->getZExtValue();
   if (Len == 0) // memcmp(s1,s2,0) -> 0
     return Constant::getNullValue(CI->getType());
 
diff --git a/lib/Transforms/Utils/StripGCRelocates.cpp b/lib/Transforms/Utils/StripGCRelocates.cpp
index f3d3fadb51e9..49dc15cf5e7c 100644
--- a/lib/Transforms/Utils/StripGCRelocates.cpp
+++ b/lib/Transforms/Utils/StripGCRelocates.cpp
@@ -20,8 +20,8 @@
 #include "llvm/IR/Statepoint.h"
 #include "llvm/IR/Type.h"
 #include "llvm/Pass.h"
-#include "llvm/Transforms/Scalar.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
 
 using namespace llvm;
 
diff --git a/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp b/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp
index 66dbf335cb95..cd0378e0140c 100644
--- a/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp
+++ b/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp
@@ -7,9 +7,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/IPO.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/Pass.h"
+#include "llvm/Transforms/IPO.h"
 using namespace llvm;
 
 namespace {
diff --git a/lib/Transforms/Utils/SymbolRewriter.cpp b/lib/Transforms/Utils/SymbolRewriter.cpp
index 6d136636ce70..20107553665f 100644
--- a/lib/Transforms/Utils/SymbolRewriter.cpp
+++ b/lib/Transforms/Utils/SymbolRewriter.cpp
@@ -59,9 +59,9 @@
 
 #define DEBUG_TYPE "symbol-rewriter"
 #include "llvm/Transforms/Utils/SymbolRewriter.h"
-#include "llvm/Pass.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/IR/LegacyPassManager.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/MemoryBuffer.h"
diff --git a/lib/Transforms/Utils/Utils.cpp b/lib/Transforms/Utils/Utils.cpp
index 7106483c3bd2..f6c7d1c4989e 100644
--- a/lib/Transforms/Utils/Utils.cpp
+++ b/lib/Transforms/Utils/Utils.cpp
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/InitializePasses.h"
 #include "llvm-c/Initialization.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/PassRegistry.h"
 
 using namespace llvm;
diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp
index c83b3f7b225b..78453aaa16ce 100644
--- a/lib/Transforms/Vectorize/BBVectorize.cpp
+++ b/lib/Transforms/Vectorize/BBVectorize.cpp
@@ -15,7 +15,6 @@
 //===----------------------------------------------------------------------===//
 
 #define BBV_NAME "bb-vectorize"
-#include "llvm/Transforms/Vectorize.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/STLExtras.h"
@@ -50,6 +49,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Vectorize.h"
 #include <algorithm>
 using namespace llvm;
 
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 799eef21dc4e..1abdb2484850 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -485,8 +485,7 @@ protected:
   /// of scalars. If \p IfPredicateInstr is true we need to 'hide' each
   /// scalarized instruction behind an if block predicated on the control
   /// dependence of the instruction.
-  virtual void scalarizeInstruction(Instruction *Instr,
-                                    bool IfPredicateInstr = false);
+  void scalarizeInstruction(Instruction *Instr, bool IfPredicateInstr = false);
 
   /// Vectorize Load and Store instructions,
   virtual void vectorizeMemoryInstruction(Instruction *Instr);
@@ -3815,7 +3814,11 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() {
       if (auto *BO = dyn_cast<BinaryOperator>(I)) {
         NewI = B.CreateBinOp(BO->getOpcode(), ShrinkOperand(BO->getOperand(0)),
                              ShrinkOperand(BO->getOperand(1)));
-        cast<BinaryOperator>(NewI)->copyIRFlags(I);
+
+        // Any wrapping introduced by shrinking this operation shouldn't be
+        // considered undefined behavior. So, we can't unconditionally copy
+        // arithmetic wrapping flags to NewI.
+        cast<BinaryOperator>(NewI)->copyIRFlags(I, /*IncludeWrapFlags=*/false);
       } else if (auto *CI = dyn_cast<ICmpInst>(I)) {
         NewI =
             B.CreateICmp(CI->getPredicate(), ShrinkOperand(CI->getOperand(0)),
diff --git a/lib/Transforms/Vectorize/Vectorize.cpp b/lib/Transforms/Vectorize/Vectorize.cpp
index 28e0b2eb9866..a21928317888 100644
--- a/lib/Transforms/Vectorize/Vectorize.cpp
+++ b/lib/Transforms/Vectorize/Vectorize.cpp
@@ -17,9 +17,9 @@
 #include "llvm-c/Initialization.h"
 #include "llvm-c/Transforms/Vectorize.h"
 #include "llvm/Analysis/Passes.h"
+#include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Verifier.h"
 #include "llvm/InitializePasses.h"
-#include "llvm/IR/LegacyPassManager.h"
 
 using namespace llvm;
 
diff --git a/lib/XRay/InstrumentationMap.cpp b/lib/XRay/InstrumentationMap.cpp
index 431c251feb65..d9ce255bc688 100644
--- a/lib/XRay/InstrumentationMap.cpp
+++ b/lib/XRay/InstrumentationMap.cpp
@@ -11,6 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/XRay/InstrumentationMap.h"
 #include "llvm/ADT/None.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
@@ -22,7 +23,6 @@
 #include "llvm/Support/Error.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/YAMLTraits.h"
-#include "llvm/XRay/InstrumentationMap.h"
 #include <algorithm>
 #include <cstddef>
 #include <cstdint>
diff --git a/test/Analysis/BranchProbabilityInfo/libfunc_call.ll b/test/Analysis/BranchProbabilityInfo/libfunc_call.ll
new file mode 100644
index 000000000000..13bc0de90a61
--- /dev/null
+++ b/test/Analysis/BranchProbabilityInfo/libfunc_call.ll
@@ -0,0 +1,264 @@
+; RUN: opt < %s -analyze -branch-prob | FileCheck %s
+; RUN: opt < %s -analyze -lazy-branch-prob | FileCheck %s
+; RUN: opt < %s -passes='print<branch-prob>' -disable-output 2>&1 | FileCheck %s
+
+declare i32 @strcmp(i8*, i8*)
+declare i32 @strncmp(i8*, i8*, i32)
+declare i32 @strcasecmp(i8*, i8*)
+declare i32 @strncasecmp(i8*, i8*, i32)
+declare i32 @memcmp(i8*, i8*)
+declare i32 @nonstrcmp(i8*, i8*)
+
+
+; Check that the result of strcmp is considered more likely to be nonzero than
+; zero, and equally likely to be (nonzero) positive or negative.
+
+define i32 @test_strcmp_eq(i8* %p, i8* %q) {
+; CHECK: Printing analysis {{.*}} for function 'test_strcmp_eq'
+entry:
+  %val = call i32 @strcmp(i8* %p, i8* %q)
+  %cond = icmp eq i32 %val, 0
+  br i1 %cond, label %then, label %else
+; CHECK: edge entry -> then probability is 0x30000000 / 0x80000000 = 37.50%
+; CHECK: edge entry -> else probability is 0x50000000 / 0x80000000 = 62.50%
+
+then:
+  br label %exit
+; CHECK: edge then -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+
+else:
+  br label %exit
+; CHECK: edge else -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+
+exit:
+  %result = phi i32 [ 0, %then ], [ 1, %else ]
+  ret i32 %result
+}
+
+define i32 @test_strcmp_ne(i8* %p, i8* %q) {
+; CHECK: Printing analysis {{.*}} for function 'test_strcmp_ne'
+entry:
+  %val = call i32 @strcmp(i8* %p, i8* %q)
+  %cond = icmp ne i32 %val, 0
+  br i1 %cond, label %then, label %else
+; CHECK: edge entry -> then probability is 0x50000000 / 0x80000000 = 62.50%
+; CHECK: edge entry -> else probability is 0x30000000 / 0x80000000 = 37.50%
+
+then:
+  br label %exit
+; CHECK: edge then -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+
+else:
+  br label %exit
+; CHECK: edge else -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+
+exit:
+  %result = phi i32 [ 0, %then ], [ 1, %else ]
+  ret i32 %result
+}
+
+define i32 @test_strcmp_sgt(i8* %p, i8* %q) {
+; CHECK: Printing analysis {{.*}} for function 'test_strcmp_sgt'
+entry:
+  %val = call i32 @strcmp(i8* %p, i8* %q)
+  %cond = icmp sgt i32 %val, 0
+  br i1 %cond, label %then, label %else
+; CHECK: edge entry -> then probability is 0x40000000 / 0x80000000 = 50.00%
+; CHECK: edge entry -> else probability is 0x40000000 / 0x80000000 = 50.00%
+
+then:
+  br label %exit
+; CHECK: edge then -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+
+else:
+  br label %exit
+; CHECK: edge else -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+
+exit:
+  %result = phi i32 [ 0, %then ], [ 1, %else ]
+  ret i32 %result
+}
+
+define i32 @test_strcmp_slt(i8* %p, i8* %q) {
+; CHECK: Printing analysis {{.*}} for function 'test_strcmp_slt'
+entry:
+  %val = call i32 @strcmp(i8* %p, i8* %q)
+  %cond = icmp slt i32 %val, 0
+  br i1 %cond, label %then, label %else
+; CHECK: edge entry -> then probability is 0x40000000 / 0x80000000 = 50.00%
+; CHECK: edge entry -> else probability is 0x40000000 / 0x80000000 = 50.00%
+
+then:
+  br label %exit
+; CHECK: edge then -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+
+else:
+  br label %exit
+; CHECK: edge else -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+
+exit:
+  %result = phi i32 [ 0, %then ], [ 1, %else ]
+  ret i32 %result
+}
+
+
+; Similarly check other library functions that have the same behaviour
+
+define i32 @test_strncmp_sgt(i8* %p, i8* %q) {
+; CHECK: Printing analysis {{.*}} for function 'test_strncmp_sgt'
+entry:
+  %val = call i32 @strncmp(i8* %p, i8* %q, i32 4)
+  %cond = icmp sgt i32 %val, 0
+  br i1 %cond, label %then, label %else
+; CHECK: edge entry -> then probability is 0x40000000 / 0x80000000 = 50.00%
+; CHECK: edge entry -> else probability is 0x40000000 / 0x80000000 = 50.00%
+
+then:
+  br label %exit
+; CHECK: edge then -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+
+else:
+  br label %exit
+; CHECK: edge else -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+
+exit:
+  %result = phi i32 [ 0, %then ], [ 1, %else ]
+  ret i32 %result
+}
+
+define i32 @test_strcasecmp_sgt(i8* %p, i8* %q) {
+; CHECK: Printing analysis {{.*}} for function 'test_strcasecmp_sgt'
+entry:
+  %val = call i32 @strcasecmp(i8* %p, i8* %q)
+  %cond = icmp sgt i32 %val, 0
+  br i1 %cond, label %then, label %else
+; CHECK: edge entry -> then probability is 0x40000000 / 0x80000000 = 50.00%
+; CHECK: edge entry -> else probability is 0x40000000 / 0x80000000 = 50.00%
+
+then:
+  br label %exit
+; CHECK: edge then -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+
+else:
+  br label %exit
+; CHECK: edge else -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+
+exit:
+  %result = phi i32 [ 0, %then ], [ 1, %else ]
+  ret i32 %result
+}
+
+define i32 @test_strncasecmp_sgt(i8* %p, i8* %q) {
+; CHECK: Printing analysis {{.*}} for function 'test_strncasecmp_sgt'
+entry:
+  %val = call i32 @strncasecmp(i8* %p, i8* %q, i32 4)
+  %cond = icmp sgt i32 %val, 0
+  br i1 %cond, label %then, label %else
+; CHECK: edge entry -> then probability is 0x40000000 / 0x80000000 = 50.00%
+; CHECK: edge entry -> else probability is 0x40000000 / 0x80000000 = 50.00%
+
+then:
+  br label %exit
+; CHECK: edge then -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+
+else:
+  br label %exit
+; CHECK: edge else -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+
+exit:
+  %result = phi i32 [ 0, %then ], [ 1, %else ]
+  ret i32 %result
+}
+
+define i32 @test_memcmp_sgt(i8* %p, i8* %q) {
+; CHECK: Printing analysis {{.*}} for function 'test_memcmp_sgt'
+entry:
+  %val = call i32 @memcmp(i8* %p, i8* %q)
+  %cond = icmp sgt i32 %val, 0
+  br i1 %cond, label %then, label %else
+; CHECK: edge entry -> then probability is 0x40000000 / 0x80000000 = 50.00%
+; CHECK: edge entry -> else probability is 0x40000000 / 0x80000000 = 50.00%
+
+then:
+  br label %exit
+; CHECK: edge then -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+
+else:
+  br label %exit
+; CHECK: edge else -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+
+exit:
+  %result = phi i32 [ 0, %then ], [ 1, %else ]
+  ret i32 %result
+}
+
+
+; Check that for the result of a call to a non-library function the default
+; heuristic is applied, i.e. positive more likely than negative, nonzero more
+; likely than zero.
+
+define i32 @test_nonstrcmp_eq(i8* %p, i8* %q) {
+; CHECK: Printing analysis {{.*}} for function 'test_nonstrcmp_eq'
+entry:
+  %val = call i32 @nonstrcmp(i8* %p, i8* %q)
+  %cond = icmp eq i32 %val, 0
+  br i1 %cond, label %then, label %else
+; CHECK: edge entry -> then probability is 0x30000000 / 0x80000000 = 37.50%
+; CHECK: edge entry -> else probability is 0x50000000 / 0x80000000 = 62.50%
+
+then:
+  br label %exit
+; CHECK: edge then -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+
+else:
+  br label %exit
+; CHECK: edge else -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+
+exit:
+  %result = phi i32 [ 0, %then ], [ 1, %else ]
+  ret i32 %result
+}
+
+define i32 @test_nonstrcmp_ne(i8* %p, i8* %q) {
+; CHECK: Printing analysis {{.*}} for function 'test_nonstrcmp_ne'
+entry:
+  %val = call i32 @nonstrcmp(i8* %p, i8* %q)
+  %cond = icmp ne i32 %val, 0
+  br i1 %cond, label %then, label %else
+; CHECK: edge entry -> then probability is 0x50000000 / 0x80000000 = 62.50%
+; CHECK: edge entry -> else probability is 0x30000000 / 0x80000000 = 37.50%
+
+then:
+  br label %exit
+; CHECK: edge then -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+
+else:
+  br label %exit
+; CHECK: edge else -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+
+exit:
+  %result = phi i32 [ 0, %then ], [ 1, %else ]
+  ret i32 %result
+}
+
+define i32 @test_nonstrcmp_sgt(i8* %p, i8* %q) {
+; CHECK: Printing analysis {{.*}} for function 'test_nonstrcmp_sgt'
+entry:
+  %val = call i32 @nonstrcmp(i8* %p, i8* %q)
+  %cond = icmp sgt i32 %val, 0
+  br i1 %cond, label %then, label %else
+; CHECK: edge entry -> then probability is 0x50000000 / 0x80000000 = 62.50%
+; CHECK: edge entry -> else probability is 0x30000000 / 0x80000000 = 37.50%
+
+then:
+  br label %exit
+; CHECK: edge then -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+
+else:
+  br label %exit
+; CHECK: edge else -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+
+exit:
+  %result = phi i32 [ 0, %then ], [ 1, %else ]
+  ret i32 %result
+}
diff --git a/test/Analysis/ConstantFolding/gep-constanfolding-error.ll b/test/Analysis/ConstantFolding/gep-constanfolding-error.ll
index 50ad61a8f100..16bc8a983e48 100644
--- a/test/Analysis/ConstantFolding/gep-constanfolding-error.ll
+++ b/test/Analysis/ConstantFolding/gep-constanfolding-error.ll
@@ -44,7 +44,7 @@ entry:
   %9 = add i32 %f.promoted, %smax
   %10 = add i32 %9, 2
   call void @llvm.memset.p0i8.i32(i8* %scevgep, i8 %conv6, i32 %10, i32 1, i1 false)
-; CHECK:  call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([6 x [6 x [7 x i8]]], [6 x [6 x [7 x i8]]]* @j, i32 0, i64 5, i64 4, i32 1), i8 %conv6, i32 1, i32 1, i1 false)
+; CHECK:  call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([6 x [6 x [7 x i8]]], [6 x [6 x [7 x i8]]]* @j, i32 0, i{{32|64}} 5, i{{32|64}} 4, i32 1), i8 %conv6, i32 1, i32 1, i1 false)
 ; CHECK-NOT: call void @llvm.memset.p0i8.i32(i8* getelementptr ([6 x [6 x [7 x i8]]], [6 x [6 x [7 x i8]]]* @j, i64 1, i64 4, i64 4, i32 1)
   ret i32 0
 }
diff --git a/test/Analysis/LazyValueAnalysis/lvi-after-jumpthreading.ll b/test/Analysis/LazyValueAnalysis/lvi-after-jumpthreading.ll
index 00ab21e46d5d..3a0ab0f03b99 100644
--- a/test/Analysis/LazyValueAnalysis/lvi-after-jumpthreading.ll
+++ b/test/Analysis/LazyValueAnalysis/lvi-after-jumpthreading.ll
@@ -10,17 +10,23 @@
 define i8 @test1(i32 %a, i32 %length) {
 ; CHECK-LABEL: LVI for function 'test1':
 entry:
+; CHECK-LABEL: entry:
+; CHECK-NEXT:    ; LatticeVal for: 'i32 %a' is: overdefined
+; CHECK-NEXT:    ; LatticeVal for: 'i32 %length' is: overdefined
   br label %loop
-; CHECK-LABEL: backedge:
-; CHECK-NEXT: ; CachedLatticeValues for: '  %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ]'
-; CHECK-DAG: ; at beginning of BasicBlock: '%backedge' LatticeVal: 'constantrange<0, 400>'
-; CHECK-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ]
-; CHECK-NEXT: ; CachedLatticeValues for: '  %iv.next = add nsw i32 %iv, 1'
-; CHECK-NEXT: ; at beginning of BasicBlock: '%backedge' LatticeVal: 'constantrange<1, 401>'
-; CHECK-NEXT: %iv.next = add nsw i32 %iv, 1
-; CHECK-NEXT:  %cont = icmp slt i32 %iv.next, 400
-; CHECK-NEXT: br i1 %cont, label %backedge, label %exit
 
+; CHECK-LABEL: backedge:
+; CHECK-NEXT:     ; LatticeVal for: 'i32 %a' is: overdefined
+; CHECK-NEXT:     ; LatticeVal for: 'i32 %length' is: overdefined
+; CHECK-NEXT:     ; LatticeVal for: '  %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ]' in BB: '%backedge' is: constantrange<0, 400>
+; CHECK-NEXT:     ; LatticeVal for: '  %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ]' in BB: '%exit' is: constantrange<399, 400>
+; CHECK-NEXT:  %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ]
+; CHECK-NEXT:     ; LatticeVal for: '  %iv.next = add nsw i32 %iv, 1' in BB: '%backedge' is: constantrange<1, 401>
+; CHECK-NEXT:     ; LatticeVal for: '  %iv.next = add nsw i32 %iv, 1' in BB: '%exit' is: constantrange<400, 401>
+; CHECK-NEXT:  %iv.next = add nsw i32 %iv, 1
+; CHECK-NEXT:     ; LatticeVal for: '  %cont = icmp slt i32 %iv.next, 400' in BB: '%backedge' is: overdefined
+; CHECK-NEXT:     ; LatticeVal for: '  %cont = icmp slt i32 %iv.next, 400' in BB: '%exit' is: constantrange<0, -1>
+; CHECK-NEXT:  %cont = icmp slt i32 %iv.next, 400
 ; CHECK-NOT: loop
 loop:
   %iv = phi i32 [0, %entry], [%iv.next, %backedge]
@@ -36,46 +42,58 @@ exit:
   ret i8 0
 }
 
-
 ; Here JT does not transform the code, but LVICache is populated during the processing of blocks.
 define i8 @test2(i32 %n) {
 ; CHECK-LABEL: LVI for function 'test2':
 ; CHECK-LABEL: entry:
-; CHECK-LABEL: ; OverDefined values for block are:
-; CHECK-NEXT: ;i32 %n
+; CHECK-NEXT:    ; LatticeVal for: 'i32 %n' is: overdefined
 ; CHECK-NEXT: br label %loop
 entry:
   br label %loop
 
 ; CHECK-LABEL: loop:
-; CHECK-LABEL: ; OverDefined values for block are:
-; CHECK-NEXT: ; %iv2 = phi i32 [ %n, %entry ], [ %iv2.next, %backedge ]
-; CHECK-NEXT: ; CachedLatticeValues for: '  %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ]'
-; CHECK-DAG: ; at beginning of BasicBlock: '%loop' LatticeVal: 'constantrange<0, -2147483647>'
-; CHECK-DAG: ; at beginning of BasicBlock: '%backedge' LatticeVal: 'constantrange<0, -2147483648>'
-; CHECK-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ]
-; CHECK: %cnd = and i1 %cnd1, %cnd2
-; CHECK: br i1 %cnd, label %backedge, label %exit
+; CHECK-NEXT:    ; LatticeVal for: 'i32 %n' is: overdefined
+; CHECK-NEXT:    ; LatticeVal for: '  %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ]' in BB: '%loop' is: constantrange<0, -2147483647>
+; CHECK-DAG:     ; LatticeVal for: '  %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ]' in BB: '%backedge' is: constantrange<0, -2147483648>
+; CHECK-DAG:     ; LatticeVal for: '  %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ]' in BB: '%exit' is: constantrange<0, -2147483647>
+; CHECK-NEXT:  %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ]
 loop:
   %iv = phi i32 [0, %entry], [%iv.next, %backedge]
+; CHECK-NEXT:    ; LatticeVal for: '  %iv2 = phi i32 [ %n, %entry ], [ %iv2.next, %backedge ]' in BB: '%loop' is: overdefined
+; CHECK-DAG:     ; LatticeVal for: '  %iv2 = phi i32 [ %n, %entry ], [ %iv2.next, %backedge ]' in BB: '%backedge' is: constantrange<1, -2147483648>
+; CHECK-DAG:     ; LatticeVal for: '  %iv2 = phi i32 [ %n, %entry ], [ %iv2.next, %backedge ]' in BB: '%exit' is: overdefined
+; CHECK-NEXT:  %iv2 = phi i32 [ %n, %entry ], [ %iv2.next, %backedge ]
   %iv2 = phi i32 [%n, %entry], [%iv2.next, %backedge]
+
+; CHECK-NEXT:    ; LatticeVal for: '  %cnd1 = icmp sge i32 %iv, 0' in BB: '%loop' is: overdefined
+; CHECK-DAG:     ; LatticeVal for: '  %cnd1 = icmp sge i32 %iv, 0' in BB: '%backedge' is: overdefined
+; CHECK-DAG:     ; LatticeVal for: '  %cnd1 = icmp sge i32 %iv, 0' in BB: '%exit' is: overdefined
+; CHECK-NEXT:  %cnd1 = icmp sge i32 %iv, 0
   %cnd1 = icmp sge i32 %iv, 0
   %cnd2 = icmp sgt i32 %iv2, 0
+; CHECK:       %cnd2 = icmp sgt i32 %iv2, 0
+; CHECK:         ; LatticeVal for: '  %cnd = and i1 %cnd1, %cnd2' in BB: '%loop' is: overdefined
+; CHECK-DAG:     ; LatticeVal for: '  %cnd = and i1 %cnd1, %cnd2' in BB: '%backedge' is: constantrange<-1, 0>
+; CHECK-DAG:     ; LatticeVal for: '  %cnd = and i1 %cnd1, %cnd2' in BB: '%exit' is: overdefined
+; CHECK-NEXT:  %cnd = and i1 %cnd1, %cnd2
   %cnd = and i1 %cnd1, %cnd2
   br i1 %cnd, label %backedge, label %exit
 
 ; CHECK-LABEL: backedge:
-; CHECK-NEXT: ; CachedLatticeValues for: '  %iv.next = add nsw i32 %iv, 1'
-; CHECK-NEXT: ; at beginning of BasicBlock: '%backedge' LatticeVal: 'constantrange<1, -2147483647>'
-; CHECK-NEXT: %iv.next = add nsw i32 %iv, 1
-; CHECK-NEXT: %iv2.next = sub nsw i32 %iv2, 1
-; CHECK: %cont = and i1 %cont1, %cont2
-; CHECK: br i1 %cont, label %loop, label %exit
+; CHECK-NEXT:    ; LatticeVal for: 'i32 %n' is: overdefined
+; CHECK-NEXT:    ; LatticeVal for: '  %iv.next = add nsw i32 %iv, 1' in BB: '%backedge' is: constantrange<1, -2147483647>
+; CHECK-NEXT:  %iv.next = add nsw i32 %iv, 1
 backedge:
   %iv.next = add nsw i32 %iv, 1
   %iv2.next = sub nsw i32 %iv2, 1
+; CHECK:         ; LatticeVal for: '  %cont1 = icmp slt i32 %iv.next, 400' in BB: '%backedge' is: overdefined
+; CHECK-NEXT:  %cont1 = icmp slt i32 %iv.next, 400
   %cont1 = icmp slt i32 %iv.next, 400
+; CHECK-NEXT:    ; LatticeVal for: '  %cont2 = icmp sgt i32 %iv2.next, 0' in BB: '%backedge' is: overdefined
+; CHECK-NEXT:  %cont2 = icmp sgt i32 %iv2.next, 0
   %cont2 = icmp sgt i32 %iv2.next, 0
+; CHECK-NEXT:    ; LatticeVal for: '  %cont = and i1 %cont1, %cont2' in BB: '%backedge' is: overdefined
+; CHECK-NEXT:  %cont = and i1 %cont1, %cont2
   %cont = and i1 %cont1, %cont2
   br i1 %cont, label %loop, label %exit
 
diff --git a/test/Bindings/OCaml/core.ml b/test/Bindings/OCaml/core.ml
index 105f1bc4f732..802baa0b80b2 100644
--- a/test/Bindings/OCaml/core.ml
+++ b/test/Bindings/OCaml/core.ml
@@ -66,6 +66,16 @@ let suite name f =
 let filename = Sys.argv.(1)
 let m = create_module context filename
 
+(*===-- Contained types  --------------------------------------------------===*)
+
+let test_contained_types () =
+  let pointer_i32 = pointer_type i32_type in
+  insist (i32_type = (Array.get (subtypes pointer_i32) 0));
+
+  let ar = struct_type context [| i32_type; i8_type |] in
+  insist (i32_type = (Array.get (subtypes ar)) 0);
+  insist (i8_type = (Array.get (subtypes ar)) 1)
+
 
 (*===-- Conversion --------------------------------------------------------===*)
 
@@ -1533,6 +1543,7 @@ let test_writer () =
 (*===-- Driver ------------------------------------------------------------===*)
 
 let _ =
+  suite "contained types"  test_contained_types;
   suite "conversion"       test_conversion;
   suite "target"           test_target;
   suite "constants"        test_constants;
diff --git a/test/Bitcode/ptest-old.ll b/test/Bitcode/ptest-old.ll
index 53ffef900b57..184f72e9856e 100644
--- a/test/Bitcode/ptest-old.ll
+++ b/test/Bitcode/ptest-old.ll
@@ -1,6 +1,6 @@
 ; RUN: llvm-as < %s | llvm-dis | FileCheck %s
 ; RUN: verify-uselistorder < %s
-; REQUIRES: x86
+; REQUIRES: x86-registered-target
 
 define i32 @foo(<4 x float> %bar) nounwind {
 entry:
diff --git a/test/BugPoint/unsymbolized.ll b/test/BugPoint/unsymbolized.ll
new file mode 100644
index 000000000000..8547f220ea26
--- /dev/null
+++ b/test/BugPoint/unsymbolized.ll
@@ -0,0 +1,21 @@
+; REQUIRES: loadable_module
+; RUN: echo "import sys" > %t.py
+; RUN: echo "print('args = ' + str(sys.argv))" >> %t.py
+; RUN: echo "exit(1)" >> %t.py
+; RUN: not bugpoint -load %llvmshlibdir/BugpointPasses%shlibext %s -output-prefix %t -bugpoint-crashcalls -opt-command="%python" -opt-args %t.py | FileCheck %s
+; RUN: not --crash opt -load %llvmshlibdir/BugpointPasses%shlibext %s -bugpoint-crashcalls -disable-symbolication 2>&1 | FileCheck --check-prefix=CRASH %s
+
+; Test that bugpoint disables symbolication on the opt tool to reduce runtime overhead when opt crashes
+; CHECK: args = {{.*}}'-disable-symbolication'
+
+; Test that opt, when it crashes & is passed -disable-symbolication, doesn't symbolicate.
+; In theory this test should maybe be in test/tools/opt or
+; test/Transforms, but since there doesn't seem to be another convenient way to
+; crash opt, apart from the BugpointPasses dynamic plugin, this is the spot for
+; now.
+; CRASH-NOT: Signals.inc
+
+define void @f() {
+  call void @f()
+  ret void
+}
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 25c340fea6f7..b52b6018e026 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -61,7 +61,7 @@ set(LLVM_TEST_DEPENDS
           llvm-nm
           llvm-objdump
           llvm-opt-report
-          llvm-pdbdump
+          llvm-pdbutil
           llvm-profdata
           llvm-ranlib
           llvm-readobj
diff --git a/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-stackprotect.ll b/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-stackprotect.ll
index 006308641184..cd3ea9715e0f 100644
--- a/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-stackprotect.ll
+++ b/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-stackprotect.ll
@@ -4,7 +4,7 @@
 ; CHECK: name: test_stack_guard
 
 ; CHECK: stack:
-; CHECK:  - { id: 0, name: StackGuardSlot, offset: 0, size: 8, alignment: 8 }
+; CHECK:  - { id: 0, name: StackGuardSlot,  type: default, offset: 0, size: 8, alignment: 8,
 ; CHECK-NOT: id: 1
 
 ; CHECK: [[GUARD_SLOT:%[0-9]+]](p0) = G_FRAME_INDEX %stack.0.StackGuardSlot
diff --git a/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll b/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll
index ac3d4b17f739..65b8ba570701 100644
--- a/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll
+++ b/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll
@@ -31,10 +31,13 @@ define i64 @muli64(i64 %arg1, i64 %arg2) {
 ; Tests for alloca
 ; CHECK-LABEL: name: allocai64
 ; CHECK: stack:
-; CHECK-NEXT:   - { id: 0, name: ptr1, offset: 0, size: 8, alignment: 8 }
-; CHECK-NEXT:   - { id: 1, name: ptr2, offset: 0, size: 8, alignment: 1 }
-; CHECK-NEXT:   - { id: 2, name: ptr3, offset: 0, size: 128, alignment: 8 }
-; CHECK-NEXT:   - { id: 3, name: ptr4, offset: 0, size: 1, alignment: 8 }
+; CHECK-NEXT:   - { id: 0, name: ptr1, type: default, offset: 0, size: 8, alignment: 8,
+; CHECK-NEXT:       callee-saved-register: '', di-variable: '', di-expression: '', di-location: '' }
+; CHECK-NEXT:   - { id: 1, name: ptr2, type: default, offset: 0, size: 8, alignment: 1,
+; CHECK-NEXT:       callee-saved-register: '', di-variable: '', di-expression: '', di-location: '' }
+; CHECK-NEXT:   - { id: 2, name: ptr3, type: default, offset: 0, size: 128, alignment: 8,
+; CHECK-NEXT:       callee-saved-register: '', di-variable: '', di-expression: '', di-location: '' }
+; CHECK-NEXT:   - { id: 3, name: ptr4, type: default, offset: 0, size: 1, alignment: 8,
 ; CHECK: %{{[0-9]+}}(p0) = G_FRAME_INDEX %stack.0.ptr1
 ; CHECK: %{{[0-9]+}}(p0) = G_FRAME_INDEX %stack.1.ptr2
 ; CHECK: %{{[0-9]+}}(p0) = G_FRAME_INDEX %stack.2.ptr3
@@ -1550,3 +1553,15 @@ define <16 x i8> @test_shufflevector_v8s8_v16s8(<8 x i8> %arg1, <8 x i8> %arg2)
 define <4 x half> @test_constant_vector() {
   ret <4 x half> <half undef, half undef, half undef, half 0xH3C00>
 }
+
+define i32 @test_target_mem_intrinsic(i32* %addr) {
+; CHECK-LABEL: name: test_target_mem_intrinsic
+; CHECK: [[ADDR:%[0-9]+]](p0) = COPY %x0
+; CHECK: [[VAL:%[0-9]+]](s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.ldxr), [[ADDR]](p0) :: (volatile load 4 from %ir.addr)
+; CHECK: G_TRUNC [[VAL]](s64)
+  %val = call i64 @llvm.aarch64.ldxr.p0i32(i32* %addr)
+  %trunc = trunc i64 %val to i32
+  ret i32 %trunc
+}
+
+declare i64 @llvm.aarch64.ldxr.p0i32(i32*) nounwind
diff --git a/test/CodeGen/AArch64/GlobalISel/arm64-regbankselect.mir b/test/CodeGen/AArch64/GlobalISel/arm64-regbankselect.mir
index 0f054f1d940c..296f65c041a1 100644
--- a/test/CodeGen/AArch64/GlobalISel/arm64-regbankselect.mir
+++ b/test/CodeGen/AArch64/GlobalISel/arm64-regbankselect.mir
@@ -98,8 +98,8 @@ name:            defaultMapping
 legalized:       true
 # CHECK-LABEL: name: defaultMapping
 # CHECK:      registers:
-# CHECK:   - { id: 0, class: gpr }
-# CHECK:   - { id: 1, class: gpr }
+# CHECK:   - { id: 0, class: gpr, preferred-register: '' }
+# CHECK:   - { id: 1, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -119,8 +119,8 @@ name:            defaultMappingVector
 legalized:       true
 # CHECK-LABEL: name: defaultMappingVector
 # CHECK:      registers:
-# CHECK:   - { id: 0, class: fpr }
-# CHECK:   - { id: 1, class: fpr }
+# CHECK:   - { id: 0, class: fpr, preferred-register: '' }
+# CHECK:   - { id: 1, class: fpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -141,10 +141,10 @@ name:            defaultMapping1Repair
 legalized:       true
 # CHECK-LABEL: name: defaultMapping1Repair
 # CHECK:      registers:
-# CHECK-NEXT:   - { id: 0, class: fpr }
-# CHECK-NEXT:   - { id: 1, class: gpr }
-# CHECK-NEXT:   - { id: 2, class: gpr }
-# CHECK-NEXT:   - { id: 3, class: gpr }
+# CHECK-NEXT:   - { id: 0, class: fpr, preferred-register: '' }
+# CHECK-NEXT:   - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT:   - { id: 2, class: gpr, preferred-register: '' }
+# CHECK-NEXT:   - { id: 3, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -166,10 +166,10 @@ name:            defaultMapping2Repairs
 legalized:       true
 # CHECK-LABEL: name: defaultMapping2Repairs
 # CHECK:      registers:
-# CHECK-NEXT:   - { id: 0, class: fpr }
-# CHECK-NEXT:   - { id: 1, class: gpr }
-# CHECK-NEXT:   - { id: 2, class: gpr }
-# CHECK-NEXT:   - { id: 3, class: gpr }
+# CHECK-NEXT:   - { id: 0, class: fpr, preferred-register: '' }
+# CHECK-NEXT:   - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT:   - { id: 2, class: gpr, preferred-register: '' }
+# CHECK-NEXT:   - { id: 3, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -193,9 +193,9 @@ name:            defaultMappingDefRepair
 legalized:       true
 # CHECK-LABEL: name: defaultMappingDefRepair
 # CHECK:      registers:
-# CHECK-NEXT:   - { id: 0, class: gpr }
-# CHECK-NEXT:   - { id: 1, class: fpr }
-# CHECK-NEXT:   - { id: 2, class: gpr }
+# CHECK-NEXT:   - { id: 0, class: gpr, preferred-register: '' }
+# CHECK-NEXT:   - { id: 1, class: fpr, preferred-register: '' }
+# CHECK-NEXT:   - { id: 2, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: fpr }
@@ -215,11 +215,11 @@ name:            phiPropagation
 legalized:       true
 tracksRegLiveness:   true
 # CHECK:      registers:
-# CHECK-NEXT:   - { id: 0, class: gpr32 }
-# CHECK-NEXT:   - { id: 1, class: gpr64sp }
-# CHECK-NEXT:   - { id: 2, class: gpr32 }
-# CHECK-NEXT:   - { id: 3, class: gpr }
-# CHECK-NEXT:   - { id: 4, class: gpr }
+# CHECK-NEXT:   - { id: 0, class: gpr32, preferred-register: '' }
+# CHECK-NEXT:   - { id: 1, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT:   - { id: 2, class: gpr32, preferred-register: '' }
+# CHECK-NEXT:   - { id: 3, class: gpr, preferred-register: '' }
+# CHECK-NEXT:   - { id: 4, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: gpr32 }
   - { id: 1, class: gpr64sp }
@@ -254,10 +254,10 @@ name:            defaultMappingUseRepairPhysReg
 legalized:       true
 # CHECK-LABEL: name: defaultMappingUseRepairPhysReg
 # CHECK:      registers:
-# CHECK-NEXT:   - { id: 0, class: gpr }
-# CHECK-NEXT:   - { id: 1, class: fpr }
-# CHECK-NEXT:   - { id: 2, class: gpr }
-# CHECK-NEXT:   - { id: 3, class: gpr }
+# CHECK-NEXT:   - { id: 0, class: gpr, preferred-register: '' }
+# CHECK-NEXT:   - { id: 1, class: fpr, preferred-register: '' }
+# CHECK-NEXT:   - { id: 2, class: gpr, preferred-register: '' }
+# CHECK-NEXT:   - { id: 3, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -280,8 +280,8 @@ name:            defaultMappingDefRepairPhysReg
 legalized:       true
 # CHECK-LABEL: name: defaultMappingDefRepairPhysReg
 # CHECK:      registers:
-# CHECK-NEXT:   - { id: 0, class: gpr }
-# CHECK-NEXT:   - { id: 1, class: gpr }
+# CHECK-NEXT:   - { id: 0, class: gpr, preferred-register: '' }
+# CHECK-NEXT:   - { id: 1, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -303,18 +303,18 @@ name:            greedyMappingOr
 legalized:       true
 # CHECK-LABEL: name: greedyMappingOr
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr }
-# CHECK-NEXT:  - { id: 1, class: gpr }
+# CHECK-NEXT:  - { id: 0, class: gpr, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr, preferred-register: '' }
 
 # Fast mode maps vector instruction on FPR.
-# FAST-NEXT:  - { id: 2, class: fpr }
+# FAST-NEXT:  - { id: 2, class: fpr, preferred-register: '' }
 # Fast mode needs two extra copies.
-# FAST-NEXT:  - { id: 3, class: fpr }
-# FAST-NEXT:  - { id: 4, class: fpr }
+# FAST-NEXT:  - { id: 3, class: fpr, preferred-register: '' }
+# FAST-NEXT:  - { id: 4, class: fpr, preferred-register: '' }
 
 # Greedy mode coalesce the computation on the GPR register
 # because it is the cheapest.
-# GREEDY-NEXT:  - { id: 2, class: gpr }
+# GREEDY-NEXT:  - { id: 2, class: gpr, preferred-register: '' }
 
 registers:
   - { id: 0, class: _ }
@@ -350,18 +350,18 @@ name:            greedyMappingOrWithConstraints
 legalized:       true
 # CHECK-LABEL: name: greedyMappingOrWithConstraints
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr }
-# CHECK-NEXT:  - { id: 1, class: gpr }
-# CHECK-NEXT:  - { id: 2, class: fpr }
+# CHECK-NEXT:  - { id: 0, class: gpr, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: fpr, preferred-register: '' }
 
 # Fast mode maps vector instruction on FPR.
 # Fast mode needs two extra copies.
-# FAST-NEXT:  - { id: 3, class: fpr }
-# FAST-NEXT:  - { id: 4, class: fpr }
+# FAST-NEXT:  - { id: 3, class: fpr, preferred-register: '' }
+# FAST-NEXT:  - { id: 4, class: fpr, preferred-register: '' }
 
 # Greedy mode coalesce the computation on the GPR register because it
 # is the cheapest, but will need one extra copy to materialize %2 into a FPR.
-# GREEDY-NEXT:  - { id: 3, class: gpr }
+# GREEDY-NEXT:  - { id: 3, class: gpr, preferred-register: '' }
 
 registers:
   - { id: 0, class: _ }
@@ -396,8 +396,8 @@ body: |
 name:            ignoreTargetSpecificInst
 legalized:       true
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64 }
-# CHECK-NEXT:  - { id: 1, class: gpr64 }
+# CHECK-NEXT:  - { id: 0, class: gpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr64, preferred-register: '' }
 registers:
   - { id: 0, class: gpr64 }
   - { id: 1, class: gpr64 }
@@ -434,8 +434,8 @@ name:            bitcast_s32_gpr
 legalized:       true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr }
-# CHECK-NEXT:  - { id: 1, class: gpr }
+# CHECK-NEXT:  - { id: 0, class: gpr, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -457,8 +457,8 @@ name:            bitcast_s32_fpr
 legalized:       true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: fpr }
-# CHECK-NEXT:  - { id: 1, class: fpr }
+# CHECK-NEXT:  - { id: 0, class: fpr, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: fpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -480,9 +480,9 @@ name:            bitcast_s32_gpr_fpr
 legalized:       true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr }
-# FAST-NEXT:  - { id: 1, class: fpr }
-# GREEDY-NEXT:  - { id: 1, class: gpr }
+# CHECK-NEXT:  - { id: 0, class: gpr, preferred-register: '' }
+# FAST-NEXT:  - { id: 1, class: fpr, preferred-register: '' }
+# GREEDY-NEXT:  - { id: 1, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -504,9 +504,9 @@ name:            bitcast_s32_fpr_gpr
 legalized:       true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: fpr }
-# FAST-NEXT:  - { id: 1, class: gpr }
-# GREEDY-NEXT:  - { id: 1, class: fpr }
+# CHECK-NEXT:  - { id: 0, class: fpr, preferred-register: '' }
+# FAST-NEXT:  - { id: 1, class: gpr, preferred-register: '' }
+# GREEDY-NEXT:  - { id: 1, class: fpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -528,8 +528,8 @@ name:            bitcast_s64_gpr
 legalized:       true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr }
-# CHECK-NEXT:  - { id: 1, class: gpr }
+# CHECK-NEXT:  - { id: 0, class: gpr, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -551,8 +551,8 @@ name:            bitcast_s64_fpr
 legalized:       true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: fpr }
-# CHECK-NEXT:  - { id: 1, class: fpr }
+# CHECK-NEXT:  - { id: 0, class: fpr, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: fpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -574,9 +574,9 @@ name:            bitcast_s64_gpr_fpr
 legalized:       true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr }
-# FAST-NEXT:  - { id: 1, class: fpr }
-# GREEDY-NEXT:  - { id: 1, class: gpr }
+# CHECK-NEXT:  - { id: 0, class: gpr, preferred-register: '' }
+# FAST-NEXT:  - { id: 1, class: fpr, preferred-register: '' }
+# GREEDY-NEXT:  - { id: 1, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -597,9 +597,9 @@ name:            bitcast_s64_fpr_gpr
 legalized:       true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: fpr }
-# FAST-NEXT:  - { id: 1, class: gpr }
-# GREEDY-NEXT:  - { id: 1, class: fpr }
+# CHECK-NEXT:  - { id: 0, class: fpr, preferred-register: '' }
+# FAST-NEXT:  - { id: 1, class: gpr, preferred-register: '' }
+# GREEDY-NEXT:  - { id: 1, class: fpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -624,15 +624,15 @@ name:            greedyWithChainOfComputation
 legalized:       true
 
 # CHECK: registers:
-# CHECK-NEXT:  - { id: 0, class: gpr }
-# CHECK-NEXT:  - { id: 1, class: gpr }
-# FAST-NEXT:   - { id: 2, class: fpr }
-# FAST-NEXT:   - { id: 3, class: fpr }
-# FAST-NEXT:   - { id: 4, class: fpr }
-# GREEDY-NEXT: - { id: 2, class: gpr }
-# GREEDY-NEXT: - { id: 3, class: gpr }
-# GREEDY-NEXT: - { id: 4, class: gpr }
-# CHECK-NEXT:  - { id: 5, class: gpr }
+# CHECK-NEXT:  - { id: 0, class: gpr, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr, preferred-register: '' }
+# FAST-NEXT:   - { id: 2, class: fpr, preferred-register: '' }
+# FAST-NEXT:   - { id: 3, class: fpr, preferred-register: '' }
+# FAST-NEXT:   - { id: 4, class: fpr, preferred-register: '' }
+# GREEDY-NEXT: - { id: 2, class: gpr, preferred-register: '' }
+# GREEDY-NEXT: - { id: 3, class: gpr, preferred-register: '' }
+# GREEDY-NEXT: - { id: 4, class: gpr, preferred-register: '' }
+# CHECK-NEXT:  - { id: 5, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -674,11 +674,11 @@ name:            floatingPointLoad
 legalized:       true
 
 # CHECK: registers:
-# CHECK-NEXT:  - { id: 0, class: gpr }
-# CHECK-NEXT:  - { id: 1, class: gpr }
-# CHECK-NEXT:   - { id: 2, class: fpr }
-# CHECK-NEXT:   - { id: 3, class: fpr }
-# CHECK-NEXT:   - { id: 4, class: fpr }
+# CHECK-NEXT:  - { id: 0, class: gpr, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT:   - { id: 2, class: fpr, preferred-register: '' }
+# CHECK-NEXT:   - { id: 3, class: fpr, preferred-register: '' }
+# CHECK-NEXT:   - { id: 4, class: fpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -716,11 +716,11 @@ name:            floatingPointStore
 legalized:       true
 
 # CHECK: registers:
-# CHECK-NEXT:  - { id: 0, class: gpr }
-# CHECK-NEXT:  - { id: 1, class: gpr }
-# CHECK-NEXT:   - { id: 2, class: fpr }
-# CHECK-NEXT:   - { id: 3, class: fpr }
-# CHECK-NEXT:   - { id: 4, class: fpr }
+# CHECK-NEXT:  - { id: 0, class: gpr, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT:   - { id: 2, class: fpr, preferred-register: '' }
+# CHECK-NEXT:   - { id: 3, class: fpr, preferred-register: '' }
+# CHECK-NEXT:   - { id: 4, class: fpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
diff --git a/test/CodeGen/AArch64/GlobalISel/call-translator-ios.ll b/test/CodeGen/AArch64/GlobalISel/call-translator-ios.ll
index 4e6b9cad4c3d..38a90bbfbbd9 100644
--- a/test/CodeGen/AArch64/GlobalISel/call-translator-ios.ll
+++ b/test/CodeGen/AArch64/GlobalISel/call-translator-ios.ll
@@ -3,8 +3,8 @@
 
 ; CHECK-LABEL: name: test_stack_slots
 ; CHECK: fixedStack:
-; CHECK-DAG:  - { id: [[STACK0:[0-9]+]], offset: 0, size: 1
-; CHECK-DAG:  - { id: [[STACK8:[0-9]+]], offset: 1, size: 1
+; CHECK-DAG:  - { id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 1,
+; CHECK-DAG:  - { id: [[STACK8:[0-9]+]], type: default, offset: 1, size: 1,
 ; CHECK: [[LHS_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]]
 ; CHECK: [[LHS:%[0-9]+]](s8) = G_LOAD [[LHS_ADDR]](p0) :: (invariant load 1 from %fixed-stack.[[STACK0]], align 0)
 ; CHECK: [[RHS_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK8]]
diff --git a/test/CodeGen/AArch64/GlobalISel/call-translator.ll b/test/CodeGen/AArch64/GlobalISel/call-translator.ll
index 44705a9c9f65..e923a0b2847f 100644
--- a/test/CodeGen/AArch64/GlobalISel/call-translator.ll
+++ b/test/CodeGen/AArch64/GlobalISel/call-translator.ll
@@ -35,7 +35,7 @@ define void @test_simple_arg(i32 %in) {
 ; CHECK-LABEL: name: test_indirect_call
 ; CHECK: registers:
 ; Make sure the register feeding the indirect call is properly constrained.
-; CHECK: - { id: [[FUNC:[0-9]+]], class: gpr64 }
+; CHECK: - { id: [[FUNC:[0-9]+]], class: gpr64, preferred-register: '' }
 ; CHECK: %[[FUNC]](p0) = COPY %x0
 ; CHECK: BLR %[[FUNC]](p0), csr_aarch64_aapcs, implicit-def %lr, implicit %sp
 ; CHECK: RET_ReallyLR
@@ -165,9 +165,9 @@ define zeroext i8 @test_abi_zext_ret(i8* %addr) {
 
 ; CHECK-LABEL: name: test_stack_slots
 ; CHECK: fixedStack:
-; CHECK-DAG:  - { id: [[STACK0:[0-9]+]], offset: 0, size: 8
-; CHECK-DAG:  - { id: [[STACK8:[0-9]+]], offset: 8, size: 8
-; CHECK-DAG:  - { id: [[STACK16:[0-9]+]], offset: 16, size: 8
+; CHECK-DAG:  - { id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 8,
+; CHECK-DAG:  - { id: [[STACK8:[0-9]+]], type: default, offset: 8, size: 8,
+; CHECK-DAG:  - { id: [[STACK16:[0-9]+]], type: default, offset: 16, size: 8,
 ; CHECK: [[LHS_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]]
 ; CHECK: [[LHS:%[0-9]+]](s64) = G_LOAD [[LHS_ADDR]](p0) :: (invariant load 8 from %fixed-stack.[[STACK0]], align 0)
 ; CHECK: [[RHS_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK8]]
@@ -208,7 +208,7 @@ define void @test_call_stack() {
 
 ; CHECK-LABEL: name: test_mem_i1
 ; CHECK: fixedStack:
-; CHECK-NEXT: - { id: [[SLOT:[0-9]+]], offset: 0, size: 1, alignment: 16, isImmutable: true, isAliased: false }
+; CHECK-NEXT: - { id: [[SLOT:[0-9]+]], type: default, offset: 0, size: 1, alignment: 16, isImmutable: true,
 ; CHECK: [[ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[SLOT]]
 ; CHECK: {{%[0-9]+}}(s1) = G_LOAD [[ADDR]](p0) :: (invariant load 1 from %fixed-stack.[[SLOT]], align 0)
 define void @test_mem_i1([8 x i64], i1 %in) {
diff --git a/test/CodeGen/AArch64/GlobalISel/debug-insts.ll b/test/CodeGen/AArch64/GlobalISel/debug-insts.ll
index e01bd2a9f7c8..e832ba953241 100644
--- a/test/CodeGen/AArch64/GlobalISel/debug-insts.ll
+++ b/test/CodeGen/AArch64/GlobalISel/debug-insts.ll
@@ -3,8 +3,8 @@
 
 ; CHECK-LABEL: name: debug_declare
 ; CHECK: stack:
-; CHECK:    - { id: {{.*}}, name: in.addr, offset: {{.*}}, size: {{.*}}, alignment: {{.*}}, di-variable: '!11',
-; CHECK-NEXT:   di-expression: '!12', di-location: '!13' }
+; CHECK:    - { id: {{.*}}, name: in.addr, type: default, offset: 0, size: {{.*}}, alignment: {{.*}}, 
+; CHECK-NEXT: callee-saved-register: '', di-variable: '!11', di-expression: '!12',
 ; CHECK: DBG_VALUE debug-use %0(s32), debug-use _, !11, !12, debug-location !13
 define void @debug_declare(i32 %in) #0 !dbg !7 {
 entry:
diff --git a/test/CodeGen/AArch64/GlobalISel/localizer-in-O0-pipeline.mir b/test/CodeGen/AArch64/GlobalISel/localizer-in-O0-pipeline.mir
index ea8a77ca3917..28c926b5d062 100644
--- a/test/CodeGen/AArch64/GlobalISel/localizer-in-O0-pipeline.mir
+++ b/test/CodeGen/AArch64/GlobalISel/localizer-in-O0-pipeline.mir
@@ -35,15 +35,15 @@ regBankSelected: true
 tracksRegLiveness: true
 registers:
 # CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: fpr }
-# CHECK-NEXT: - { id: 1, class: gpr }
-# CHECK-NEXT: - { id: 2, class: fpr }
-# CHECK-NEXT: - { id: 3, class: fpr }
-# CHECK-NEXT: - { id: 4, class: fpr }
-# CHECK-NEXT: - { id: 5, class: fpr }
+# CHECK-NEXT: - { id: 0, class: fpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: fpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: fpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 4, class: fpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 5, class: fpr, preferred-register: '' }
 # The localizer will create two new values to materialize the constants.
-# OPTNONE-NEXT:  - { id: 6, class: fpr }
-# OPTNONE-NEXT:  - { id: 7, class: fpr }
+# OPTNONE-NEXT:  - { id: 6, class: fpr, preferred-register: '' }
+# OPTNONE-NEXT:  - { id: 7, class: fpr, preferred-register: '' }
   - { id: 0, class: fpr }
   - { id: 1, class: gpr }
   - { id: 2, class: fpr }
diff --git a/test/CodeGen/AArch64/GlobalISel/localizer.mir b/test/CodeGen/AArch64/GlobalISel/localizer.mir
index 5bf8dac79860..afe2c13f025d 100644
--- a/test/CodeGen/AArch64/GlobalISel/localizer.mir
+++ b/test/CodeGen/AArch64/GlobalISel/localizer.mir
@@ -44,11 +44,11 @@ regBankSelected: true
 
 # CHECK:      registers:
 # Existing registers should be left untouched
-# CHECK:  - { id: 0, class: gpr }
-#CHECK-NEXT:  - { id: 1, class: gpr }
-#CHECK-NEXT:  - { id: 2, class: gpr }
+# CHECK:  - { id: 0, class: gpr, preferred-register: '' }
+#CHECK-NEXT:  - { id: 1, class: gpr, preferred-register: '' }
+#CHECK-NEXT:  - { id: 2, class: gpr, preferred-register: '' }
 # The newly created reg should be on the same regbank/regclass as its origin.
-#CHECK-NEXT:  - { id: 3, class: gpr }
+#CHECK-NEXT:  - { id: 3, class: gpr, preferred-register: '' }
 
 registers:
   - { id: 0, class: gpr }
@@ -82,11 +82,11 @@ regBankSelected: true
 
 # CHECK:      registers:
 # Existing registers should be left untouched
-# CHECK:  - { id: 0, class: gpr }
-#CHECK-NEXT:  - { id: 1, class: gpr }
-#CHECK-NEXT:  - { id: 2, class: gpr }
+# CHECK:  - { id: 0, class: gpr, preferred-register: '' }
+#CHECK-NEXT:  - { id: 1, class: gpr, preferred-register: '' }
+#CHECK-NEXT:  - { id: 2, class: gpr, preferred-register: '' }
 # The newly created reg should be on the same regbank/regclass as its origin.
-#CHECK-NEXT:  - { id: 3, class: gpr }
+#CHECK-NEXT:  - { id: 3, class: gpr, preferred-register: '' }
 
 registers:
   - { id: 0, class: gpr }
@@ -120,13 +120,13 @@ tracksRegLiveness: true
 
 # CHECK:      registers:
 # Existing registers should be left untouched
-# CHECK:  - { id: 0, class: gpr }
-#CHECK-NEXT:  - { id: 1, class: gpr }
-#CHECK-NEXT:  - { id: 2, class: gpr }
-#CHECK-NEXT:  - { id: 3, class: gpr }
-#CHECK-NEXT:  - { id: 4, class: gpr }
+# CHECK:  - { id: 0, class: gpr, preferred-register: '' }
+#CHECK-NEXT:  - { id: 1, class: gpr, preferred-register: '' }
+#CHECK-NEXT:  - { id: 2, class: gpr, preferred-register: '' }
+#CHECK-NEXT:  - { id: 3, class: gpr, preferred-register: '' }
+#CHECK-NEXT:  - { id: 4, class: gpr, preferred-register: '' }
 # The newly created reg should be on the same regbank/regclass as its origin.
-#CHECK-NEXT:  - { id: 5, class: gpr }
+#CHECK-NEXT:  - { id: 5, class: gpr, preferred-register: '' }
 
 registers:
   - { id: 0, class: gpr }
@@ -168,14 +168,14 @@ tracksRegLiveness: true
 
 # CHECK:      registers:
 # Existing registers should be left untouched
-# CHECK:  - { id: 0, class: gpr }
-#CHECK-NEXT:  - { id: 1, class: gpr }
-#CHECK-NEXT:  - { id: 2, class: gpr }
-#CHECK-NEXT:  - { id: 3, class: gpr }
-#CHECK-NEXT:  - { id: 4, class: gpr }
+# CHECK:  - { id: 0, class: gpr, preferred-register: '' }
+#CHECK-NEXT:  - { id: 1, class: gpr, preferred-register: '' }
+#CHECK-NEXT:  - { id: 2, class: gpr, preferred-register: '' }
+#CHECK-NEXT:  - { id: 3, class: gpr, preferred-register: '' }
+#CHECK-NEXT:  - { id: 4, class: gpr, preferred-register: '' }
 # The newly created regs should be on the same regbank/regclass as its origin.
-#CHECK-NEXT:  - { id: 5, class: gpr }
-#CHECK-NEXT:  - { id: 6, class: gpr }
+#CHECK-NEXT:  - { id: 5, class: gpr, preferred-register: '' }
+#CHECK-NEXT:  - { id: 6, class: gpr, preferred-register: '' }
 
 registers:
   - { id: 0, class: gpr }
@@ -219,14 +219,14 @@ tracksRegLiveness: true
 
 # CHECK:      registers:
 # Existing registers should be left untouched
-# CHECK:  - { id: 0, class: gpr }
-#CHECK-NEXT:  - { id: 1, class: gpr }
-#CHECK-NEXT:  - { id: 2, class: gpr }
-#CHECK-NEXT:  - { id: 3, class: gpr }
-#CHECK-NEXT:  - { id: 4, class: gpr }
+# CHECK:  - { id: 0, class: gpr, preferred-register: '' }
+#CHECK-NEXT:  - { id: 1, class: gpr, preferred-register: '' }
+#CHECK-NEXT:  - { id: 2, class: gpr, preferred-register: '' }
+#CHECK-NEXT:  - { id: 3, class: gpr, preferred-register: '' }
+#CHECK-NEXT:  - { id: 4, class: gpr, preferred-register: '' }
 # The newly created reg should be on the same regbank/regclass as its origin.
-#CHECK-NEXT:  - { id: 5, class: gpr }
-#CHECK-NEXT:  - { id: 6, class: gpr }
+#CHECK-NEXT:  - { id: 5, class: gpr, preferred-register: '' }
+#CHECK-NEXT:  - { id: 6, class: gpr, preferred-register: '' }
 
 registers:
   - { id: 0, class: gpr }
@@ -270,14 +270,14 @@ tracksRegLiveness: true
 
 # CHECK:      registers:
 # Existing registers should be left untouched
-# CHECK:  - { id: 0, class: fpr }
-#CHECK-NEXT:  - { id: 1, class: fpr }
-#CHECK-NEXT:  - { id: 2, class: fpr }
-#CHECK-NEXT:  - { id: 3, class: fpr }
-#CHECK-NEXT:  - { id: 4, class: fpr }
+# CHECK:  - { id: 0, class: fpr, preferred-register: '' }
+#CHECK-NEXT:  - { id: 1, class: fpr, preferred-register: '' }
+#CHECK-NEXT:  - { id: 2, class: fpr, preferred-register: '' }
+#CHECK-NEXT:  - { id: 3, class: fpr, preferred-register: '' }
+#CHECK-NEXT:  - { id: 4, class: fpr, preferred-register: '' }
 # The newly created reg should be on the same regbank/regclass as its origin.
-#CHECK-NEXT:  - { id: 5, class: fpr }
-#CHECK-NEXT:  - { id: 6, class: fpr }
+#CHECK-NEXT:  - { id: 5, class: fpr, preferred-register: '' }
+#CHECK-NEXT:  - { id: 6, class: fpr, preferred-register: '' }
 
 registers:
   - { id: 0, class: fpr }
@@ -323,12 +323,12 @@ tracksRegLiveness: true
 
 # CHECK:      registers:
 # Existing registers should be left untouched
-# CHECK:  - { id: 0, class: fpr }
-#CHECK-NEXT:  - { id: 1, class: fpr }
-#CHECK-NEXT:  - { id: 2, class: fpr }
-#CHECK-NEXT:  - { id: 3, class: fpr }
+# CHECK:  - { id: 0, class: fpr, preferred-register: '' }
+#CHECK-NEXT:  - { id: 1, class: fpr, preferred-register: '' }
+#CHECK-NEXT:  - { id: 2, class: fpr, preferred-register: '' }
+#CHECK-NEXT:  - { id: 3, class: fpr, preferred-register: '' }
 # The newly created reg should be on the same regbank/regclass as its origin.
-#CHECK-NEXT:  - { id: 4, class: fpr }
+#CHECK-NEXT:  - { id: 4, class: fpr, preferred-register: '' }
 
 registers:
   - { id: 0, class: fpr }
diff --git a/test/CodeGen/AArch64/GlobalISel/regbankselect-dbg-value.mir b/test/CodeGen/AArch64/GlobalISel/regbankselect-dbg-value.mir
index 73d4d2054729..c8a8266e8b28 100644
--- a/test/CodeGen/AArch64/GlobalISel/regbankselect-dbg-value.mir
+++ b/test/CodeGen/AArch64/GlobalISel/regbankselect-dbg-value.mir
@@ -32,7 +32,7 @@
 name:            test_dbg_value
 legalized:       true
 # CHECK: registers:
-# CHECK-NEXT:  - { id: 0, class: gpr }
+# CHECK-NEXT:  - { id: 0, class: gpr, preferred-register: '' }
 body: |
   bb.0:
     liveins: %w0
diff --git a/test/CodeGen/AArch64/GlobalISel/regbankselect-default.mir b/test/CodeGen/AArch64/GlobalISel/regbankselect-default.mir
index 14ee40c941bf..b8468d8cf55f 100644
--- a/test/CodeGen/AArch64/GlobalISel/regbankselect-default.mir
+++ b/test/CodeGen/AArch64/GlobalISel/regbankselect-default.mir
@@ -73,8 +73,8 @@
 name:            test_add_s32
 legalized:       true
 # CHECK: registers:
-# CHECK:   - { id: 0, class: gpr }
-# CHECK:   - { id: 1, class: gpr }
+# CHECK:   - { id: 0, class: gpr, preferred-register: '' }
+# CHECK:   - { id: 1, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -92,8 +92,8 @@ body: |
 name:            test_add_v4s32
 legalized:       true
 # CHECK: registers:
-# CHECK:   - { id: 0, class: fpr }
-# CHECK:   - { id: 1, class: fpr }
+# CHECK:   - { id: 0, class: fpr, preferred-register: '' }
+# CHECK:   - { id: 1, class: fpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -111,8 +111,8 @@ body: |
 name:            test_sub_s32
 legalized:       true
 # CHECK: registers:
-# CHECK:   - { id: 0, class: gpr }
-# CHECK:   - { id: 1, class: gpr }
+# CHECK:   - { id: 0, class: gpr, preferred-register: '' }
+# CHECK:   - { id: 1, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -130,8 +130,8 @@ body: |
 name:            test_sub_v4s32
 legalized:       true
 # CHECK: registers:
-# CHECK:   - { id: 0, class: fpr }
-# CHECK:   - { id: 1, class: fpr }
+# CHECK:   - { id: 0, class: fpr, preferred-register: '' }
+# CHECK:   - { id: 1, class: fpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -149,8 +149,8 @@ body: |
 name:            test_mul_s32
 legalized:       true
 # CHECK: registers:
-# CHECK:   - { id: 0, class: gpr }
-# CHECK:   - { id: 1, class: gpr }
+# CHECK:   - { id: 0, class: gpr, preferred-register: '' }
+# CHECK:   - { id: 1, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -168,8 +168,8 @@ body: |
 name:            test_mul_v4s32
 legalized:       true
 # CHECK: registers:
-# CHECK:   - { id: 0, class: fpr }
-# CHECK:   - { id: 1, class: fpr }
+# CHECK:   - { id: 0, class: fpr, preferred-register: '' }
+# CHECK:   - { id: 1, class: fpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -187,8 +187,8 @@ body: |
 name:            test_and_s32
 legalized:       true
 # CHECK: registers:
-# CHECK:   - { id: 0, class: gpr }
-# CHECK:   - { id: 1, class: gpr }
+# CHECK:   - { id: 0, class: gpr, preferred-register: '' }
+# CHECK:   - { id: 1, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -206,8 +206,8 @@ body: |
 name:            test_and_v4s32
 legalized:       true
 # CHECK: registers:
-# CHECK:   - { id: 0, class: fpr }
-# CHECK:   - { id: 1, class: fpr }
+# CHECK:   - { id: 0, class: fpr, preferred-register: '' }
+# CHECK:   - { id: 1, class: fpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -225,8 +225,8 @@ body: |
 name:            test_or_s32
 legalized:       true
 # CHECK: registers:
-# CHECK:   - { id: 0, class: gpr }
-# CHECK:   - { id: 1, class: gpr }
+# CHECK:   - { id: 0, class: gpr, preferred-register: '' }
+# CHECK:   - { id: 1, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -244,8 +244,8 @@ body: |
 name:            test_or_v4s32
 legalized:       true
 # CHECK: registers:
-# CHECK:   - { id: 0, class: fpr }
-# CHECK:   - { id: 1, class: fpr }
+# CHECK:   - { id: 0, class: fpr, preferred-register: '' }
+# CHECK:   - { id: 1, class: fpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -263,8 +263,8 @@ body: |
 name:            test_xor_s32
 legalized:       true
 # CHECK: registers:
-# CHECK:   - { id: 0, class: gpr }
-# CHECK:   - { id: 1, class: gpr }
+# CHECK:   - { id: 0, class: gpr, preferred-register: '' }
+# CHECK:   - { id: 1, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -282,8 +282,8 @@ body: |
 name:            test_xor_v4s32
 legalized:       true
 # CHECK: registers:
-# CHECK:   - { id: 0, class: fpr }
-# CHECK:   - { id: 1, class: fpr }
+# CHECK:   - { id: 0, class: fpr, preferred-register: '' }
+# CHECK:   - { id: 1, class: fpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -301,8 +301,8 @@ body: |
 name:            test_shl_s32
 legalized:       true
 # CHECK: registers:
-# CHECK:   - { id: 0, class: gpr }
-# CHECK:   - { id: 1, class: gpr }
+# CHECK:   - { id: 0, class: gpr, preferred-register: '' }
+# CHECK:   - { id: 1, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -320,8 +320,8 @@ body: |
 name:            test_shl_v4s32
 legalized:       true
 # CHECK: registers:
-# CHECK:   - { id: 0, class: fpr }
-# CHECK:   - { id: 1, class: fpr }
+# CHECK:   - { id: 0, class: fpr, preferred-register: '' }
+# CHECK:   - { id: 1, class: fpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -339,8 +339,8 @@ body: |
 name:            test_lshr_s32
 legalized:       true
 # CHECK: registers:
-# CHECK:   - { id: 0, class: gpr }
-# CHECK:   - { id: 1, class: gpr }
+# CHECK:   - { id: 0, class: gpr, preferred-register: '' }
+# CHECK:   - { id: 1, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -358,8 +358,8 @@ body: |
 name:            test_ashr_s32
 legalized:       true
 # CHECK: registers:
-# CHECK:   - { id: 0, class: gpr }
-# CHECK:   - { id: 1, class: gpr }
+# CHECK:   - { id: 0, class: gpr, preferred-register: '' }
+# CHECK:   - { id: 1, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -377,8 +377,8 @@ body: |
 name:            test_sdiv_s32
 legalized:       true
 # CHECK: registers:
-# CHECK:   - { id: 0, class: gpr }
-# CHECK:   - { id: 1, class: gpr }
+# CHECK:   - { id: 0, class: gpr, preferred-register: '' }
+# CHECK:   - { id: 1, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -396,8 +396,8 @@ body: |
 name:            test_udiv_s32
 legalized:       true
 # CHECK: registers:
-# CHECK:   - { id: 0, class: gpr }
-# CHECK:   - { id: 1, class: gpr }
+# CHECK:   - { id: 0, class: gpr, preferred-register: '' }
+# CHECK:   - { id: 1, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -415,8 +415,8 @@ body: |
 name:            test_anyext_s64_s32
 legalized:       true
 # CHECK: registers:
-# CHECK:   - { id: 0, class: gpr }
-# CHECK:   - { id: 1, class: gpr }
+# CHECK:   - { id: 0, class: gpr, preferred-register: '' }
+# CHECK:   - { id: 1, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -434,8 +434,8 @@ body: |
 name:            test_sext_s64_s32
 legalized:       true
 # CHECK: registers:
-# CHECK:   - { id: 0, class: gpr }
-# CHECK:   - { id: 1, class: gpr }
+# CHECK:   - { id: 0, class: gpr, preferred-register: '' }
+# CHECK:   - { id: 1, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -453,8 +453,8 @@ body: |
 name:            test_zext_s64_s32
 legalized:       true
 # CHECK: registers:
-# CHECK:   - { id: 0, class: gpr }
-# CHECK:   - { id: 1, class: gpr }
+# CHECK:   - { id: 0, class: gpr, preferred-register: '' }
+# CHECK:   - { id: 1, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -472,8 +472,8 @@ body: |
 name:            test_trunc_s32_s64
 legalized:       true
 # CHECK: registers:
-# CHECK:   - { id: 0, class: gpr }
-# CHECK:   - { id: 1, class: gpr }
+# CHECK:   - { id: 0, class: gpr, preferred-register: '' }
+# CHECK:   - { id: 1, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -491,7 +491,7 @@ body: |
 name:            test_constant_s32
 legalized:       true
 # CHECK: registers:
-# CHECK:   - { id: 0, class: gpr }
+# CHECK:   - { id: 0, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
 body: |
@@ -505,7 +505,7 @@ body: |
 name:            test_constant_p0
 legalized:       true
 # CHECK: registers:
-# CHECK:   - { id: 0, class: gpr }
+# CHECK:   - { id: 0, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
 body: |
@@ -519,8 +519,8 @@ body: |
 name:            test_icmp_s32
 legalized:       true
 # CHECK: registers:
-# CHECK:   - { id: 0, class: gpr }
-# CHECK:   - { id: 1, class: gpr }
+# CHECK:   - { id: 0, class: gpr, preferred-register: '' }
+# CHECK:   - { id: 1, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -538,8 +538,8 @@ body: |
 name:            test_icmp_p0
 legalized:       true
 # CHECK: registers:
-# CHECK:   - { id: 0, class: gpr }
-# CHECK:   - { id: 1, class: gpr }
+# CHECK:   - { id: 0, class: gpr, preferred-register: '' }
+# CHECK:   - { id: 1, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -557,7 +557,7 @@ body: |
 name:            test_frame_index_p0
 legalized:       true
 # CHECK: registers:
-# CHECK:   - { id: 0, class: gpr }
+# CHECK:   - { id: 0, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
 stack:
@@ -573,8 +573,8 @@ body: |
 name:            test_ptrtoint_s64_p0
 legalized:       true
 # CHECK: registers:
-# CHECK:   - { id: 0, class: gpr }
-# CHECK:   - { id: 1, class: gpr }
+# CHECK:   - { id: 0, class: gpr, preferred-register: '' }
+# CHECK:   - { id: 1, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -592,8 +592,8 @@ body: |
 name:            test_inttoptr_p0_s64
 legalized:       true
 # CHECK: registers:
-# CHECK:   - { id: 0, class: gpr }
-# CHECK:   - { id: 1, class: gpr }
+# CHECK:   - { id: 0, class: gpr, preferred-register: '' }
+# CHECK:   - { id: 1, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -611,8 +611,8 @@ body: |
 name:            test_load_s32_p0
 legalized:       true
 # CHECK: registers:
-# CHECK:   - { id: 0, class: gpr }
-# CHECK:   - { id: 1, class: gpr }
+# CHECK:   - { id: 0, class: gpr, preferred-register: '' }
+# CHECK:   - { id: 1, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -630,8 +630,8 @@ body: |
 name:            test_store_s32_p0
 legalized:       true
 # CHECK: registers:
-# CHECK:   - { id: 0, class: gpr }
-# CHECK:   - { id: 1, class: gpr }
+# CHECK:   - { id: 0, class: gpr, preferred-register: '' }
+# CHECK:   - { id: 1, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -651,8 +651,8 @@ body: |
 name:            test_fadd_s32
 legalized:       true
 # CHECK: registers:
-# CHECK:   - { id: 0, class: fpr }
-# CHECK:   - { id: 1, class: fpr }
+# CHECK:   - { id: 0, class: fpr, preferred-register: '' }
+# CHECK:   - { id: 1, class: fpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -670,8 +670,8 @@ body: |
 name:            test_fsub_s32
 legalized:       true
 # CHECK: registers:
-# CHECK:   - { id: 0, class: fpr }
-# CHECK:   - { id: 1, class: fpr }
+# CHECK:   - { id: 0, class: fpr, preferred-register: '' }
+# CHECK:   - { id: 1, class: fpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -689,8 +689,8 @@ body: |
 name:            test_fmul_s32
 legalized:       true
 # CHECK: registers:
-# CHECK:   - { id: 0, class: fpr }
-# CHECK:   - { id: 1, class: fpr }
+# CHECK:   - { id: 0, class: fpr, preferred-register: '' }
+# CHECK:   - { id: 1, class: fpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -708,8 +708,8 @@ body: |
 name:            test_fdiv_s32
 legalized:       true
 # CHECK: registers:
-# CHECK:   - { id: 0, class: fpr }
-# CHECK:   - { id: 1, class: fpr }
+# CHECK:   - { id: 0, class: fpr, preferred-register: '' }
+# CHECK:   - { id: 1, class: fpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -727,8 +727,8 @@ body: |
 name:            test_fpext_s64_s32
 legalized:       true
 # CHECK: registers:
-# CHECK:   - { id: 0, class: fpr }
-# CHECK:   - { id: 1, class: fpr }
+# CHECK:   - { id: 0, class: fpr, preferred-register: '' }
+# CHECK:   - { id: 1, class: fpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -746,8 +746,8 @@ body: |
 name:            test_fptrunc_s32_s64
 legalized:       true
 # CHECK: registers:
-# CHECK:   - { id: 0, class: fpr }
-# CHECK:   - { id: 1, class: fpr }
+# CHECK:   - { id: 0, class: fpr, preferred-register: '' }
+# CHECK:   - { id: 1, class: fpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -765,7 +765,7 @@ body: |
 name:            test_fconstant_s32
 legalized:       true
 # CHECK: registers:
-# CHECK:   - { id: 0, class: fpr }
+# CHECK:   - { id: 0, class: fpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
 body: |
@@ -779,8 +779,8 @@ body: |
 name:            test_fcmp_s32
 legalized:       true
 # CHECK: registers:
-# CHECK:   - { id: 0, class: fpr }
-# CHECK:   - { id: 1, class: gpr }
+# CHECK:   - { id: 0, class: fpr, preferred-register: '' }
+# CHECK:   - { id: 1, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -798,8 +798,8 @@ body: |
 name:            test_sitofp_s64_s32
 legalized:       true
 # CHECK: registers:
-# CHECK:   - { id: 0, class: gpr }
-# CHECK:   - { id: 1, class: fpr }
+# CHECK:   - { id: 0, class: gpr, preferred-register: '' }
+# CHECK:   - { id: 1, class: fpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -817,8 +817,8 @@ body: |
 name:            test_uitofp_s32_s64
 legalized:       true
 # CHECK: registers:
-# CHECK:   - { id: 0, class: gpr }
-# CHECK:   - { id: 1, class: fpr }
+# CHECK:   - { id: 0, class: gpr, preferred-register: '' }
+# CHECK:   - { id: 1, class: fpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -836,8 +836,8 @@ body: |
 name:            test_fptosi_s64_s32
 legalized:       true
 # CHECK: registers:
-# CHECK:   - { id: 0, class: fpr }
-# CHECK:   - { id: 1, class: gpr }
+# CHECK:   - { id: 0, class: fpr, preferred-register: '' }
+# CHECK:   - { id: 1, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -855,8 +855,8 @@ body: |
 name:            test_fptoui_s32_s64
 legalized:       true
 # CHECK: registers:
-# CHECK:   - { id: 0, class: fpr }
-# CHECK:   - { id: 1, class: gpr }
+# CHECK:   - { id: 0, class: fpr, preferred-register: '' }
+# CHECK:   - { id: 1, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
diff --git a/test/CodeGen/AArch64/GlobalISel/select-binop.mir b/test/CodeGen/AArch64/GlobalISel/select-binop.mir
index 8ae2e1b2eb7d..70cda516d5f1 100644
--- a/test/CodeGen/AArch64/GlobalISel/select-binop.mir
+++ b/test/CodeGen/AArch64/GlobalISel/select-binop.mir
@@ -64,9 +64,9 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr32 }
-# CHECK-NEXT:  - { id: 1, class: gpr32 }
-# CHECK-NEXT:  - { id: 2, class: gpr32 }
+# CHECK-NEXT:  - { id: 0, class: gpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: gpr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -94,9 +94,9 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64 }
-# CHECK-NEXT:  - { id: 1, class: gpr64 }
-# CHECK-NEXT:  - { id: 2, class: gpr64 }
+# CHECK-NEXT:  - { id: 0, class: gpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: gpr64, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -123,9 +123,9 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr32sp }
-# CHECK-NEXT:  - { id: 1, class: gpr }
-# CHECK-NEXT:  - { id: 2, class: gpr32sp }
+# CHECK-NEXT:  - { id: 0, class: gpr32sp, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: gpr32sp, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -151,9 +151,9 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64sp }
-# CHECK-NEXT:  - { id: 1, class: gpr }
-# CHECK-NEXT:  - { id: 2, class: gpr64sp }
+# CHECK-NEXT:  - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: gpr64sp, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -179,9 +179,9 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr32sp }
-# CHECK-NEXT:  - { id: 1, class: gpr }
-# CHECK-NEXT:  - { id: 2, class: gpr32sp }
+# CHECK-NEXT:  - { id: 0, class: gpr32sp, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: gpr32sp, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -213,9 +213,9 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr32 }
-# CHECK-NEXT:  - { id: 1, class: gpr32 }
-# CHECK-NEXT:  - { id: 2, class: gpr32 }
+# CHECK-NEXT:  - { id: 0, class: gpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: gpr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -243,9 +243,9 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64 }
-# CHECK-NEXT:  - { id: 1, class: gpr64 }
-# CHECK-NEXT:  - { id: 2, class: gpr64 }
+# CHECK-NEXT:  - { id: 0, class: gpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: gpr64, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -273,9 +273,9 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr32 }
-# CHECK-NEXT:  - { id: 1, class: gpr32 }
-# CHECK-NEXT:  - { id: 2, class: gpr32 }
+# CHECK-NEXT:  - { id: 0, class: gpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: gpr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -303,9 +303,9 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64 }
-# CHECK-NEXT:  - { id: 1, class: gpr64 }
-# CHECK-NEXT:  - { id: 2, class: gpr64 }
+# CHECK-NEXT:  - { id: 0, class: gpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: gpr64, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -333,9 +333,9 @@ legalized:       true
 regBankSelected: true
 #
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: fpr64 }
-# CHECK-NEXT:  - { id: 1, class: fpr64 }
-# CHECK-NEXT:  - { id: 2, class: fpr64 }
+# CHECK-NEXT:  - { id: 0, class: fpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: fpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: fpr64, preferred-register: '' }
 registers:
   - { id: 0, class: fpr }
   - { id: 1, class: fpr }
@@ -365,9 +365,9 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr32 }
-# CHECK-NEXT:  - { id: 1, class: gpr32 }
-# CHECK-NEXT:  - { id: 2, class: gpr32 }
+# CHECK-NEXT:  - { id: 0, class: gpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: gpr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -395,9 +395,9 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64 }
-# CHECK-NEXT:  - { id: 1, class: gpr64 }
-# CHECK-NEXT:  - { id: 2, class: gpr64 }
+# CHECK-NEXT:  - { id: 0, class: gpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: gpr64, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -425,9 +425,9 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr32 }
-# CHECK-NEXT:  - { id: 1, class: gpr32 }
-# CHECK-NEXT:  - { id: 2, class: gpr32 }
+# CHECK-NEXT:  - { id: 0, class: gpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: gpr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -455,9 +455,9 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64 }
-# CHECK-NEXT:  - { id: 1, class: gpr64 }
-# CHECK-NEXT:  - { id: 2, class: gpr64 }
+# CHECK-NEXT:  - { id: 0, class: gpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: gpr64, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -485,9 +485,9 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr32 }
-# CHECK-NEXT:  - { id: 1, class: gpr32 }
-# CHECK-NEXT:  - { id: 2, class: gpr32 }
+# CHECK-NEXT:  - { id: 0, class: gpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: gpr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -515,9 +515,9 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64 }
-# CHECK-NEXT:  - { id: 1, class: gpr64 }
-# CHECK-NEXT:  - { id: 2, class: gpr64 }
+# CHECK-NEXT:  - { id: 0, class: gpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: gpr64, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -545,9 +545,9 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr32 }
-# CHECK-NEXT:  - { id: 1, class: gpr32 }
-# CHECK-NEXT:  - { id: 2, class: gpr32 }
+# CHECK-NEXT:  - { id: 0, class: gpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: gpr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -575,9 +575,9 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64 }
-# CHECK-NEXT:  - { id: 1, class: gpr64 }
-# CHECK-NEXT:  - { id: 2, class: gpr64 }
+# CHECK-NEXT:  - { id: 0, class: gpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: gpr64, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -606,9 +606,9 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr32 }
-# CHECK-NEXT:  - { id: 1, class: gpr32 }
-# CHECK-NEXT:  - { id: 2, class: gpr32 }
+# CHECK-NEXT:  - { id: 0, class: gpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: gpr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -636,9 +636,9 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64 }
-# CHECK-NEXT:  - { id: 1, class: gpr64 }
-# CHECK-NEXT:  - { id: 2, class: gpr64 }
+# CHECK-NEXT:  - { id: 0, class: gpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: gpr64, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -666,10 +666,10 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64 }
-# CHECK-NEXT:  - { id: 1, class: gpr64 }
-# CHECK-NEXT:  - { id: 2, class: gpr64 }
-# CHECK-NEXT:  - { id: 3, class: gpr64 }
+# CHECK-NEXT:  - { id: 0, class: gpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: gpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 3, class: gpr64, preferred-register: '' }
 
 # CHECK:  body:
 # CHECK:    %0 = COPY %x0
@@ -696,9 +696,9 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr32 }
-# CHECK-NEXT:  - { id: 1, class: gpr32 }
-# CHECK-NEXT:  - { id: 2, class: gpr32 }
+# CHECK-NEXT:  - { id: 0, class: gpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: gpr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -726,9 +726,9 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64 }
-# CHECK-NEXT:  - { id: 1, class: gpr64 }
-# CHECK-NEXT:  - { id: 2, class: gpr64 }
+# CHECK-NEXT:  - { id: 0, class: gpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: gpr64, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -756,9 +756,9 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr32 }
-# CHECK-NEXT:  - { id: 1, class: gpr32 }
-# CHECK-NEXT:  - { id: 2, class: gpr32 }
+# CHECK-NEXT:  - { id: 0, class: gpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: gpr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -786,9 +786,9 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64 }
-# CHECK-NEXT:  - { id: 1, class: gpr64 }
-# CHECK-NEXT:  - { id: 2, class: gpr64 }
+# CHECK-NEXT:  - { id: 0, class: gpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: gpr64, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -816,9 +816,9 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: fpr32 }
-# CHECK-NEXT:  - { id: 1, class: fpr32 }
-# CHECK-NEXT:  - { id: 2, class: fpr32 }
+# CHECK-NEXT:  - { id: 0, class: fpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: fpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: fpr32, preferred-register: '' }
 registers:
   - { id: 0, class: fpr }
   - { id: 1, class: fpr }
@@ -845,9 +845,9 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: fpr64 }
-# CHECK-NEXT:  - { id: 1, class: fpr64 }
-# CHECK-NEXT:  - { id: 2, class: fpr64 }
+# CHECK-NEXT:  - { id: 0, class: fpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: fpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: fpr64, preferred-register: '' }
 registers:
   - { id: 0, class: fpr }
   - { id: 1, class: fpr }
@@ -874,9 +874,9 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: fpr32 }
-# CHECK-NEXT:  - { id: 1, class: fpr32 }
-# CHECK-NEXT:  - { id: 2, class: fpr32 }
+# CHECK-NEXT:  - { id: 0, class: fpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: fpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: fpr32, preferred-register: '' }
 registers:
   - { id: 0, class: fpr }
   - { id: 1, class: fpr }
@@ -903,9 +903,9 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: fpr64 }
-# CHECK-NEXT:  - { id: 1, class: fpr64 }
-# CHECK-NEXT:  - { id: 2, class: fpr64 }
+# CHECK-NEXT:  - { id: 0, class: fpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: fpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: fpr64, preferred-register: '' }
 registers:
   - { id: 0, class: fpr }
   - { id: 1, class: fpr }
@@ -932,9 +932,9 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: fpr32 }
-# CHECK-NEXT:  - { id: 1, class: fpr32 }
-# CHECK-NEXT:  - { id: 2, class: fpr32 }
+# CHECK-NEXT:  - { id: 0, class: fpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: fpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: fpr32, preferred-register: '' }
 registers:
   - { id: 0, class: fpr }
   - { id: 1, class: fpr }
@@ -961,9 +961,9 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: fpr64 }
-# CHECK-NEXT:  - { id: 1, class: fpr64 }
-# CHECK-NEXT:  - { id: 2, class: fpr64 }
+# CHECK-NEXT:  - { id: 0, class: fpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: fpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: fpr64, preferred-register: '' }
 registers:
   - { id: 0, class: fpr }
   - { id: 1, class: fpr }
@@ -990,9 +990,9 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: fpr32 }
-# CHECK-NEXT:  - { id: 1, class: fpr32 }
-# CHECK-NEXT:  - { id: 2, class: fpr32 }
+# CHECK-NEXT:  - { id: 0, class: fpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: fpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: fpr32, preferred-register: '' }
 registers:
   - { id: 0, class: fpr }
   - { id: 1, class: fpr }
@@ -1019,9 +1019,9 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: fpr64 }
-# CHECK-NEXT:  - { id: 1, class: fpr64 }
-# CHECK-NEXT:  - { id: 2, class: fpr64 }
+# CHECK-NEXT:  - { id: 0, class: fpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: fpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: fpr64, preferred-register: '' }
 registers:
   - { id: 0, class: fpr }
   - { id: 1, class: fpr }
diff --git a/test/CodeGen/AArch64/GlobalISel/select-bitcast.mir b/test/CodeGen/AArch64/GlobalISel/select-bitcast.mir
index 5ca63dbc214d..d871a80661a8 100644
--- a/test/CodeGen/AArch64/GlobalISel/select-bitcast.mir
+++ b/test/CodeGen/AArch64/GlobalISel/select-bitcast.mir
@@ -19,8 +19,8 @@ name:            bitcast_s32_gpr
 legalized:       true
 regBankSelected: true
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr32all }
-# CHECK-NEXT:  - { id: 1, class: gpr32all }
+# CHECK-NEXT:  - { id: 0, class: gpr32all, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr32all, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -44,8 +44,8 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: fpr32 }
-# CHECK-NEXT:  - { id: 1, class: fpr32 }
+# CHECK-NEXT:  - { id: 0, class: fpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: fpr32, preferred-register: '' }
 registers:
   - { id: 0, class: fpr }
   - { id: 1, class: fpr }
@@ -69,8 +69,8 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr32all }
-# CHECK-NEXT:  - { id: 1, class: fpr32 }
+# CHECK-NEXT:  - { id: 0, class: gpr32all, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: fpr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: fpr }
@@ -94,8 +94,8 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: fpr32 }
-# CHECK-NEXT:  - { id: 1, class: gpr32all }
+# CHECK-NEXT:  - { id: 0, class: fpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr32all, preferred-register: '' }
 registers:
   - { id: 0, class: fpr }
   - { id: 1, class: gpr }
@@ -119,8 +119,8 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64all }
-# CHECK-NEXT:  - { id: 1, class: gpr64all }
+# CHECK-NEXT:  - { id: 0, class: gpr64all, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr64all, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -144,8 +144,8 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: fpr64 }
-# CHECK-NEXT:  - { id: 1, class: fpr64 }
+# CHECK-NEXT:  - { id: 0, class: fpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: fpr64, preferred-register: '' }
 registers:
   - { id: 0, class: fpr }
   - { id: 1, class: fpr }
@@ -169,8 +169,8 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64all }
-# CHECK-NEXT:  - { id: 1, class: fpr64 }
+# CHECK-NEXT:  - { id: 0, class: gpr64all, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: fpr64, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: fpr }
@@ -193,8 +193,8 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: fpr64 }
-# CHECK-NEXT:  - { id: 1, class: gpr64all }
+# CHECK-NEXT:  - { id: 0, class: fpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr64all, preferred-register: '' }
 registers:
   - { id: 0, class: fpr }
   - { id: 1, class: gpr }
diff --git a/test/CodeGen/AArch64/GlobalISel/select-fp-casts.mir b/test/CodeGen/AArch64/GlobalISel/select-fp-casts.mir
index fbb11a1c7a4c..34c3da3a5369 100644
--- a/test/CodeGen/AArch64/GlobalISel/select-fp-casts.mir
+++ b/test/CodeGen/AArch64/GlobalISel/select-fp-casts.mir
@@ -34,8 +34,8 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK: - { id: 0, class: fpr64 }
-# CHECK: - { id: 1, class: fpr32 }
+# CHECK: - { id: 0, class: fpr64, preferred-register: '' }
+# CHECK: - { id: 1, class: fpr32, preferred-register: '' }
 registers:
   - { id: 0, class: fpr }
   - { id: 1, class: fpr }
@@ -59,8 +59,8 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK: - { id: 0, class: fpr32 }
-# CHECK: - { id: 1, class: fpr64 }
+# CHECK: - { id: 0, class: fpr32, preferred-register: '' }
+# CHECK: - { id: 1, class: fpr64, preferred-register: '' }
 registers:
   - { id: 0, class: fpr }
   - { id: 1, class: fpr }
@@ -84,8 +84,8 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr32 }
-# CHECK-NEXT:  - { id: 1, class: fpr32 }
+# CHECK-NEXT:  - { id: 0, class: gpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: fpr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: fpr }
@@ -109,8 +109,8 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64 }
-# CHECK-NEXT:  - { id: 1, class: fpr32 }
+# CHECK-NEXT:  - { id: 0, class: gpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: fpr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: fpr }
@@ -134,8 +134,8 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr32 }
-# CHECK-NEXT:  - { id: 1, class: fpr64 }
+# CHECK-NEXT:  - { id: 0, class: gpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: fpr64, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: fpr }
@@ -159,8 +159,8 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64 }
-# CHECK-NEXT:  - { id: 1, class: fpr64 }
+# CHECK-NEXT:  - { id: 0, class: gpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: fpr64, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: fpr }
@@ -184,8 +184,8 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr32 }
-# CHECK-NEXT:  - { id: 1, class: fpr32 }
+# CHECK-NEXT:  - { id: 0, class: gpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: fpr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: fpr }
@@ -209,8 +209,8 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64 }
-# CHECK-NEXT:  - { id: 1, class: fpr32 }
+# CHECK-NEXT:  - { id: 0, class: gpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: fpr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: fpr }
@@ -234,8 +234,8 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr32 }
-# CHECK-NEXT:  - { id: 1, class: fpr64 }
+# CHECK-NEXT:  - { id: 0, class: gpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: fpr64, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: fpr }
@@ -259,8 +259,8 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64 }
-# CHECK-NEXT:  - { id: 1, class: fpr64 }
+# CHECK-NEXT:  - { id: 0, class: gpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: fpr64, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: fpr }
@@ -284,8 +284,8 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: fpr32 }
-# CHECK-NEXT:  - { id: 1, class: gpr32 }
+# CHECK-NEXT:  - { id: 0, class: fpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr32, preferred-register: '' }
 registers:
   - { id: 0, class: fpr }
   - { id: 1, class: gpr }
@@ -309,8 +309,8 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: fpr64 }
-# CHECK-NEXT:  - { id: 1, class: gpr32 }
+# CHECK-NEXT:  - { id: 0, class: fpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr32, preferred-register: '' }
 registers:
   - { id: 0, class: fpr }
   - { id: 1, class: gpr }
@@ -334,8 +334,8 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: fpr32 }
-# CHECK-NEXT:  - { id: 1, class: gpr64 }
+# CHECK-NEXT:  - { id: 0, class: fpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr64, preferred-register: '' }
 registers:
   - { id: 0, class: fpr }
   - { id: 1, class: gpr }
@@ -359,8 +359,8 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: fpr64 }
-# CHECK-NEXT:  - { id: 1, class: gpr64 }
+# CHECK-NEXT:  - { id: 0, class: fpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr64, preferred-register: '' }
 registers:
   - { id: 0, class: fpr }
   - { id: 1, class: gpr }
@@ -384,8 +384,8 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: fpr32 }
-# CHECK-NEXT:  - { id: 1, class: gpr32 }
+# CHECK-NEXT:  - { id: 0, class: fpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr32, preferred-register: '' }
 registers:
   - { id: 0, class: fpr }
   - { id: 1, class: gpr }
@@ -409,8 +409,8 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: fpr64 }
-# CHECK-NEXT:  - { id: 1, class: gpr32 }
+# CHECK-NEXT:  - { id: 0, class: fpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr32, preferred-register: '' }
 registers:
   - { id: 0, class: fpr }
   - { id: 1, class: gpr }
@@ -434,8 +434,8 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: fpr32 }
-# CHECK-NEXT:  - { id: 1, class: gpr64 }
+# CHECK-NEXT:  - { id: 0, class: fpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr64, preferred-register: '' }
 registers:
   - { id: 0, class: fpr }
   - { id: 1, class: gpr }
@@ -459,8 +459,8 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: fpr64 }
-# CHECK-NEXT:  - { id: 1, class: gpr64 }
+# CHECK-NEXT:  - { id: 0, class: fpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr64, preferred-register: '' }
 registers:
   - { id: 0, class: fpr }
   - { id: 1, class: gpr }
diff --git a/test/CodeGen/AArch64/GlobalISel/select-int-ext.mir b/test/CodeGen/AArch64/GlobalISel/select-int-ext.mir
index 2ba8b7366252..5f29f8b62fab 100644
--- a/test/CodeGen/AArch64/GlobalISel/select-int-ext.mir
+++ b/test/CodeGen/AArch64/GlobalISel/select-int-ext.mir
@@ -24,9 +24,9 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr32all }
-# CHECK-NEXT:  - { id: 1, class: gpr64all }
-# CHECK-NEXT:  - { id: 2, class: gpr64all }
+# CHECK-NEXT:  - { id: 0, class: gpr32all, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr64all, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: gpr64all, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -51,8 +51,8 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr32all }
-# CHECK-NEXT:  - { id: 1, class: gpr32all }
+# CHECK-NEXT:  - { id: 0, class: gpr32all, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr32all, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -76,9 +76,9 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr32 }
-# CHECK-NEXT:  - { id: 1, class: gpr64 }
-# CHECK-NEXT:  - { id: 2, class: gpr64 }
+# CHECK-NEXT:  - { id: 0, class: gpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: gpr64, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -103,8 +103,8 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr32 }
-# CHECK-NEXT:  - { id: 1, class: gpr32 }
+# CHECK-NEXT:  - { id: 0, class: gpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -128,8 +128,8 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr32 }
-# CHECK-NEXT:  - { id: 1, class: gpr32 }
+# CHECK-NEXT:  - { id: 0, class: gpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -153,8 +153,8 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr32 }
-# CHECK-NEXT:  - { id: 1, class: gpr32 }
+# CHECK-NEXT:  - { id: 0, class: gpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -178,9 +178,9 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr32 }
-# CHECK-NEXT:  - { id: 1, class: gpr64 }
-# CHECK-NEXT:  - { id: 2, class: gpr64 }
+# CHECK-NEXT:  - { id: 0, class: gpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: gpr64, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -205,8 +205,8 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr32 }
-# CHECK-NEXT:  - { id: 1, class: gpr32 }
+# CHECK-NEXT:  - { id: 0, class: gpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -230,8 +230,8 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr32 }
-# CHECK-NEXT:  - { id: 1, class: gpr32 }
+# CHECK-NEXT:  - { id: 0, class: gpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -255,8 +255,8 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr32 }
-# CHECK-NEXT:  - { id: 1, class: gpr32 }
+# CHECK-NEXT:  - { id: 0, class: gpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
diff --git a/test/CodeGen/AArch64/GlobalISel/select-int-ptr-casts.mir b/test/CodeGen/AArch64/GlobalISel/select-int-ptr-casts.mir
index 6537408f6d98..b71a9a3d731e 100644
--- a/test/CodeGen/AArch64/GlobalISel/select-int-ptr-casts.mir
+++ b/test/CodeGen/AArch64/GlobalISel/select-int-ptr-casts.mir
@@ -18,8 +18,8 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64all }
-# CHECK-NEXT:  - { id: 1, class: gpr64all }
+# CHECK-NEXT:  - { id: 0, class: gpr64all, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr64all, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -41,8 +41,8 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64 }
-# CHECK-NEXT:  - { id: 1, class: gpr64 }
+# CHECK-NEXT:  - { id: 0, class: gpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr64, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -64,8 +64,8 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64 }
-# CHECK-NEXT:  - { id: 1, class: gpr32 }
+# CHECK-NEXT:  - { id: 0, class: gpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -87,8 +87,8 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64 }
-# CHECK-NEXT:  - { id: 1, class: gpr32 }
+# CHECK-NEXT:  - { id: 0, class: gpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -110,8 +110,8 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64 }
-# CHECK-NEXT:  - { id: 1, class: gpr32 }
+# CHECK-NEXT:  - { id: 0, class: gpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -133,8 +133,8 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64 }
-# CHECK-NEXT:  - { id: 1, class: gpr32 }
+# CHECK-NEXT:  - { id: 0, class: gpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
diff --git a/test/CodeGen/AArch64/GlobalISel/select-load.mir b/test/CodeGen/AArch64/GlobalISel/select-load.mir
index 9188e2b0c0fc..d00b98d148be 100644
--- a/test/CodeGen/AArch64/GlobalISel/select-load.mir
+++ b/test/CodeGen/AArch64/GlobalISel/select-load.mir
@@ -37,8 +37,8 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64sp }
-# CHECK-NEXT:  - { id: 1, class: gpr64 }
+# CHECK-NEXT:  - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr64, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -62,8 +62,8 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64sp }
-# CHECK-NEXT:  - { id: 1, class: gpr32 }
+# CHECK-NEXT:  - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -87,8 +87,8 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64sp }
-# CHECK-NEXT:  - { id: 1, class: gpr32 }
+# CHECK-NEXT:  - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -112,8 +112,8 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64sp }
-# CHECK-NEXT:  - { id: 1, class: gpr32 }
+# CHECK-NEXT:  - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -137,8 +137,8 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr }
-# CHECK-NEXT:  - { id: 1, class: gpr64 }
+# CHECK-NEXT:  - { id: 0, class: gpr, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr64, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -165,10 +165,10 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64sp }
-# CHECK-NEXT:  - { id: 1, class: gpr }
-# CHECK-NEXT:  - { id: 2, class: gpr }
-# CHECK-NEXT:  - { id: 3, class: gpr64 }
+# CHECK-NEXT:  - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: gpr, preferred-register: '' }
+# CHECK-NEXT:  - { id: 3, class: gpr64, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -197,10 +197,10 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64sp }
-# CHECK-NEXT:  - { id: 1, class: gpr }
-# CHECK-NEXT:  - { id: 2, class: gpr }
-# CHECK-NEXT:  - { id: 3, class: gpr32 }
+# CHECK-NEXT:  - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: gpr, preferred-register: '' }
+# CHECK-NEXT:  - { id: 3, class: gpr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -229,10 +229,10 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64sp }
-# CHECK-NEXT:  - { id: 1, class: gpr }
-# CHECK-NEXT:  - { id: 2, class: gpr }
-# CHECK-NEXT:  - { id: 3, class: gpr32 }
+# CHECK-NEXT:  - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: gpr, preferred-register: '' }
+# CHECK-NEXT:  - { id: 3, class: gpr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -261,10 +261,10 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64sp }
-# CHECK-NEXT:  - { id: 1, class: gpr }
-# CHECK-NEXT:  - { id: 2, class: gpr }
-# CHECK-NEXT:  - { id: 3, class: gpr32 }
+# CHECK-NEXT:  - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: gpr, preferred-register: '' }
+# CHECK-NEXT:  - { id: 3, class: gpr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -293,8 +293,8 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64sp }
-# CHECK-NEXT:  - { id: 1, class: fpr64 }
+# CHECK-NEXT:  - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: fpr64, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: fpr }
@@ -318,8 +318,8 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64sp }
-# CHECK-NEXT:  - { id: 1, class: fpr32 }
+# CHECK-NEXT:  - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: fpr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: fpr }
@@ -343,8 +343,8 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64sp }
-# CHECK-NEXT:  - { id: 1, class: fpr16 }
+# CHECK-NEXT:  - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: fpr16, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: fpr }
@@ -368,8 +368,8 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64sp }
-# CHECK-NEXT:  - { id: 1, class: fpr8 }
+# CHECK-NEXT:  - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: fpr8, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: fpr }
@@ -393,10 +393,10 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64sp }
-# CHECK-NEXT:  - { id: 1, class: gpr }
-# CHECK-NEXT:  - { id: 2, class: gpr }
-# CHECK-NEXT:  - { id: 3, class: fpr64 }
+# CHECK-NEXT:  - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: gpr, preferred-register: '' }
+# CHECK-NEXT:  - { id: 3, class: fpr64, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -425,10 +425,10 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64sp }
-# CHECK-NEXT:  - { id: 1, class: gpr }
-# CHECK-NEXT:  - { id: 2, class: gpr }
-# CHECK-NEXT:  - { id: 3, class: fpr32 }
+# CHECK-NEXT:  - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: gpr, preferred-register: '' }
+# CHECK-NEXT:  - { id: 3, class: fpr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -457,10 +457,10 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64sp }
-# CHECK-NEXT:  - { id: 1, class: gpr }
-# CHECK-NEXT:  - { id: 2, class: gpr }
-# CHECK-NEXT:  - { id: 3, class: fpr16 }
+# CHECK-NEXT:  - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: gpr, preferred-register: '' }
+# CHECK-NEXT:  - { id: 3, class: fpr16, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -489,10 +489,10 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64sp }
-# CHECK-NEXT:  - { id: 1, class: gpr }
-# CHECK-NEXT:  - { id: 2, class: gpr }
-# CHECK-NEXT:  - { id: 3, class: fpr8 }
+# CHECK-NEXT:  - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: gpr, preferred-register: '' }
+# CHECK-NEXT:  - { id: 3, class: fpr8, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
diff --git a/test/CodeGen/AArch64/GlobalISel/select-muladd.mir b/test/CodeGen/AArch64/GlobalISel/select-muladd.mir
index 7d5b43bc16d5..cd7a79f17d95 100644
--- a/test/CodeGen/AArch64/GlobalISel/select-muladd.mir
+++ b/test/CodeGen/AArch64/GlobalISel/select-muladd.mir
@@ -13,13 +13,13 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64 }
-# CHECK-NEXT:  - { id: 1, class: gpr32 }
-# CHECK-NEXT:  - { id: 2, class: gpr32 }
-# CHECK-NEXT:  - { id: 3, class: gpr }
-# CHECK-NEXT:  - { id: 4, class: gpr }
-# CHECK-NEXT:  - { id: 5, class: gpr }
-# CHECK-NEXT:  - { id: 6, class: gpr64 }
+# CHECK-NEXT:  - { id: 0, class: gpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: gpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 3, class: gpr, preferred-register: '' }
+# CHECK-NEXT:  - { id: 4, class: gpr, preferred-register: '' }
+# CHECK-NEXT:  - { id: 5, class: gpr, preferred-register: '' }
+# CHECK-NEXT:  - { id: 6, class: gpr64, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
diff --git a/test/CodeGen/AArch64/GlobalISel/select-store.mir b/test/CodeGen/AArch64/GlobalISel/select-store.mir
index 9b8f5c566ce0..536e236c2738 100644
--- a/test/CodeGen/AArch64/GlobalISel/select-store.mir
+++ b/test/CodeGen/AArch64/GlobalISel/select-store.mir
@@ -35,8 +35,8 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64sp }
-# CHECK-NEXT:  - { id: 1, class: gpr64 }
+# CHECK-NEXT:  - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr64, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -62,8 +62,8 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64sp }
-# CHECK-NEXT:  - { id: 1, class: gpr32 }
+# CHECK-NEXT:  - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -89,8 +89,8 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64sp }
-# CHECK-NEXT:  - { id: 1, class: gpr32 }
+# CHECK-NEXT:  - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -116,8 +116,8 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64sp }
-# CHECK-NEXT:  - { id: 1, class: gpr32 }
+# CHECK-NEXT:  - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -143,8 +143,8 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64sp }
-# CHECK-NEXT:  - { id: 1, class: gpr }
+# CHECK-NEXT:  - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -169,8 +169,8 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64sp }
-# CHECK-NEXT:  - { id: 1, class: gpr }
+# CHECK-NEXT:  - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -195,8 +195,8 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64 }
-# CHECK-NEXT:  - { id: 1, class: gpr }
+# CHECK-NEXT:  - { id: 0, class: gpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -223,10 +223,10 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64sp }
-# CHECK-NEXT:  - { id: 1, class: gpr64 }
-# CHECK-NEXT:  - { id: 2, class: gpr }
-# CHECK-NEXT:  - { id: 3, class: gpr }
+# CHECK-NEXT:  - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: gpr, preferred-register: '' }
+# CHECK-NEXT:  - { id: 3, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -255,10 +255,10 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64sp }
-# CHECK-NEXT:  - { id: 1, class: gpr32 }
-# CHECK-NEXT:  - { id: 2, class: gpr }
-# CHECK-NEXT:  - { id: 3, class: gpr }
+# CHECK-NEXT:  - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: gpr, preferred-register: '' }
+# CHECK-NEXT:  - { id: 3, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -287,10 +287,10 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64sp }
-# CHECK-NEXT:  - { id: 1, class: gpr32 }
-# CHECK-NEXT:  - { id: 2, class: gpr }
-# CHECK-NEXT:  - { id: 3, class: gpr }
+# CHECK-NEXT:  - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: gpr, preferred-register: '' }
+# CHECK-NEXT:  - { id: 3, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -319,10 +319,10 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64sp }
-# CHECK-NEXT:  - { id: 1, class: gpr32 }
-# CHECK-NEXT:  - { id: 2, class: gpr }
-# CHECK-NEXT:  - { id: 3, class: gpr }
+# CHECK-NEXT:  - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: gpr, preferred-register: '' }
+# CHECK-NEXT:  - { id: 3, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -351,8 +351,8 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64sp }
-# CHECK-NEXT:  - { id: 1, class: fpr64 }
+# CHECK-NEXT:  - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: fpr64, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: fpr }
@@ -378,8 +378,8 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64sp }
-# CHECK-NEXT:  - { id: 1, class: fpr32 }
+# CHECK-NEXT:  - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: fpr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: fpr }
@@ -405,10 +405,10 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64sp }
-# CHECK-NEXT:  - { id: 1, class: fpr64 }
-# CHECK-NEXT:  - { id: 2, class: gpr }
-# CHECK-NEXT:  - { id: 3, class: gpr }
+# CHECK-NEXT:  - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: fpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: gpr, preferred-register: '' }
+# CHECK-NEXT:  - { id: 3, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: fpr }
@@ -437,10 +437,10 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64sp }
-# CHECK-NEXT:  - { id: 1, class: fpr32 }
-# CHECK-NEXT:  - { id: 2, class: gpr }
-# CHECK-NEXT:  - { id: 3, class: gpr }
+# CHECK-NEXT:  - { id: 0, class: gpr64sp, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: fpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: gpr, preferred-register: '' }
+# CHECK-NEXT:  - { id: 3, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: fpr }
diff --git a/test/CodeGen/AArch64/GlobalISel/select-trunc.mir b/test/CodeGen/AArch64/GlobalISel/select-trunc.mir
index fc3546e777f7..5559e2d3a0d1 100644
--- a/test/CodeGen/AArch64/GlobalISel/select-trunc.mir
+++ b/test/CodeGen/AArch64/GlobalISel/select-trunc.mir
@@ -15,8 +15,8 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64 }
-# CHECK-NEXT:  - { id: 1, class: gpr32 }
+# CHECK-NEXT:  - { id: 0, class: gpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -39,8 +39,8 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64 }
-# CHECK-NEXT:  - { id: 1, class: gpr32 }
+# CHECK-NEXT:  - { id: 0, class: gpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -63,8 +63,8 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr32 }
-# CHECK-NEXT:  - { id: 1, class: gpr32 }
+# CHECK-NEXT:  - { id: 0, class: gpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
diff --git a/test/CodeGen/AArch64/GlobalISel/select-xor.mir b/test/CodeGen/AArch64/GlobalISel/select-xor.mir
index e787849c8d1b..7190fda15b8e 100644
--- a/test/CodeGen/AArch64/GlobalISel/select-xor.mir
+++ b/test/CodeGen/AArch64/GlobalISel/select-xor.mir
@@ -20,9 +20,9 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr32 }
-# CHECK-NEXT:  - { id: 1, class: gpr32 }
-# CHECK-NEXT:  - { id: 2, class: gpr32 }
+# CHECK-NEXT:  - { id: 0, class: gpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: gpr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -50,9 +50,9 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64 }
-# CHECK-NEXT:  - { id: 1, class: gpr64 }
-# CHECK-NEXT:  - { id: 2, class: gpr64 }
+# CHECK-NEXT:  - { id: 0, class: gpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: gpr64, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -81,9 +81,9 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr32 }
-# CHECK-NEXT:  - { id: 1, class: gpr }
-# CHECK-NEXT:  - { id: 2, class: gpr32 }
+# CHECK-NEXT:  - { id: 0, class: gpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: gpr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -110,9 +110,9 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64 }
-# CHECK-NEXT:  - { id: 1, class: gpr }
-# CHECK-NEXT:  - { id: 2, class: gpr64 }
+# CHECK-NEXT:  - { id: 0, class: gpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: gpr64, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -139,9 +139,9 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr32 }
-# CHECK-NEXT:  - { id: 1, class: gpr }
-# CHECK-NEXT:  - { id: 2, class: gpr32 }
+# CHECK-NEXT:  - { id: 0, class: gpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: gpr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
diff --git a/test/CodeGen/AArch64/GlobalISel/select.mir b/test/CodeGen/AArch64/GlobalISel/select.mir
index 8bffa085fdca..5e52bc761a84 100644
--- a/test/CodeGen/AArch64/GlobalISel/select.mir
+++ b/test/CodeGen/AArch64/GlobalISel/select.mir
@@ -35,7 +35,7 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr64sp }
+# CHECK-NEXT:  - { id: 0, class: gpr64sp, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
 
@@ -132,12 +132,12 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr32 }
-# CHECK-NEXT:  - { id: 1, class: gpr32 }
-# CHECK-NEXT:  - { id: 2, class: gpr64 }
-# CHECK-NEXT:  - { id: 3, class: gpr32 }
-# CHECK-NEXT:  - { id: 4, class: gpr64 }
-# CHECK-NEXT:  - { id: 5, class: gpr32 }
+# CHECK-NEXT:  - { id: 0, class: gpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: gpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 3, class: gpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 4, class: gpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 5, class: gpr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -180,12 +180,12 @@ legalized:       true
 regBankSelected: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: fpr32 }
-# CHECK-NEXT:  - { id: 1, class: gpr32 }
-# CHECK-NEXT:  - { id: 2, class: fpr64 }
-# CHECK-NEXT:  - { id: 3, class: gpr32 }
-# CHECK-NEXT:  - { id: 4, class: gpr32 }
-# CHECK-NEXT:  - { id: 5, class: gpr32 }
+# CHECK-NEXT:  - { id: 0, class: fpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: fpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 3, class: gpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 4, class: gpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 5, class: gpr32, preferred-register: '' }
 registers:
   - { id: 0, class: fpr }
   - { id: 1, class: gpr }
@@ -223,9 +223,9 @@ regBankSelected: true
 tracksRegLiveness: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: fpr32 }
-# CHECK-NEXT:  - { id: 1, class: gpr32 }
-# CHECK-NEXT:  - { id: 2, class: fpr32 }
+# CHECK-NEXT:  - { id: 0, class: fpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: fpr32, preferred-register: '' }
 registers:
   - { id: 0, class: fpr }
   - { id: 1, class: gpr }
@@ -260,16 +260,16 @@ regBankSelected: true
 tracksRegLiveness: true
 
 # CHECK:      registers:
-# CHECK-NEXT:  - { id: 0, class: gpr32 }
-# CHECK-NEXT:  - { id: 1, class: gpr32 }
-# CHECK-NEXT:  - { id: 2, class: gpr32 }
-# CHECK-NEXT:  - { id: 3, class: gpr32 }
-# CHECK-NEXT:  - { id: 4, class: gpr64 }
-# CHECK-NEXT:  - { id: 5, class: gpr64 }
-# CHECK-NEXT:  - { id: 6, class: gpr64 }
-# CHECK-NEXT:  - { id: 7, class: gpr64 }
-# CHECK-NEXT:  - { id: 8, class: gpr64 }
-# CHECK-NEXT:  - { id: 9, class: gpr64 }
+# CHECK-NEXT:  - { id: 0, class: gpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: gpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 3, class: gpr32, preferred-register: '' }
+# CHECK-NEXT:  - { id: 4, class: gpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 5, class: gpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 6, class: gpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 7, class: gpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 8, class: gpr64, preferred-register: '' }
+# CHECK-NEXT:  - { id: 9, class: gpr64, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
diff --git a/test/CodeGen/AArch64/GlobalISel/varargs-ios-translator.ll b/test/CodeGen/AArch64/GlobalISel/varargs-ios-translator.ll
index 3bd56fa4cebc..af0ab57b0b9f 100644
--- a/test/CodeGen/AArch64/GlobalISel/varargs-ios-translator.ll
+++ b/test/CodeGen/AArch64/GlobalISel/varargs-ios-translator.ll
@@ -4,7 +4,7 @@ define void @test_varargs_sentinel(i8* %list, i64, i64, i64, i64, i64, i64, i64,
                                    i32, ...) {
 ; CHECK-LABEL: name: test_varargs_sentinel
 ; CHECK: fixedStack:
-; CHECK:   - { id: [[VARARGS_SLOT:[0-9]+]], offset: 8
+; CHECK:   - { id: [[VARARGS_SLOT:[0-9]+]], type: default, offset: 8
 ; CHECK: body:
 ; CHECK:   [[LIST:%[0-9]+]] = COPY %x0
 ; CHECK:   [[VARARGS_AREA:%[0-9]+]] = ADDXri %fixed-stack.[[VARARGS_SLOT]], 0, 0
diff --git a/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll b/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll
new file mode 100644
index 000000000000..16a02de79a91
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll
@@ -0,0 +1,131 @@
+; RUN: llc -O0 -verify-machineinstrs -mtriple=arm64-eabi < %s | FileCheck --enable-var-scope %s
+
+; Test fptosi
+define i32 @fptosi_wh(half %a) nounwind ssp {
+entry:
+; CHECK-LABEL: fptosi_wh
+; CHECK: fcvt s1, h0
+; CHECK: fcvtzs [[REG:w[0-9]+]], s1
+; CHECK: mov w0, [[REG]]
+  %conv = fptosi half %a to i32
+  ret i32 %conv
+}
+
+; Test fptoui
+define i32 @fptoui_swh(half %a) nounwind ssp {
+entry:
+; CHECK-LABEL: fptoui_swh
+; CHECK: fcvt s1, h0
+; CHECK: fcvtzu [[REG:w[0-9]+]], s1
+; CHECK: mov w0, [[REG]]
+  %conv = fptoui half %a to i32
+  ret i32 %conv
+}
+
+; Test sitofp
+define half @sitofp_hw_i1(i1 %a) nounwind ssp {
+entry:
+; CHECK-LABEL: sitofp_hw_i1
+; CHECK: sbfx w0, w0, #0, #1
+; CHECK: scvtf s0, w0
+; CHECK: fcvt  h0, s0
+  %conv = sitofp i1 %a to half
+  ret half %conv
+}
+
+; Test sitofp
+define half @sitofp_hw_i8(i8 %a) nounwind ssp {
+entry:
+; CHECK-LABEL: sitofp_hw_i8
+; CHECK: sxtb w0, w0
+; CHECK: scvtf s0, w0
+; CHECK: fcvt  h0, s0
+  %conv = sitofp i8 %a to half
+  ret half %conv
+}
+
+; Test sitofp
+define half @sitofp_hw_i16(i16 %a) nounwind ssp {
+entry:
+; CHECK-LABEL: sitofp_hw_i16
+; CHECK: sxth w0, w0
+; CHECK: scvtf s0, w0
+; CHECK: fcvt  h0, s0
+  %conv = sitofp i16 %a to half
+  ret half %conv
+}
+
+; Test sitofp
+define half @sitofp_hw_i32(i32 %a) nounwind ssp {
+entry:
+; CHECK-LABEL: sitofp_hw_i32
+; CHECK: scvtf s0, w0
+; CHECK: fcvt  h0, s0
+  %conv = sitofp i32 %a to half
+  ret half %conv
+}
+
+; Test sitofp
+define half @sitofp_hx(i64 %a) nounwind ssp {
+entry:
+; CHECK-LABEL: sitofp_hx
+; CHECK: scvtf s0, x0
+; CHECK: fcvt  h0, s0
+  %conv = sitofp i64 %a to half
+  ret half %conv
+}
+
+; Test uitofp
+define half @uitofp_hw_i1(i1 %a) nounwind ssp {
+entry:
+; CHECK-LABEL: uitofp_hw_i1
+; CHECK: and w0, w0, #0x1
+; CHECK: ucvtf s0, w0
+; CHECK: fcvt  h0, s0
+  %conv = uitofp i1 %a to half
+  ret half %conv
+}
+
+; Test uitofp
+define half @uitofp_hw_i8(i8 %a) nounwind ssp {
+entry:
+; CHECK-LABEL: uitofp_hw_i8
+; CHECK: and w0, w0, #0xff
+; CHECK: ucvtf s0, w0
+; CHECK: fcvt  h0, s0
+  %conv = uitofp i8 %a to half
+  ret half %conv
+}
+
+; Test uitofp
+define half @uitofp_hw_i16(i16 %a) nounwind ssp {
+entry:
+; CHECK-LABEL: uitofp_hw_i16
+; CHECK: and w0, w0, #0xffff
+; CHECK: ucvtf s0, w0
+; CHECK: fcvt  h0, s0
+  %conv = uitofp i16 %a to half
+  ret half %conv
+}
+
+; Test uitofp
+define half @uitofp_hw_i32(i32 %a) nounwind ssp {
+entry:
+; CHECK-LABEL: uitofp_hw_i32
+; CHECK: ucvtf s0, w0
+; CHECK: fcvt  h0, s0
+  %conv = uitofp i32 %a to half
+  ret half %conv
+}
+
+; Test uitofp
+define half @uitofp_hx(i64 %a) nounwind ssp {
+entry:
+; CHECK-LABEL: uitofp_hx
+; CHECK: ucvtf s0, x0
+; CHECK: fcvt  h0, s0
+  %conv = uitofp i64 %a to half
+  ret half %conv
+}
+
+
diff --git a/test/CodeGen/AArch64/spill-undef.mir b/test/CodeGen/AArch64/spill-undef.mir
new file mode 100644
index 000000000000..4294df286bd3
--- /dev/null
+++ b/test/CodeGen/AArch64/spill-undef.mir
@@ -0,0 +1,67 @@
+# RUN: llc %s -run-pass greedy -o - | FileCheck %s
+# Check that we don't insert spill code for undef values.
+# Uninitialized memory for them is fine.
+# PR33311
+--- |
+  ; ModuleID = 'stuff.ll'
+  target triple = "aarch64--"
+  
+  @g = external global i32
+  
+  define void @foobar() {
+    ret void
+  }
+  
+...
+---
+name:            foobar
+alignment:       2
+tracksRegLiveness: true
+registers:       
+  - { id: 0, class: gpr32 }
+  - { id: 1, class: gpr32 }
+  - { id: 2, class: gpr32all }
+  - { id: 3, class: gpr32 }
+  - { id: 4, class: gpr64common }
+  - { id: 5, class: gpr32 }
+  - { id: 6, class: gpr64common }
+  - { id: 7, class: gpr32 }
+  - { id: 8, class: gpr32 }
+  - { id: 9, class: gpr64 }
+body:             |
+  bb.0:
+    liveins: %x0
+    successors: %bb.1, %bb.2
+
+    ; %8 is going to be spilled.
+    ; But on that path, we don't care about its value.
+    ; Emit a simple KILL instruction instead of an
+    ; actual spill.
+    ; CHECK: [[UNDEF:%[0-9]+]] = IMPLICIT_DEF
+    ; CHECK-NEXT: KILL [[UNDEF]]
+    %8 = IMPLICIT_DEF
+    ; %9 us going to be spilled.
+    ; But it is only partially undef.
+    ; Make sure we spill it properly
+    ; CHECK: [[NINE:%[0-9]+]] = COPY %x0
+    ; CHECK: [[NINE]].sub_32 = IMPLICIT_DEF
+    ; CHECK-NEXT: STRXui [[NINE]]
+    %9 = COPY %x0
+    %9.sub_32 = IMPLICIT_DEF
+    CBNZW %wzr, %bb.2
+    B %bb.1
+  
+  bb.1:
+    %4 = ADRP target-flags(aarch64-page) @g
+    %8 = LDRWui %4, target-flags(aarch64-pageoff, aarch64-nc) @g :: (volatile dereferenceable load 4 from @g)
+    INLINEASM $nop, 1, 12, implicit-def dead early-clobber %x0, 12, implicit-def dead early-clobber %x1, 12, implicit-def dead early-clobber %x2, 12, implicit-def dead early-clobber %x3, 12, implicit-def dead early-clobber %x4, 12, implicit-def dead early-clobber %x5, 12, implicit-def dead early-clobber %x6, 12, implicit-def dead early-clobber %x7, 12, implicit-def dead early-clobber %x8, 12, implicit-def dead early-clobber %x9, 12, implicit-def dead early-clobber %x10, 12, implicit-def dead early-clobber %x11, 12, implicit-def dead early-clobber %x12, 12, implicit-def dead early-clobber %x13, 12, implicit-def dead early-clobber %x14, 12, implicit-def dead early-clobber %x15, 12, implicit-def dead early-clobber %x16, 12, implicit-def dead early-clobber %x17, 12, implicit-def dead early-clobber %x18, 12, implicit-def dead early-clobber %x19, 12, implicit-def dead early-clobber %x20, 12, implicit-def dead early-clobber %x21, 12, implicit-def dead early-clobber %x22, 12, implicit-def dead early-clobber %x23, 12, implicit-def dead early-clobber %x24, 12, implicit-def dead early-clobber %x25, 12, implicit-def dead early-clobber %x26, 12, implicit-def dead early-clobber %x27, 12, implicit-def dead early-clobber %x28, 12, implicit-def dead early-clobber %fp, 12, implicit-def dead early-clobber %lr
+  
+  bb.2:
+    INLINEASM $nop, 1, 12, implicit-def dead early-clobber %x0, 12, implicit-def dead early-clobber %x1, 12, implicit-def dead early-clobber %x2, 12, implicit-def dead early-clobber %x3, 12, implicit-def dead early-clobber %x4, 12, implicit-def dead early-clobber %x5, 12, implicit-def dead early-clobber %x6, 12, implicit-def dead early-clobber %x7, 12, implicit-def dead early-clobber %x8, 12, implicit-def dead early-clobber %x9, 12, implicit-def dead early-clobber %x10, 12, implicit-def dead early-clobber %x11, 12, implicit-def dead early-clobber %x12, 12, implicit-def dead early-clobber %x13, 12, implicit-def dead early-clobber %x14, 12, implicit-def dead early-clobber %x15, 12, implicit-def dead early-clobber %x16, 12, implicit-def dead early-clobber %x17, 12, implicit-def dead early-clobber %x18, 12, implicit-def dead early-clobber %x19, 12, implicit-def dead early-clobber %x20, 12, implicit-def dead early-clobber %x21, 12, implicit-def dead early-clobber %x22, 12, implicit-def dead early-clobber %x23, 12, implicit-def dead early-clobber %x24, 12, implicit-def dead early-clobber %x25, 12, implicit-def dead early-clobber %x26, 12, implicit-def dead early-clobber %x27, 12, implicit-def dead early-clobber %x28, 12, implicit-def dead early-clobber %fp, 12, implicit-def dead early-clobber %lr
+    %6 = ADRP target-flags(aarch64-page) @g
+    %w0 = MOVi32imm 42
+    STRWui %8, %6, target-flags(aarch64-pageoff, aarch64-nc) @g :: (volatile store 4 into @g)
+    STRXui %9, %6, target-flags(aarch64-pageoff, aarch64-nc) @g :: (volatile store 8 into @g)
+    RET_ReallyLR implicit killed %w0
+
+...
diff --git a/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir b/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir
new file mode 100644
index 000000000000..ebd473d769b3
--- /dev/null
+++ b/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir
@@ -0,0 +1,24 @@
+# RUN: llc -O0 -march=amdgcn -mcpu=fiji  -run-pass=legalizer -global-isel %s -o - | FileCheck %s
+
+--- |
+  define void @test_icmp() {
+  entry:
+    ret void
+  }
+...
+
+---
+name:            test_icmp
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+body: |
+  bb.0.entry:
+    liveins: %vgpr0
+    %0(s32) = G_CONSTANT i32 0
+    %1(s32) = COPY %vgpr0
+
+    ; CHECK: %2(s1) = G_ICMP intpred(ne), %0(s32), %1
+    %2(s1) = G_ICMP intpred(ne), %0, %1
+...
diff --git a/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir b/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir
new file mode 100644
index 000000000000..d11130936bd9
--- /dev/null
+++ b/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir
@@ -0,0 +1,28 @@
+# RUN: llc -O0 -march=amdgcn -mcpu=fiji  -run-pass=legalizer -global-isel %s -o - | FileCheck %s
+
+--- |
+  define void @test_select() { ret void }
+...
+
+---
+name:            test_select
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+  - { id: 3, class: _ }
+  - { id: 4, class: _ }
+  - { id: 5, class: _ }
+body: |
+  bb.0:
+    liveins: %vgpr0
+    %0(s32) = G_CONSTANT i32 0
+    %1(s32) = COPY %vgpr0
+
+    %2(s1) = G_ICMP intpred(ne), %0, %1
+    %3(s32) = G_CONSTANT i32 1
+    %4(s32) = G_CONSTANT i32 2
+    ; CHECK: %5(s32) = G_SELECT %2(s1), %3, %4
+    %5(s32) = G_SELECT %2, %3, %4
+
+...
diff --git a/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir b/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir
index 3496b1ab71fe..902f1e6c6725 100644
--- a/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir
+++ b/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir
@@ -24,8 +24,8 @@ legalized: true
 
 # CHECK-LABEL: name: load_constant
 # CHECK: registers:
-# CHECK: - { id: 0, class: sgpr }
-# CHECK: - { id: 1, class: sgpr }
+# CHECK: - { id: 0, class: sgpr, preferred-register: '' }
+# CHECK: - { id: 1, class: sgpr, preferred-register: '' }
 
 body: |
   bb.0:
@@ -40,8 +40,8 @@ legalized: true
 
 # CHECK-LABEL: name: load_global_uniform
 # CHECK: registers:
-# CHECK: - { id: 0, class: sgpr }
-# CHECK: - { id: 1, class: sgpr }
+# CHECK: - { id: 0, class: sgpr, preferred-register: '' }
+# CHECK: - { id: 1, class: sgpr, preferred-register: '' }
 
 body: |
   bb.0:
@@ -56,9 +56,9 @@ legalized: true
 
 # CHECK-LABEL: name: load_global_non_uniform
 # CHECK: registers:
-# CHECK: - { id: 0, class: sgpr }
-# CHECK: - { id: 1, class: vgpr }
-# CHECK: - { id: 2, class: vgpr }
+# CHECK: - { id: 0, class: sgpr, preferred-register: '' }
+# CHECK: - { id: 1, class: vgpr, preferred-register: '' }
+# CHECK: - { id: 2, class: vgpr, preferred-register: '' }
 
 
 body: |
diff --git a/test/CodeGen/AMDGPU/add.v2i16.ll b/test/CodeGen/AMDGPU/add.v2i16.ll
index e5e2d436deb0..76f724c2b90b 100644
--- a/test/CodeGen/AMDGPU/add.v2i16.ll
+++ b/test/CodeGen/AMDGPU/add.v2i16.ll
@@ -66,7 +66,7 @@ define amdgpu_kernel void @s_test_add_v2i16_kernarg(<2 x i16> addrspace(1)* %out
 
 ; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, 0x7b, v{{[0-9]+}}
 ; VI-DAG: v_mov_b32_e32 v[[SCONST:[0-9]+]], 0x1c8
-; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, v[[SCONST]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v[[SCONST]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 define amdgpu_kernel void @v_test_add_v2i16_constant(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid
@@ -84,7 +84,7 @@ define amdgpu_kernel void @v_test_add_v2i16_constant(<2 x i16> addrspace(1)* %ou
 
 ; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, 0xfffffcb3, v{{[0-9]+}}
 ; VI-DAG: v_mov_b32_e32 v[[SCONST:[0-9]+]], 0xfffffc21
-; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, v[[SCONST]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v[[SCONST]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 define amdgpu_kernel void @v_test_add_v2i16_neg_constant(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid
@@ -101,7 +101,7 @@ define amdgpu_kernel void @v_test_add_v2i16_neg_constant(<2 x i16> addrspace(1)*
 ; VI: v_mov_b32_e32 v[[SCONST:[0-9]+]], -1
 ; VI: flat_load_ushort [[LOAD0:v[0-9]+]]
 ; VI: flat_load_ushort [[LOAD1:v[0-9]+]]
-; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, v[[SCONST]], [[LOAD0]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, [[LOAD0]], v[[SCONST]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, -1, [[LOAD1]]
 ; VI: v_or_b32_e32
 define amdgpu_kernel void @v_test_add_v2i16_inline_neg1(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 {
@@ -140,7 +140,7 @@ define amdgpu_kernel void @v_test_add_v2i16_inline_lo_zero_hi(<2 x i16> addrspac
 
 ; VI-NOT: v_add_u16
 ; VI: v_mov_b32_e32 v[[K:[0-9]+]], 0x3f80
-; VI: v_add_u16_sdwa v{{[0-9]+}}, v[[K]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI: v_add_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v[[K]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; VI-NOT: v_add_u16
 ; VI: v_or_b32_e32
 define amdgpu_kernel void @v_test_add_v2i16_inline_fp_split(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 {
diff --git a/test/CodeGen/AMDGPU/ashr.v2i16.ll b/test/CodeGen/AMDGPU/ashr.v2i16.ll
index 7f424ef2a147..dd96e6264418 100644
--- a/test/CodeGen/AMDGPU/ashr.v2i16.ll
+++ b/test/CodeGen/AMDGPU/ashr.v2i16.ll
@@ -9,7 +9,7 @@
 ; GFX9: v_pk_ashrrev_i16 [[RESULT:v[0-9]+]], [[RHS]], [[VLHS]]
 
 ; VI: v_ashrrev_i32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
 
 ; CI: v_ashrrev_i32_e32
 ; CI: v_and_b32_e32 v{{[0-9]+}}, 0xffff, v{{[0-9]+}}
diff --git a/test/CodeGen/AMDGPU/branch-relax-spill.ll b/test/CodeGen/AMDGPU/branch-relax-spill.ll
index ede15559c4ff..db476c21636f 100644
--- a/test/CodeGen/AMDGPU/branch-relax-spill.ll
+++ b/test/CodeGen/AMDGPU/branch-relax-spill.ll
@@ -7,110 +7,110 @@
 
 define amdgpu_kernel void @spill(i32 addrspace(1)* %arg, i32 %cnd) #0 {
 entry:
-  %sgpr0 = tail call i32 asm sideeffect "s_mov_b32 s0, 0", "={SGPR0}"() #0
-  %sgpr1 = tail call i32 asm sideeffect "s_mov_b32 s1, 0", "={SGPR1}"() #0
-  %sgpr2 = tail call i32 asm sideeffect "s_mov_b32 s2, 0", "={SGPR2}"() #0
-  %sgpr3 = tail call i32 asm sideeffect "s_mov_b32 s3, 0", "={SGPR3}"() #0
-  %sgpr4 = tail call i32 asm sideeffect "s_mov_b32 s4, 0", "={SGPR4}"() #0
-  %sgpr5 = tail call i32 asm sideeffect "s_mov_b32 s5, 0", "={SGPR5}"() #0
-  %sgpr6 = tail call i32 asm sideeffect "s_mov_b32 s6, 0", "={SGPR6}"() #0
-  %sgpr7 = tail call i32 asm sideeffect "s_mov_b32 s7, 0", "={SGPR7}"() #0
-  %sgpr8 = tail call i32 asm sideeffect "s_mov_b32 s8, 0", "={SGPR8}"() #0
-  %sgpr9 = tail call i32 asm sideeffect "s_mov_b32 s9, 0", "={SGPR9}"() #0
-  %sgpr10 = tail call i32 asm sideeffect "s_mov_b32 s10, 0", "={SGPR10}"() #0
-  %sgpr11 = tail call i32 asm sideeffect "s_mov_b32 s11, 0", "={SGPR11}"() #0
-  %sgpr12 = tail call i32 asm sideeffect "s_mov_b32 s12, 0", "={SGPR12}"() #0
-  %sgpr13 = tail call i32 asm sideeffect "s_mov_b32 s13, 0", "={SGPR13}"() #0
-  %sgpr14 = tail call i32 asm sideeffect "s_mov_b32 s14, 0", "={SGPR14}"() #0
-  %sgpr15 = tail call i32 asm sideeffect "s_mov_b32 s15, 0", "={SGPR15}"() #0
-  %sgpr16 = tail call i32 asm sideeffect "s_mov_b32 s16, 0", "={SGPR16}"() #0
-  %sgpr17 = tail call i32 asm sideeffect "s_mov_b32 s17, 0", "={SGPR17}"() #0
-  %sgpr18 = tail call i32 asm sideeffect "s_mov_b32 s18, 0", "={SGPR18}"() #0
-  %sgpr19 = tail call i32 asm sideeffect "s_mov_b32 s19, 0", "={SGPR19}"() #0
-  %sgpr20 = tail call i32 asm sideeffect "s_mov_b32 s20, 0", "={SGPR20}"() #0
-  %sgpr21 = tail call i32 asm sideeffect "s_mov_b32 s21, 0", "={SGPR21}"() #0
-  %sgpr22 = tail call i32 asm sideeffect "s_mov_b32 s22, 0", "={SGPR22}"() #0
-  %sgpr23 = tail call i32 asm sideeffect "s_mov_b32 s23, 0", "={SGPR23}"() #0
-  %sgpr24 = tail call i32 asm sideeffect "s_mov_b32 s24, 0", "={SGPR24}"() #0
-  %sgpr25 = tail call i32 asm sideeffect "s_mov_b32 s25, 0", "={SGPR25}"() #0
-  %sgpr26 = tail call i32 asm sideeffect "s_mov_b32 s26, 0", "={SGPR26}"() #0
-  %sgpr27 = tail call i32 asm sideeffect "s_mov_b32 s27, 0", "={SGPR27}"() #0
-  %sgpr28 = tail call i32 asm sideeffect "s_mov_b32 s28, 0", "={SGPR28}"() #0
-  %sgpr29 = tail call i32 asm sideeffect "s_mov_b32 s29, 0", "={SGPR29}"() #0
-  %sgpr30 = tail call i32 asm sideeffect "s_mov_b32 s30, 0", "={SGPR30}"() #0
-  %sgpr31 = tail call i32 asm sideeffect "s_mov_b32 s31, 0", "={SGPR31}"() #0
-  %sgpr32 = tail call i32 asm sideeffect "s_mov_b32 s32, 0", "={SGPR32}"() #0
-  %sgpr33 = tail call i32 asm sideeffect "s_mov_b32 s33, 0", "={SGPR33}"() #0
-  %sgpr34 = tail call i32 asm sideeffect "s_mov_b32 s34, 0", "={SGPR34}"() #0
-  %sgpr35 = tail call i32 asm sideeffect "s_mov_b32 s35, 0", "={SGPR35}"() #0
-  %sgpr36 = tail call i32 asm sideeffect "s_mov_b32 s36, 0", "={SGPR36}"() #0
-  %sgpr37 = tail call i32 asm sideeffect "s_mov_b32 s37, 0", "={SGPR37}"() #0
-  %sgpr38 = tail call i32 asm sideeffect "s_mov_b32 s38, 0", "={SGPR38}"() #0
-  %sgpr39 = tail call i32 asm sideeffect "s_mov_b32 s39, 0", "={SGPR39}"() #0
-  %sgpr40 = tail call i32 asm sideeffect "s_mov_b32 s40, 0", "={SGPR40}"() #0
-  %sgpr41 = tail call i32 asm sideeffect "s_mov_b32 s41, 0", "={SGPR41}"() #0
-  %sgpr42 = tail call i32 asm sideeffect "s_mov_b32 s42, 0", "={SGPR42}"() #0
-  %sgpr43 = tail call i32 asm sideeffect "s_mov_b32 s43, 0", "={SGPR43}"() #0
-  %sgpr44 = tail call i32 asm sideeffect "s_mov_b32 s44, 0", "={SGPR44}"() #0
-  %sgpr45 = tail call i32 asm sideeffect "s_mov_b32 s45, 0", "={SGPR45}"() #0
-  %sgpr46 = tail call i32 asm sideeffect "s_mov_b32 s46, 0", "={SGPR46}"() #0
-  %sgpr47 = tail call i32 asm sideeffect "s_mov_b32 s47, 0", "={SGPR47}"() #0
-  %sgpr48 = tail call i32 asm sideeffect "s_mov_b32 s48, 0", "={SGPR48}"() #0
-  %sgpr49 = tail call i32 asm sideeffect "s_mov_b32 s49, 0", "={SGPR49}"() #0
-  %sgpr50 = tail call i32 asm sideeffect "s_mov_b32 s50, 0", "={SGPR50}"() #0
-  %sgpr51 = tail call i32 asm sideeffect "s_mov_b32 s51, 0", "={SGPR51}"() #0
-  %sgpr52 = tail call i32 asm sideeffect "s_mov_b32 s52, 0", "={SGPR52}"() #0
-  %sgpr53 = tail call i32 asm sideeffect "s_mov_b32 s53, 0", "={SGPR53}"() #0
-  %sgpr54 = tail call i32 asm sideeffect "s_mov_b32 s54, 0", "={SGPR54}"() #0
-  %sgpr55 = tail call i32 asm sideeffect "s_mov_b32 s55, 0", "={SGPR55}"() #0
-  %sgpr56 = tail call i32 asm sideeffect "s_mov_b32 s56, 0", "={SGPR56}"() #0
-  %sgpr57 = tail call i32 asm sideeffect "s_mov_b32 s57, 0", "={SGPR57}"() #0
-  %sgpr58 = tail call i32 asm sideeffect "s_mov_b32 s58, 0", "={SGPR58}"() #0
-  %sgpr59 = tail call i32 asm sideeffect "s_mov_b32 s59, 0", "={SGPR59}"() #0
-  %sgpr60 = tail call i32 asm sideeffect "s_mov_b32 s60, 0", "={SGPR60}"() #0
-  %sgpr61 = tail call i32 asm sideeffect "s_mov_b32 s61, 0", "={SGPR61}"() #0
-  %sgpr62 = tail call i32 asm sideeffect "s_mov_b32 s62, 0", "={SGPR62}"() #0
-  %sgpr63 = tail call i32 asm sideeffect "s_mov_b32 s63, 0", "={SGPR63}"() #0
-  %sgpr64 = tail call i32 asm sideeffect "s_mov_b32 s64, 0", "={SGPR64}"() #0
-  %sgpr65 = tail call i32 asm sideeffect "s_mov_b32 s65, 0", "={SGPR65}"() #0
-  %sgpr66 = tail call i32 asm sideeffect "s_mov_b32 s66, 0", "={SGPR66}"() #0
-  %sgpr67 = tail call i32 asm sideeffect "s_mov_b32 s67, 0", "={SGPR67}"() #0
-  %sgpr68 = tail call i32 asm sideeffect "s_mov_b32 s68, 0", "={SGPR68}"() #0
-  %sgpr69 = tail call i32 asm sideeffect "s_mov_b32 s69, 0", "={SGPR69}"() #0
-  %sgpr70 = tail call i32 asm sideeffect "s_mov_b32 s70, 0", "={SGPR70}"() #0
-  %sgpr71 = tail call i32 asm sideeffect "s_mov_b32 s71, 0", "={SGPR71}"() #0
-  %sgpr72 = tail call i32 asm sideeffect "s_mov_b32 s72, 0", "={SGPR72}"() #0
-  %sgpr73 = tail call i32 asm sideeffect "s_mov_b32 s73, 0", "={SGPR73}"() #0
-  %sgpr74 = tail call i32 asm sideeffect "s_mov_b32 s74, 0", "={SGPR74}"() #0
-  %sgpr75 = tail call i32 asm sideeffect "s_mov_b32 s75, 0", "={SGPR75}"() #0
-  %sgpr76 = tail call i32 asm sideeffect "s_mov_b32 s76, 0", "={SGPR76}"() #0
-  %sgpr77 = tail call i32 asm sideeffect "s_mov_b32 s77, 0", "={SGPR77}"() #0
-  %sgpr78 = tail call i32 asm sideeffect "s_mov_b32 s78, 0", "={SGPR78}"() #0
-  %sgpr79 = tail call i32 asm sideeffect "s_mov_b32 s79, 0", "={SGPR79}"() #0
-  %sgpr80 = tail call i32 asm sideeffect "s_mov_b32 s80, 0", "={SGPR80}"() #0
-  %sgpr81 = tail call i32 asm sideeffect "s_mov_b32 s81, 0", "={SGPR81}"() #0
-  %sgpr82 = tail call i32 asm sideeffect "s_mov_b32 s82, 0", "={SGPR82}"() #0
-  %sgpr83 = tail call i32 asm sideeffect "s_mov_b32 s83, 0", "={SGPR83}"() #0
-  %sgpr84 = tail call i32 asm sideeffect "s_mov_b32 s84, 0", "={SGPR84}"() #0
-  %sgpr85 = tail call i32 asm sideeffect "s_mov_b32 s85, 0", "={SGPR85}"() #0
-  %sgpr86 = tail call i32 asm sideeffect "s_mov_b32 s86, 0", "={SGPR86}"() #0
-  %sgpr87 = tail call i32 asm sideeffect "s_mov_b32 s87, 0", "={SGPR87}"() #0
-  %sgpr88 = tail call i32 asm sideeffect "s_mov_b32 s88, 0", "={SGPR88}"() #0
-  %sgpr89 = tail call i32 asm sideeffect "s_mov_b32 s89, 0", "={SGPR89}"() #0
-  %sgpr90 = tail call i32 asm sideeffect "s_mov_b32 s90, 0", "={SGPR90}"() #0
-  %sgpr91 = tail call i32 asm sideeffect "s_mov_b32 s91, 0", "={SGPR91}"() #0
-  %sgpr92 = tail call i32 asm sideeffect "s_mov_b32 s92, 0", "={SGPR92}"() #0
-  %sgpr93 = tail call i32 asm sideeffect "s_mov_b32 s93, 0", "={SGPR93}"() #0
-  %sgpr94 = tail call i32 asm sideeffect "s_mov_b32 s94, 0", "={SGPR94}"() #0
-  %sgpr95 = tail call i32 asm sideeffect "s_mov_b32 s95, 0", "={SGPR95}"() #0
-  %sgpr96 = tail call i32 asm sideeffect "s_mov_b32 s96, 0", "={SGPR96}"() #0
-  %sgpr97 = tail call i32 asm sideeffect "s_mov_b32 s97, 0", "={SGPR97}"() #0
-  %sgpr98 = tail call i32 asm sideeffect "s_mov_b32 s98, 0", "={SGPR98}"() #0
-  %sgpr99 = tail call i32 asm sideeffect "s_mov_b32 s99, 0", "={SGPR99}"() #0
-  %sgpr100 = tail call i32 asm sideeffect "s_mov_b32 s100, 0", "={SGPR100}"() #0
-  %sgpr101 = tail call i32 asm sideeffect "s_mov_b32 s101, 0", "={SGPR101}"() #0
-  %sgpr102 = tail call i32 asm sideeffect "s_mov_b32 s102, 0", "={SGPR102}"() #0
-  %sgpr103 = tail call i32 asm sideeffect "s_mov_b32 s103, 0", "={SGPR103}"() #0
+  %sgpr0 = tail call i32 asm sideeffect "s_mov_b32 s0, 0", "={s0}"() #0
+  %sgpr1 = tail call i32 asm sideeffect "s_mov_b32 s1, 0", "={s1}"() #0
+  %sgpr2 = tail call i32 asm sideeffect "s_mov_b32 s2, 0", "={s2}"() #0
+  %sgpr3 = tail call i32 asm sideeffect "s_mov_b32 s3, 0", "={s3}"() #0
+  %sgpr4 = tail call i32 asm sideeffect "s_mov_b32 s4, 0", "={s4}"() #0
+  %sgpr5 = tail call i32 asm sideeffect "s_mov_b32 s5, 0", "={s5}"() #0
+  %sgpr6 = tail call i32 asm sideeffect "s_mov_b32 s6, 0", "={s6}"() #0
+  %sgpr7 = tail call i32 asm sideeffect "s_mov_b32 s7, 0", "={s7}"() #0
+  %sgpr8 = tail call i32 asm sideeffect "s_mov_b32 s8, 0", "={s8}"() #0
+  %sgpr9 = tail call i32 asm sideeffect "s_mov_b32 s9, 0", "={s9}"() #0
+  %sgpr10 = tail call i32 asm sideeffect "s_mov_b32 s10, 0", "={s10}"() #0
+  %sgpr11 = tail call i32 asm sideeffect "s_mov_b32 s11, 0", "={s11}"() #0
+  %sgpr12 = tail call i32 asm sideeffect "s_mov_b32 s12, 0", "={s12}"() #0
+  %sgpr13 = tail call i32 asm sideeffect "s_mov_b32 s13, 0", "={s13}"() #0
+  %sgpr14 = tail call i32 asm sideeffect "s_mov_b32 s14, 0", "={s14}"() #0
+  %sgpr15 = tail call i32 asm sideeffect "s_mov_b32 s15, 0", "={s15}"() #0
+  %sgpr16 = tail call i32 asm sideeffect "s_mov_b32 s16, 0", "={s16}"() #0
+  %sgpr17 = tail call i32 asm sideeffect "s_mov_b32 s17, 0", "={s17}"() #0
+  %sgpr18 = tail call i32 asm sideeffect "s_mov_b32 s18, 0", "={s18}"() #0
+  %sgpr19 = tail call i32 asm sideeffect "s_mov_b32 s19, 0", "={s19}"() #0
+  %sgpr20 = tail call i32 asm sideeffect "s_mov_b32 s20, 0", "={s20}"() #0
+  %sgpr21 = tail call i32 asm sideeffect "s_mov_b32 s21, 0", "={s21}"() #0
+  %sgpr22 = tail call i32 asm sideeffect "s_mov_b32 s22, 0", "={s22}"() #0
+  %sgpr23 = tail call i32 asm sideeffect "s_mov_b32 s23, 0", "={s23}"() #0
+  %sgpr24 = tail call i32 asm sideeffect "s_mov_b32 s24, 0", "={s24}"() #0
+  %sgpr25 = tail call i32 asm sideeffect "s_mov_b32 s25, 0", "={s25}"() #0
+  %sgpr26 = tail call i32 asm sideeffect "s_mov_b32 s26, 0", "={s26}"() #0
+  %sgpr27 = tail call i32 asm sideeffect "s_mov_b32 s27, 0", "={s27}"() #0
+  %sgpr28 = tail call i32 asm sideeffect "s_mov_b32 s28, 0", "={s28}"() #0
+  %sgpr29 = tail call i32 asm sideeffect "s_mov_b32 s29, 0", "={s29}"() #0
+  %sgpr30 = tail call i32 asm sideeffect "s_mov_b32 s30, 0", "={s30}"() #0
+  %sgpr31 = tail call i32 asm sideeffect "s_mov_b32 s31, 0", "={s31}"() #0
+  %sgpr32 = tail call i32 asm sideeffect "s_mov_b32 s32, 0", "={s32}"() #0
+  %sgpr33 = tail call i32 asm sideeffect "s_mov_b32 s33, 0", "={s33}"() #0
+  %sgpr34 = tail call i32 asm sideeffect "s_mov_b32 s34, 0", "={s34}"() #0
+  %sgpr35 = tail call i32 asm sideeffect "s_mov_b32 s35, 0", "={s35}"() #0
+  %sgpr36 = tail call i32 asm sideeffect "s_mov_b32 s36, 0", "={s36}"() #0
+  %sgpr37 = tail call i32 asm sideeffect "s_mov_b32 s37, 0", "={s37}"() #0
+  %sgpr38 = tail call i32 asm sideeffect "s_mov_b32 s38, 0", "={s38}"() #0
+  %sgpr39 = tail call i32 asm sideeffect "s_mov_b32 s39, 0", "={s39}"() #0
+  %sgpr40 = tail call i32 asm sideeffect "s_mov_b32 s40, 0", "={s40}"() #0
+  %sgpr41 = tail call i32 asm sideeffect "s_mov_b32 s41, 0", "={s41}"() #0
+  %sgpr42 = tail call i32 asm sideeffect "s_mov_b32 s42, 0", "={s42}"() #0
+  %sgpr43 = tail call i32 asm sideeffect "s_mov_b32 s43, 0", "={s43}"() #0
+  %sgpr44 = tail call i32 asm sideeffect "s_mov_b32 s44, 0", "={s44}"() #0
+  %sgpr45 = tail call i32 asm sideeffect "s_mov_b32 s45, 0", "={s45}"() #0
+  %sgpr46 = tail call i32 asm sideeffect "s_mov_b32 s46, 0", "={s46}"() #0
+  %sgpr47 = tail call i32 asm sideeffect "s_mov_b32 s47, 0", "={s47}"() #0
+  %sgpr48 = tail call i32 asm sideeffect "s_mov_b32 s48, 0", "={s48}"() #0
+  %sgpr49 = tail call i32 asm sideeffect "s_mov_b32 s49, 0", "={s49}"() #0
+  %sgpr50 = tail call i32 asm sideeffect "s_mov_b32 s50, 0", "={s50}"() #0
+  %sgpr51 = tail call i32 asm sideeffect "s_mov_b32 s51, 0", "={s51}"() #0
+  %sgpr52 = tail call i32 asm sideeffect "s_mov_b32 s52, 0", "={s52}"() #0
+  %sgpr53 = tail call i32 asm sideeffect "s_mov_b32 s53, 0", "={s53}"() #0
+  %sgpr54 = tail call i32 asm sideeffect "s_mov_b32 s54, 0", "={s54}"() #0
+  %sgpr55 = tail call i32 asm sideeffect "s_mov_b32 s55, 0", "={s55}"() #0
+  %sgpr56 = tail call i32 asm sideeffect "s_mov_b32 s56, 0", "={s56}"() #0
+  %sgpr57 = tail call i32 asm sideeffect "s_mov_b32 s57, 0", "={s57}"() #0
+  %sgpr58 = tail call i32 asm sideeffect "s_mov_b32 s58, 0", "={s58}"() #0
+  %sgpr59 = tail call i32 asm sideeffect "s_mov_b32 s59, 0", "={s59}"() #0
+  %sgpr60 = tail call i32 asm sideeffect "s_mov_b32 s60, 0", "={s60}"() #0
+  %sgpr61 = tail call i32 asm sideeffect "s_mov_b32 s61, 0", "={s61}"() #0
+  %sgpr62 = tail call i32 asm sideeffect "s_mov_b32 s62, 0", "={s62}"() #0
+  %sgpr63 = tail call i32 asm sideeffect "s_mov_b32 s63, 0", "={s63}"() #0
+  %sgpr64 = tail call i32 asm sideeffect "s_mov_b32 s64, 0", "={s64}"() #0
+  %sgpr65 = tail call i32 asm sideeffect "s_mov_b32 s65, 0", "={s65}"() #0
+  %sgpr66 = tail call i32 asm sideeffect "s_mov_b32 s66, 0", "={s66}"() #0
+  %sgpr67 = tail call i32 asm sideeffect "s_mov_b32 s67, 0", "={s67}"() #0
+  %sgpr68 = tail call i32 asm sideeffect "s_mov_b32 s68, 0", "={s68}"() #0
+  %sgpr69 = tail call i32 asm sideeffect "s_mov_b32 s69, 0", "={s69}"() #0
+  %sgpr70 = tail call i32 asm sideeffect "s_mov_b32 s70, 0", "={s70}"() #0
+  %sgpr71 = tail call i32 asm sideeffect "s_mov_b32 s71, 0", "={s71}"() #0
+  %sgpr72 = tail call i32 asm sideeffect "s_mov_b32 s72, 0", "={s72}"() #0
+  %sgpr73 = tail call i32 asm sideeffect "s_mov_b32 s73, 0", "={s73}"() #0
+  %sgpr74 = tail call i32 asm sideeffect "s_mov_b32 s74, 0", "={s74}"() #0
+  %sgpr75 = tail call i32 asm sideeffect "s_mov_b32 s75, 0", "={s75}"() #0
+  %sgpr76 = tail call i32 asm sideeffect "s_mov_b32 s76, 0", "={s76}"() #0
+  %sgpr77 = tail call i32 asm sideeffect "s_mov_b32 s77, 0", "={s77}"() #0
+  %sgpr78 = tail call i32 asm sideeffect "s_mov_b32 s78, 0", "={s78}"() #0
+  %sgpr79 = tail call i32 asm sideeffect "s_mov_b32 s79, 0", "={s79}"() #0
+  %sgpr80 = tail call i32 asm sideeffect "s_mov_b32 s80, 0", "={s80}"() #0
+  %sgpr81 = tail call i32 asm sideeffect "s_mov_b32 s81, 0", "={s81}"() #0
+  %sgpr82 = tail call i32 asm sideeffect "s_mov_b32 s82, 0", "={s82}"() #0
+  %sgpr83 = tail call i32 asm sideeffect "s_mov_b32 s83, 0", "={s83}"() #0
+  %sgpr84 = tail call i32 asm sideeffect "s_mov_b32 s84, 0", "={s84}"() #0
+  %sgpr85 = tail call i32 asm sideeffect "s_mov_b32 s85, 0", "={s85}"() #0
+  %sgpr86 = tail call i32 asm sideeffect "s_mov_b32 s86, 0", "={s86}"() #0
+  %sgpr87 = tail call i32 asm sideeffect "s_mov_b32 s87, 0", "={s87}"() #0
+  %sgpr88 = tail call i32 asm sideeffect "s_mov_b32 s88, 0", "={s88}"() #0
+  %sgpr89 = tail call i32 asm sideeffect "s_mov_b32 s89, 0", "={s89}"() #0
+  %sgpr90 = tail call i32 asm sideeffect "s_mov_b32 s90, 0", "={s90}"() #0
+  %sgpr91 = tail call i32 asm sideeffect "s_mov_b32 s91, 0", "={s91}"() #0
+  %sgpr92 = tail call i32 asm sideeffect "s_mov_b32 s92, 0", "={s92}"() #0
+  %sgpr93 = tail call i32 asm sideeffect "s_mov_b32 s93, 0", "={s93}"() #0
+  %sgpr94 = tail call i32 asm sideeffect "s_mov_b32 s94, 0", "={s94}"() #0
+  %sgpr95 = tail call i32 asm sideeffect "s_mov_b32 s95, 0", "={s95}"() #0
+  %sgpr96 = tail call i32 asm sideeffect "s_mov_b32 s96, 0", "={s96}"() #0
+  %sgpr97 = tail call i32 asm sideeffect "s_mov_b32 s97, 0", "={s97}"() #0
+  %sgpr98 = tail call i32 asm sideeffect "s_mov_b32 s98, 0", "={s98}"() #0
+  %sgpr99 = tail call i32 asm sideeffect "s_mov_b32 s99, 0", "={s99}"() #0
+  %sgpr100 = tail call i32 asm sideeffect "s_mov_b32 s100, 0", "={s100}"() #0
+  %sgpr101 = tail call i32 asm sideeffect "s_mov_b32 s101, 0", "={s101}"() #0
+  %sgpr102 = tail call i32 asm sideeffect "s_mov_b32 s102, 0", "={s102}"() #0
+  %sgpr103 = tail call i32 asm sideeffect "s_mov_b32 s103, 0", "={s103}"() #0
   %vcc_lo = tail call i32 asm sideeffect "s_mov_b32 $0, 0", "={VCC_LO}"() #0
   %vcc_hi = tail call i32 asm sideeffect "s_mov_b32 $0, 0", "={VCC_HI}"() #0
   %cmp = icmp eq i32 %cnd, 0
@@ -126,112 +126,112 @@ bb2: ; 28 bytes
   br label %bb3
 
 bb3:
-  tail call void asm sideeffect "; reg use $0", "{SGPR0}"(i32 %sgpr0) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR1}"(i32 %sgpr1) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR2}"(i32 %sgpr2) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR3}"(i32 %sgpr3) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR4}"(i32 %sgpr4) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR5}"(i32 %sgpr5) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR6}"(i32 %sgpr6) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR7}"(i32 %sgpr7) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR8}"(i32 %sgpr8) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR9}"(i32 %sgpr9) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR10}"(i32 %sgpr10) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR11}"(i32 %sgpr11) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR12}"(i32 %sgpr12) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR13}"(i32 %sgpr13) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR14}"(i32 %sgpr14) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR15}"(i32 %sgpr15) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR16}"(i32 %sgpr16) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR17}"(i32 %sgpr17) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR18}"(i32 %sgpr18) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR19}"(i32 %sgpr19) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR20}"(i32 %sgpr20) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR21}"(i32 %sgpr21) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR22}"(i32 %sgpr22) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR23}"(i32 %sgpr23) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR24}"(i32 %sgpr24) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR25}"(i32 %sgpr25) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR26}"(i32 %sgpr26) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR27}"(i32 %sgpr27) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR28}"(i32 %sgpr28) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR29}"(i32 %sgpr29) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR30}"(i32 %sgpr30) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR31}"(i32 %sgpr31) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR32}"(i32 %sgpr32) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR33}"(i32 %sgpr33) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR34}"(i32 %sgpr34) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR35}"(i32 %sgpr35) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR36}"(i32 %sgpr36) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR37}"(i32 %sgpr37) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR38}"(i32 %sgpr38) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR39}"(i32 %sgpr39) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR40}"(i32 %sgpr40) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR41}"(i32 %sgpr41) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR42}"(i32 %sgpr42) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR43}"(i32 %sgpr43) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR44}"(i32 %sgpr44) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR45}"(i32 %sgpr45) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR46}"(i32 %sgpr46) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR47}"(i32 %sgpr47) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR48}"(i32 %sgpr48) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR49}"(i32 %sgpr49) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR50}"(i32 %sgpr50) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR51}"(i32 %sgpr51) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR52}"(i32 %sgpr52) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR53}"(i32 %sgpr53) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR54}"(i32 %sgpr54) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR55}"(i32 %sgpr55) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR56}"(i32 %sgpr56) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR57}"(i32 %sgpr57) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR58}"(i32 %sgpr58) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR59}"(i32 %sgpr59) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR60}"(i32 %sgpr60) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR61}"(i32 %sgpr61) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR62}"(i32 %sgpr62) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR63}"(i32 %sgpr63) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR64}"(i32 %sgpr64) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR65}"(i32 %sgpr65) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR66}"(i32 %sgpr66) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR67}"(i32 %sgpr67) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR68}"(i32 %sgpr68) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR69}"(i32 %sgpr69) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR70}"(i32 %sgpr70) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR71}"(i32 %sgpr71) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR72}"(i32 %sgpr72) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR73}"(i32 %sgpr73) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR74}"(i32 %sgpr74) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR75}"(i32 %sgpr75) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR76}"(i32 %sgpr76) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR77}"(i32 %sgpr77) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR78}"(i32 %sgpr78) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR79}"(i32 %sgpr79) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR80}"(i32 %sgpr80) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR81}"(i32 %sgpr81) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR82}"(i32 %sgpr82) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR83}"(i32 %sgpr83) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR84}"(i32 %sgpr84) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR85}"(i32 %sgpr85) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR86}"(i32 %sgpr86) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR87}"(i32 %sgpr87) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR88}"(i32 %sgpr88) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR89}"(i32 %sgpr89) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR90}"(i32 %sgpr90) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR91}"(i32 %sgpr91) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR92}"(i32 %sgpr92) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR93}"(i32 %sgpr93) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR94}"(i32 %sgpr94) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR95}"(i32 %sgpr95) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR96}"(i32 %sgpr96) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR97}"(i32 %sgpr97) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR98}"(i32 %sgpr98) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR99}"(i32 %sgpr99) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR100}"(i32 %sgpr100) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR101}"(i32 %sgpr101) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR102}"(i32 %sgpr102) #0
-  tail call void asm sideeffect "; reg use $0", "{SGPR103}"(i32 %sgpr103) #0
-  tail call void asm sideeffect "; reg use $0", "{VCC_LO}"(i32 %vcc_lo) #0
-  tail call void asm sideeffect "; reg use $0", "{VCC_HI}"(i32 %vcc_hi) #0
+  tail call void asm sideeffect "; reg use $0", "{s0}"(i32 %sgpr0) #0
+  tail call void asm sideeffect "; reg use $0", "{s1}"(i32 %sgpr1) #0
+  tail call void asm sideeffect "; reg use $0", "{s2}"(i32 %sgpr2) #0
+  tail call void asm sideeffect "; reg use $0", "{s3}"(i32 %sgpr3) #0
+  tail call void asm sideeffect "; reg use $0", "{s4}"(i32 %sgpr4) #0
+  tail call void asm sideeffect "; reg use $0", "{s5}"(i32 %sgpr5) #0
+  tail call void asm sideeffect "; reg use $0", "{s6}"(i32 %sgpr6) #0
+  tail call void asm sideeffect "; reg use $0", "{s7}"(i32 %sgpr7) #0
+  tail call void asm sideeffect "; reg use $0", "{s8}"(i32 %sgpr8) #0
+  tail call void asm sideeffect "; reg use $0", "{s9}"(i32 %sgpr9) #0
+  tail call void asm sideeffect "; reg use $0", "{s10}"(i32 %sgpr10) #0
+  tail call void asm sideeffect "; reg use $0", "{s11}"(i32 %sgpr11) #0
+  tail call void asm sideeffect "; reg use $0", "{s12}"(i32 %sgpr12) #0
+  tail call void asm sideeffect "; reg use $0", "{s13}"(i32 %sgpr13) #0
+  tail call void asm sideeffect "; reg use $0", "{s14}"(i32 %sgpr14) #0
+  tail call void asm sideeffect "; reg use $0", "{s15}"(i32 %sgpr15) #0
+  tail call void asm sideeffect "; reg use $0", "{s16}"(i32 %sgpr16) #0
+  tail call void asm sideeffect "; reg use $0", "{s17}"(i32 %sgpr17) #0
+  tail call void asm sideeffect "; reg use $0", "{s18}"(i32 %sgpr18) #0
+  tail call void asm sideeffect "; reg use $0", "{s19}"(i32 %sgpr19) #0
+  tail call void asm sideeffect "; reg use $0", "{s20}"(i32 %sgpr20) #0
+  tail call void asm sideeffect "; reg use $0", "{s21}"(i32 %sgpr21) #0
+  tail call void asm sideeffect "; reg use $0", "{s22}"(i32 %sgpr22) #0
+  tail call void asm sideeffect "; reg use $0", "{s23}"(i32 %sgpr23) #0
+  tail call void asm sideeffect "; reg use $0", "{s24}"(i32 %sgpr24) #0
+  tail call void asm sideeffect "; reg use $0", "{s25}"(i32 %sgpr25) #0
+  tail call void asm sideeffect "; reg use $0", "{s26}"(i32 %sgpr26) #0
+  tail call void asm sideeffect "; reg use $0", "{s27}"(i32 %sgpr27) #0
+  tail call void asm sideeffect "; reg use $0", "{s28}"(i32 %sgpr28) #0
+  tail call void asm sideeffect "; reg use $0", "{s29}"(i32 %sgpr29) #0
+  tail call void asm sideeffect "; reg use $0", "{s30}"(i32 %sgpr30) #0
+  tail call void asm sideeffect "; reg use $0", "{s31}"(i32 %sgpr31) #0
+  tail call void asm sideeffect "; reg use $0", "{s32}"(i32 %sgpr32) #0
+  tail call void asm sideeffect "; reg use $0", "{s33}"(i32 %sgpr33) #0
+  tail call void asm sideeffect "; reg use $0", "{s34}"(i32 %sgpr34) #0
+  tail call void asm sideeffect "; reg use $0", "{s35}"(i32 %sgpr35) #0
+  tail call void asm sideeffect "; reg use $0", "{s36}"(i32 %sgpr36) #0
+  tail call void asm sideeffect "; reg use $0", "{s37}"(i32 %sgpr37) #0
+  tail call void asm sideeffect "; reg use $0", "{s38}"(i32 %sgpr38) #0
+  tail call void asm sideeffect "; reg use $0", "{s39}"(i32 %sgpr39) #0
+  tail call void asm sideeffect "; reg use $0", "{s40}"(i32 %sgpr40) #0
+  tail call void asm sideeffect "; reg use $0", "{s41}"(i32 %sgpr41) #0
+  tail call void asm sideeffect "; reg use $0", "{s42}"(i32 %sgpr42) #0
+  tail call void asm sideeffect "; reg use $0", "{s43}"(i32 %sgpr43) #0
+  tail call void asm sideeffect "; reg use $0", "{s44}"(i32 %sgpr44) #0
+  tail call void asm sideeffect "; reg use $0", "{s45}"(i32 %sgpr45) #0
+  tail call void asm sideeffect "; reg use $0", "{s46}"(i32 %sgpr46) #0
+  tail call void asm sideeffect "; reg use $0", "{s47}"(i32 %sgpr47) #0
+  tail call void asm sideeffect "; reg use $0", "{s48}"(i32 %sgpr48) #0
+  tail call void asm sideeffect "; reg use $0", "{s49}"(i32 %sgpr49) #0
+  tail call void asm sideeffect "; reg use $0", "{s50}"(i32 %sgpr50) #0
+  tail call void asm sideeffect "; reg use $0", "{s51}"(i32 %sgpr51) #0
+  tail call void asm sideeffect "; reg use $0", "{s52}"(i32 %sgpr52) #0
+  tail call void asm sideeffect "; reg use $0", "{s53}"(i32 %sgpr53) #0
+  tail call void asm sideeffect "; reg use $0", "{s54}"(i32 %sgpr54) #0
+  tail call void asm sideeffect "; reg use $0", "{s55}"(i32 %sgpr55) #0
+  tail call void asm sideeffect "; reg use $0", "{s56}"(i32 %sgpr56) #0
+  tail call void asm sideeffect "; reg use $0", "{s57}"(i32 %sgpr57) #0
+  tail call void asm sideeffect "; reg use $0", "{s58}"(i32 %sgpr58) #0
+  tail call void asm sideeffect "; reg use $0", "{s59}"(i32 %sgpr59) #0
+  tail call void asm sideeffect "; reg use $0", "{s60}"(i32 %sgpr60) #0
+  tail call void asm sideeffect "; reg use $0", "{s61}"(i32 %sgpr61) #0
+  tail call void asm sideeffect "; reg use $0", "{s62}"(i32 %sgpr62) #0
+  tail call void asm sideeffect "; reg use $0", "{s63}"(i32 %sgpr63) #0
+  tail call void asm sideeffect "; reg use $0", "{s64}"(i32 %sgpr64) #0
+  tail call void asm sideeffect "; reg use $0", "{s65}"(i32 %sgpr65) #0
+  tail call void asm sideeffect "; reg use $0", "{s66}"(i32 %sgpr66) #0
+  tail call void asm sideeffect "; reg use $0", "{s67}"(i32 %sgpr67) #0
+  tail call void asm sideeffect "; reg use $0", "{s68}"(i32 %sgpr68) #0
+  tail call void asm sideeffect "; reg use $0", "{s69}"(i32 %sgpr69) #0
+  tail call void asm sideeffect "; reg use $0", "{s70}"(i32 %sgpr70) #0
+  tail call void asm sideeffect "; reg use $0", "{s71}"(i32 %sgpr71) #0
+  tail call void asm sideeffect "; reg use $0", "{s72}"(i32 %sgpr72) #0
+  tail call void asm sideeffect "; reg use $0", "{s73}"(i32 %sgpr73) #0
+  tail call void asm sideeffect "; reg use $0", "{s74}"(i32 %sgpr74) #0
+  tail call void asm sideeffect "; reg use $0", "{s75}"(i32 %sgpr75) #0
+  tail call void asm sideeffect "; reg use $0", "{s76}"(i32 %sgpr76) #0
+  tail call void asm sideeffect "; reg use $0", "{s77}"(i32 %sgpr77) #0
+  tail call void asm sideeffect "; reg use $0", "{s78}"(i32 %sgpr78) #0
+  tail call void asm sideeffect "; reg use $0", "{s79}"(i32 %sgpr79) #0
+  tail call void asm sideeffect "; reg use $0", "{s80}"(i32 %sgpr80) #0
+  tail call void asm sideeffect "; reg use $0", "{s81}"(i32 %sgpr81) #0
+  tail call void asm sideeffect "; reg use $0", "{s82}"(i32 %sgpr82) #0
+  tail call void asm sideeffect "; reg use $0", "{s83}"(i32 %sgpr83) #0
+  tail call void asm sideeffect "; reg use $0", "{s84}"(i32 %sgpr84) #0
+  tail call void asm sideeffect "; reg use $0", "{s85}"(i32 %sgpr85) #0
+  tail call void asm sideeffect "; reg use $0", "{s86}"(i32 %sgpr86) #0
+  tail call void asm sideeffect "; reg use $0", "{s87}"(i32 %sgpr87) #0
+  tail call void asm sideeffect "; reg use $0", "{s88}"(i32 %sgpr88) #0
+  tail call void asm sideeffect "; reg use $0", "{s89}"(i32 %sgpr89) #0
+  tail call void asm sideeffect "; reg use $0", "{s90}"(i32 %sgpr90) #0
+  tail call void asm sideeffect "; reg use $0", "{s91}"(i32 %sgpr91) #0
+  tail call void asm sideeffect "; reg use $0", "{s92}"(i32 %sgpr92) #0
+  tail call void asm sideeffect "; reg use $0", "{s93}"(i32 %sgpr93) #0
+  tail call void asm sideeffect "; reg use $0", "{s94}"(i32 %sgpr94) #0
+  tail call void asm sideeffect "; reg use $0", "{s95}"(i32 %sgpr95) #0
+  tail call void asm sideeffect "; reg use $0", "{s96}"(i32 %sgpr96) #0
+  tail call void asm sideeffect "; reg use $0", "{s97}"(i32 %sgpr97) #0
+  tail call void asm sideeffect "; reg use $0", "{s98}"(i32 %sgpr98) #0
+  tail call void asm sideeffect "; reg use $0", "{s99}"(i32 %sgpr99) #0
+  tail call void asm sideeffect "; reg use $0", "{s100}"(i32 %sgpr100) #0
+  tail call void asm sideeffect "; reg use $0", "{s101}"(i32 %sgpr101) #0
+  tail call void asm sideeffect "; reg use $0", "{s102}"(i32 %sgpr102) #0
+  tail call void asm sideeffect "; reg use $0", "{s103}"(i32 %sgpr103) #0
+  tail call void asm sideeffect "; reg use $0", "{vcc_lo}"(i32 %vcc_lo) #0
+  tail call void asm sideeffect "; reg use $0", "{vcc_hi}"(i32 %vcc_hi) #0
   ret void
 }
 
diff --git a/test/CodeGen/AMDGPU/clamp-omod-special-case.mir b/test/CodeGen/AMDGPU/clamp-omod-special-case.mir
index fbfd0fbf9308..6ecf75c1acec 100644
--- a/test/CodeGen/AMDGPU/clamp-omod-special-case.mir
+++ b/test/CodeGen/AMDGPU/clamp-omod-special-case.mir
@@ -24,6 +24,10 @@
     ret void
   }
 
+  define amdgpu_ps void @v_max_reg_imm_f32() #0 {
+    ret void
+  }
+
   attributes #0 = { nounwind "no-signed-zeros-fp-math"="false" }
 
 ...
@@ -422,3 +426,19 @@ body:             |
     S_ENDPGM
 
 ...
+---
+
+# Pass used to crash with immediate second operand of max
+name:            v_max_reg_imm_f32
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: vgpr_32 }
+  - { id: 1, class: vgpr_32 }
+body:             |
+  bb.0 (%ir-block.0):
+    liveins: %vgpr0
+
+    %0 = COPY %vgpr0
+    %1 = V_MAX_F32_e64 0, killed %0, 0, 1056964608, 1, 0, implicit %exec
+
+...
diff --git a/test/CodeGen/AMDGPU/exceed-max-sgprs.ll b/test/CodeGen/AMDGPU/exceed-max-sgprs.ll
index 207dfce75f16..13aafc24895d 100644
--- a/test/CodeGen/AMDGPU/exceed-max-sgprs.ll
+++ b/test/CodeGen/AMDGPU/exceed-max-sgprs.ll
@@ -2,97 +2,97 @@
 
 ; ERROR: error: scalar registers limit of 104 exceeded (106) in use_too_many_sgprs_tahiti
 define amdgpu_kernel void @use_too_many_sgprs_tahiti() #0 {
-  call void asm sideeffect "", "~{SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7}" ()
-  call void asm sideeffect "", "~{SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15}" ()
-  call void asm sideeffect "", "~{SGPR16_SGPR17_SGPR18_SGPR19_SGPR20_SGPR21_SGPR22_SGPR23}" ()
-  call void asm sideeffect "", "~{SGPR24_SGPR25_SGPR26_SGPR27_SGPR28_SGPR29_SGPR30_SGPR31}" ()
-  call void asm sideeffect "", "~{SGPR32_SGPR33_SGPR34_SGPR35_SGPR36_SGPR37_SGPR38_SGPR39}" ()
-  call void asm sideeffect "", "~{SGPR40_SGPR41_SGPR42_SGPR43_SGPR44_SGPR45_SGPR46_SGPR47}" ()
-  call void asm sideeffect "", "~{SGPR48_SGPR49_SGPR50_SGPR51_SGPR52_SGPR53_SGPR54_SGPR55}" ()
-  call void asm sideeffect "", "~{SGPR56_SGPR57_SGPR58_SGPR59_SGPR60_SGPR61_SGPR62_SGPR63}" ()
-  call void asm sideeffect "", "~{SGPR64_SGPR65_SGPR66_SGPR67_SGPR68_SGPR69_SGPR70_SGPR71}" ()
-  call void asm sideeffect "", "~{SGPR72_SGPR73_SGPR74_SGPR75_SGPR76_SGPR77_SGPR78_SGPR79}" ()
-  call void asm sideeffect "", "~{SGPR80_SGPR81_SGPR82_SGPR83_SGPR84_SGPR85_SGPR86_SGPR87}" ()
-  call void asm sideeffect "", "~{SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95}" ()
-  call void asm sideeffect "", "~{SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103}" ()
-  call void asm sideeffect "", "~{VCC}" ()
+  call void asm sideeffect "", "~{s[0:7]}" ()
+  call void asm sideeffect "", "~{s[8:15]}" ()
+  call void asm sideeffect "", "~{s[16:23]}" ()
+  call void asm sideeffect "", "~{s[24:31]}" ()
+  call void asm sideeffect "", "~{s[32:39]}" ()
+  call void asm sideeffect "", "~{s[40:47]}" ()
+  call void asm sideeffect "", "~{s[48:55]}" ()
+  call void asm sideeffect "", "~{s[56:63]}" ()
+  call void asm sideeffect "", "~{s[64:71]}" ()
+  call void asm sideeffect "", "~{s[72:79]}" ()
+  call void asm sideeffect "", "~{s[80:87]}" ()
+  call void asm sideeffect "", "~{s[88:95]}" ()
+  call void asm sideeffect "", "~{s[96:103]}" ()
+  call void asm sideeffect "", "~{vcc}" ()
   ret void
 }
 
 ; ERROR: error: scalar registers limit of 104 exceeded (106) in use_too_many_sgprs_bonaire
 define amdgpu_kernel void @use_too_many_sgprs_bonaire() #1 {
-  call void asm sideeffect "", "~{SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7}" ()
-  call void asm sideeffect "", "~{SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15}" ()
-  call void asm sideeffect "", "~{SGPR16_SGPR17_SGPR18_SGPR19_SGPR20_SGPR21_SGPR22_SGPR23}" ()
-  call void asm sideeffect "", "~{SGPR24_SGPR25_SGPR26_SGPR27_SGPR28_SGPR29_SGPR30_SGPR31}" ()
-  call void asm sideeffect "", "~{SGPR32_SGPR33_SGPR34_SGPR35_SGPR36_SGPR37_SGPR38_SGPR39}" ()
-  call void asm sideeffect "", "~{SGPR40_SGPR41_SGPR42_SGPR43_SGPR44_SGPR45_SGPR46_SGPR47}" ()
-  call void asm sideeffect "", "~{SGPR48_SGPR49_SGPR50_SGPR51_SGPR52_SGPR53_SGPR54_SGPR55}" ()
-  call void asm sideeffect "", "~{SGPR56_SGPR57_SGPR58_SGPR59_SGPR60_SGPR61_SGPR62_SGPR63}" ()
-  call void asm sideeffect "", "~{SGPR64_SGPR65_SGPR66_SGPR67_SGPR68_SGPR69_SGPR70_SGPR71}" ()
-  call void asm sideeffect "", "~{SGPR72_SGPR73_SGPR74_SGPR75_SGPR76_SGPR77_SGPR78_SGPR79}" ()
-  call void asm sideeffect "", "~{SGPR80_SGPR81_SGPR82_SGPR83_SGPR84_SGPR85_SGPR86_SGPR87}" ()
-  call void asm sideeffect "", "~{SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95}" ()
-  call void asm sideeffect "", "~{SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103}" ()
-  call void asm sideeffect "", "~{VCC}" ()
+  call void asm sideeffect "", "~{s[0:7]}" ()
+  call void asm sideeffect "", "~{s[8:15]}" ()
+  call void asm sideeffect "", "~{s[16:23]}" ()
+  call void asm sideeffect "", "~{s[24:31]}" ()
+  call void asm sideeffect "", "~{s[32:39]}" ()
+  call void asm sideeffect "", "~{s[40:47]}" ()
+  call void asm sideeffect "", "~{s[48:55]}" ()
+  call void asm sideeffect "", "~{s[56:63]}" ()
+  call void asm sideeffect "", "~{s[64:71]}" ()
+  call void asm sideeffect "", "~{s[72:79]}" ()
+  call void asm sideeffect "", "~{s[80:87]}" ()
+  call void asm sideeffect "", "~{s[88:95]}" ()
+  call void asm sideeffect "", "~{s[96:103]}" ()
+  call void asm sideeffect "", "~{vcc}" ()
   ret void
 }
 
 ; ERROR: error: scalar registers limit of 104 exceeded (108) in use_too_many_sgprs_bonaire_flat_scr
 define amdgpu_kernel void @use_too_many_sgprs_bonaire_flat_scr() #1 {
-  call void asm sideeffect "", "~{SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7}" ()
-  call void asm sideeffect "", "~{SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15}" ()
-  call void asm sideeffect "", "~{SGPR16_SGPR17_SGPR18_SGPR19_SGPR20_SGPR21_SGPR22_SGPR23}" ()
-  call void asm sideeffect "", "~{SGPR24_SGPR25_SGPR26_SGPR27_SGPR28_SGPR29_SGPR30_SGPR31}" ()
-  call void asm sideeffect "", "~{SGPR32_SGPR33_SGPR34_SGPR35_SGPR36_SGPR37_SGPR38_SGPR39}" ()
-  call void asm sideeffect "", "~{SGPR40_SGPR41_SGPR42_SGPR43_SGPR44_SGPR45_SGPR46_SGPR47}" ()
-  call void asm sideeffect "", "~{SGPR48_SGPR49_SGPR50_SGPR51_SGPR52_SGPR53_SGPR54_SGPR55}" ()
-  call void asm sideeffect "", "~{SGPR56_SGPR57_SGPR58_SGPR59_SGPR60_SGPR61_SGPR62_SGPR63}" ()
-  call void asm sideeffect "", "~{SGPR64_SGPR65_SGPR66_SGPR67_SGPR68_SGPR69_SGPR70_SGPR71}" ()
-  call void asm sideeffect "", "~{SGPR72_SGPR73_SGPR74_SGPR75_SGPR76_SGPR77_SGPR78_SGPR79}" ()
-  call void asm sideeffect "", "~{SGPR80_SGPR81_SGPR82_SGPR83_SGPR84_SGPR85_SGPR86_SGPR87}" ()
-  call void asm sideeffect "", "~{SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95}" ()
-  call void asm sideeffect "", "~{SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103}" ()
-  call void asm sideeffect "", "~{VCC}" ()
-  call void asm sideeffect "", "~{FLAT_SCR}" ()
+  call void asm sideeffect "", "~{s[0:7]}" ()
+  call void asm sideeffect "", "~{s[8:15]}" ()
+  call void asm sideeffect "", "~{s[16:23]}" ()
+  call void asm sideeffect "", "~{s[24:31]}" ()
+  call void asm sideeffect "", "~{s[32:39]}" ()
+  call void asm sideeffect "", "~{s[40:47]}" ()
+  call void asm sideeffect "", "~{s[48:55]}" ()
+  call void asm sideeffect "", "~{s[56:63]}" ()
+  call void asm sideeffect "", "~{s[64:71]}" ()
+  call void asm sideeffect "", "~{s[72:79]}" ()
+  call void asm sideeffect "", "~{s[80:87]}" ()
+  call void asm sideeffect "", "~{s[88:95]}" ()
+  call void asm sideeffect "", "~{s[96:103]}" ()
+  call void asm sideeffect "", "~{vcc}" ()
+  call void asm sideeffect "", "~{flat_scratch}" ()
   ret void
 }
 
 ; ERROR: error: scalar registers limit of 96 exceeded (98) in use_too_many_sgprs_iceland
 define amdgpu_kernel void @use_too_many_sgprs_iceland() #2 {
-  call void asm sideeffect "", "~{VCC}" ()
-  call void asm sideeffect "", "~{SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7}" ()
-  call void asm sideeffect "", "~{SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15}" ()
-  call void asm sideeffect "", "~{SGPR16_SGPR17_SGPR18_SGPR19_SGPR20_SGPR21_SGPR22_SGPR23}" ()
-  call void asm sideeffect "", "~{SGPR24_SGPR25_SGPR26_SGPR27_SGPR28_SGPR29_SGPR30_SGPR31}" ()
-  call void asm sideeffect "", "~{SGPR32_SGPR33_SGPR34_SGPR35_SGPR36_SGPR37_SGPR38_SGPR39}" ()
-  call void asm sideeffect "", "~{SGPR40_SGPR41_SGPR42_SGPR43_SGPR44_SGPR45_SGPR46_SGPR47}" ()
-  call void asm sideeffect "", "~{SGPR48_SGPR49_SGPR50_SGPR51_SGPR52_SGPR53_SGPR54_SGPR55}" ()
-  call void asm sideeffect "", "~{SGPR56_SGPR57_SGPR58_SGPR59_SGPR60_SGPR61_SGPR62_SGPR63}" ()
-  call void asm sideeffect "", "~{SGPR64_SGPR65_SGPR66_SGPR67_SGPR68_SGPR69_SGPR70_SGPR71}" ()
-  call void asm sideeffect "", "~{SGPR72_SGPR73_SGPR74_SGPR75_SGPR76_SGPR77_SGPR78_SGPR79}" ()
-  call void asm sideeffect "", "~{SGPR80_SGPR81_SGPR82_SGPR83_SGPR84_SGPR85_SGPR86_SGPR87}" ()
-  call void asm sideeffect "", "~{SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95}" ()
+  call void asm sideeffect "", "~{vcc}" ()
+  call void asm sideeffect "", "~{s[0:7]}" ()
+  call void asm sideeffect "", "~{s[8:15]}" ()
+  call void asm sideeffect "", "~{s[16:23]}" ()
+  call void asm sideeffect "", "~{s[24:31]}" ()
+  call void asm sideeffect "", "~{s[32:39]}" ()
+  call void asm sideeffect "", "~{s[40:47]}" ()
+  call void asm sideeffect "", "~{s[48:55]}" ()
+  call void asm sideeffect "", "~{s[56:63]}" ()
+  call void asm sideeffect "", "~{s[64:71]}" ()
+  call void asm sideeffect "", "~{s[72:79]}" ()
+  call void asm sideeffect "", "~{s[80:87]}" ()
+  call void asm sideeffect "", "~{s[88:95]}" ()
   ret void
 }
 
 ; ERROR: error: addressable scalar registers limit of 102 exceeded (103) in use_too_many_sgprs_fiji
 define amdgpu_kernel void @use_too_many_sgprs_fiji() #3 {
-  call void asm sideeffect "", "~{SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7}" ()
-  call void asm sideeffect "", "~{SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15}" ()
-  call void asm sideeffect "", "~{SGPR16_SGPR17_SGPR18_SGPR19_SGPR20_SGPR21_SGPR22_SGPR23}" ()
-  call void asm sideeffect "", "~{SGPR24_SGPR25_SGPR26_SGPR27_SGPR28_SGPR29_SGPR30_SGPR31}" ()
-  call void asm sideeffect "", "~{SGPR32_SGPR33_SGPR34_SGPR35_SGPR36_SGPR37_SGPR38_SGPR39}" ()
-  call void asm sideeffect "", "~{SGPR40_SGPR41_SGPR42_SGPR43_SGPR44_SGPR45_SGPR46_SGPR47}" ()
-  call void asm sideeffect "", "~{SGPR48_SGPR49_SGPR50_SGPR51_SGPR52_SGPR53_SGPR54_SGPR55}" ()
-  call void asm sideeffect "", "~{SGPR56_SGPR57_SGPR58_SGPR59_SGPR60_SGPR61_SGPR62_SGPR63}" ()
-  call void asm sideeffect "", "~{SGPR64_SGPR65_SGPR66_SGPR67_SGPR68_SGPR69_SGPR70_SGPR71}" ()
-  call void asm sideeffect "", "~{SGPR72_SGPR73_SGPR74_SGPR75_SGPR76_SGPR77_SGPR78_SGPR79}" ()
-  call void asm sideeffect "", "~{SGPR80_SGPR81_SGPR82_SGPR83_SGPR84_SGPR85_SGPR86_SGPR87}" ()
-  call void asm sideeffect "", "~{SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95}" ()
-  call void asm sideeffect "", "~{SGPR96_SGPR97_SGPR98_SGPR99}" ()
-  call void asm sideeffect "", "~{SGPR100_SGPR101}" ()
-  call void asm sideeffect "", "~{SGPR102}" ()
+  call void asm sideeffect "", "~{s[0:7]}" ()
+  call void asm sideeffect "", "~{s[8:15]}" ()
+  call void asm sideeffect "", "~{s[16:23]}" ()
+  call void asm sideeffect "", "~{s[24:31]}" ()
+  call void asm sideeffect "", "~{s[32:39]}" ()
+  call void asm sideeffect "", "~{s[40:47]}" ()
+  call void asm sideeffect "", "~{s[48:55]}" ()
+  call void asm sideeffect "", "~{s[56:63]}" ()
+  call void asm sideeffect "", "~{s[64:71]}" ()
+  call void asm sideeffect "", "~{s[72:79]}" ()
+  call void asm sideeffect "", "~{s[80:87]}" ()
+  call void asm sideeffect "", "~{s[88:95]}" ()
+  call void asm sideeffect "", "~{s[96:99]}" ()
+  call void asm sideeffect "", "~{s[100:101]}" ()
+  call void asm sideeffect "", "~{s102}" ()
   ret void
 }
 
diff --git a/test/CodeGen/AMDGPU/fabs.f16.ll b/test/CodeGen/AMDGPU/fabs.f16.ll
index d4ef7124a334..4e2ec4b3054f 100644
--- a/test/CodeGen/AMDGPU/fabs.f16.ll
+++ b/test/CodeGen/AMDGPU/fabs.f16.ll
@@ -40,7 +40,7 @@ define amdgpu_kernel void @s_fabs_f16(half addrspace(1)* %out, half %in) {
 ; VI: flat_load_ushort [[LO:v[0-9]+]]
 ; VI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7fff{{$}}
 ; VI-DAG: v_and_b32_e32 [[FABS_LO:v[0-9]+]], [[MASK]], [[HI]]
-; VI-DAG: v_and_b32_sdwa [[FABS_HI:v[0-9]+]], [[MASK]], [[LO]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-DAG: v_and_b32_sdwa [[FABS_HI:v[0-9]+]], [[LO]], [[MASK]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; VI-DAG: v_or_b32_e32 v{{[0-9]+}}, [[FABS_HI]], [[FABS_LO]]
 ; VI: flat_store_dword
 
@@ -60,8 +60,8 @@ define amdgpu_kernel void @s_fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half
 ; CI: v_and_b32_e32 v{{[0-9]+}}, [[MASK]]
 
 ; VI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7fff{{$}}
-; VI-DAG: v_and_b32_sdwa v{{[0-9]+}}, [[MASK]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI-DAG: v_and_b32_sdwa v{{[0-9]+}}, [[MASK]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-DAG: v_and_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[MASK]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-DAG: v_and_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[MASK]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; VI-DAG: v_and_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}}
 ; VI-DAG: v_and_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}}
 ; VI-DAG: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
@@ -128,7 +128,7 @@ define amdgpu_kernel void @fabs_free_v2f16(<2 x half> addrspace(1)* %out, i32 %i
 ; CI: v_cvt_f16_f32
 
 ; VI: v_lshrrev_b32_e32 v{{[0-9]+}}, 16,
-; VI: v_mul_f16_e64 v{{[0-9]+}}, |v{{[0-9]+}}|, v{{[0-9]+}}
+; VI: v_mul_f16_sdwa v{{[0-9]+}}, |v{{[0-9]+}}|, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
 ; VI: v_mul_f16_e64 v{{[0-9]+}}, |v{{[0-9]+}}|, v{{[0-9]+}}
 
 ; GFX9: v_and_b32_e32 [[FABS:v[0-9]+]], 0x7fff7fff, [[VAL]]
diff --git a/test/CodeGen/AMDGPU/fadd.f16.ll b/test/CodeGen/AMDGPU/fadd.f16.ll
index 9b3d2a475a14..08199be144f4 100644
--- a/test/CodeGen/AMDGPU/fadd.f16.ll
+++ b/test/CodeGen/AMDGPU/fadd.f16.ll
@@ -78,7 +78,7 @@ entry:
 ; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
 
 ; VI-DAG: v_add_f16_e32 v[[R_F16_LO:[0-9]+]], v[[B_V2_F16]], v[[A_V2_F16]]
-; VI-DAG: v_add_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[B_V2_F16]], v[[A_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-DAG: v_add_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[A_V2_F16]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; VI:     v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]]
 
 ; GCN: buffer_store_dword v[[R_V2_F16]]
@@ -108,7 +108,7 @@ entry:
 ; SI:  v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
 
 ; VI-DAG: v_mov_b32_e32 v[[CONST2:[0-9]+]], 0x4000
-; VI-DAG: v_add_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[CONST2]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; VI-DAG: v_add_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[B_V2_F16]], v[[CONST2]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
 ; VI-DAG: v_add_f16_e32 v[[R_F16_0:[0-9]+]], 1.0, v[[B_V2_F16]]
 ; VI:     v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
 
@@ -137,7 +137,7 @@ entry:
 ; SI:  v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
 
 ; VI-DAG: v_mov_b32_e32 v[[CONST1:[0-9]+]], 0x3c00
-; VI-DAG: v_add_f16_sdwa v[[R_F16_0:[0-9]+]], v[[CONST1]], v[[A_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; VI-DAG: v_add_f16_sdwa v[[R_F16_0:[0-9]+]], v[[A_V2_F16]], v[[CONST1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
 ; VI-DAG: v_add_f16_e32 v[[R_F16_1:[0-9]+]], 2.0, v[[A_V2_F16]]
 ; VI:     v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_1]]
 
diff --git a/test/CodeGen/AMDGPU/fcanonicalize.f16.ll b/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
index 9e8ddd39bbaf..404358f0ecb9 100644
--- a/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
+++ b/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
@@ -278,9 +278,9 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_var_v2f16(<2 x half> addrspa
 }
 
 ; GCN-LABEL: {{^}}s_test_canonicalize_var_v2f16:
-; VI: v_mul_f16_e64 [[REG0:v[0-9]+]], 1.0, {{s[0-9]+}}
-; VI-DAG: v_mul_f16_e64 [[REG1:v[0-9]+]], 1.0, {{s[0-9]+}}
-; VI-DAG: v_lshlrev_b32_e32 v{{[0-9]+}}, 16,
+; VI: v_mov_b32_e32 [[ONE:v[0-9]+]], 0x3c00
+; VI: v_mul_f16_sdwa [[REG0:v[0-9]+]], [[ONE]], {{v[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI: v_mul_f16_e64 [[REG1:v[0-9]+]], 1.0, {{s[0-9]+}}
 ; VI-NOT: v_and_b32
 
 ; GFX9: v_pk_mul_f16 [[REG:v[0-9]+]], 1.0, {{s[0-9]+$}}
diff --git a/test/CodeGen/AMDGPU/flat-scratch-reg.ll b/test/CodeGen/AMDGPU/flat-scratch-reg.ll
index 5705cbc99443..a7664c399fbb 100644
--- a/test/CodeGen/AMDGPU/flat-scratch-reg.ll
+++ b/test/CodeGen/AMDGPU/flat-scratch-reg.ll
@@ -21,7 +21,7 @@
 ; VI-XNACK: ; NumSgprs: 12
 define amdgpu_kernel void @no_vcc_no_flat() {
 entry:
-  call void asm sideeffect "", "~{SGPR7}"()
+  call void asm sideeffect "", "~{s7}"()
   ret void
 }
 
@@ -35,7 +35,7 @@ entry:
 ; VI-XNACK: ; NumSgprs: 12
 define amdgpu_kernel void @vcc_no_flat() {
 entry:
-  call void asm sideeffect "", "~{SGPR7},~{VCC}"()
+  call void asm sideeffect "", "~{s7},~{vcc}"()
   ret void
 }
 
@@ -52,7 +52,7 @@ entry:
 ; HSA-VI-XNACK: ; NumSgprs: 14
 define amdgpu_kernel void @no_vcc_flat() {
 entry:
-  call void asm sideeffect "", "~{SGPR7},~{FLAT_SCR}"()
+  call void asm sideeffect "", "~{s7},~{flat_scratch}"()
   ret void
 }
 
@@ -68,7 +68,7 @@ entry:
 ; HSA-VI-XNACK: ; NumSgprs: 14
 define amdgpu_kernel void @vcc_flat() {
 entry:
-  call void asm sideeffect "", "~{SGPR7},~{VCC},~{FLAT_SCR}"()
+  call void asm sideeffect "", "~{s7},~{vcc},~{flat_scratch}"()
   ret void
 }
 
@@ -81,7 +81,7 @@ entry:
 ; VI-XNACK: NumSgprs: 6
 define amdgpu_kernel void @use_flat_scr() #0 {
 entry:
-  call void asm sideeffect "; clobber ", "~{FLAT_SCR}"()
+  call void asm sideeffect "; clobber ", "~{flat_scratch}"()
   ret void
 }
 
@@ -91,7 +91,7 @@ entry:
 ; VI-XNACK: NumSgprs: 6
 define amdgpu_kernel void @use_flat_scr_lo() #0 {
 entry:
-  call void asm sideeffect "; clobber ", "~{FLAT_SCR_LO}"()
+  call void asm sideeffect "; clobber ", "~{flat_scratch_lo}"()
   ret void
 }
 
@@ -101,7 +101,7 @@ entry:
 ; VI-XNACK: NumSgprs: 6
 define amdgpu_kernel void @use_flat_scr_hi() #0 {
 entry:
-  call void asm sideeffect "; clobber ", "~{FLAT_SCR_HI}"()
+  call void asm sideeffect "; clobber ", "~{flat_scratch_hi}"()
   ret void
 }
 
diff --git a/test/CodeGen/AMDGPU/fmul.f16.ll b/test/CodeGen/AMDGPU/fmul.f16.ll
index 4ef2aa693cf4..cd86409e2038 100644
--- a/test/CodeGen/AMDGPU/fmul.f16.ll
+++ b/test/CodeGen/AMDGPU/fmul.f16.ll
@@ -78,7 +78,7 @@ entry:
 ; SI:  v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
 
 ; VI-DAG: v_mul_f16_e32 v[[R_F16_LO:[0-9]+]], v[[B_V2_F16]], v[[A_V2_F16]]
-; VI-DAG: v_mul_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[B_V2_F16]], v[[A_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-DAG: v_mul_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[A_V2_F16]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]]
 
 ; GCN: buffer_store_dword v[[R_V2_F16]]
@@ -105,7 +105,7 @@ entry:
 ; SI:  v_mul_f32_e32 v[[R_F32_1:[0-9]+]], 4.0, v[[B_F32_1]]
 ; SI:  v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]]
 ; VI-DAG:  v_mov_b32_e32 v[[CONST4:[0-9]+]], 0x4400
-; VI-DAG:  v_mul_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[CONST4]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; VI-DAG:  v_mul_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[B_V2_F16]], v[[CONST4]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
 ; VI-DAG:  v_mul_f16_e32 v[[R_F16_0:[0-9]+]], 0x4200, v[[B_V2_F16]]
 ; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
 ; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
@@ -131,7 +131,7 @@ entry:
 ; SI:  v_mul_f32_e32 v[[R_F32_1:[0-9]+]], 0x40400000, v[[A_F32_1]]
 ; SI:  v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]]
 ; VI-DAG:  v_mov_b32_e32 v[[CONST3:[0-9]+]], 0x4200
-; VI-DAG:  v_mul_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[CONST3]], v[[A_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; VI-DAG:  v_mul_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[A_V2_F16]], v[[CONST3]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
 ; VI-DAG:  v_mul_f16_e32 v[[R_F16_0:[0-9]+]], 4.0, v[[A_V2_F16]]
 ; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
 ; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
diff --git a/test/CodeGen/AMDGPU/fneg-fabs.f16.ll b/test/CodeGen/AMDGPU/fneg-fabs.f16.ll
index c256159726bf..f4afaca2b7a7 100644
--- a/test/CodeGen/AMDGPU/fneg-fabs.f16.ll
+++ b/test/CodeGen/AMDGPU/fneg-fabs.f16.ll
@@ -73,7 +73,7 @@ define amdgpu_kernel void @v_fneg_fabs_f16(half addrspace(1)* %out, half addrspa
 ; CIVI: s_mov_b32 [[MASK:s[0-9]+]], 0x8000{{$}}
 ; VI: v_mov_b32_e32 [[VMASK:v[0-9]+]], [[MASK]]
 ; CI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]],
-; VI: v_or_b32_sdwa v{{[0-9]+}}, [[VMASK]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[VMASK]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; CIVI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]],
 ; CIVI: flat_store_dword
 
@@ -92,9 +92,9 @@ define amdgpu_kernel void @s_fneg_fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x
 ; CI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]],
 ; CI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]],
 ; VI: v_mov_b32_e32 [[VMASK:v[0-9]+]], [[MASK]]
-; VI: v_or_b32_sdwa v{{[0-9]+}}, [[VMASK]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[VMASK]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; VI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]],
-; VI: v_or_b32_sdwa v{{[0-9]+}}, [[VMASK]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[VMASK]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; VI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]],
 
 ; GFX9: s_mov_b32 [[MASK:s[0-9]+]], 0x80008000
@@ -116,7 +116,7 @@ define amdgpu_kernel void @fneg_fabs_v4f16(<4 x half> addrspace(1)* %out, <4 x h
 ; CI: v_mul_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}
 
 ; VI: v_mul_f16_e64 v{{[0-9]+}}, -|v{{[0-9]+}}|, 4.0
-; VI: v_mul_f16_e64 v{{[0-9]+}}, -|v{{[0-9]+}}|, 4.0
+; VI: v_mul_f16_sdwa v{{[0-9]+}}, -|v{{[0-9]+}}|, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 
 ; GFX9: s_and_b32 [[ABS:s[0-9]+]], s{{[0-9]+}}, 0x7fff7fff
 ; GFX9: v_pk_mul_f16 v{{[0-9]+}}, [[ABS]], 4.0 neg_lo:[1,0] neg_hi:[1,0]
diff --git a/test/CodeGen/AMDGPU/fneg.f16.ll b/test/CodeGen/AMDGPU/fneg.f16.ll
index 16e4fc680bea..59745a9352ce 100644
--- a/test/CodeGen/AMDGPU/fneg.f16.ll
+++ b/test/CodeGen/AMDGPU/fneg.f16.ll
@@ -117,7 +117,7 @@ define amdgpu_kernel void @fneg_free_v2f16(<2 x half> addrspace(1)* %out, i32 %i
 ; CI: v_cvt_f16_f32
 
 ; VI: v_lshrrev_b32_e32 v{{[0-9]+}}, 16,
-; VI: v_mul_f16_e64 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}
+; VI: v_mul_f16_sdwa v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
 ; VI: v_mul_f16_e64 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}
 
 ; GFX9: v_pk_mul_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} neg_lo:[1,0] neg_hi:[1,0]{{$}}
diff --git a/test/CodeGen/AMDGPU/fptosi.f16.ll b/test/CodeGen/AMDGPU/fptosi.f16.ll
index 50e56e08416a..f310618d8bdb 100644
--- a/test/CodeGen/AMDGPU/fptosi.f16.ll
+++ b/test/CodeGen/AMDGPU/fptosi.f16.ll
@@ -66,7 +66,7 @@ entry:
 ; VI: v_cvt_f32_f16_sdwa v[[A_F32_1:[0-9]+]], v[[A_V2_F16]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
 ; VI: v_cvt_i32_f32_e32 v[[R_I16_0:[0-9]+]], v[[A_F32_0]]
 ; VI: v_cvt_i32_f32_sdwa v[[R_I16_1:[0-9]+]], v[[A_F32_1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; VI: v_or_b32_sdwa v[[R_V2_I16:[0-9]+]], v[[R_I16_1]], v[[R_I16_0]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; VI: v_or_b32_sdwa v[[R_V2_I16:[0-9]+]], v[[R_I16_0]], v[[R_I16_1]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
 
 ; GCN: buffer_store_dword v[[R_V2_I16]]
 ; GCN: s_endpgm
diff --git a/test/CodeGen/AMDGPU/fptoui.f16.ll b/test/CodeGen/AMDGPU/fptoui.f16.ll
index 2afa6111cf17..7641c08e33c3 100644
--- a/test/CodeGen/AMDGPU/fptoui.f16.ll
+++ b/test/CodeGen/AMDGPU/fptoui.f16.ll
@@ -66,7 +66,7 @@ entry:
 ; VI-DAG: v_cvt_f32_f16_sdwa v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
 ; VI:      v_cvt_i32_f32_e32 v[[R_I16_1:[0-9]+]], v[[A_F32_1]]
 ; VI:      v_cvt_i32_f32_sdwa v[[R_I16_0:[0-9]+]], v[[A_F32_0]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; VI:     v_or_b32_sdwa v[[R_V2_I16:[0-9]+]], v[[R_I16_0]], v[[R_I16_1]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; VI:     v_or_b32_sdwa v[[R_V2_I16:[0-9]+]], v[[R_I16_1]], v[[R_I16_0]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
 
 ; GCN:     buffer_store_dword v[[R_V2_I16]]
 ; GCN:     s_endpgm
diff --git a/test/CodeGen/AMDGPU/fsub.f16.ll b/test/CodeGen/AMDGPU/fsub.f16.ll
index 836b480b6a67..fa00c06546db 100644
--- a/test/CodeGen/AMDGPU/fsub.f16.ll
+++ b/test/CodeGen/AMDGPU/fsub.f16.ll
@@ -78,7 +78,7 @@ entry:
 ; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
 
 ; VI-DAG: v_subrev_f16_e32 v[[R_F16_0:[0-9]+]], v[[B_V2_F16]], v[[A_V2_F16]]
-; VI-DAG: v_subrev_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[B_V2_F16]], v[[A_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-DAG: v_sub_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[A_V2_F16]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
 
 ; GFX9: v_pk_add_f16 v[[R_V2_F16:[0-9]+]], v[[A_V2_F16]], v[[B_V2_F16]] neg_lo:[0,1] neg_hi:[0,1]
@@ -146,7 +146,7 @@ entry:
 ; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
 
 ; VI-DAG: v_mov_b32_e32 [[CONSTM1:v[0-9]+]], 0xbc00
-; VI-DAG: v_add_f16_sdwa v[[R_F16_HI:[0-9]+]], [[CONSTM1]], v[[A_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; VI-DAG: v_add_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[A_V2_F16]], [[CONSTM1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
 ; VI-DAG: v_add_f16_e32 v[[R_F16_0:[0-9]+]], -2.0, v[[A_V2_F16]]
 ; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
 
diff --git a/test/CodeGen/AMDGPU/hsa-note-no-func.ll b/test/CodeGen/AMDGPU/hsa-note-no-func.ll
index af63a4f8df76..81d9ed2eba8c 100644
--- a/test/CodeGen/AMDGPU/hsa-note-no-func.ll
+++ b/test/CodeGen/AMDGPU/hsa-note-no-func.ll
@@ -1,6 +1,12 @@
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx600 | FileCheck --check-prefix=HSA --check-prefix=HSA-SI600 %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx601 | FileCheck --check-prefix=HSA --check-prefix=HSA-SI601 %s
 ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx700 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI700 %s
 ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx701 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI701 %s
 ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx702 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI702 %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx703 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI703 %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=mullins | FileCheck --check-prefix=HSA --check-prefix=HSA-CI703 %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=hawaii | FileCheck --check-prefix=HSA --check-prefix=HSA-CI701 %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kabini | FileCheck --check-prefix=HSA --check-prefix=HSA-CI703 %s
 ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | FileCheck --check-prefix=HSA --check-prefix=HSA-CI700 %s
 ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=-flat-for-global | FileCheck --check-prefix=HSA --check-prefix=HSA-VI801 %s
 ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=tonga -mattr=-flat-for-global | FileCheck --check-prefix=HSA --check-prefix=HSA-VI802 %s
@@ -15,11 +21,16 @@
 ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx810 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI810 %s
 ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx900 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX900 %s
 ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx901 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX901 %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx902 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX902 %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx903 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX903 %s
 
 ; HSA: .hsa_code_object_version 2,1
+; HSA-SI600: .hsa_code_object_isa 6,0,0,"AMD","AMDGPU"
+; HSA-SI601: .hsa_code_object_isa 6,0,1,"AMD","AMDGPU"
 ; HSA-CI700: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
 ; HSA-CI701: .hsa_code_object_isa 7,0,1,"AMD","AMDGPU"
 ; HSA-CI702: .hsa_code_object_isa 7,0,2,"AMD","AMDGPU"
+; HSA-CI703: .hsa_code_object_isa 7,0,3,"AMD","AMDGPU"
 ; HSA-VI800: .hsa_code_object_isa 8,0,0,"AMD","AMDGPU"
 ; HSA-VI801: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU"
 ; HSA-VI802: .hsa_code_object_isa 8,0,2,"AMD","AMDGPU"
@@ -28,3 +39,5 @@
 ; HSA-VI810: .hsa_code_object_isa 8,1,0,"AMD","AMDGPU"
 ; HSA-GFX900: .hsa_code_object_isa 9,0,0,"AMD","AMDGPU"
 ; HSA-GFX901: .hsa_code_object_isa 9,0,1,"AMD","AMDGPU"
+; HSA-GFX902: .hsa_code_object_isa 9,0,2,"AMD","AMDGPU"
+; HSA-GFX903: .hsa_code_object_isa 9,0,3,"AMD","AMDGPU"
diff --git a/test/CodeGen/AMDGPU/illegal-sgpr-to-vgpr-copy.ll b/test/CodeGen/AMDGPU/illegal-sgpr-to-vgpr-copy.ll
index 6e411ce5e017..0c5b8fbda222 100644
--- a/test/CodeGen/AMDGPU/illegal-sgpr-to-vgpr-copy.ll
+++ b/test/CodeGen/AMDGPU/illegal-sgpr-to-vgpr-copy.ll
@@ -5,40 +5,40 @@
 ; GCN: ; illegal copy v1 to s9
 
 define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_i32() #0 {
-  %vgpr = call i32 asm sideeffect "; def $0", "=${VGPR1}"()
-  call void asm sideeffect "; use $0", "${SGPR9}"(i32 %vgpr)
+  %vgpr = call i32 asm sideeffect "; def $0", "=${v1}"()
+  call void asm sideeffect "; use $0", "${s9}"(i32 %vgpr)
   ret void
 }
 
 ; ERR: error: <unknown>:0:0: in function illegal_vgpr_to_sgpr_copy_v2i32 void (): illegal SGPR to VGPR copy
 ; GCN: ; illegal copy v[0:1] to s[10:11]
 define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_v2i32() #0 {
-  %vgpr = call <2 x i32> asm sideeffect "; def $0", "=${VGPR0_VGPR1}"()
-  call void asm sideeffect "; use $0", "${SGPR10_SGPR11}"(<2 x i32> %vgpr)
+  %vgpr = call <2 x i32> asm sideeffect "; def $0", "=${v[0:1]}"()
+  call void asm sideeffect "; use $0", "${s[10:11]}"(<2 x i32> %vgpr)
   ret void
 }
 
 ; ERR: error: <unknown>:0:0: in function illegal_vgpr_to_sgpr_copy_v4i32 void (): illegal SGPR to VGPR copy
 ; GCN: ; illegal copy v[0:3] to s[8:11]
 define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_v4i32() #0 {
-  %vgpr = call <4 x i32> asm sideeffect "; def $0", "=${VGPR0_VGPR1_VGPR2_VGPR3}"()
-  call void asm sideeffect "; use $0", "${SGPR8_SGPR9_SGPR10_SGPR11}"(<4 x i32> %vgpr)
+  %vgpr = call <4 x i32> asm sideeffect "; def $0", "=${v[0:3]}"()
+  call void asm sideeffect "; use $0", "${s[8:11]}"(<4 x i32> %vgpr)
   ret void
 }
 
 ; ERR: error: <unknown>:0:0: in function illegal_vgpr_to_sgpr_copy_v8i32 void (): illegal SGPR to VGPR copy
 ; GCN: ; illegal copy v[0:7] to s[8:15]
 define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_v8i32() #0 {
-  %vgpr = call <8 x i32> asm sideeffect "; def $0", "=${VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7}"()
-  call void asm sideeffect "; use $0", "${SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15}"(<8 x i32> %vgpr)
+  %vgpr = call <8 x i32> asm sideeffect "; def $0", "=${v[0:7]}"()
+  call void asm sideeffect "; use $0", "${s[8:15]}"(<8 x i32> %vgpr)
   ret void
 }
 
 ; ERR error: <unknown>:0:0: in function illegal_vgpr_to_sgpr_copy_v16i32 void (): illegal SGPR to VGPR copy
 ; GCN: ; illegal copy v[0:15] to s[16:31]
 define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_v16i32() #0 {
-  %vgpr = call <16 x i32> asm sideeffect "; def $0", "=${VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7_VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15}"()
-  call void asm sideeffect "; use $0", "${SGPR16_SGPR17_SGPR18_SGPR19_SGPR20_SGPR21_SGPR22_SGPR23_SGPR24_SGPR25_SGPR26_SGPR27_SGPR28_SGPR29_SGPR30_SGPR31}"(<16 x i32> %vgpr)
+  %vgpr = call <16 x i32> asm sideeffect "; def $0", "=${v[0:15]}"()
+  call void asm sideeffect "; use $0", "${s[16:31]}"(<16 x i32> %vgpr)
   ret void
 }
 
diff --git a/test/CodeGen/AMDGPU/immv216.ll b/test/CodeGen/AMDGPU/immv216.ll
index bc951a82becd..cd3502baee7b 100644
--- a/test/CodeGen/AMDGPU/immv216.ll
+++ b/test/CodeGen/AMDGPU/immv216.ll
@@ -124,7 +124,7 @@ define amdgpu_kernel void @store_literal_imm_v2f16(<2 x half> addrspace(1)* %out
 ; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
 ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 0, [[VAL0]]
 ; VI-DAG: v_mov_b32_e32 [[CONST0:v[0-9]+]], 0
-; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONST0]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST0]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; VI: v_or_b32
 ; VI: buffer_store_dword
 define amdgpu_kernel void @add_inline_imm_0.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
@@ -142,7 +142,7 @@ define amdgpu_kernel void @add_inline_imm_0.0_v2f16(<2 x half> addrspace(1)* %ou
 ; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
 ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 0.5, [[VAL0]]
 ; VI-DAG: v_mov_b32_e32 [[CONST05:v[0-9]+]], 0x3800
-; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONST05]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST05]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; VI: v_or_b32
 ; VI: buffer_store_dword
 define amdgpu_kernel void @add_inline_imm_0.5_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
@@ -160,7 +160,7 @@ define amdgpu_kernel void @add_inline_imm_0.5_v2f16(<2 x half> addrspace(1)* %ou
 ; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
 ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -0.5, [[VAL0]]
 ; VI-DAG: v_mov_b32_e32 [[CONSTM05:v[0-9]+]], 0xb800
-; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONSTM05]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONSTM05]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; VI: v_or_b32
 ; VI: buffer_store_dword
 define amdgpu_kernel void @add_inline_imm_neg_0.5_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
@@ -178,7 +178,7 @@ define amdgpu_kernel void @add_inline_imm_neg_0.5_v2f16(<2 x half> addrspace(1)*
 ; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
 ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 1.0, [[VAL0]]
 ; VI-DAG: v_mov_b32_e32 [[CONST1:v[0-9]+]], 0x3c00
-; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONST1]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; VI: v_or_b32
 ; VI: buffer_store_dword
 define amdgpu_kernel void @add_inline_imm_1.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
@@ -196,7 +196,7 @@ define amdgpu_kernel void @add_inline_imm_1.0_v2f16(<2 x half> addrspace(1)* %ou
 ; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
 ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -1.0, [[VAL0]]
 ; VI-DAG: v_mov_b32_e32 [[CONSTM1:v[0-9]+]], 0xbc00
-; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONSTM1]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONSTM1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; VI: v_or_b32
 ; VI: buffer_store_dword
 define amdgpu_kernel void @add_inline_imm_neg_1.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
@@ -214,7 +214,7 @@ define amdgpu_kernel void @add_inline_imm_neg_1.0_v2f16(<2 x half> addrspace(1)*
 ; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
 ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 2.0, [[VAL0]]
 ; VI-DAG: v_mov_b32_e32 [[CONST2:v[0-9]+]], 0x4000
-; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONST2]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST2]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; VI: v_or_b32
 ; VI: buffer_store_dword
 define amdgpu_kernel void @add_inline_imm_2.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
@@ -232,7 +232,7 @@ define amdgpu_kernel void @add_inline_imm_2.0_v2f16(<2 x half> addrspace(1)* %ou
 ; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
 ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -2.0, [[VAL0]]
 ; VI-DAG: v_mov_b32_e32 [[CONSTM2:v[0-9]+]], 0xc000
-; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONSTM2]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONSTM2]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; VI: v_or_b32
 ; VI: buffer_store_dword
 define amdgpu_kernel void @add_inline_imm_neg_2.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
@@ -250,7 +250,7 @@ define amdgpu_kernel void @add_inline_imm_neg_2.0_v2f16(<2 x half> addrspace(1)*
 ; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
 ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 4.0, [[VAL0]]
 ; VI-DAG: v_mov_b32_e32 [[CONST4:v[0-9]+]], 0x4400
-; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONST4]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST4]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; VI: v_or_b32
 ; VI: buffer_store_dword
 define amdgpu_kernel void @add_inline_imm_4.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
@@ -268,7 +268,7 @@ define amdgpu_kernel void @add_inline_imm_4.0_v2f16(<2 x half> addrspace(1)* %ou
 ; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
 ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -4.0, [[VAL0]]
 ; VI-DAG: v_mov_b32_e32 [[CONSTM4:v[0-9]+]], 0xc400
-; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONSTM4]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONSTM4]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; VI: v_or_b32
 ; VI: buffer_store_dword
 define amdgpu_kernel void @add_inline_imm_neg_4.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
@@ -285,7 +285,7 @@ define amdgpu_kernel void @add_inline_imm_neg_4.0_v2f16(<2 x half> addrspace(1)*
 ; VI: v_mov_b32_e32 [[CONST05:v[0-9]+]], 0x3800
 ; VI: buffer_load_dword
 ; VI-NOT: and
-; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONST05]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[CONST05]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
 ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 0.5, v{{[0-9]+}}
 ; VI: v_or_b32
 ; VI: buffer_store_dword
@@ -306,7 +306,7 @@ define amdgpu_kernel void @commute_add_inline_imm_0.5_v2f16(<2 x half> addrspace
 ; VI-DAG: buffer_load_dword
 ; VI-NOT: and
 ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, [[K]], v{{[0-9]+}}
-; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[K]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[K]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
 ; VI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 ; VI: buffer_store_dword
 define amdgpu_kernel void @commute_add_literal_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
@@ -325,7 +325,7 @@ define amdgpu_kernel void @commute_add_literal_v2f16(<2 x half> addrspace(1)* %o
 ; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
 ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 1, [[VAL0]]
 ; VI-DAG: v_mov_b32_e32 [[CONST1:v[0-9]+]], 1
-; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONST1]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; VI: v_or_b32
 ; VI: buffer_store_dword
 define amdgpu_kernel void @add_inline_imm_1_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
@@ -343,7 +343,7 @@ define amdgpu_kernel void @add_inline_imm_1_v2f16(<2 x half> addrspace(1)* %out,
 ; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
 ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 2, [[VAL0]]
 ; VI-DAG: v_mov_b32_e32 [[CONST2:v[0-9]+]], 2
-; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONST2]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST2]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; VI: v_or_b32
 ; VI: buffer_store_dword
 define amdgpu_kernel void @add_inline_imm_2_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
@@ -361,7 +361,7 @@ define amdgpu_kernel void @add_inline_imm_2_v2f16(<2 x half> addrspace(1)* %out,
 ; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
 ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 16, [[VAL0]]
 ; VI-DAG: v_mov_b32_e32 [[CONST16:v[0-9]+]], 16
-; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONST16]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; VI: v_or_b32
 ; VI: buffer_store_dword
 define amdgpu_kernel void @add_inline_imm_16_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
@@ -379,7 +379,7 @@ define amdgpu_kernel void @add_inline_imm_16_v2f16(<2 x half> addrspace(1)* %out
 ; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
 ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -1, [[VAL0]]
 ; VI-DAG: v_mov_b32_e32 [[CONSTM1:v[0-9]+]], 0xffff
-; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONSTM1]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONSTM1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; VI: v_or_b32
 ; VI: buffer_store_dword
 define amdgpu_kernel void @add_inline_imm_neg_1_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
@@ -397,7 +397,7 @@ define amdgpu_kernel void @add_inline_imm_neg_1_v2f16(<2 x half> addrspace(1)* %
 ; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
 ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -2, [[VAL0]]
 ; VI-DAG: v_mov_b32_e32 [[CONSTM2:v[0-9]+]], 0xfffe
-; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONSTM2]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONSTM2]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; VI: v_or_b32
 ; VI: buffer_store_dword
 define amdgpu_kernel void @add_inline_imm_neg_2_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
@@ -415,7 +415,7 @@ define amdgpu_kernel void @add_inline_imm_neg_2_v2f16(<2 x half> addrspace(1)* %
 ; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
 ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -16, [[VAL0]]
 ; VI-DAG: v_mov_b32_e32 [[CONSTM16:v[0-9]+]], 0xfff0
-; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONSTM16]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONSTM16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; VI: v_or_b32
 ; VI: buffer_store_dword
 define amdgpu_kernel void @add_inline_imm_neg_16_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
@@ -433,7 +433,7 @@ define amdgpu_kernel void @add_inline_imm_neg_16_v2f16(<2 x half> addrspace(1)*
 ; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
 ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 63, [[VAL0]]
 ; VI-DAG: v_mov_b32_e32 [[CONST63:v[0-9]+]], 63
-; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONST63]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST63]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; VI: v_or_b32
 ; VI: buffer_store_dword
 define amdgpu_kernel void @add_inline_imm_63_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
@@ -451,7 +451,7 @@ define amdgpu_kernel void @add_inline_imm_63_v2f16(<2 x half> addrspace(1)* %out
 ; VI: buffer_load_ushort [[VAL1:v[0-9]+]]
 ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 64, [[VAL0]]
 ; VI-DAG: v_mov_b32_e32 [[CONST64:v[0-9]+]], 64
-; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[CONST64]], [[VAL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST64]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; VI: v_or_b32
 ; VI: buffer_store_dword
 define amdgpu_kernel void @add_inline_imm_64_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
diff --git a/test/CodeGen/AMDGPU/indirect-addressing-si.ll b/test/CodeGen/AMDGPU/indirect-addressing-si.ll
index fab1f8d12253..0d20c32a4770 100644
--- a/test/CodeGen/AMDGPU/indirect-addressing-si.ll
+++ b/test/CodeGen/AMDGPU/indirect-addressing-si.ll
@@ -379,7 +379,7 @@ entry:
   %idx0 = load volatile i32, i32 addrspace(1)* %gep
   %idx1 = add i32 %idx0, 1
   %val0 = extractelement <4 x i32> <i32 7, i32 9, i32 11, i32 13>, i32 %idx0
-  %live.out.reg = call i32 asm sideeffect "s_mov_b32 $0, 17", "={SGPR4}" ()
+  %live.out.reg = call i32 asm sideeffect "s_mov_b32 $0, 17", "={s4}" ()
   %val1 = extractelement <4 x i32> <i32 7, i32 9, i32 11, i32 13>, i32 %idx1
   store volatile i32 %val0, i32 addrspace(1)* %out0
   store volatile i32 %val1, i32 addrspace(1)* %out0
diff --git a/test/CodeGen/AMDGPU/inline-asm.ll b/test/CodeGen/AMDGPU/inline-asm.ll
index 36441cf778c2..c0f5218efc16 100644
--- a/test/CodeGen/AMDGPU/inline-asm.ll
+++ b/test/CodeGen/AMDGPU/inline-asm.ll
@@ -193,7 +193,7 @@ entry:
 ; CHECK: use v[0:1]
 define amdgpu_kernel void @i64_imm_input_phys_vgpr() {
 entry:
-  call void asm sideeffect "; use $0 ", "{VGPR0_VGPR1}"(i64 123456)
+  call void asm sideeffect "; use $0 ", "{v[0:1]}"(i64 123456)
   ret void
 }
 
@@ -202,7 +202,7 @@ entry:
 ; CHECK: ; use v0
 define amdgpu_kernel void @i1_imm_input_phys_vgpr() {
 entry:
-  call void asm sideeffect "; use $0 ", "{VGPR0}"(i1 true)
+  call void asm sideeffect "; use $0 ", "{v0}"(i1 true)
   ret void
 }
 
@@ -215,7 +215,7 @@ entry:
 define amdgpu_kernel void @i1_input_phys_vgpr() {
 entry:
   %val = load i1, i1 addrspace(1)* undef
-  call void asm sideeffect "; use $0 ", "{VGPR0}"(i1 %val)
+  call void asm sideeffect "; use $0 ", "{v0}"(i1 %val)
   ret void
 }
 
@@ -229,7 +229,7 @@ define amdgpu_kernel void @i1_input_phys_vgpr_x2() {
 entry:
   %val0 = load volatile i1, i1 addrspace(1)* undef
   %val1 = load volatile i1, i1 addrspace(1)* undef
-  call void asm sideeffect "; use $0 $1 ", "{VGPR0}, {VGPR1}"(i1 %val0, i1 %val1)
+  call void asm sideeffect "; use $0 $1 ", "{v0}, {v1}"(i1 %val0, i1 %val1)
   ret void
 }
 
@@ -240,8 +240,8 @@ entry:
 ; CHECK: v_lshlrev_b32_e32 v{{[0-9]+}}, v0, v1
 define amdgpu_kernel void @muliple_def_phys_vgpr() {
 entry:
-  %def0 = call i32 asm sideeffect "; def $0 ", "={VGPR0}"()
-  %def1 = call i32 asm sideeffect "; def $0 ", "={VGPR0}"()
+  %def0 = call i32 asm sideeffect "; def $0 ", "={v0}"()
+  %def1 = call i32 asm sideeffect "; def $0 ", "={v0}"()
   %add = shl i32 %def0, %def1
   store i32 %add, i32 addrspace(1)* undef
   ret void
diff --git a/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll b/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll
index 1edccff3bf15..86fc41a23772 100644
--- a/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll
+++ b/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll
@@ -261,7 +261,7 @@ define amdgpu_kernel void @v_insertelement_v2i16_0_inlineimm(<2 x i16> addrspace
 ; VI: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e70000
 ; GCN-DAG: flat_load_dword [[VEC:v[0-9]+]]
 ; CI: v_or_b32_e32 [[RES:v[0-9]+]], 0x3e70000, [[VEC]]
-; VI: v_or_b32_sdwa [[RES:v[0-9]+]], [[K]], [[VEC]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; VI: v_or_b32_sdwa [[RES:v[0-9]+]], [[VEC]], [[K]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
 
 ; GFX9-DAG: s_movk_i32 [[K:s[0-9]+]], 0x3e7
 ; GFX9-DAG: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]]
@@ -285,7 +285,7 @@ define amdgpu_kernel void @v_insertelement_v2i16_1(<2 x i16> addrspace(1)* %out,
 ; CI:   v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]]
 ; GFX9: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]]
 ; CI: v_or_b32_e32 [[RES:v[0-9]+]], 0xfff10000, [[ELT0]]
-; VI: v_or_b32_sdwa [[RES:v[0-9]+]], [[K]], [[VEC]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; VI: v_or_b32_sdwa [[RES:v[0-9]+]], [[VEC]], [[K]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
 ; GFX9: v_lshl_or_b32 [[RES:v[0-9]+]], -15, 16, [[ELT0]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RES]]
 define amdgpu_kernel void @v_insertelement_v2i16_1_inlineimm(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
@@ -345,7 +345,7 @@ define amdgpu_kernel void @v_insertelement_v2f16_0_inlineimm(<2 x half> addrspac
 ; VI: v_mov_b32_e32 [[K:v[0-9]+]], 0x45000000
 ; GCN-DAG: flat_load_dword [[VEC:v[0-9]+]]
 ; CI: v_or_b32_e32 [[RES:v[0-9]+]], 0x45000000, [[VEC]]
-; VI: v_or_b32_sdwa [[RES:v[0-9]+]], [[K]], [[VEC]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; VI: v_or_b32_sdwa [[RES:v[0-9]+]], [[VEC]], [[K]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
 
 ; GFX9-DAG: s_movk_i32 [[K:s[0-9]+]], 0x4500
 ; GFX9-DAG: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]]
@@ -369,7 +369,7 @@ define amdgpu_kernel void @v_insertelement_v2f16_1(<2 x half> addrspace(1)* %out
 ; CI: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]]
 ; GFX9: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]]
 ; CI: v_or_b32_e32 [[RES:v[0-9]+]], 0x230000, [[ELT0]]
-; VI: v_or_b32_sdwa [[RES:v[0-9]+]], [[K]], [[VEC]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; VI: v_or_b32_sdwa [[RES:v[0-9]+]], [[VEC]], [[K]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
 ; GFX9: v_lshl_or_b32 [[RES:v[0-9]+]], 35, 16, [[ELT0]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RES]]
 define amdgpu_kernel void @v_insertelement_v2f16_1_inlineimm(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
diff --git a/test/CodeGen/AMDGPU/limit-coalesce.mir b/test/CodeGen/AMDGPU/limit-coalesce.mir
index a0d2d6c097a2..7d6d8a5891cd 100644
--- a/test/CodeGen/AMDGPU/limit-coalesce.mir
+++ b/test/CodeGen/AMDGPU/limit-coalesce.mir
@@ -2,13 +2,13 @@
 
 # Check that coalescer does not create wider register tuple than in source
 
-# CHECK:  - { id: 2, class: vreg_64 }
-# CHECK:  - { id: 3, class: vreg_64 }
-# CHECK:  - { id: 4, class: vreg_64 }
-# CHECK:  - { id: 5, class: vreg_96 }
-# CHECK:  - { id: 6, class: vreg_96 }
-# CHECK:  - { id: 7, class: vreg_128 }
-# CHECK:  - { id: 8, class: vreg_128 }
+# CHECK:  - { id: 2, class: vreg_64, preferred-register: '' }
+# CHECK:  - { id: 3, class: vreg_64, preferred-register: '' }
+# CHECK:  - { id: 4, class: vreg_64, preferred-register: '' }
+# CHECK:  - { id: 5, class: vreg_96, preferred-register: '' }
+# CHECK:  - { id: 6, class: vreg_96, preferred-register: '' }
+# CHECK:  - { id: 7, class: vreg_128, preferred-register: '' }
+# CHECK:  - { id: 8, class: vreg_128, preferred-register: '' }
 # No more registers shall be defined
 # CHECK-NEXT: liveins:
 # CHECK:    FLAT_STORE_DWORDX2 %vgpr0_vgpr1, %4,
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.alignb.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.alignb.ll
new file mode 100644
index 000000000000..873a3f0f368f
--- /dev/null
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.alignb.ll
@@ -0,0 +1,23 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+declare i32 @llvm.amdgcn.alignbit(i32, i32, i32) #0
+declare i32 @llvm.amdgcn.alignbyte(i32, i32, i32) #0
+
+; GCN-LABEL: {{^}}v_alignbit_b32:
+; GCN: v_alignbit_b32 {{[vs][0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}
+define amdgpu_kernel void @v_alignbit_b32(i32 addrspace(1)* %out, i32 %src1, i32 %src2, i32 %src3) #1 {
+  %val = call i32 @llvm.amdgcn.alignbit(i32 %src1, i32 %src2, i32 %src3) #0
+  store i32 %val, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}v_alignbyte_b32:
+; GCN: v_alignbyte_b32 {{[vs][0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}
+define amdgpu_kernel void @v_alignbyte_b32(i32 addrspace(1)* %out, i32 %src1, i32 %src2, i32 %src3) #1 {
+  %val = call i32 @llvm.amdgcn.alignbyte(i32 %src1, i32 %src2, i32 %src3) #0
+  store i32 %val, i32 addrspace(1)* %out
+  ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll
index 3a2b87cd87f3..83bc8b234724 100644
--- a/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll
@@ -4,18 +4,28 @@
 declare i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64, i32, i64) #0
 
 ; GCN-LABEL: {{^}}v_mqsad_pk_u16_u8:
-; GCN: v_mqsad_pk_u16_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
+; GCN: v_mqsad_pk_u16_u8 v[0:1], v[4:5], s{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
+; GCN-DAG: v_mov_b32_e32 v5, v1
+; GCN-DAG: v_mov_b32_e32 v4, v0
 define amdgpu_kernel void @v_mqsad_pk_u16_u8(i64 addrspace(1)* %out, i64 %src) {
-  %result= call i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64 %src, i32 100, i64 100) #0
-  store i64 %result, i64 addrspace(1)* %out, align 4
+  %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[4:5]},v"(i64 %src) #0
+  %tmp1 = call i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64 %tmp, i32 100, i64 100) #0
+  %tmp2 = call i64 asm ";; force constraint", "=v,{v[4:5]}"(i64 %tmp1) #0
+  store i64 %tmp2, i64 addrspace(1)* %out, align 4
   ret void
 }
 
 ; GCN-LABEL: {{^}}v_mqsad_pk_u16_u8_non_immediate:
-; GCN: v_mqsad_pk_u16_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
+; GCN: v_mqsad_pk_u16_u8 v[0:1], v[2:3], v4, v[6:7]
+; GCN-DAG: v_mov_b32_e32 v3, v1
+; GCN-DAG: v_mov_b32_e32 v2, v0
 define amdgpu_kernel void @v_mqsad_pk_u16_u8_non_immediate(i64 addrspace(1)* %out, i64 %src, i32 %a, i64 %b) {
-  %result= call i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64 %src, i32 %a, i64 %b) #0
-  store i64 %result, i64 addrspace(1)* %out, align 4
+  %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) #0
+  %tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) #0
+  %tmp2 = call i64 asm "v_lshlrev_b64 $0, $1, 1", "={v[6:7]},v"(i64 %b) #0
+  %tmp3 = call i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64 %tmp, i32 %tmp1, i64 %tmp2) #0
+  %tmp4 = call i64 asm ";; force constraint", "=v,{v[2:3]}"(i64 %tmp3) #0
+  store i64 %tmp4, i64 addrspace(1)* %out, align 4
   ret void
 }
 
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.u32.u8.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.u32.u8.ll
index a8d03bf6bbac..685b5e0f29c4 100644
--- a/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.u32.u8.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.u32.u8.ll
@@ -3,45 +3,56 @@
 
 declare <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64, i32, <4 x i32>) #0
 
-; GCN-LABEL: {{^}}v_mqsad_u32_u8_use_non_inline_constant:
-; GCN: v_mqsad_u32_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
-define amdgpu_kernel void @v_mqsad_u32_u8_use_non_inline_constant(<4 x i32> addrspace(1)* %out, i64 %src) {
-  %result = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %src, i32 100, <4 x i32> <i32 100, i32 100, i32 100, i32 100>) #0
-  store <4 x i32> %result, <4 x i32> addrspace(1)* %out, align 4
+; GCN-LABEL: {{^}}v_mqsad_u32_u8_inline_integer_immediate:
+; GCN-DAG: v_mov_b32_e32 v0, v2
+; GCN-DAG: v_mov_b32_e32 v1, v3
+; GCN: v_mqsad_u32_u8 v[2:5], v[0:1], v6, v[{{[0-9]+:[0-9]+}}]
+define amdgpu_kernel void @v_mqsad_u32_u8_inline_integer_immediate(<4 x i32> addrspace(1)* %out, i64 %src, i32 %a) {
+  %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) #0
+  %tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) #0
+  %tmp2 = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %tmp, i32 %tmp1, <4 x i32> <i32 10, i32 20, i32 30, i32 40>) #0
+  %tmp3 = call <4 x i32>  asm ";; force constraint", "=v,{v[2:5]}"(<4 x i32> %tmp2) #0
+  store <4 x i32> %tmp3, <4 x i32> addrspace(1)* %out, align 4
   ret void
 }
 
 ; GCN-LABEL: {{^}}v_mqsad_u32_u8_non_immediate:
-; GCN: v_mqsad_u32_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
+; GCN-DAG: v_mov_b32_e32 v0, v2
+; GCN-DAG: v_mov_b32_e32 v1, v3
+; GCN: v_mqsad_u32_u8 v[2:5], v[0:1], v6, v[{{[0-9]+:[0-9]+}}]
 define amdgpu_kernel void @v_mqsad_u32_u8_non_immediate(<4 x i32> addrspace(1)* %out, i64 %src, i32 %a, <4 x i32> %b) {
-  %result = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %src, i32 %a, <4 x i32> %b) #0
-  store <4 x i32> %result, <4 x i32> addrspace(1)* %out, align 4
-  ret void
-}
-
-; GCN-LABEL: {{^}}v_mqsad_u32_u8_inline_integer_immediate:
-; GCN: v_mqsad_u32_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
-define amdgpu_kernel void @v_mqsad_u32_u8_inline_integer_immediate(<4 x i32> addrspace(1)* %out, i64 %src, i32 %a) {
-  %result = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %src, i32 %a, <4 x i32> <i32 10, i32 20, i32 30, i32 40>) #0
-  store <4 x i32> %result, <4 x i32> addrspace(1)* %out, align 4
+  %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) #0
+  %tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) #0
+  %tmp2 = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %tmp, i32 %tmp1, <4 x i32> %b) #0
+  %tmp3 = call <4 x i32>  asm ";; force constraint", "=v,{v[2:5]}"(<4 x i32> %tmp2) #0
+  store <4 x i32> %tmp3, <4 x i32> addrspace(1)* %out, align 4
   ret void
 }
 
 ; GCN-LABEL: {{^}}v_mqsad_u32_u8_inline_fp_immediate:
-; GCN: v_mqsad_u32_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
+; GCN-DAG: v_mov_b32_e32 v0, v2
+; GCN-DAG: v_mov_b32_e32 v1, v3
+; GCN: v_mqsad_u32_u8 v[2:5], v[0:1], v6, v[{{[0-9]+:[0-9]+}}]
 define amdgpu_kernel void @v_mqsad_u32_u8_inline_fp_immediate(<4 x i32> addrspace(1)* %out, i64 %src, i32 %a) {
-  %result = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %src, i32 %a, <4 x i32> <i32 1065353216, i32 0, i32 0, i32 0>) #0
-  store <4 x i32> %result, <4 x i32> addrspace(1)* %out, align 4
+  %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) #0
+  %tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) #0
+  %tmp2 = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %tmp, i32 %tmp1, <4 x i32> <i32 1065353216, i32 0, i32 0, i32 0>) #0
+  %tmp3 = call <4 x i32>  asm ";; force constraint", "=v,{v[2:5]}"(<4 x i32> %tmp2) #0
+  store <4 x i32> %tmp3, <4 x i32> addrspace(1)* %out, align 4
   ret void
 }
 
 ; GCN-LABEL: {{^}}v_mqsad_u32_u8_use_sgpr_vgpr:
-; GCN: v_mqsad_u32_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
+; GCN-DAG: v_mov_b32_e32 v0, v2
+; GCN-DAG: v_mov_b32_e32 v1, v3
+; GCN: v_mqsad_u32_u8 v[2:5], v[0:1], v6, v[{{[0-9]+:[0-9]+}}]
 define amdgpu_kernel void @v_mqsad_u32_u8_use_sgpr_vgpr(<4 x i32> addrspace(1)* %out, i64 %src, i32 %a, <4 x i32> addrspace(1)* %input) {
   %in = load <4 x i32>, <4 x i32> addrspace(1) * %input
-
-  %result = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %src, i32 %a, <4 x i32> %in) #0
-  store <4 x i32> %result, <4 x i32> addrspace(1)* %out, align 4
+  %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) #0
+  %tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) #0
+  %tmp2 = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %tmp, i32 %tmp1, <4 x i32> %in) #0
+  %tmp3 = call <4 x i32>  asm ";; force constraint", "=v,{v[2:5]}"(<4 x i32> %tmp2) #0
+  store <4 x i32> %tmp3, <4 x i32> addrspace(1)* %out, align 4
   ret void
 }
 
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll
index be71225c5e06..1f46613a8db0 100644
--- a/test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll
@@ -4,18 +4,28 @@
 declare i64 @llvm.amdgcn.qsad.pk.u16.u8(i64, i32, i64) #0
 
 ; GCN-LABEL: {{^}}v_qsad_pk_u16_u8:
-; GCN: v_qsad_pk_u16_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
+; GCN: v_qsad_pk_u16_u8 v[0:1], v[4:5], s{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
+; GCN-DAG: v_mov_b32_e32 v5, v1
+; GCN-DAG: v_mov_b32_e32 v4, v0
 define amdgpu_kernel void @v_qsad_pk_u16_u8(i64 addrspace(1)* %out, i64 %src) {
-  %result= call i64 @llvm.amdgcn.qsad.pk.u16.u8(i64 %src, i32 100, i64 100) #0
-  store i64 %result, i64 addrspace(1)* %out, align 4
+  %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[4:5]},v"(i64 %src) #0
+  %tmp1 = call i64 @llvm.amdgcn.qsad.pk.u16.u8(i64 %tmp, i32 100, i64 100) #0
+  %tmp2 = call i64 asm ";; force constraint", "=v,{v[4:5]}"(i64 %tmp1) #0
+  store i64 %tmp2, i64 addrspace(1)* %out, align 4
   ret void
 }
 
 ; GCN-LABEL: {{^}}v_qsad_pk_u16_u8_non_immediate:
-; GCN: v_qsad_pk_u16_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
+; GCN: v_qsad_pk_u16_u8 v[0:1], v[2:3], v4, v[6:7]
+; GCN-DAG: v_mov_b32_e32 v3, v1
+; GCN-DAG: v_mov_b32_e32 v2, v0
 define amdgpu_kernel void @v_qsad_pk_u16_u8_non_immediate(i64 addrspace(1)* %out, i64 %src, i32 %a, i64 %b) {
-  %result= call i64 @llvm.amdgcn.qsad.pk.u16.u8(i64 %src, i32 %a, i64 %b) #0
-  store i64 %result, i64 addrspace(1)* %out, align 4
+  %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) #0
+  %tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) #0
+  %tmp2 = call i64 asm "v_lshlrev_b64 $0, $1, 1", "={v[6:7]},v"(i64 %b) #0
+  %tmp3 = call i64 @llvm.amdgcn.qsad.pk.u16.u8(i64 %tmp, i32 %tmp1, i64 %tmp2) #0
+  %tmp4 = call i64 asm ";; force constraint", "=v,{v[2:3]}"(i64 %tmp3) #0
+  store i64 %tmp4, i64 addrspace(1)* %out, align 4
   ret void
 }
 
diff --git a/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll b/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll
index eec187390169..806723e5136c 100644
--- a/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll
+++ b/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll
@@ -118,7 +118,7 @@ define amdgpu_kernel void @fmuladd_f16_imm_b(
 ; SI:  v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]]
 
 ; VI-FLUSH:     v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
-; VI-FLUSH-DAG: v_mac_f16_sdwa v[[A_F16_1]], v[[C_V2_F16]], v[[B_V2_F16]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-FLUSH-DAG: v_mac_f16_sdwa v[[A_F16_1]], v[[B_V2_F16]], v[[C_V2_F16]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; VI-FLUSH-DAG: v_mac_f16_e32 v[[A_V2_F16]], v[[C_V2_F16]], v[[B_V2_F16]]
 ; VI-FLUSH-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[A_F16_1]]
 ; VI-FLUSH-NOT: v_and_b32
diff --git a/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll b/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll
index a4353d1136e1..8f4b314ffabb 100644
--- a/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll
+++ b/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll
@@ -82,7 +82,7 @@ entry:
 ; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
 
 ; VI-DAG: v_max_f16_e32 v[[R_F16_0:[0-9]+]], v[[B_V2_F16]], v[[A_V2_F16]]
-; VI-DAG: v_max_f16_sdwa v[[R_F16_1:[0-9]+]], v[[B_V2_F16]], v[[A_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-DAG: v_max_f16_sdwa v[[R_F16_1:[0-9]+]], v[[A_V2_F16]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; VI-NOT: and
 ; VI:     v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_1]], v[[R_F16_0]]
 
@@ -110,7 +110,7 @@ entry:
 ; SI:  v_max_f32_e32 v[[R_F32_1:[0-9]+]], 4.0, v[[B_F32_1]]
 ; SI:  v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]]
 ; VI-DAG:  v_mov_b32_e32 [[CONST4:v[0-9]+]], 0x4400
-; VI-DAG:  v_max_f16_sdwa v[[R_F16_HI:[0-9]+]], [[CONST4]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; VI-DAG:  v_max_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[B_V2_F16]], [[CONST4]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
 ; VI-DAG:  v_max_f16_e32 v[[R_F16_0:[0-9]+]], 0x4200, v[[B_V2_F16]]
 
 ; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
@@ -138,7 +138,7 @@ entry:
 ; SI:  v_max_f32_e32 v[[R_F32_1:[0-9]+]], 0x40400000, v[[A_F32_1]]
 ; SI:  v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]]
 ; VI-DAG:  v_mov_b32_e32 [[CONST3:v[0-9]+]], 0x4200
-; VI-DAG:  v_max_f16_sdwa v[[R_F16_HI:[0-9]+]], [[CONST3]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; VI-DAG:  v_max_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[B_V2_F16]], [[CONST3]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
 ; VI-DAG:  v_max_f16_e32 v[[R_F16_0:[0-9]+]], 4.0, v[[A_V2_F16]]
 
 ; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
diff --git a/test/CodeGen/AMDGPU/llvm.minnum.f16.ll b/test/CodeGen/AMDGPU/llvm.minnum.f16.ll
index 4875d26fc860..1a86286f7136 100644
--- a/test/CodeGen/AMDGPU/llvm.minnum.f16.ll
+++ b/test/CodeGen/AMDGPU/llvm.minnum.f16.ll
@@ -81,7 +81,7 @@ entry:
 ; SI:     v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]]
 
 ; VI-DAG: v_min_f16_e32 v[[R_F16_0:[0-9]+]], v[[B_V2_F16]], v[[A_V2_F16]]
-; VI-DAG: v_min_f16_sdwa v[[R_F16_1:[0-9]+]], v[[B_V2_F16]], v[[A_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-DAG: v_min_f16_sdwa v[[R_F16_1:[0-9]+]], v[[A_V2_F16]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; VI-NOT: and
 ; VI:     v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_1]], v[[R_F16_0]]
 
@@ -111,7 +111,7 @@ entry:
 ; SI-DAG:  v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]]
 
 ; VI-DAG:  v_mov_b32_e32 [[CONST4:v[0-9]+]], 0x4400
-; VI-DAG:  v_min_f16_sdwa v[[R_F16_HI:[0-9]+]], [[CONST4]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; VI-DAG:  v_min_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[B_V2_F16]], [[CONST4]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
 ; VI-DAG:  v_min_f16_e32 v[[R_F16_0:[0-9]+]], 0x4200, v[[B_V2_F16]]
 
 ; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
@@ -139,7 +139,7 @@ entry:
 ; SI:  v_min_f32_e32 v[[R_F32_1:[0-9]+]], 0x40400000, v[[A_F32_1]]
 ; SI:  v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]]
 ; VI-DAG:  v_mov_b32_e32 [[CONST3:v[0-9]+]], 0x4200
-; VI-DAG:  v_min_f16_sdwa v[[R_F16_HI:[0-9]+]], [[CONST3]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; VI-DAG:  v_min_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[B_V2_F16]], [[CONST3]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
 ; VI-DAG:  v_min_f16_e32 v[[R_F16_0:[0-9]+]], 4.0, v[[A_V2_F16]]
 
 ; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
diff --git a/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll b/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll
index 77d793201adc..49f00e9447da 100644
--- a/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll
+++ b/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll
@@ -608,11 +608,11 @@ ret:
 ; GCN: ;;#ASMSTART
 ; GCN: ; use s[0:1]
 define amdgpu_kernel void @no_vgprs_last_sgpr_spill(i32 addrspace(1)* %out, i32 %in) #1 {
-  call void asm sideeffect "", "~{VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7}" () #0
-  call void asm sideeffect "", "~{VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15}" () #0
-  call void asm sideeffect "", "~{VGPR16_VGPR17_VGPR18_VGPR19}"() #0
-  call void asm sideeffect "", "~{VGPR20_VGPR21}"() #0
-  call void asm sideeffect "", "~{VGPR22}"() #0
+  call void asm sideeffect "", "~{v[0:7]}" () #0
+  call void asm sideeffect "", "~{v[8:15]}" () #0
+  call void asm sideeffect "", "~{v[16:19]}"() #0
+  call void asm sideeffect "", "~{v[20:21]}"() #0
+  call void asm sideeffect "", "~{v22}"() #0
 
   %wide.sgpr0 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
   %wide.sgpr1 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
diff --git a/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll b/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll
new file mode 100644
index 000000000000..5b2da788a405
--- /dev/null
+++ b/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll
@@ -0,0 +1,131 @@
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-promote-alloca < %s | FileCheck --check-prefix=OPT %s
+
+; Make sure that array alloca loaded and stored as multi-element aggregates are handled correctly
+; Strictly the promote-alloca pass shouldn't have to deal with this case as it is non-canonical, but
+; the pass should handle it gracefully if it is
+; The checks look for lines that previously caused issues in PromoteAlloca (non-canonical). Opt
+; should now leave these unchanged
+
+; OPT-LABEL: @promote_1d_aggr(
+; OPT: store [1 x float] %tmp3, [1 x float]* %f1
+
+%Block = type { [1 x float], i32 }
+%gl_PerVertex = type { <4 x float>, float, [1 x float], [1 x float] }
+
+@block = external addrspace(1) global %Block
+@pv = external addrspace(1) global %gl_PerVertex
+
+define amdgpu_vs void @promote_1d_aggr() #0 {
+  %i = alloca i32
+  %f1 = alloca [1 x float]
+  %tmp = getelementptr %Block, %Block addrspace(1)* @block, i32 0, i32 1
+  %tmp1 = load i32, i32 addrspace(1)* %tmp
+  store i32 %tmp1, i32* %i
+  %tmp2 = getelementptr %Block, %Block addrspace(1)* @block, i32 0, i32 0
+  %tmp3 = load [1 x float], [1 x float] addrspace(1)* %tmp2
+  store [1 x float] %tmp3, [1 x float]* %f1
+  %tmp4 = load i32, i32* %i
+  %tmp5 = getelementptr [1 x float], [1 x float]* %f1, i32 0, i32 %tmp4
+  %tmp6 = load float, float* %tmp5
+  %tmp7 = alloca <4 x float>
+  %tmp8 = load <4 x float>, <4 x float>* %tmp7
+  %tmp9 = insertelement <4 x float> %tmp8, float %tmp6, i32 0
+  %tmp10 = insertelement <4 x float> %tmp9, float %tmp6, i32 1
+  %tmp11 = insertelement <4 x float> %tmp10, float %tmp6, i32 2
+  %tmp12 = insertelement <4 x float> %tmp11, float %tmp6, i32 3
+  %tmp13 = getelementptr %gl_PerVertex, %gl_PerVertex addrspace(1)* @pv, i32 0, i32 0
+  store <4 x float> %tmp12, <4 x float> addrspace(1)* %tmp13
+  ret void
+}
+
+
+; OPT-LABEL: @promote_store_aggr(
+; OPT: %tmp6 = load [2 x float], [2 x float]* %f1
+
+%Block2 = type { i32, [2 x float] }
+@block2 = external addrspace(1) global %Block2
+
+define amdgpu_vs void @promote_store_aggr() #0 {
+  %i = alloca i32
+  %f1 = alloca [2 x float]
+  %tmp = getelementptr %Block2, %Block2 addrspace(1)* @block2, i32 0, i32 0
+  %tmp1 = load i32, i32 addrspace(1)* %tmp
+  store i32 %tmp1, i32* %i
+  %tmp2 = load i32, i32* %i
+  %tmp3 = sitofp i32 %tmp2 to float
+  %tmp4 = getelementptr [2 x float], [2 x float]* %f1, i32 0, i32 0
+  store float %tmp3, float* %tmp4
+  %tmp5 = getelementptr [2 x float], [2 x float]* %f1, i32 0, i32 1
+  store float 2.000000e+00, float* %tmp5
+  %tmp6 = load [2 x float], [2 x float]* %f1
+  %tmp7 = getelementptr %Block2, %Block2 addrspace(1)* @block2, i32 0, i32 1
+  store [2 x float] %tmp6, [2 x float] addrspace(1)* %tmp7
+  %tmp8 = getelementptr %gl_PerVertex, %gl_PerVertex addrspace(1)* @pv, i32 0, i32 0
+  store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float> addrspace(1)* %tmp8
+  ret void
+}
+
+; OPT-LABEL: @promote_load_from_store_aggr(
+; OPT: store [2 x float] %tmp3, [2 x float]* %f1
+
+%Block3 = type { [2 x float], i32 }
+@block3 = external addrspace(1) global %Block3
+
+define amdgpu_vs void @promote_load_from_store_aggr() #0 {
+  %i = alloca i32
+  %f1 = alloca [2 x float]
+  %tmp = getelementptr %Block3, %Block3 addrspace(1)* @block3, i32 0, i32 1
+  %tmp1 = load i32, i32 addrspace(1)* %tmp
+  store i32 %tmp1, i32* %i
+  %tmp2 = getelementptr %Block3, %Block3 addrspace(1)* @block3, i32 0, i32 0
+  %tmp3 = load [2 x float], [2 x float] addrspace(1)* %tmp2
+  store [2 x float] %tmp3, [2 x float]* %f1
+  %tmp4 = load i32, i32* %i
+  %tmp5 = getelementptr [2 x float], [2 x float]* %f1, i32 0, i32 %tmp4
+  %tmp6 = load float, float* %tmp5
+  %tmp7 = alloca <4 x float>
+  %tmp8 = load <4 x float>, <4 x float>* %tmp7
+  %tmp9 = insertelement <4 x float> %tmp8, float %tmp6, i32 0
+  %tmp10 = insertelement <4 x float> %tmp9, float %tmp6, i32 1
+  %tmp11 = insertelement <4 x float> %tmp10, float %tmp6, i32 2
+  %tmp12 = insertelement <4 x float> %tmp11, float %tmp6, i32 3
+  %tmp13 = getelementptr %gl_PerVertex, %gl_PerVertex addrspace(1)* @pv, i32 0, i32 0
+  store <4 x float> %tmp12, <4 x float> addrspace(1)* %tmp13
+  ret void
+}
+
+; OPT-LABEL: @promote_double_aggr(
+; OPT: store [2 x double] %tmp5, [2 x double]* %s
+
+@tmp_g = external addrspace(1) global { [4 x double], <2 x double>, <3 x double>, <4 x double> }
+@frag_color = external addrspace(1) global <4 x float>
+
+define amdgpu_ps void @promote_double_aggr() #0 {
+  %s = alloca [2 x double]
+  %tmp = getelementptr { [4 x double], <2 x double>, <3 x double>, <4 x double> }, { [4 x double], <2 x double>, <3 x double>, <4 x double> } addrspace(1)* @tmp_g, i32 0, i32 0, i32 0
+  %tmp1 = load double, double addrspace(1)* %tmp
+  %tmp2 = getelementptr { [4 x double], <2 x double>, <3 x double>, <4 x double> }, { [4 x double], <2 x double>, <3 x double>, <4 x double> } addrspace(1)* @tmp_g, i32 0, i32 0, i32 1
+  %tmp3 = load double, double addrspace(1)* %tmp2
+  %tmp4 = insertvalue [2 x double] undef, double %tmp1, 0
+  %tmp5 = insertvalue [2 x double] %tmp4, double %tmp3, 1
+  store [2 x double] %tmp5, [2 x double]* %s
+  %tmp6 = getelementptr [2 x double], [2 x double]* %s, i32 0, i32 1
+  %tmp7 = load double, double* %tmp6
+  %tmp8 = getelementptr [2 x double], [2 x double]* %s, i32 0, i32 1
+  %tmp9 = load double, double* %tmp8
+  %tmp10 = fadd double %tmp7, %tmp9
+  %tmp11 = getelementptr [2 x double], [2 x double]* %s, i32 0, i32 0
+  store double %tmp10, double* %tmp11
+  %tmp12 = getelementptr [2 x double], [2 x double]* %s, i32 0, i32 0
+  %tmp13 = load double, double* %tmp12
+  %tmp14 = getelementptr [2 x double], [2 x double]* %s, i32 0, i32 1
+  %tmp15 = load double, double* %tmp14
+  %tmp16 = fadd double %tmp13, %tmp15
+  %tmp17 = fptrunc double %tmp16 to float
+  %tmp18 = insertelement <4 x float> undef, float %tmp17, i32 0
+  %tmp19 = insertelement <4 x float> %tmp18, float %tmp17, i32 1
+  %tmp20 = insertelement <4 x float> %tmp19, float %tmp17, i32 2
+  %tmp21 = insertelement <4 x float> %tmp20, float %tmp17, i32 3
+  store <4 x float> %tmp21, <4 x float> addrspace(1)* @frag_color
+  ret void
+}
diff --git a/test/CodeGen/AMDGPU/rename-independent-subregs-invalid-mac-operands.mir b/test/CodeGen/AMDGPU/rename-independent-subregs-invalid-mac-operands.mir
new file mode 100644
index 000000000000..1a0d68d81f97
--- /dev/null
+++ b/test/CodeGen/AMDGPU/rename-independent-subregs-invalid-mac-operands.mir
@@ -0,0 +1,69 @@
+# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass=simple-register-coalescing,rename-independent-subregs -o - %s | FileCheck -check-prefix=GCN %s
+---
+
+# GCN-LABEL: name: mac_invalid_operands
+# GCN: undef %18.sub0 = V_MAC_F32_e32 undef %3, undef %9, undef %18.sub0, implicit %exec
+
+name:            mac_invalid_operands
+alignment:       0
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: vreg_128 }
+  - { id: 1, class: vreg_128 }
+  - { id: 2, class: sgpr_64 }
+  - { id: 3, class: vgpr_32 }
+  - { id: 4, class: vgpr_32 }
+  - { id: 5, class: vgpr_32 }
+  - { id: 6, class: vgpr_32 }
+  - { id: 7, class: sreg_64 }
+  - { id: 8, class: vgpr_32 }
+  - { id: 9, class: vgpr_32 }
+  - { id: 10, class: vreg_64 }
+  - { id: 11, class: vreg_64 }
+  - { id: 12, class: vreg_128 }
+  - { id: 13, class: vreg_128 }
+  - { id: 14, class: vgpr_32 }
+  - { id: 15, class: vreg_64 }
+  - { id: 16, class: vgpr_32 }
+  - { id: 17, class: vreg_128 }
+body:             |
+  bb.0:
+    successors: %bb.2, %bb.1
+
+    %7 = V_CMP_NEQ_F32_e64 0, 0, 0, undef %3, 0, 0, implicit %exec
+    %vcc = COPY killed %7
+    S_CBRANCH_VCCZ %bb.2, implicit killed %vcc
+
+  bb.1:
+    successors: %bb.3
+
+    %4 = V_ADD_F32_e32 undef %6, undef %5, implicit %exec
+    undef %12.sub0 = COPY killed %4
+    %17 = COPY killed %12
+    S_BRANCH %bb.3
+
+  bb.2:
+    successors: %bb.3
+
+    %8 = V_MAC_F32_e32 undef %3, undef %9, undef %8, implicit %exec
+    undef %13.sub0 = COPY %8
+    %13.sub1 = COPY %8
+    %13.sub2 = COPY killed %8
+    %0 = COPY killed %13
+    %17 = COPY killed %0
+
+  bb.3:
+    %1 = COPY killed %17
+    FLAT_STORE_DWORD undef %10, %1.sub2, 0, 0, implicit %exec, implicit %flat_scr
+    %14 = COPY %1.sub1
+    %16 = COPY killed %1.sub0
+    undef %15.sub0 = COPY killed %16
+    %15.sub1 = COPY killed %14
+    FLAT_STORE_DWORDX2 undef %11, killed %15, 0, 0, implicit %exec, implicit %flat_scr
+    S_ENDPGM
+
+...
diff --git a/test/CodeGen/AMDGPU/scratch-simple.ll b/test/CodeGen/AMDGPU/scratch-simple.ll
index 6ed730ad60f4..abd15f1fb47f 100644
--- a/test/CodeGen/AMDGPU/scratch-simple.ll
+++ b/test/CodeGen/AMDGPU/scratch-simple.ll
@@ -12,8 +12,10 @@
 ; GCN-DAG: v_lshlrev_b32_e32 [[BYTES:v[0-9]+]], 2, v0
 ; GCN-DAG: v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, [[BYTES]]
 
-; GCN-DAG: v_or_b32_e32 [[LO_OFF:v[0-9]+]], 0x200, [[CLAMP_IDX]]
-; GCN-DAG: v_or_b32_e32 [[HI_OFF:v[0-9]+]], 0x400, [[CLAMP_IDX]]
+; GCN-DAG: v_mov_b32_e32 [[C200:v[0-9]+]], 0x200
+; GCN-DAG: v_mov_b32_e32 [[C400:v[0-9]+]], 0x400
+; GCN-DAG: v_or_b32_e32 [[LO_OFF:v[0-9]+]], [[C200]], [[CLAMP_IDX]]
+; GCN-DAG: v_or_b32_e32 [[HI_OFF:v[0-9]+]], [[C400]], [[CLAMP_IDX]]
 
 ; GCN: buffer_load_dword {{v[0-9]+}}, [[LO_OFF]], {{s\[[0-9]+:[0-9]+\]}}, [[SWO]] offen
 ; GCN: buffer_load_dword {{v[0-9]+}}, [[HI_OFF]], {{s\[[0-9]+:[0-9]+\]}}, [[SWO]] offen
diff --git a/test/CodeGen/AMDGPU/sdwa-peephole.ll b/test/CodeGen/AMDGPU/sdwa-peephole.ll
index a319edfc5ace..66e166d283f7 100644
--- a/test/CodeGen/AMDGPU/sdwa-peephole.ll
+++ b/test/CodeGen/AMDGPU/sdwa-peephole.ll
@@ -74,7 +74,7 @@ entry:
 
 ; SDWA-DAG: v_mul_u32_u24_sdwa v[[DST_MUL_LO:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0
 ; SDWA-DAG: v_mul_u32_u24_sdwa v[[DST_MUL_HI:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; SDWA: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL_HI]], v[[DST_MUL_LO]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; SDWA: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL_LO]], v[[DST_MUL_HI]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
 
 define amdgpu_kernel void @mul_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %ina, <2 x i16> addrspace(1)* %inb) {
 entry:
@@ -97,8 +97,8 @@ entry:
 ; SDWA-DAG: v_mul_u32_u24_sdwa v[[DST_MUL1:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; SDWA-DAG: v_mul_u32_u24_sdwa v[[DST_MUL2:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0
 ; SDWA-DAG: v_mul_u32_u24_sdwa v[[DST_MUL3:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL3]], v[[DST_MUL2]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL1]], v[[DST_MUL0]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL2]], v[[DST_MUL3]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL0]], v[[DST_MUL1]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
 
 define amdgpu_kernel void @mul_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %ina, <4 x i16> addrspace(1)* %inb) {
 entry:
@@ -125,10 +125,10 @@ entry:
 ; SDWA-DAG: v_mul_u32_u24_sdwa v[[DST_MUL5:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; SDWA-DAG: v_mul_u32_u24_sdwa v[[DST_MUL6:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0
 ; SDWA-DAG: v_mul_u32_u24_sdwa v[[DST_MUL7:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL7]], v[[DST_MUL6]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL5]], v[[DST_MUL4]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL3]], v[[DST_MUL2]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL1]], v[[DST_MUL0]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL6]], v[[DST_MUL7]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL4]], v[[DST_MUL5]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL2]], v[[DST_MUL3]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL0]], v[[DST_MUL1]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
 
 define amdgpu_kernel void @mul_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> addrspace(1)* %ina, <8 x i16> addrspace(1)* %inb) {
 entry:
@@ -347,8 +347,8 @@ entry:
 ; NOSDWA-NOT: v_mul_u32_u24_sdwa
 ; SDWA-DAG: v_mov_b32_e32 v[[M321:[0-9]+]], 0x141
 ; SDWA-DAG: v_mov_b32_e32 v[[M123:[0-9]+]], 0x7b
-; SDWA-DAG: v_mul_u32_u24_sdwa v{{[0-9]+}}, v[[M123]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; SDWA-DAG: v_mul_u32_u24_sdwa v{{[0-9]+}}, v[[M321]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; SDWA-DAG: v_mul_u32_u24_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v[[M123]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; SDWA-DAG: v_mul_u32_u24_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v[[M321]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
 
 define amdgpu_kernel void @immediate_mul_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
 entry:
@@ -367,7 +367,7 @@ entry:
 ; NOSDWA: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 ; NOSDWA-NOT: v_mul_u32_u24_sdwa
 
-; SDWA: v_mul_u32_u24_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; SDWA: v_mul_u32_u24_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
 
 define amdgpu_kernel void @mulmul_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %ina, <2 x i16> addrspace(1)* %inb) {
 entry:
@@ -408,9 +408,9 @@ store_label:
 ; NOSDWA-NOT: v_and_b32_sdwa
 ; NOSDWA-NOT: v_or_b32_sdwa
 
-; SDWA-DAG: v_and_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; SDWA-DAG: v_and_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
 ; SDWA-DAG: v_lshlrev_b16_e32 v{{[0-9]+}}, 8, v{{[0-9]+}}
-; SDWA-DAG: v_and_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; SDWA-DAG: v_and_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
 ; SDWA-DAG: v_lshlrev_b16_e32 v{{[0-9]+}}, 8, v{{[0-9]+}}
 ; SDWA: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 
diff --git a/test/CodeGen/AMDGPU/shl.v2i16.ll b/test/CodeGen/AMDGPU/shl.v2i16.ll
index 115221c5316d..839854fd575b 100644
--- a/test/CodeGen/AMDGPU/shl.v2i16.ll
+++ b/test/CodeGen/AMDGPU/shl.v2i16.ll
@@ -10,7 +10,7 @@
 
 ; VI: v_lshlrev_b32_e32
 ; VI: v_lshlrev_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
 
 ; CI: v_lshlrev_b32_e32
 ; CI: v_and_b32_e32 v{{[0-9]+}}, 0xffff, v{{[0-9]+}}
diff --git a/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll b/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll
index 114c97b61bd4..a57e7b13453f 100644
--- a/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll
+++ b/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll
@@ -25,50 +25,50 @@
 ; SMEM: s_dcache_wb
 ; ALL: s_endpgm
 define amdgpu_kernel void @test(i32 addrspace(1)* %out, i32 %in) {
-  call void asm sideeffect "", "~{SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7}" ()
-  call void asm sideeffect "", "~{SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15}" ()
-  call void asm sideeffect "", "~{SGPR16_SGPR17_SGPR18_SGPR19_SGPR20_SGPR21_SGPR22_SGPR23}" ()
-  call void asm sideeffect "", "~{SGPR24_SGPR25_SGPR26_SGPR27_SGPR28_SGPR29_SGPR30_SGPR31}" ()
-  call void asm sideeffect "", "~{SGPR32_SGPR33_SGPR34_SGPR35_SGPR36_SGPR37_SGPR38_SGPR39}" ()
-  call void asm sideeffect "", "~{SGPR40_SGPR41_SGPR42_SGPR43_SGPR44_SGPR45_SGPR46_SGPR47}" ()
-  call void asm sideeffect "", "~{SGPR48_SGPR49_SGPR50_SGPR51_SGPR52_SGPR53_SGPR54_SGPR55}" ()
-  call void asm sideeffect "", "~{SGPR56_SGPR57_SGPR58_SGPR59_SGPR60_SGPR61_SGPR62_SGPR63}" ()
-  call void asm sideeffect "", "~{SGPR64_SGPR65_SGPR66_SGPR67_SGPR68_SGPR69_SGPR70_SGPR71}" ()
-  call void asm sideeffect "", "~{SGPR72_SGPR73_SGPR74_SGPR75_SGPR76_SGPR77_SGPR78_SGPR79}" ()
-  call void asm sideeffect "", "~{SGPR80_SGPR81_SGPR82_SGPR83_SGPR84_SGPR85_SGPR86_SGPR87}" ()
-  call void asm sideeffect "", "~{SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95}" ()
-  call void asm sideeffect "", "~{VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7}" ()
-  call void asm sideeffect "", "~{VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15}" ()
-  call void asm sideeffect "", "~{VGPR16_VGPR17_VGPR18_VGPR19_VGPR20_VGPR21_VGPR22_VGPR23}" ()
-  call void asm sideeffect "", "~{VGPR24_VGPR25_VGPR26_VGPR27_VGPR28_VGPR29_VGPR30_VGPR31}" ()
-  call void asm sideeffect "", "~{VGPR32_VGPR33_VGPR34_VGPR35_VGPR36_VGPR37_VGPR38_VGPR39}" ()
-  call void asm sideeffect "", "~{VGPR40_VGPR41_VGPR42_VGPR43_VGPR44_VGPR45_VGPR46_VGPR47}" ()
-  call void asm sideeffect "", "~{VGPR48_VGPR49_VGPR50_VGPR51_VGPR52_VGPR53_VGPR54_VGPR55}" ()
-  call void asm sideeffect "", "~{VGPR56_VGPR57_VGPR58_VGPR59_VGPR60_VGPR61_VGPR62_VGPR63}" ()
-  call void asm sideeffect "", "~{VGPR64_VGPR65_VGPR66_VGPR67_VGPR68_VGPR69_VGPR70_VGPR71}" ()
-  call void asm sideeffect "", "~{VGPR72_VGPR73_VGPR74_VGPR75_VGPR76_VGPR77_VGPR78_VGPR79}" ()
-  call void asm sideeffect "", "~{VGPR80_VGPR81_VGPR82_VGPR83_VGPR84_VGPR85_VGPR86_VGPR87}" ()
-  call void asm sideeffect "", "~{VGPR88_VGPR89_VGPR90_VGPR91_VGPR92_VGPR93_VGPR94_VGPR95}" ()
-  call void asm sideeffect "", "~{VGPR96_VGPR97_VGPR98_VGPR99_VGPR100_VGPR101_VGPR102_VGPR103}" ()
-  call void asm sideeffect "", "~{VGPR104_VGPR105_VGPR106_VGPR107_VGPR108_VGPR109_VGPR110_VGPR111}" ()
-  call void asm sideeffect "", "~{VGPR112_VGPR113_VGPR114_VGPR115_VGPR116_VGPR117_VGPR118_VGPR119}" ()
-  call void asm sideeffect "", "~{VGPR120_VGPR121_VGPR122_VGPR123_VGPR124_VGPR125_VGPR126_VGPR127}" ()
-  call void asm sideeffect "", "~{VGPR128_VGPR129_VGPR130_VGPR131_VGPR132_VGPR133_VGPR134_VGPR135}" ()
-  call void asm sideeffect "", "~{VGPR136_VGPR137_VGPR138_VGPR139_VGPR140_VGPR141_VGPR142_VGPR143}" ()
-  call void asm sideeffect "", "~{VGPR144_VGPR145_VGPR146_VGPR147_VGPR148_VGPR149_VGPR150_VGPR151}" ()
-  call void asm sideeffect "", "~{VGPR152_VGPR153_VGPR154_VGPR155_VGPR156_VGPR157_VGPR158_VGPR159}" ()
-  call void asm sideeffect "", "~{VGPR160_VGPR161_VGPR162_VGPR163_VGPR164_VGPR165_VGPR166_VGPR167}" ()
-  call void asm sideeffect "", "~{VGPR168_VGPR169_VGPR170_VGPR171_VGPR172_VGPR173_VGPR174_VGPR175}" ()
-  call void asm sideeffect "", "~{VGPR176_VGPR177_VGPR178_VGPR179_VGPR180_VGPR181_VGPR182_VGPR183}" ()
-  call void asm sideeffect "", "~{VGPR184_VGPR185_VGPR186_VGPR187_VGPR188_VGPR189_VGPR190_VGPR191}" ()
-  call void asm sideeffect "", "~{VGPR192_VGPR193_VGPR194_VGPR195_VGPR196_VGPR197_VGPR198_VGPR199}" ()
-  call void asm sideeffect "", "~{VGPR200_VGPR201_VGPR202_VGPR203_VGPR204_VGPR205_VGPR206_VGPR207}" ()
-  call void asm sideeffect "", "~{VGPR208_VGPR209_VGPR210_VGPR211_VGPR212_VGPR213_VGPR214_VGPR215}" ()
-  call void asm sideeffect "", "~{VGPR216_VGPR217_VGPR218_VGPR219_VGPR220_VGPR221_VGPR222_VGPR223}" ()
-  call void asm sideeffect "", "~{VGPR224_VGPR225_VGPR226_VGPR227_VGPR228_VGPR229_VGPR230_VGPR231}" ()
-  call void asm sideeffect "", "~{VGPR232_VGPR233_VGPR234_VGPR235_VGPR236_VGPR237_VGPR238_VGPR239}" ()
-  call void asm sideeffect "", "~{VGPR240_VGPR241_VGPR242_VGPR243_VGPR244_VGPR245_VGPR246_VGPR247}" ()
-  call void asm sideeffect "", "~{VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255}" ()
+  call void asm sideeffect "", "~{s[0:7]}" ()
+  call void asm sideeffect "", "~{s[8:15]}" ()
+  call void asm sideeffect "", "~{s[16:23]}" ()
+  call void asm sideeffect "", "~{s[24:31]}" ()
+  call void asm sideeffect "", "~{s[32:39]}" ()
+  call void asm sideeffect "", "~{s[40:47]}" ()
+  call void asm sideeffect "", "~{s[48:55]}" ()
+  call void asm sideeffect "", "~{s[56:63]}" ()
+  call void asm sideeffect "", "~{s[64:71]}" ()
+  call void asm sideeffect "", "~{s[72:79]}" ()
+  call void asm sideeffect "", "~{s[80:87]}" ()
+  call void asm sideeffect "", "~{s[88:95]}" ()
+  call void asm sideeffect "", "~{v[0:7]}" ()
+  call void asm sideeffect "", "~{v[8:15]}" ()
+  call void asm sideeffect "", "~{v[16:23]}" ()
+  call void asm sideeffect "", "~{v[24:31]}" ()
+  call void asm sideeffect "", "~{v[32:39]}" ()
+  call void asm sideeffect "", "~{v[40:47]}" ()
+  call void asm sideeffect "", "~{v[48:55]}" ()
+  call void asm sideeffect "", "~{v[56:63]}" ()
+  call void asm sideeffect "", "~{v[64:71]}" ()
+  call void asm sideeffect "", "~{v[72:79]}" ()
+  call void asm sideeffect "", "~{v[80:87]}" ()
+  call void asm sideeffect "", "~{v[88:95]}" ()
+  call void asm sideeffect "", "~{v[96:103]}" ()
+  call void asm sideeffect "", "~{v[104:111]}" ()
+  call void asm sideeffect "", "~{v[112:119]}" ()
+  call void asm sideeffect "", "~{v[120:127]}" ()
+  call void asm sideeffect "", "~{v[128:135]}" ()
+  call void asm sideeffect "", "~{v[136:143]}" ()
+  call void asm sideeffect "", "~{v[144:151]}" ()
+  call void asm sideeffect "", "~{v[152:159]}" ()
+  call void asm sideeffect "", "~{v[160:167]}" ()
+  call void asm sideeffect "", "~{v[168:175]}" ()
+  call void asm sideeffect "", "~{v[176:183]}" ()
+  call void asm sideeffect "", "~{v[184:191]}" ()
+  call void asm sideeffect "", "~{v[192:199]}" ()
+  call void asm sideeffect "", "~{v[200:207]}" ()
+  call void asm sideeffect "", "~{v[208:215]}" ()
+  call void asm sideeffect "", "~{v[216:223]}" ()
+  call void asm sideeffect "", "~{v[224:231]}" ()
+  call void asm sideeffect "", "~{v[232:239]}" ()
+  call void asm sideeffect "", "~{v[240:247]}" ()
+  call void asm sideeffect "", "~{v[248:255]}" ()
 
   store i32 %in, i32 addrspace(1)* %out
   ret void
diff --git a/test/CodeGen/AMDGPU/skip-if-dead.ll b/test/CodeGen/AMDGPU/skip-if-dead.ll
index 3f53572ab440..ea8b87f1dee2 100644
--- a/test/CodeGen/AMDGPU/skip-if-dead.ll
+++ b/test/CodeGen/AMDGPU/skip-if-dead.ll
@@ -79,7 +79,7 @@ define amdgpu_ps void @test_kill_depth_var_x2(float %x, float %y) #0 {
 ; CHECK-NEXT: s_endpgm
 define amdgpu_ps void @test_kill_depth_var_x2_instructions(float %x) #0 {
   call void @llvm.AMDGPU.kill(float %x)
-  %y = call float asm sideeffect "v_mov_b32_e64 v7, -1", "={VGPR7}"()
+  %y = call float asm sideeffect "v_mov_b32_e64 v7, -1", "={v7}"()
   call void @llvm.AMDGPU.kill(float %y)
   ret void
 }
@@ -128,7 +128,7 @@ bb:
     v_nop_e64
     v_nop_e64
     v_nop_e64
-    v_nop_e64", "={VGPR7}"()
+    v_nop_e64", "={v7}"()
   call void @llvm.AMDGPU.kill(float %var)
   br label %exit
 
@@ -186,11 +186,11 @@ bb:
     v_nop_e64
     v_nop_e64
     v_nop_e64
-    v_nop_e64", "={VGPR7}"()
-  %live.across = call float asm sideeffect "v_mov_b32_e64 v8, -1", "={VGPR8}"()
+    v_nop_e64", "={v7}"()
+  %live.across = call float asm sideeffect "v_mov_b32_e64 v8, -1", "={v8}"()
   call void @llvm.AMDGPU.kill(float %var)
   store volatile float %live.across, float addrspace(1)* undef
-  %live.out = call float asm sideeffect "v_mov_b32_e64 v9, -2", "={VGPR9}"()
+  %live.out = call float asm sideeffect "v_mov_b32_e64 v9, -2", "={v9}"()
   br label %exit
 
 exit:
@@ -242,7 +242,7 @@ bb:
     v_nop_e64
     v_nop_e64
     v_nop_e64
-    v_nop_e64", "={VGPR7}"()
+    v_nop_e64", "={v7}"()
   call void @llvm.AMDGPU.kill(float %var)
   %vgpr = load volatile i32, i32 addrspace(1)* undef
   %loop.cond = icmp eq i32 %vgpr, 0
diff --git a/test/CodeGen/AMDGPU/sminmax.v2i16.ll b/test/CodeGen/AMDGPU/sminmax.v2i16.ll
index 5d71ad2c8ba3..a9aac2d8abb7 100644
--- a/test/CodeGen/AMDGPU/sminmax.v2i16.ll
+++ b/test/CodeGen/AMDGPU/sminmax.v2i16.ll
@@ -10,11 +10,11 @@
 
 ; VI: v_sub_i32_e32
 ; VI-DAG: v_sub_i32_e32
-; VI: v_max_i32_sdwa v{{[0-9]+}}, sext(v{{[0-9]+}}), v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI: v_max_i32_sdwa v{{[0-9]+}}, sext(v{{[0-9]+}}), v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI: v_max_i32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, sext(v{{[0-9]+}}) dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; VI: v_max_i32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, sext(v{{[0-9]+}}) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
 ; VI: v_add_i32_e32
 ; VI: v_add_i32_e32
-; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
 
 ; CI: v_sub_i32_e32
 ; CI-DAG: v_sub_i32_e32
@@ -47,7 +47,7 @@ define amdgpu_kernel void @s_abs_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %
 ; VI: v_max_i16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 ; VI: v_max_i16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 ; VI: v_add_u16_e32 v{{[0-9]+}}, 2, v{{[0-9]+}}
-; VI: v_add_u16_sdwa v{{[0-9]+}}, [[TWO]], v{{[0-9]+}}  dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI: v_add_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[TWO]]  dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; VI-NOT: v_and_b32
 ; VI: v_or_b32_e32
 define amdgpu_kernel void @v_abs_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %src) #0 {
diff --git a/test/CodeGen/AMDGPU/spill-scavenge-offset.ll b/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
index c05021a91ff0..a23461a0a514 100644
--- a/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
+++ b/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
@@ -20,13 +20,13 @@ entry:
   %a = load <1280 x i32>, <1280 x i32> addrspace(1)* %aptr
 
 ; mark most VGPR registers as used to increase register pressure
-  call void asm sideeffect "", "~{VGPR4},~{VGPR8},~{VGPR12},~{VGPR16},~{VGPR20},~{VGPR24},~{VGPR28},~{VGPR32}" ()
-  call void asm sideeffect "", "~{VGPR36},~{VGPR40},~{VGPR44},~{VGPR48},~{VGPR52},~{VGPR56},~{VGPR60},~{VGPR64}" ()
-  call void asm sideeffect "", "~{VGPR68},~{VGPR72},~{VGPR76},~{VGPR80},~{VGPR84},~{VGPR88},~{VGPR92},~{VGPR96}" ()
-  call void asm sideeffect "", "~{VGPR100},~{VGPR104},~{VGPR108},~{VGPR112},~{VGPR116},~{VGPR120},~{VGPR124},~{VGPR128}" ()
-  call void asm sideeffect "", "~{VGPR132},~{VGPR136},~{VGPR140},~{VGPR144},~{VGPR148},~{VGPR152},~{VGPR156},~{VGPR160}" ()
-  call void asm sideeffect "", "~{VGPR164},~{VGPR168},~{VGPR172},~{VGPR176},~{VGPR180},~{VGPR184},~{VGPR188},~{VGPR192}" ()
-  call void asm sideeffect "", "~{VGPR196},~{VGPR200},~{VGPR204},~{VGPR208},~{VGPR212},~{VGPR216},~{VGPR220},~{VGPR224}" ()
+  call void asm sideeffect "", "~{v4},~{v8},~{v12},~{v16},~{v20},~{v24},~{v28},~{v32}" ()
+  call void asm sideeffect "", "~{v36},~{v40},~{v44},~{v48},~{v52},~{v56},~{v60},~{v64}" ()
+  call void asm sideeffect "", "~{v68},~{v72},~{v76},~{v80},~{v84},~{v88},~{v92},~{v96}" ()
+  call void asm sideeffect "", "~{v100},~{v104},~{v108},~{v112},~{v116},~{v120},~{v124},~{v128}" ()
+  call void asm sideeffect "", "~{v132},~{v136},~{v140},~{v144},~{v148},~{v152},~{v156},~{v160}" ()
+  call void asm sideeffect "", "~{v164},~{v168},~{v172},~{v176},~{v180},~{v184},~{v188},~{v192}" ()
+  call void asm sideeffect "", "~{v196},~{v200},~{v204},~{v208},~{v212},~{v216},~{v220},~{v224}" ()
 
   %outptr = getelementptr <1280 x i32>, <1280 x i32> addrspace(1)* %out, i32 %tid
   store <1280 x i32> %a, <1280 x i32> addrspace(1)* %outptr
diff --git a/test/CodeGen/AMDGPU/sub.v2i16.ll b/test/CodeGen/AMDGPU/sub.v2i16.ll
index 6aeff3fc3b6c..ee923e2b8b61 100644
--- a/test/CodeGen/AMDGPU/sub.v2i16.ll
+++ b/test/CodeGen/AMDGPU/sub.v2i16.ll
@@ -5,7 +5,7 @@
 ; GCN-LABEL: {{^}}v_test_sub_v2i16:
 ; GFX9: v_pk_sub_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 
-; VI: v_subrev_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI: v_sub_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; VI: v_subrev_u16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 define amdgpu_kernel void @v_test_sub_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0, <2 x i16> addrspace(1)* %in1) #1 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -62,7 +62,7 @@ define amdgpu_kernel void @s_test_sub_v2i16_kernarg(<2 x i16> addrspace(1)* %out
 ; GFX9: v_pk_sub_i16 v{{[0-9]+}}, v{{[0-9]+}}, [[CONST]]
 
 ; VI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xfffffe38
-; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, [[K]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[K]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, 0xffffff85, v{{[0-9]+}}
 define amdgpu_kernel void @v_test_sub_v2i16_constant(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -80,7 +80,7 @@ define amdgpu_kernel void @v_test_sub_v2i16_constant(<2 x i16> addrspace(1)* %ou
 ; GFX9: v_pk_sub_i16 v{{[0-9]+}}, v{{[0-9]+}}, [[CONST]]
 
 ; VI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3df
-; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, [[K]], v{{[0-9]+}}
+; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[K]]
 ; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, 0x34d, v{{[0-9]+}}
 define amdgpu_kernel void @v_test_sub_v2i16_neg_constant(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -98,7 +98,7 @@ define amdgpu_kernel void @v_test_sub_v2i16_neg_constant(<2 x i16> addrspace(1)*
 ; VI: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
 ; VI: flat_load_ushort [[LOAD0:v[0-9]+]]
 ; VI: flat_load_ushort [[LOAD1:v[0-9]+]]
-; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, [[ONE]], [[LOAD0]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, [[LOAD0]], [[ONE]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, 1, [[LOAD1]]
 ; VI: v_or_b32_e32
 define amdgpu_kernel void @v_test_sub_v2i16_inline_neg1(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 {
@@ -137,7 +137,7 @@ define amdgpu_kernel void @v_test_sub_v2i16_inline_lo_zero_hi(<2 x i16> addrspac
 
 ; VI-NOT: v_subrev_i16
 ; VI: v_mov_b32_e32 [[K:v[0-9]+]], 0xffffc080
-; VI: v_add_u16_sdwa v{{[0-9]+}}, [[K]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI: v_add_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[K]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; VI-NOT: v_subrev_i16
 ; VI: v_or_b32_e32
 define amdgpu_kernel void @v_test_sub_v2i16_inline_fp_split(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 {
@@ -252,7 +252,7 @@ define amdgpu_kernel void @v_test_sub_v2i16_sext_to_v2i32(<2 x i32> addrspace(1)
 ; GFX9: v_pk_sub_i16
 ; GFX9: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}}
 
-; VI: v_subrev_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI: v_sub_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; VI: v_subrev_u16_e32
 
 ; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 16
diff --git a/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll b/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll
index 3e80fcf85b52..1e08f51dabde 100644
--- a/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll
+++ b/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll
@@ -73,14 +73,14 @@ bb11:                                             ; preds = %bb9
 
 ; CHECK: buffer_store_dwordx4 v{{\[}}[[OUTPUT_LO]]:[[OUTPUT_HI]]{{\]}}
 define amdgpu_kernel void @partially_undef_copy() #0 {
-  %tmp0 = call i32 asm sideeffect "v_mov_b32_e32 v5, 5", "={VGPR5}"()
-  %tmp1 = call i32 asm sideeffect "v_mov_b32_e32 v6, 6", "={VGPR6}"()
+  %tmp0 = call i32 asm sideeffect "v_mov_b32_e32 v5, 5", "={v5}"()
+  %tmp1 = call i32 asm sideeffect "v_mov_b32_e32 v6, 6", "={v6}"()
 
   %partially.undef.0 = insertelement <4 x i32> undef, i32 %tmp0, i32 0
   %partially.undef.1 = insertelement <4 x i32> %partially.undef.0, i32 %tmp1, i32 0
 
   store volatile <4 x i32> %partially.undef.1, <4 x i32> addrspace(1)* undef, align 16
-  tail call void asm sideeffect "v_nop", "v={VGPR5_VGPR6_VGPR7_VGPR8}"(<4 x i32> %partially.undef.0)
+  tail call void asm sideeffect "v_nop", "v={v[5:8]}"(<4 x i32> %partially.undef.0)
   ret void
 }
 
diff --git a/test/CodeGen/AMDGPU/v_mac_f16.ll b/test/CodeGen/AMDGPU/v_mac_f16.ll
index 3da1a0324042..ce4a69db3506 100644
--- a/test/CodeGen/AMDGPU/v_mac_f16.ll
+++ b/test/CodeGen/AMDGPU/v_mac_f16.ll
@@ -304,14 +304,14 @@ entry:
 ; GCN: {{buffer|flat}}_load_dword v[[C_V2_F16:[0-9]+]]
 
 ; SI:  v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
-; SI: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
-; SI:  v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
-; SI-DAG: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
-; SI-DAG: v_lshrrev_b32_e32 v[[C_F16_1:[0-9]+]], 16, v[[C_V2_F16]]
-; SI-DAG:  v_cvt_f32_f16_e32 v[[C_F32_0:[0-9]+]], v[[C_V2_F16]]
-; SI-DAG:  v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
+; SI:  v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
+; SI:  v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
+; SI:  v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
 ; SI:  v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
+; SI:  v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
+; SI:  v_lshrrev_b32_e32 v[[C_F16_1:[0-9]+]], 16, v[[C_V2_F16]]
 ; SI:  v_cvt_f32_f16_e32 v[[C_F32_1:[0-9]+]], v[[C_F16_1]]
+; SI-DAG:  v_cvt_f32_f16_e32 v[[C_F32_0:[0-9]+]], v[[C_V2_F16]]
 ; SI-DAG:  v_mac_f32_e32 v[[C_F32_0]], v[[B_F32_0]], v[[A_F32_0]]
 ; SI-DAG:  v_cvt_f16_f32_e32 v[[R_F16_LO:[0-9]+]], v[[C_F32_0]]
 ; SI-DAG:  v_mac_f32_e32 v[[C_F32_1]], v[[B_F32_1]], v[[A_F32_1]]
@@ -320,12 +320,12 @@ entry:
 ; VI-NOT: and
 ; SI:  v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]]
 
-; VI-DAG: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
-; VI-DAG: v_mac_f16_sdwa v[[A_F16_1]], v[[C_V2_F16]], v[[B_V2_F16]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI-DAG: v_mac_f16_e32 v[[A_V2_F16]], v[[C_V2_F16]], v[[B_V2_F16]]
-; VI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[A_F16_1]]
+; VI-DAG: v_lshrrev_b32_e32 v[[C_F16_1:[0-9]+]], 16, v[[C_V2_F16]]
+; VI-DAG: v_mac_f16_sdwa v[[C_F16_1]], v[[A_V2_F16]], v[[B_V2_F16]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-DAG: v_mac_f16_e32 v[[C_V2_F16]], v[[B_V2_F16]], v[[A_V2_F16]]
+; VI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[C_F16_1]]
 ; VI-NOT: and
-; VI:  v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[A_V2_F16]]
+; VI:  v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[C_V2_F16]]
 
 ; GCN: {{buffer|flat}}_store_dword v[[R_V2_F16]]
 ; GCN: s_endpgm
@@ -336,7 +336,9 @@ define amdgpu_kernel void @mac_v2f16(
     <2 x half> addrspace(1)* %c) #0 {
 entry:
   %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
+  call void @llvm.amdgcn.s.barrier() #2
   %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
+  call void @llvm.amdgcn.s.barrier() #2
   %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
 
   %t.val = fmul <2 x half> %a.val, %b.val
@@ -485,7 +487,7 @@ entry:
 ; VI-DAG:  v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
 ; VI-DAG:  v_sub_f16_e32 v[[NEG_A1:[0-9]+]], 0, v{{[0-9]+}}
 ; VI-DAG:  v_sub_f16_sdwa v[[NEG_A0:[0-9]+]], [[ZERO]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; VI-DAG:  v_mac_f16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v[[NEG_A0]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-DAG:  v_mac_f16_sdwa v{{[0-9]+}}, v[[NEG_A0]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; VI-DAG:  v_mac_f16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v[[NEG_A1]]
 
 ; GCN: s_endpgm
@@ -517,7 +519,7 @@ entry:
 ; VI:  v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
 ; VI:  v_sub_f16_e32 v[[NEG_A1:[0-9]+]], 0, v{{[0-9]+}}
 ; VI:  v_sub_f16_sdwa v[[NEG_A0:[0-9]+]], [[ZERO]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; VI-DAG:  v_mac_f16_sdwa v{{[0-9]+}}, v[[NEG_A0]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; VI-DAG:  v_mac_f16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v[[NEG_A0]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
 ; VI-DAG:  v_mac_f16_e32 v{{[0-9]+}}, v[[NEG_A1]], v{{[0-9]+}}
 
 ; GCN: s_endpgm
@@ -670,5 +672,8 @@ entry:
   ret void
 }
 
+declare void @llvm.amdgcn.s.barrier() #2
+
 attributes #0 = { nounwind "no-signed-zeros-fp-math"="false" }
 attributes #1 = { nounwind "no-signed-zeros-fp-math"="true" }
+attributes #2 = { nounwind convergent }
diff --git a/test/CodeGen/ARM/GlobalISel/arm-instruction-select-combos.mir b/test/CodeGen/ARM/GlobalISel/arm-instruction-select-combos.mir
new file mode 100644
index 000000000000..d7f208d4cf59
--- /dev/null
+++ b/test/CodeGen/ARM/GlobalISel/arm-instruction-select-combos.mir
@@ -0,0 +1,149 @@
+# RUN: llc -O0 -mtriple arm-- -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+--- |
+  define void @test_mla() #0 { ret void }
+  define void @test_mla_v5() #1 { ret void }
+
+  define void @test_mls() #2 { ret void }
+  define void @test_no_mls() { ret void }
+
+  attributes #0 = { "target-features"="+v6" }
+  attributes #1 = { "target-features"="-v6" }
+  attributes #2 = { "target-features"="+v6t2" }
+...
+---
+name:            test_mla
+# CHECK-LABEL: name: test_mla
+legalized:       true
+regBankSelected: true
+selected:        false
+# CHECK: selected: true
+registers:
+  - { id: 0, class: gprb }
+  - { id: 1, class: gprb }
+  - { id: 2, class: gprb }
+  - { id: 3, class: gprb }
+  - { id: 4, class: gprb }
+body:             |
+  bb.0:
+    liveins: %r0, %r1, %r2
+
+    %0(s32) = COPY %r0
+    %1(s32) = COPY %r1
+    %2(s32) = COPY %r2
+    ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0
+    ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1
+    ; CHECK: [[VREGZ:%[0-9]+]] = COPY %r2
+
+    %3(s32) = G_MUL %0, %1
+    %4(s32) = G_ADD %3, %2
+    ; CHECK: [[VREGR:%[0-9]+]] = MLA [[VREGX]], [[VREGY]], [[VREGZ]], 14, _, _
+
+    %r0 = COPY %4(s32)
+    ; CHECK: %r0 = COPY [[VREGR]]
+
+    BX_RET 14, _, implicit %r0
+    ; CHECK: BX_RET 14, _, implicit %r0
+...
+---
+name:            test_mla_v5
+# CHECK-LABEL: name: test_mla_v5
+legalized:       true
+regBankSelected: true
+selected:        false
+# CHECK: selected: true
+registers:
+  - { id: 0, class: gprb }
+  - { id: 1, class: gprb }
+  - { id: 2, class: gprb }
+  - { id: 3, class: gprb }
+  - { id: 4, class: gprb }
+body:             |
+  bb.0:
+    liveins: %r0, %r1, %r2
+
+    %0(s32) = COPY %r0
+    %1(s32) = COPY %r1
+    %2(s32) = COPY %r2
+    ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0
+    ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1
+    ; CHECK: [[VREGZ:%[0-9]+]] = COPY %r2
+
+    %3(s32) = G_MUL %0, %1
+    %4(s32) = G_ADD %3, %2
+    ; CHECK: [[VREGR:%[0-9]+]] = MLAv5 [[VREGX]], [[VREGY]], [[VREGZ]], 14, _, _
+
+    %r0 = COPY %4(s32)
+    ; CHECK: %r0 = COPY [[VREGR]]
+
+    BX_RET 14, _, implicit %r0
+    ; CHECK: BX_RET 14, _, implicit %r0
+...
+---
+name:            test_mls
+# CHECK-LABEL: name: test_mls
+legalized:       true
+regBankSelected: true
+selected:        false
+# CHECK: selected: true
+registers:
+  - { id: 0, class: gprb }
+  - { id: 1, class: gprb }
+  - { id: 2, class: gprb }
+  - { id: 3, class: gprb }
+  - { id: 4, class: gprb }
+body:             |
+  bb.0:
+    liveins: %r0, %r1, %r2
+
+    %0(s32) = COPY %r0
+    %1(s32) = COPY %r1
+    %2(s32) = COPY %r2
+    ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0
+    ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1
+    ; CHECK: [[VREGZ:%[0-9]+]] = COPY %r2
+
+    %3(s32) = G_MUL %0, %1
+    %4(s32) = G_SUB %2, %3
+    ; CHECK: [[VREGR:%[0-9]+]] = MLS [[VREGX]], [[VREGY]], [[VREGZ]], 14, _
+
+    %r0 = COPY %4(s32)
+    ; CHECK: %r0 = COPY [[VREGR]]
+
+    BX_RET 14, _, implicit %r0
+    ; CHECK: BX_RET 14, _, implicit %r0
+...
+---
+name:            test_no_mls
+# CHECK-LABEL: name: test_no_mls
+legalized:       true
+regBankSelected: true
+selected:        false
+# CHECK: selected: true
+registers:
+  - { id: 0, class: gprb }
+  - { id: 1, class: gprb }
+  - { id: 2, class: gprb }
+  - { id: 3, class: gprb }
+  - { id: 4, class: gprb }
+body:             |
+  bb.0:
+    liveins: %r0, %r1, %r2
+
+    %0(s32) = COPY %r0
+    %1(s32) = COPY %r1
+    %2(s32) = COPY %r2
+    ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0
+    ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1
+    ; CHECK: [[VREGZ:%[0-9]+]] = COPY %r2
+
+    %3(s32) = G_MUL %0, %1
+    %4(s32) = G_SUB %2, %3
+    ; CHECK: [[VREGM:%[0-9]+]] = MULv5 [[VREGX]], [[VREGY]], 14, _, _
+    ; CHECK: [[VREGR:%[0-9]+]] = SUBrr [[VREGZ]], [[VREGM]], 14, _, _
+
+    %r0 = COPY %4(s32)
+    ; CHECK: %r0 = COPY [[VREGR]]
+
+    BX_RET 14, _, implicit %r0
+    ; CHECK: BX_RET 14, _, implicit %r0
+...
diff --git a/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir b/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir
index 72c3b715d36e..16642d85d9cf 100644
--- a/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir
+++ b/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir
@@ -28,6 +28,10 @@
   define void @test_sdiv_s32() #2 { ret void }
   define void @test_udiv_s32() #2 { ret void }
 
+  define void @test_and_s32() { ret void }
+  define void @test_or_s32() { ret void }
+  define void @test_xor_s32() { ret void }
+
   define void @test_load_from_stack() { ret void }
   define void @test_load_f32() #0 { ret void }
   define void @test_load_f64() #0 { ret void }
@@ -783,6 +787,105 @@ body:             |
     ; CHECK: BX_RET 14, _, implicit %r0
 ...
 ---
+name:            test_and_s32
+# CHECK-LABEL: name: test_and_s32
+legalized:       true
+regBankSelected: true
+selected:        false
+# CHECK: selected: true
+registers:
+  - { id: 0, class: gprb }
+  - { id: 1, class: gprb }
+  - { id: 2, class: gprb }
+# CHECK: id: 0, class: gpr
+# CHECK: id: 1, class: gpr
+# CHECK: id: 2, class: gpr
+body:             |
+  bb.0:
+    liveins: %r0, %r1
+
+    %0(s32) = COPY %r0
+    ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0
+
+    %1(s32) = COPY %r1
+    ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1
+
+    %2(s32) = G_AND %0, %1
+    ; CHECK: [[VREGRES:%[0-9]+]] = ANDrr [[VREGX]], [[VREGY]], 14, _
+
+    %r0 = COPY %2(s32)
+    ; CHECK: %r0 = COPY [[VREGRES]]
+
+    BX_RET 14, _, implicit %r0
+    ; CHECK: BX_RET 14, _, implicit %r0
+...
+---
+name:            test_or_s32
+# CHECK-LABEL: name: test_or_s32
+legalized:       true
+regBankSelected: true
+selected:        false
+# CHECK: selected: true
+registers:
+  - { id: 0, class: gprb }
+  - { id: 1, class: gprb }
+  - { id: 2, class: gprb }
+# CHECK: id: 0, class: gpr
+# CHECK: id: 1, class: gpr
+# CHECK: id: 2, class: gpr
+body:             |
+  bb.0:
+    liveins: %r0, %r1
+
+    %0(s32) = COPY %r0
+    ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0
+
+    %1(s32) = COPY %r1
+    ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1
+
+    %2(s32) = G_OR %0, %1
+    ; CHECK: [[VREGRES:%[0-9]+]] = ORRrr [[VREGX]], [[VREGY]], 14, _
+
+    %r0 = COPY %2(s32)
+    ; CHECK: %r0 = COPY [[VREGRES]]
+
+    BX_RET 14, _, implicit %r0
+    ; CHECK: BX_RET 14, _, implicit %r0
+...
+---
+name:            test_xor_s32
+# CHECK-LABEL: name: test_xor_s32
+legalized:       true
+regBankSelected: true
+selected:        false
+# CHECK: selected: true
+registers:
+  - { id: 0, class: gprb }
+  - { id: 1, class: gprb }
+  - { id: 2, class: gprb }
+# CHECK: id: 0, class: gpr
+# CHECK: id: 1, class: gpr
+# CHECK: id: 2, class: gpr
+body:             |
+  bb.0:
+    liveins: %r0, %r1
+
+    %0(s32) = COPY %r0
+    ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0
+
+    %1(s32) = COPY %r1
+    ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1
+
+    %2(s32) = G_XOR %0, %1
+    ; CHECK: [[VREGRES:%[0-9]+]] = EORrr [[VREGX]], [[VREGY]], 14, _
+
+    %r0 = COPY %2(s32)
+    ; CHECK: %r0 = COPY [[VREGRES]]
+
+    BX_RET 14, _, implicit %r0
+    ; CHECK: BX_RET 14, _, implicit %r0
+...
+---
 name:            test_load_from_stack
 # CHECK-LABEL: name: test_load_from_stack
 legalized:       true
@@ -802,8 +905,8 @@ fixedStack:
   - { id: 0, offset: 0, size: 1, alignment: 4, isImmutable: true, isAliased: false }
   - { id: 1, offset: 4, size: 4, alignment: 4, isImmutable: true, isAliased: false }
   - { id: 2, offset: 8, size: 4, alignment: 4, isImmutable: true, isAliased: false }
-# CHECK-DAG: id: [[FI1:[0-9]+]], offset: 0
-# CHECK-DAG: id: [[FI32:[0-9]+]], offset: 8
+# CHECK-DAG: id: [[FI1:[0-9]+]], type: default, offset: 0, size: 1
+# CHECK-DAG: id: [[FI32:[0-9]+]], type: default, offset: 8
 body:             |
   bb.0:
     liveins: %r0, %r1, %r2, %r3
@@ -1024,13 +1127,11 @@ body:             |
     %1(s32) = COPY %r3
     ; CHECK: [[IN2:%[0-9]+]] = COPY %r3
 
-    %2(s64) = G_SEQUENCE %0(s32), 0, %1(s32), 1
+    %2(s64) = G_MERGE_VALUES %0(s32), %1(s32)
     ; CHECK: %[[DREG]] = VMOVDRR [[IN1]], [[IN2]]
 
-    %3(s32) = G_EXTRACT %2(s64), 0
-    %4(s32) = G_EXTRACT %2(s64), 32
-    ; CHECK: [[OUT1:%[0-9]+]] = VGETLNi32 %[[DREG]], 0
-    ; CHECK: [[OUT2:%[0-9]+]] = VGETLNi32 %[[DREG]], 1
+    %3(s32), %4(s32) = G_UNMERGE_VALUES %2(s64)
+    ; CHECK: [[OUT1:%[0-9]+]], [[OUT2:%[0-9]+]] = VMOVRRD %[[DREG]]
 
     %r0 = COPY %3
     ; CHECK: %r0 = COPY [[OUT1]]
diff --git a/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll b/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll
index 1c7769894a27..05902c22fb98 100644
--- a/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll
+++ b/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple arm-unknown -mattr=+vfp2 -global-isel -stop-after=irtranslator %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=LITTLE
-; RUN: llc -mtriple armeb-unknown -mattr=+vfp2 -global-isel -stop-after=irtranslator %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=BIG
+; RUN: llc -mtriple arm-unknown -mattr=+vfp2 -global-isel -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=LITTLE
+; RUN: llc -mtriple armeb-unknown -mattr=+vfp2 -global-isel -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=BIG
 
 define void @test_void_return() {
 ; CHECK-LABEL: name: test_void_return
@@ -329,15 +329,13 @@ define arm_aapcscc double @test_double_aapcscc(double %p0, double %p1, double %p
 ; CHECK: liveins: %r0, %r1, %r2, %r3
 ; CHECK-DAG: [[VREGP1LO:%[0-9]+]](s32) = COPY %r2
 ; CHECK-DAG: [[VREGP1HI:%[0-9]+]](s32) = COPY %r3
-; LITTLE: [[VREGP1:%[0-9]+]](s64) = G_SEQUENCE [[VREGP1LO]](s32), 0, [[VREGP1HI]](s32), 32
-; BIG: [[VREGP1:%[0-9]+]](s64) = G_SEQUENCE [[VREGP1HI]](s32), 0, [[VREGP1LO]](s32), 32
+; LITTLE: [[VREGP1:%[0-9]+]](s64) = G_MERGE_VALUES [[VREGP1LO]](s32), [[VREGP1HI]](s32)
+; BIG: [[VREGP1:%[0-9]+]](s64) = G_MERGE_VALUES [[VREGP1HI]](s32), [[VREGP1LO]](s32)
 ; CHECK: [[FIP5:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[P5]]
 ; CHECK: [[VREGP5:%[0-9]+]](s64) = G_LOAD [[FIP5]](p0){{.*}}load 8
 ; CHECK: [[VREGV:%[0-9]+]](s64) = G_FADD [[VREGP1]], [[VREGP5]]
-; LITTLE: [[VREGVLO:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 0
-; LITTLE: [[VREGVHI:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 32
-; BIG: [[VREGVHI:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 0
-; BIG: [[VREGVLO:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 32
+; LITTLE: [[VREGVLO:%[0-9]+]](s32), [[VREGVHI:%[0-9]+]](s32) = G_UNMERGE_VALUES [[VREGV]](s64)
+; BIG: [[VREGVHI:%[0-9]+]](s32), [[VREGVLO:%[0-9]+]](s32) = G_UNMERGE_VALUES [[VREGV]](s64)
 ; CHECK-DAG: %r0 = COPY [[VREGVLO]]
 ; CHECK-DAG: %r1 = COPY [[VREGVHI]]
 ; CHECK: BX_RET 14, _, implicit %r0, implicit %r1
@@ -376,15 +374,13 @@ define arm_aapcscc double @test_double_gap_aapcscc(float %filler, double %p0,
 ; CHECK: liveins: %r0, %r2, %r3
 ; CHECK-DAG: [[VREGP0LO:%[0-9]+]](s32) = COPY %r2
 ; CHECK-DAG: [[VREGP0HI:%[0-9]+]](s32) = COPY %r3
-; LITTLE: [[VREGP0:%[0-9]+]](s64) = G_SEQUENCE [[VREGP0LO]](s32), 0, [[VREGP0HI]](s32), 32
-; BIG: [[VREGP0:%[0-9]+]](s64) = G_SEQUENCE [[VREGP0HI]](s32), 0, [[VREGP0LO]](s32), 32
+; LITTLE: [[VREGP0:%[0-9]+]](s64) = G_MERGE_VALUES [[VREGP0LO]](s32), [[VREGP0HI]](s32)
+; BIG: [[VREGP0:%[0-9]+]](s64) = G_MERGE_VALUES [[VREGP0HI]](s32), [[VREGP0LO]](s32)
 ; CHECK: [[FIP1:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[P1]]
 ; CHECK: [[VREGP1:%[0-9]+]](s64) = G_LOAD [[FIP1]](p0){{.*}}load 8
 ; CHECK: [[VREGV:%[0-9]+]](s64) = G_FADD [[VREGP0]], [[VREGP1]]
-; LITTLE: [[VREGVLO:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 0
-; LITTLE: [[VREGVHI:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 32
-; BIG: [[VREGVHI:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 0
-; BIG: [[VREGVLO:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 32
+; LITTLE: [[VREGVLO:%[0-9]+]](s32), [[VREGVHI:%[0-9]+]](s32) = G_UNMERGE_VALUES [[VREGV]](s64)
+; BIG: [[VREGVHI:%[0-9]+]](s32), [[VREGVLO:%[0-9]+]](s32) = G_UNMERGE_VALUES [[VREGV]](s64)
 ; CHECK-DAG: %r0 = COPY [[VREGVLO]]
 ; CHECK-DAG: %r1 = COPY [[VREGVHI]]
 ; CHECK: BX_RET 14, _, implicit %r0, implicit %r1
@@ -401,15 +397,13 @@ define arm_aapcscc double @test_double_gap2_aapcscc(double %p0, float %filler,
 ; CHECK: liveins: %r0, %r1, %r2
 ; CHECK-DAG: [[VREGP0LO:%[0-9]+]](s32) = COPY %r0
 ; CHECK-DAG: [[VREGP0HI:%[0-9]+]](s32) = COPY %r1
-; LITTLE: [[VREGP0:%[0-9]+]](s64) = G_SEQUENCE [[VREGP0LO]](s32), 0, [[VREGP0HI]](s32), 32
-; BIG: [[VREGP0:%[0-9]+]](s64) = G_SEQUENCE [[VREGP0HI]](s32), 0, [[VREGP0LO]](s32), 32
+; LITTLE: [[VREGP0:%[0-9]+]](s64) = G_MERGE_VALUES [[VREGP0LO]](s32), [[VREGP0HI]](s32)
+; BIG: [[VREGP0:%[0-9]+]](s64) = G_MERGE_VALUES [[VREGP0HI]](s32), [[VREGP0LO]](s32)
 ; CHECK: [[FIP1:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[P1]]
 ; CHECK: [[VREGP1:%[0-9]+]](s64) = G_LOAD [[FIP1]](p0){{.*}}load 8
 ; CHECK: [[VREGV:%[0-9]+]](s64) = G_FADD [[VREGP0]], [[VREGP1]]
-; LITTLE: [[VREGVLO:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 0
-; LITTLE: [[VREGVHI:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 32
-; BIG: [[VREGVHI:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 0
-; BIG: [[VREGVLO:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 32
+; LITTLE: [[VREGVLO:%[0-9]+]](s32), [[VREGVHI:%[0-9]+]](s32) = G_UNMERGE_VALUES [[VREGV]](s64)
+; BIG: [[VREGVHI:%[0-9]+]](s32), [[VREGVLO:%[0-9]+]](s32) = G_UNMERGE_VALUES [[VREGV]](s64)
 ; CHECK-DAG: %r0 = COPY [[VREGVLO]]
 ; CHECK-DAG: %r1 = COPY [[VREGVHI]]
 ; CHECK: BX_RET 14, _, implicit %r0, implicit %r1
@@ -420,9 +414,11 @@ entry:
 
 define arm_aapcscc void @test_indirect_call(void() *%fptr) {
 ; CHECK-LABEL: name: test_indirect_call
-; CHECK: [[FPTR:%[0-9]+]](p0) = COPY %r0
+; CHECK: registers:
+; CHECK-NEXT: id: [[FPTR:[0-9]+]], class: gpr
+; CHECK: %[[FPTR]](p0) = COPY %r0
 ; CHECK: ADJCALLSTACKDOWN 0, 0, 14, _, implicit-def %sp, implicit %sp
-; CHECK: BLX [[FPTR]](p0), csr_aapcs, implicit-def %lr, implicit %sp
+; CHECK: BLX %[[FPTR]](p0), csr_aapcs, implicit-def %lr, implicit %sp
 ; CHECK: ADJCALLSTACKUP 0, 0, 14, _, implicit-def %sp, implicit %sp
 entry:
   notail call arm_aapcscc void %fptr()
@@ -566,13 +562,12 @@ define arm_aapcscc double @test_call_aapcs_fp_params(double %a, float %b) {
 ; CHECK-LABEL: name: test_call_aapcs_fp_params
 ; CHECK-DAG: [[A1:%[0-9]+]](s32) = COPY %r0
 ; CHECK-DAG: [[A2:%[0-9]+]](s32) = COPY %r1
-; LITTLE-DAG: [[AVREG:%[0-9]+]](s64) = G_SEQUENCE [[A1]](s32), 0, [[A2]](s32), 32
-; BIG-DAG: [[AVREG:%[0-9]+]](s64) = G_SEQUENCE [[A2]](s32), 0, [[A1]](s32), 32
+; LITTLE-DAG: [[AVREG:%[0-9]+]](s64) = G_MERGE_VALUES [[A1]](s32), [[A2]](s32)
+; BIG-DAG: [[AVREG:%[0-9]+]](s64) = G_MERGE_VALUES [[A2]](s32), [[A1]](s32)
 ; CHECK-DAG: [[BVREG:%[0-9]+]](s32) = COPY %r2
 ; CHECK: ADJCALLSTACKDOWN 16, 0, 14, _, implicit-def %sp, implicit %sp
 ; CHECK-DAG: %r0 = COPY [[BVREG]]
-; CHECK-DAG: [[A1:%[0-9]+]](s32) = G_EXTRACT [[AVREG]](s64), 0
-; CHECK-DAG: [[A2:%[0-9]+]](s32) = G_EXTRACT [[AVREG]](s64), 32
+; CHECK-DAG: [[A1:%[0-9]+]](s32), [[A2:%[0-9]+]](s32) = G_UNMERGE_VALUES [[AVREG]](s64)
 ; LITTLE-DAG: %r2 = COPY [[A1]]
 ; LITTLE-DAG: %r3 = COPY [[A2]]
 ; BIG-DAG: %r2 = COPY [[A2]]
@@ -588,11 +583,10 @@ define arm_aapcscc double @test_call_aapcs_fp_params(double %a, float %b) {
 ; CHECK: BLX @aapcscc_fp_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1
 ; CHECK-DAG: [[R1:%[0-9]+]](s32) = COPY %r0
 ; CHECK-DAG: [[R2:%[0-9]+]](s32) = COPY %r1
-; LITTLE: [[RVREG:%[0-9]+]](s64) = G_SEQUENCE [[R1]](s32), 0, [[R2]](s32), 32
-; BIG: [[RVREG:%[0-9]+]](s64) = G_SEQUENCE [[R2]](s32), 0, [[R1]](s32), 32
+; LITTLE: [[RVREG:%[0-9]+]](s64) = G_MERGE_VALUES [[R1]](s32), [[R2]](s32)
+; BIG: [[RVREG:%[0-9]+]](s64) = G_MERGE_VALUES [[R2]](s32), [[R1]](s32)
 ; CHECK: ADJCALLSTACKUP 16, 0, 14, _, implicit-def %sp, implicit %sp
-; CHECK: [[R1:%[0-9]+]](s32) = G_EXTRACT [[RVREG]](s64), 0
-; CHECK: [[R2:%[0-9]+]](s32) = G_EXTRACT [[RVREG]](s64), 32
+; CHECK: [[R1:%[0-9]+]](s32), [[R2:%[0-9]+]](s32) = G_UNMERGE_VALUES [[RVREG]](s64)
 ; LITTLE-DAG: %r0 = COPY [[R1]]
 ; LITTLE-DAG: %r1 = COPY [[R2]]
 ; BIG-DAG: %r0 = COPY [[R2]]
@@ -702,8 +696,8 @@ define arm_aapcscc void @test_large_int_arrays([20 x i32] %arr) {
 ; CHECK: fixedStack:
 ; The parameters live in separate stack locations, one for each element that
 ; doesn't fit in the registers.
-; CHECK-DAG: id: [[FIRST_STACK_ID:[0-9]+]], offset: 0, size: 4
-; CHECK-DAG: id: [[LAST_STACK_ID:[-0]+]], offset: 60, size: 4
+; CHECK-DAG: id: [[FIRST_STACK_ID:[0-9]+]], type: default, offset: 0, size: 4,
+; CHECK-DAG: id: [[LAST_STACK_ID:[-0]+]], type: default, offset: 60, size: 4
 ; CHECK: liveins: %r0, %r1, %r2, %r3
 ; CHECK-DAG: [[R0:%[0-9]+]](s32) = COPY %r0
 ; CHECK-DAG: [[R1:%[0-9]+]](s32) = COPY %r1
@@ -755,16 +749,16 @@ declare arm_aapcscc [2 x float] @fp_arrays_aapcs_target([3 x double])
 define arm_aapcscc [2 x float] @test_fp_arrays_aapcs([3 x double] %arr) {
 ; CHECK-LABEL: name: test_fp_arrays_aapcs
 ; CHECK: fixedStack:
-; CHECK: id: [[ARR2_ID:[0-9]+]], offset: 0, size: 8
+; CHECK: id: [[ARR2_ID:[0-9]+]], type: default, offset: 0, size: 8,
 ; CHECK: liveins: %r0, %r1, %r2, %r3
 ; CHECK: [[ARR0_0:%[0-9]+]](s32) = COPY %r0
 ; CHECK: [[ARR0_1:%[0-9]+]](s32) = COPY %r1
-; LITTLE: [[ARR0:%[0-9]+]](s64) = G_SEQUENCE [[ARR0_0]](s32), 0, [[ARR0_1]](s32), 32
-; BIG: [[ARR0:%[0-9]+]](s64) = G_SEQUENCE [[ARR0_1]](s32), 0, [[ARR0_0]](s32), 32
+; LITTLE: [[ARR0:%[0-9]+]](s64) = G_MERGE_VALUES [[ARR0_0]](s32), [[ARR0_1]](s32)
+; BIG: [[ARR0:%[0-9]+]](s64) = G_MERGE_VALUES [[ARR0_1]](s32), [[ARR0_0]](s32)
 ; CHECK: [[ARR1_0:%[0-9]+]](s32) = COPY %r2
 ; CHECK: [[ARR1_1:%[0-9]+]](s32) = COPY %r3
-; LITTLE: [[ARR1:%[0-9]+]](s64) = G_SEQUENCE [[ARR1_0]](s32), 0, [[ARR1_1]](s32), 32
-; BIG: [[ARR1:%[0-9]+]](s64) = G_SEQUENCE [[ARR1_1]](s32), 0, [[ARR1_0]](s32), 32
+; LITTLE: [[ARR1:%[0-9]+]](s64) = G_MERGE_VALUES [[ARR1_0]](s32), [[ARR1_1]](s32)
+; BIG: [[ARR1:%[0-9]+]](s64) = G_MERGE_VALUES [[ARR1_1]](s32), [[ARR1_0]](s32)
 ; CHECK: [[ARR2_FI:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[ARR2_ID]]
 ; CHECK: [[ARR2:%[0-9]+]](s64) = G_LOAD [[ARR2_FI]]{{.*}}load 8 from %fixed-stack.[[ARR2_ID]]
 ; CHECK: [[ARR_MERGED_0:%[0-9]+]](s192) = IMPLICIT_DEF
@@ -776,14 +770,12 @@ define arm_aapcscc [2 x float] @test_fp_arrays_aapcs([3 x double] %arr) {
 ; CHECK: [[ARR0:%[0-9]+]](s64) = G_EXTRACT [[ARR_MERGED]](s192), 0
 ; CHECK: [[ARR1:%[0-9]+]](s64) = G_EXTRACT [[ARR_MERGED]](s192), 64
 ; CHECK: [[ARR2:%[0-9]+]](s64) = G_EXTRACT [[ARR_MERGED]](s192), 128
-; CHECK: [[ARR0_0:%[0-9]+]](s32) = G_EXTRACT [[ARR0]](s64), 0
-; CHECK: [[ARR0_1:%[0-9]+]](s32) = G_EXTRACT [[ARR0]](s64), 32
+; CHECK: [[ARR0_0:%[0-9]+]](s32), [[ARR0_1:%[0-9]+]](s32) = G_UNMERGE_VALUES [[ARR0]](s64)
 ; LITTLE: %r0 = COPY [[ARR0_0]](s32)
 ; LITTLE: %r1 = COPY [[ARR0_1]](s32)
 ; BIG: %r0 = COPY [[ARR0_1]](s32)
 ; BIG: %r1 = COPY [[ARR0_0]](s32)
-; CHECK: [[ARR1_0:%[0-9]+]](s32) = G_EXTRACT [[ARR1]](s64), 0
-; CHECK: [[ARR1_1:%[0-9]+]](s32) = G_EXTRACT [[ARR1]](s64), 32
+; CHECK: [[ARR1_0:%[0-9]+]](s32), [[ARR1_1:%[0-9]+]](s32) = G_UNMERGE_VALUES [[ARR1]](s64)
 ; LITTLE: %r2 = COPY [[ARR1_0]](s32)
 ; LITTLE: %r3 = COPY [[ARR1_1]](s32)
 ; BIG: %r2 = COPY [[ARR1_1]](s32)
@@ -815,10 +807,10 @@ declare arm_aapcs_vfpcc [4 x float] @fp_arrays_aapcs_vfp_target([3 x double], [3
 define arm_aapcs_vfpcc [4 x float] @test_fp_arrays_aapcs_vfp([3 x double] %x, [3 x float] %y, [4 x double] %z) {
 ; CHECK-LABEL: name: test_fp_arrays_aapcs_vfp
 ; CHECK: fixedStack:
-; CHECK-DAG: id: [[Z0_ID:[0-9]+]], offset: 0, size: 8
-; CHECK-DAG: id: [[Z1_ID:[0-9]+]], offset: 8, size: 8
-; CHECK-DAG: id: [[Z2_ID:[0-9]+]], offset: 16, size: 8
-; CHECK-DAG: id: [[Z3_ID:[0-9]+]], offset: 24, size: 8
+; CHECK-DAG: id: [[Z0_ID:[0-9]+]], type: default, offset: 0, size: 8,
+; CHECK-DAG: id: [[Z1_ID:[0-9]+]], type: default, offset: 8, size: 8,
+; CHECK-DAG: id: [[Z2_ID:[0-9]+]], type: default, offset: 16, size: 8,
+; CHECK-DAG: id: [[Z3_ID:[0-9]+]], type: default, offset: 24, size: 8,
 ; CHECK: liveins: %d0, %d1, %d2, %s6, %s7, %s8
 ; CHECK: [[X0:%[0-9]+]](s64) = COPY %d0
 ; CHECK: [[X1:%[0-9]+]](s64) = COPY %d1
@@ -916,8 +908,8 @@ define arm_aapcscc [2 x i32*] @test_tough_arrays([6 x [4 x i32]] %arr) {
 ; CHECK: fixedStack:
 ; The parameters live in separate stack locations, one for each element that
 ; doesn't fit in the registers.
-; CHECK-DAG: id: [[FIRST_STACK_ID:[0-9]+]], offset: 0, size: 4
-; CHECK-DAG: id: [[LAST_STACK_ID:[-0]+]], offset: 76, size: 4
+; CHECK-DAG: id: [[FIRST_STACK_ID:[0-9]+]], type: default, offset: 0, size: 4,
+; CHECK-DAG: id: [[LAST_STACK_ID:[-0]+]], type: default, offset: 76, size: 4
 ; CHECK: liveins: %r0, %r1, %r2, %r3
 ; CHECK-DAG: [[R0:%[0-9]+]](s32) = COPY %r0
 ; CHECK-DAG: [[R1:%[0-9]+]](s32) = COPY %r1
@@ -979,8 +971,8 @@ declare arm_aapcscc {i32, i32} @structs_target({i32, i32}, {i32*, float, i32, do
 define arm_aapcscc {i32, i32} @test_structs({i32, i32} %x, {i32*, float, i32, double} %y) {
 ; CHECK-LABEL: test_structs
 ; CHECK: fixedStack:
-; CHECK-DAG: id: [[Y2_ID:[0-9]+]], offset: 0, size: 4
-; CHECK-DAG: id: [[Y3_ID:[0-9]+]], offset: 8, size: 8
+; CHECK-DAG: id: [[Y2_ID:[0-9]+]], type: default, offset: 0, size: 4,
+; CHECK-DAG: id: [[Y3_ID:[0-9]+]], type: default, offset: 8, size: 8,
 ; CHECK: liveins: %r0, %r1, %r2, %r3
 ; CHECK-DAG: [[X0:%[0-9]+]](s32) = COPY %r0
 ; CHECK-DAG: [[X1:%[0-9]+]](s32) = COPY %r1
diff --git a/test/CodeGen/ARM/GlobalISel/arm-isel.ll b/test/CodeGen/ARM/GlobalISel/arm-isel.ll
index 57ccff90c0bb..6ddc29a3bbba 100644
--- a/test/CodeGen/ARM/GlobalISel/arm-isel.ll
+++ b/test/CodeGen/ARM/GlobalISel/arm-isel.ll
@@ -153,6 +153,87 @@ entry:
   ret i32 %sum
 }
 
+define i8 @test_and_i8(i8 %x, i8 %y) {
+; CHECK-LABEL: test_and_i8:
+; CHECK: and r0, r0, r1
+; CHECK: bx lr
+entry:
+  %sum = and i8 %x, %y
+  ret i8 %sum
+}
+
+define i16 @test_and_i16(i16 %x, i16 %y) {
+; CHECK-LABEL: test_and_i16:
+; CHECK: and r0, r0, r1
+; CHECK: bx lr
+entry:
+  %sum = and i16 %x, %y
+  ret i16 %sum
+}
+
+define i32 @test_and_i32(i32 %x, i32 %y) {
+; CHECK-LABEL: test_and_i32:
+; CHECK: and r0, r0, r1
+; CHECK: bx lr
+entry:
+  %sum = and i32 %x, %y
+  ret i32 %sum
+}
+
+define i8 @test_or_i8(i8 %x, i8 %y) {
+; CHECK-LABEL: test_or_i8:
+; CHECK: orr r0, r0, r1
+; CHECK: bx lr
+entry:
+  %sum = or i8 %x, %y
+  ret i8 %sum
+}
+
+define i16 @test_or_i16(i16 %x, i16 %y) {
+; CHECK-LABEL: test_or_i16:
+; CHECK: orr r0, r0, r1
+; CHECK: bx lr
+entry:
+  %sum = or i16 %x, %y
+  ret i16 %sum
+}
+
+define i32 @test_or_i32(i32 %x, i32 %y) {
+; CHECK-LABEL: test_or_i32:
+; CHECK: orr r0, r0, r1
+; CHECK: bx lr
+entry:
+  %sum = or i32 %x, %y
+  ret i32 %sum
+}
+
+define i8 @test_xor_i8(i8 %x, i8 %y) {
+; CHECK-LABEL: test_xor_i8:
+; CHECK: eor r0, r0, r1
+; CHECK: bx lr
+entry:
+  %sum = xor i8 %x, %y
+  ret i8 %sum
+}
+
+define i16 @test_xor_i16(i16 %x, i16 %y) {
+; CHECK-LABEL: test_xor_i16:
+; CHECK: eor r0, r0, r1
+; CHECK: bx lr
+entry:
+  %sum = xor i16 %x, %y
+  ret i16 %sum
+}
+
+define i32 @test_xor_i32(i32 %x, i32 %y) {
+; CHECK-LABEL: test_xor_i32:
+; CHECK: eor r0, r0, r1
+; CHECK: bx lr
+entry:
+  %sum = xor i32 %x, %y
+  ret i32 %sum
+}
+
 define i32 @test_stack_args_i32(i32 %p0, i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5) {
 ; CHECK-LABEL: test_stack_args_i32:
 ; CHECK: add [[P5ADDR:r[0-9]+]], sp, #4
@@ -272,8 +353,7 @@ define arm_aapcscc double @test_double_softfp(double %f0, double %f1) {
 ; CHECK-DAG: vmov [[F0:d[0-9]+]], r0, r1
 ; CHECK-DAG: vmov [[F1:d[0-9]+]], r2, r3
 ; CHECK: vadd.f64 [[FV:d[0-9]+]], [[F0]], [[F1]]
-; CHECK: vmov.32 r0, [[FV]][0]
-; CHECK: vmov.32 r1, [[FV]][1]
+; CHECK: vmov r0, r1, [[FV]]
 ; CHECK: bx lr
 entry:
   %v = fadd double %f0, %f1
diff --git a/test/CodeGen/ARM/GlobalISel/arm-legalize-fp.mir b/test/CodeGen/ARM/GlobalISel/arm-legalize-fp.mir
index d154b4887c19..803135ba595e 100644
--- a/test/CodeGen/ARM/GlobalISel/arm-legalize-fp.mir
+++ b/test/CodeGen/ARM/GlobalISel/arm-legalize-fp.mir
@@ -82,10 +82,10 @@ body:             |
     %1(s32) = COPY %r1
     %2(s32) = COPY %r2
     %3(s32) = COPY %r3
-    ; HARD-DAG: [[X:%[0-9]+]](s64) = G_SEQUENCE [[X0]]
-    ; HARD-DAG: [[Y:%[0-9]+]](s64) = G_SEQUENCE [[Y0]]
-    %4(s64) = G_SEQUENCE %0(s32), 0, %1(s32), 32
-    %5(s64) = G_SEQUENCE %2(s32), 0, %3(s32), 32
+    ; HARD-DAG: [[X:%[0-9]+]](s64) = G_MERGE_VALUES [[X0]]
+    ; HARD-DAG: [[Y:%[0-9]+]](s64) = G_MERGE_VALUES [[Y0]]
+    %4(s64) = G_MERGE_VALUES %0(s32), %1(s32)
+    %5(s64) = G_MERGE_VALUES %2(s32), %3(s32)
     ; CHECK: ADJCALLSTACKDOWN
     ; SOFT-DAG: %r{{[0-1]}} = COPY [[X0]]
     ; SOFT-DAG: %r{{[0-1]}} = COPY [[X1]]
@@ -97,8 +97,7 @@ body:             |
     ; HARD: BLX $fmod, {{.*}}, implicit %d0, implicit %d1, implicit-def %d0
     ; CHECK: ADJCALLSTACKUP
     %6(s64) = G_FREM %4, %5
-    %7(s32) = G_EXTRACT %6(s64), 0
-    %8(s32) = G_EXTRACT %6(s64), 32
+    %7(s32), %8(s32) = G_UNMERGE_VALUES %6(s64)
     %r0 = COPY %7(s32)
     %r1 = COPY %8(s32)
     BX_RET 14, _, implicit %r0, implicit %r1
@@ -174,10 +173,10 @@ body:             |
     %1(s32) = COPY %r1
     %2(s32) = COPY %r2
     %3(s32) = COPY %r3
-    ; HARD-DAG: [[X:%[0-9]+]](s64) = G_SEQUENCE [[X0]]
-    ; HARD-DAG: [[Y:%[0-9]+]](s64) = G_SEQUENCE [[Y0]]
-    %4(s64) = G_SEQUENCE %0(s32), 0, %1(s32), 32
-    %5(s64) = G_SEQUENCE %2(s32), 0, %3(s32), 32
+    ; HARD-DAG: [[X:%[0-9]+]](s64) = G_MERGE_VALUES [[X0]]
+    ; HARD-DAG: [[Y:%[0-9]+]](s64) = G_MERGE_VALUES [[Y0]]
+    %4(s64) = G_MERGE_VALUES %0(s32), %1(s32)
+    %5(s64) = G_MERGE_VALUES %2(s32), %3(s32)
     ; CHECK: ADJCALLSTACKDOWN
     ; SOFT-DAG: %r{{[0-1]}} = COPY [[X0]]
     ; SOFT-DAG: %r{{[0-1]}} = COPY [[X1]]
@@ -189,8 +188,7 @@ body:             |
     ; HARD: BLX $pow, {{.*}}, implicit %d0, implicit %d1, implicit-def %d0
     ; CHECK: ADJCALLSTACKUP
     %6(s64) = G_FPOW %4, %5
-    %7(s32) = G_EXTRACT %6(s64), 0
-    %8(s32) = G_EXTRACT %6(s64), 32
+    %7(s32), %8(s32) = G_UNMERGE_VALUES %6(s64)
     %r0 = COPY %7(s32)
     %r1 = COPY %8(s32)
     BX_RET 14, _, implicit %r0, implicit %r1
@@ -258,10 +256,10 @@ body:             |
     %1(s32) = COPY %r1
     %2(s32) = COPY %r2
     %3(s32) = COPY %r3
-    ; HARD-DAG: [[X:%[0-9]+]](s64) = G_SEQUENCE [[X0]]
-    ; HARD-DAG: [[Y:%[0-9]+]](s64) = G_SEQUENCE [[Y0]]
-    %4(s64) = G_SEQUENCE %0(s32), 0, %1(s32), 32
-    %5(s64) = G_SEQUENCE %2(s32), 0, %3(s32), 32
+    ; HARD-DAG: [[X:%[0-9]+]](s64) = G_MERGE_VALUES [[X0]]
+    ; HARD-DAG: [[Y:%[0-9]+]](s64) = G_MERGE_VALUES [[Y0]]
+    %4(s64) = G_MERGE_VALUES %0(s32), %1(s32)
+    %5(s64) = G_MERGE_VALUES %2(s32), %3(s32)
     ; HARD: [[R:%[0-9]+]](s64) = G_FADD [[X]], [[Y]]
     ; SOFT: ADJCALLSTACKDOWN
     ; SOFT-DAG: %r{{[0-1]}} = COPY [[X0]]
@@ -272,10 +270,8 @@ body:             |
     ; SOFT-DEFAULT: BLX $__adddf3, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1
     ; SOFT: ADJCALLSTACKUP
     %6(s64) = G_FADD %4, %5
-    ; HARD-DAG: G_EXTRACT [[R]](s64), 0
-    ; HARD-DAG: G_EXTRACT [[R]](s64), 32
-    %7(s32) = G_EXTRACT %6(s64), 0
-    %8(s32) = G_EXTRACT %6(s64), 32
+    ; HARD-DAG: G_UNMERGE_VALUES [[R]](s64)
+    %7(s32),%8(s32) = G_UNMERGE_VALUES %6(s64)
     %r0 = COPY %7(s32)
     %r1 = COPY %8(s32)
     BX_RET 14, _, implicit %r0, implicit %r1
diff --git a/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir b/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir
index f6ac92597cb2..c6f6ca81c279 100644
--- a/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir
+++ b/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir
@@ -15,6 +15,18 @@
   define void @test_mul_s16() { ret void }
   define void @test_mul_s32() { ret void }
 
+  define void @test_and_s8() { ret void }
+  define void @test_and_s16() { ret void }
+  define void @test_and_s32() { ret void }
+
+  define void @test_or_s8() { ret void }
+  define void @test_or_s16() { ret void }
+  define void @test_or_s32() { ret void }
+
+  define void @test_xor_s8() { ret void }
+  define void @test_xor_s16() { ret void }
+  define void @test_xor_s32() { ret void }
+
   define void @test_load_from_stack() { ret void }
   define void @test_legal_loads() #0 { ret void }
   define void @test_legal_stores() #0 { ret void }
@@ -299,6 +311,234 @@ body:             |
     %r0 = COPY %2(s32)
     BX_RET 14, _, implicit %r0
 
+...
+---
+name:            test_and_s8
+# CHECK-LABEL: name: test_and_s8
+legalized:       false
+# CHECK: legalized: true
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+body:             |
+  bb.0:
+    liveins: %r0, %r1
+
+    %0(s8) = COPY %r0
+    %1(s8) = COPY %r1
+    %2(s8) = G_AND %0, %1
+    ; G_AND with s8 should widen
+    ; CHECK: {{%[0-9]+}}(s32) = G_AND {{%[0-9]+, %[0-9]+}}
+    ; CHECK-NOT: {{%[0-9]+}}(s8) = G_AND {{%[0-9]+, %[0-9]+}}
+    %r0 = COPY %2(s8)
+    BX_RET 14, _, implicit %r0
+...
+---
+name:            test_and_s16
+# CHECK-LABEL: name: test_and_s16
+legalized:       false
+# CHECK: legalized: true
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+body:             |
+  bb.0:
+    liveins: %r0, %r1
+
+    %0(s16) = COPY %r0
+    %1(s16) = COPY %r1
+    %2(s16) = G_AND %0, %1
+    ; G_AND with s16 should widen
+    ; CHECK: {{%[0-9]+}}(s32) = G_AND {{%[0-9]+, %[0-9]+}}
+    ; CHECK-NOT: {{%[0-9]+}}(s16) = G_AND {{%[0-9]+, %[0-9]+}}
+    %r0 = COPY %2(s16)
+    BX_RET 14, _, implicit %r0
+
+...
+---
+name:            test_and_s32
+# CHECK-LABEL: name: test_and_s32
+legalized:       false
+# CHECK: legalized: true
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+body:             |
+  bb.0:
+    liveins: %r0, %r1
+
+    %0(s32) = COPY %r0
+    %1(s32) = COPY %r1
+    %2(s32) = G_AND %0, %1
+    ; G_AND with s32 is legal, so we should find it unchanged in the output
+    ; CHECK: {{%[0-9]+}}(s32) = G_AND {{%[0-9]+, %[0-9]+}}
+    %r0 = COPY %2(s32)
+    BX_RET 14, _, implicit %r0
+
+...
+---
+name:            test_or_s8
+# CHECK-LABEL: name: test_or_s8
+legalized:       false
+# CHECK: legalized: true
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+body:             |
+  bb.0:
+    liveins: %r0, %r1
+
+    %0(s8) = COPY %r0
+    %1(s8) = COPY %r1
+    %2(s8) = G_OR %0, %1
+    ; G_OR with s8 should widen
+    ; CHECK: {{%[0-9]+}}(s32) = G_OR {{%[0-9]+, %[0-9]+}}
+    ; CHECK-NOT: {{%[0-9]+}}(s8) = G_OR {{%[0-9]+, %[0-9]+}}
+    %r0 = COPY %2(s8)
+    BX_RET 14, _, implicit %r0
+...
+---
+name:            test_or_s16
+# CHECK-LABEL: name: test_or_s16
+legalized:       false
+# CHECK: legalized: true
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+body:             |
+  bb.0:
+    liveins: %r0, %r1
+
+    %0(s16) = COPY %r0
+    %1(s16) = COPY %r1
+    %2(s16) = G_OR %0, %1
+    ; G_OR with s16 should widen
+    ; CHECK: {{%[0-9]+}}(s32) = G_OR {{%[0-9]+, %[0-9]+}}
+    ; CHECK-NOT: {{%[0-9]+}}(s16) = G_OR {{%[0-9]+, %[0-9]+}}
+    %r0 = COPY %2(s16)
+    BX_RET 14, _, implicit %r0
+
+...
+---
+name:            test_or_s32
+# CHECK-LABEL: name: test_or_s32
+legalized:       false
+# CHECK: legalized: true
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+body:             |
+  bb.0:
+    liveins: %r0, %r1
+
+    %0(s32) = COPY %r0
+    %1(s32) = COPY %r1
+    %2(s32) = G_OR %0, %1
+    ; G_OR with s32 is legal, so we should find it unchanged in the output
+    ; CHECK: {{%[0-9]+}}(s32) = G_OR {{%[0-9]+, %[0-9]+}}
+    %r0 = COPY %2(s32)
+    BX_RET 14, _, implicit %r0
+
+...
+---
+name:            test_xor_s8
+# CHECK-LABEL: name: test_xor_s8
+legalized:       false
+# CHECK: legalized: true
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+body:             |
+  bb.0:
+    liveins: %r0, %r1
+
+    %0(s8) = COPY %r0
+    %1(s8) = COPY %r1
+    %2(s8) = G_XOR %0, %1
+    ; G_XOR with s8 should widen
+    ; CHECK: {{%[0-9]+}}(s32) = G_XOR {{%[0-9]+, %[0-9]+}}
+    ; CHECK-NOT: {{%[0-9]+}}(s8) = G_XOR {{%[0-9]+, %[0-9]+}}
+    %r0 = COPY %2(s8)
+    BX_RET 14, _, implicit %r0
+...
+---
+name:            test_xor_s16
+# CHECK-LABEL: name: test_xor_s16
+legalized:       false
+# CHECK: legalized: true
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+body:             |
+  bb.0:
+    liveins: %r0, %r1
+
+    %0(s16) = COPY %r0
+    %1(s16) = COPY %r1
+    %2(s16) = G_XOR %0, %1
+    ; G_XOR with s16 should widen
+    ; CHECK: {{%[0-9]+}}(s32) = G_XOR {{%[0-9]+, %[0-9]+}}
+    ; CHECK-NOT: {{%[0-9]+}}(s16) = G_XOR {{%[0-9]+, %[0-9]+}}
+    %r0 = COPY %2(s16)
+    BX_RET 14, _, implicit %r0
+
+...
+---
+name:            test_xor_s32
+# CHECK-LABEL: name: test_xor_s32
+legalized:       false
+# CHECK: legalized: true
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+body:             |
+  bb.0:
+    liveins: %r0, %r1
+
+    %0(s32) = COPY %r0
+    %1(s32) = COPY %r1
+    %2(s32) = G_XOR %0, %1
+    ; G_XOR with s32 is legal, so we should find it unchanged in the output
+    ; CHECK: {{%[0-9]+}}(s32) = G_XOR {{%[0-9]+, %[0-9]+}}
+    %r0 = COPY %2(s32)
+    BX_RET 14, _, implicit %r0
+
 ...
 ---
 name:            test_load_from_stack
@@ -317,7 +557,7 @@ fixedStack:
   - { id: 0, offset: 0, size: 4, alignment: 4, isImmutable: true, isAliased: false }
   - { id: 1, offset: 4, size: 4, alignment: 4, isImmutable: true, isAliased: false }
   - { id: 2, offset: 8, size: 4, alignment: 4, isImmutable: true, isAliased: false }
-  # CHECK: id: [[FRAME_INDEX:[0-9]+]], offset: 8
+  # CHECK: id: [[FRAME_INDEX:[0-9]+]], type: default, offset: 8
 body:             |
   bb.0:
     liveins: %r0, %r1, %r2, %r3
diff --git a/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir b/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir
index dfccc47c277c..cc1df80c6019 100644
--- a/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir
+++ b/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir
@@ -16,6 +16,10 @@
   define void @test_sdiv_s32() #1 { ret void }
   define void @test_udiv_s32() #1 { ret void }
 
+  define void @test_and_s32() { ret void}
+  define void @test_or_s32() { ret void}
+  define void @test_xor_s32() { ret void}
+
   define void @test_loads() #0 { ret void }
   define void @test_stores() #0 { ret void }
 
@@ -45,9 +49,9 @@ legalized:       true
 regBankSelected: false
 selected:        false
 # CHECK: registers:
-# CHECK: - { id: 0, class: gprb }
-# CHECK: - { id: 1, class: gprb }
-# CHECK: - { id: 2, class: gprb }
+# CHECK: - { id: 0, class: gprb, preferred-register: '' }
+# CHECK: - { id: 1, class: gprb, preferred-register: '' }
+# CHECK: - { id: 2, class: gprb, preferred-register: '' }
 
 registers:
   - { id: 0, class: _ }
@@ -71,12 +75,12 @@ legalized:       true
 regBankSelected: false
 selected:        false
 # CHECK: registers:
-# CHECK: - { id: 0, class: gprb }
-# CHECK: - { id: 1, class: gprb }
-# CHECK: - { id: 2, class: gprb }
-# CHECK: - { id: 3, class: gprb }
-# CHECK: - { id: 4, class: gprb }
-# CHECK: - { id: 5, class: gprb }
+# CHECK: - { id: 0, class: gprb, preferred-register: '' }
+# CHECK: - { id: 1, class: gprb, preferred-register: '' }
+# CHECK: - { id: 2, class: gprb, preferred-register: '' }
+# CHECK: - { id: 3, class: gprb, preferred-register: '' }
+# CHECK: - { id: 4, class: gprb, preferred-register: '' }
+# CHECK: - { id: 5, class: gprb, preferred-register: '' }
 
 registers:
   - { id: 0, class: _ }
@@ -106,12 +110,12 @@ legalized:       true
 regBankSelected: false
 selected:        false
 # CHECK: registers:
-# CHECK: - { id: 0, class: gprb }
-# CHECK: - { id: 1, class: gprb }
-# CHECK: - { id: 2, class: gprb }
-# CHECK: - { id: 3, class: gprb }
-# CHECK: - { id: 4, class: gprb }
-# CHECK: - { id: 5, class: gprb }
+# CHECK: - { id: 0, class: gprb, preferred-register: '' }
+# CHECK: - { id: 1, class: gprb, preferred-register: '' }
+# CHECK: - { id: 2, class: gprb, preferred-register: '' }
+# CHECK: - { id: 3, class: gprb, preferred-register: '' }
+# CHECK: - { id: 4, class: gprb, preferred-register: '' }
+# CHECK: - { id: 5, class: gprb, preferred-register: '' }
 
 registers:
   - { id: 0, class: _ }
@@ -141,12 +145,12 @@ legalized:       true
 regBankSelected: false
 selected:        false
 # CHECK: registers:
-# CHECK: - { id: 0, class: gprb }
-# CHECK: - { id: 1, class: gprb }
-# CHECK: - { id: 2, class: gprb }
-# CHECK: - { id: 3, class: gprb }
-# CHECK: - { id: 4, class: gprb }
-# CHECK: - { id: 5, class: gprb }
+# CHECK: - { id: 0, class: gprb, preferred-register: '' }
+# CHECK: - { id: 1, class: gprb, preferred-register: '' }
+# CHECK: - { id: 2, class: gprb, preferred-register: '' }
+# CHECK: - { id: 3, class: gprb, preferred-register: '' }
+# CHECK: - { id: 4, class: gprb, preferred-register: '' }
+# CHECK: - { id: 5, class: gprb, preferred-register: '' }
 
 registers:
   - { id: 0, class: _ }
@@ -176,9 +180,9 @@ legalized:       true
 regBankSelected: false
 selected:        false
 # CHECK: registers:
-# CHECK: - { id: 0, class: gprb }
-# CHECK: - { id: 1, class: gprb }
-# CHECK: - { id: 2, class: gprb }
+# CHECK: - { id: 0, class: gprb, preferred-register: '' }
+# CHECK: - { id: 1, class: gprb, preferred-register: '' }
+# CHECK: - { id: 2, class: gprb, preferred-register: '' }
 
 registers:
   - { id: 0, class: _ }
@@ -202,12 +206,12 @@ legalized:       true
 regBankSelected: false
 selected:        false
 # CHECK: registers:
-# CHECK: - { id: 0, class: gprb }
-# CHECK: - { id: 1, class: gprb }
-# CHECK: - { id: 2, class: gprb }
-# CHECK: - { id: 3, class: gprb }
-# CHECK: - { id: 4, class: gprb }
-# CHECK: - { id: 5, class: gprb }
+# CHECK: - { id: 0, class: gprb, preferred-register: '' }
+# CHECK: - { id: 1, class: gprb, preferred-register: '' }
+# CHECK: - { id: 2, class: gprb, preferred-register: '' }
+# CHECK: - { id: 3, class: gprb, preferred-register: '' }
+# CHECK: - { id: 4, class: gprb, preferred-register: '' }
+# CHECK: - { id: 5, class: gprb, preferred-register: '' }
 
 registers:
   - { id: 0, class: _ }
@@ -237,12 +241,12 @@ legalized:       true
 regBankSelected: false
 selected:        false
 # CHECK: registers:
-# CHECK: - { id: 0, class: gprb }
-# CHECK: - { id: 1, class: gprb }
-# CHECK: - { id: 2, class: gprb }
-# CHECK: - { id: 3, class: gprb }
-# CHECK: - { id: 4, class: gprb }
-# CHECK: - { id: 5, class: gprb }
+# CHECK: - { id: 0, class: gprb, preferred-register: '' }
+# CHECK: - { id: 1, class: gprb, preferred-register: '' }
+# CHECK: - { id: 2, class: gprb, preferred-register: '' }
+# CHECK: - { id: 3, class: gprb, preferred-register: '' }
+# CHECK: - { id: 4, class: gprb, preferred-register: '' }
+# CHECK: - { id: 5, class: gprb, preferred-register: '' }
 
 registers:
   - { id: 0, class: _ }
@@ -272,9 +276,9 @@ legalized:       true
 regBankSelected: false
 selected:        false
 # CHECK: registers:
-# CHECK: - { id: 0, class: gprb }
-# CHECK: - { id: 1, class: gprb }
-# CHECK: - { id: 2, class: gprb }
+# CHECK: - { id: 0, class: gprb, preferred-register: '' }
+# CHECK: - { id: 1, class: gprb, preferred-register: '' }
+# CHECK: - { id: 2, class: gprb, preferred-register: '' }
 
 registers:
   - { id: 0, class: _ }
@@ -298,12 +302,12 @@ legalized:       true
 regBankSelected: false
 selected:        false
 # CHECK: registers:
-# CHECK: - { id: 0, class: gprb }
-# CHECK: - { id: 1, class: gprb }
-# CHECK: - { id: 2, class: gprb }
-# CHECK: - { id: 3, class: gprb }
-# CHECK: - { id: 4, class: gprb }
-# CHECK: - { id: 5, class: gprb }
+# CHECK: - { id: 0, class: gprb, preferred-register: '' }
+# CHECK: - { id: 1, class: gprb, preferred-register: '' }
+# CHECK: - { id: 2, class: gprb, preferred-register: '' }
+# CHECK: - { id: 3, class: gprb, preferred-register: '' }
+# CHECK: - { id: 4, class: gprb, preferred-register: '' }
+# CHECK: - { id: 5, class: gprb, preferred-register: '' }
 
 registers:
   - { id: 0, class: _ }
@@ -333,12 +337,12 @@ legalized:       true
 regBankSelected: false
 selected:        false
 # CHECK: registers:
-# CHECK: - { id: 0, class: gprb }
-# CHECK: - { id: 1, class: gprb }
-# CHECK: - { id: 2, class: gprb }
-# CHECK: - { id: 3, class: gprb }
-# CHECK: - { id: 4, class: gprb }
-# CHECK: - { id: 5, class: gprb }
+# CHECK: - { id: 0, class: gprb, preferred-register: '' }
+# CHECK: - { id: 1, class: gprb, preferred-register: '' }
+# CHECK: - { id: 2, class: gprb, preferred-register: '' }
+# CHECK: - { id: 3, class: gprb, preferred-register: '' }
+# CHECK: - { id: 4, class: gprb, preferred-register: '' }
+# CHECK: - { id: 5, class: gprb, preferred-register: '' }
 
 registers:
   - { id: 0, class: _ }
@@ -368,9 +372,9 @@ legalized:       true
 regBankSelected: false
 selected:        false
 # CHECK: registers:
-# CHECK: - { id: 0, class: gprb }
-# CHECK: - { id: 1, class: gprb }
-# CHECK: - { id: 2, class: gprb }
+# CHECK: - { id: 0, class: gprb, preferred-register: '' }
+# CHECK: - { id: 1, class: gprb, preferred-register: '' }
+# CHECK: - { id: 2, class: gprb, preferred-register: '' }
 
 registers:
   - { id: 0, class: _ }
@@ -394,9 +398,9 @@ legalized:       true
 regBankSelected: false
 selected:        false
 # CHECK: registers:
-# CHECK: - { id: 0, class: gprb }
-# CHECK: - { id: 1, class: gprb }
-# CHECK: - { id: 2, class: gprb }
+# CHECK: - { id: 0, class: gprb, preferred-register: '' }
+# CHECK: - { id: 1, class: gprb, preferred-register: '' }
+# CHECK: - { id: 2, class: gprb, preferred-register: '' }
 
 registers:
   - { id: 0, class: _ }
@@ -412,6 +416,84 @@ body:             |
     %r0 = COPY %2(s32)
     BX_RET 14, _, implicit %r0
 
+...
+---
+name:            test_and_s32
+# CHECK-LABEL: name: test_and_s32
+legalized:       true
+regBankSelected: false
+selected:        false
+# CHECK: registers:
+# CHECK: - { id: 0, class: gprb, preferred-register: '' }
+# CHECK: - { id: 1, class: gprb, preferred-register: '' }
+# CHECK: - { id: 2, class: gprb, preferred-register: '' }
+
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+body:             |
+  bb.0:
+    liveins: %r0, %r1
+
+    %0(s32) = COPY %r0
+    %1(s32) = COPY %r1
+    %2(s32) = G_AND %0, %1
+    %r0 = COPY %2(s32)
+    BX_RET 14, _, implicit %r0
+
+...
+---
+name:            test_or_s32
+# CHECK-LABEL: name: test_or_s32
+legalized:       true
+regBankSelected: false
+selected:        false
+# CHECK: registers:
+# CHECK: - { id: 0, class: gprb, preferred-register: '' }
+# CHECK: - { id: 1, class: gprb, preferred-register: '' }
+# CHECK: - { id: 2, class: gprb, preferred-register: '' }
+
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+body:             |
+  bb.0:
+    liveins: %r0, %r1
+
+    %0(s32) = COPY %r0
+    %1(s32) = COPY %r1
+    %2(s32) = G_OR %0, %1
+    %r0 = COPY %2(s32)
+    BX_RET 14, _, implicit %r0
+
+...
+---
+name:            test_xor_s32
+# CHECK-LABEL: name: test_xor_s32
+legalized:       true
+regBankSelected: false
+selected:        false
+# CHECK: registers:
+# CHECK: - { id: 0, class: gprb, preferred-register: '' }
+# CHECK: - { id: 1, class: gprb, preferred-register: '' }
+# CHECK: - { id: 2, class: gprb, preferred-register: '' }
+
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+body:             |
+  bb.0:
+    liveins: %r0, %r1
+
+    %0(s32) = COPY %r0
+    %1(s32) = COPY %r1
+    %2(s32) = G_XOR %0, %1
+    %r0 = COPY %2(s32)
+    BX_RET 14, _, implicit %r0
+
 ...
 ---
 name:            test_loads
@@ -420,13 +502,13 @@ legalized:       true
 regBankSelected: false
 selected:        false
 # CHECK: registers:
-# CHECK: - { id: 0, class: gprb }
-# CHECK: - { id: 1, class: gprb }
-# CHECK: - { id: 2, class: gprb }
-# CHECK: - { id: 3, class: gprb }
-# CHECK: - { id: 4, class: gprb }
-# CHECK: - { id: 5, class: gprb }
-# CHECK: - { id: 6, class: fprb }
+# CHECK: - { id: 0, class: gprb, preferred-register: '' }
+# CHECK: - { id: 1, class: gprb, preferred-register: '' }
+# CHECK: - { id: 2, class: gprb, preferred-register: '' }
+# CHECK: - { id: 3, class: gprb, preferred-register: '' }
+# CHECK: - { id: 4, class: gprb, preferred-register: '' }
+# CHECK: - { id: 5, class: gprb, preferred-register: '' }
+# CHECK: - { id: 6, class: fprb, preferred-register: '' }
 
 registers:
   - { id: 0, class: _ }
@@ -456,13 +538,13 @@ legalized:       true
 regBankSelected: false
 selected:        false
 # CHECK: registers:
-# CHECK: - { id: 0, class: gprb }
-# CHECK: - { id: 1, class: gprb }
-# CHECK: - { id: 2, class: gprb }
-# CHECK: - { id: 3, class: gprb }
-# CHECK: - { id: 4, class: gprb }
-# CHECK: - { id: 5, class: gprb }
-# CHECK: - { id: 6, class: fprb }
+# CHECK: - { id: 0, class: gprb, preferred-register: '' }
+# CHECK: - { id: 1, class: gprb, preferred-register: '' }
+# CHECK: - { id: 2, class: gprb, preferred-register: '' }
+# CHECK: - { id: 3, class: gprb, preferred-register: '' }
+# CHECK: - { id: 4, class: gprb, preferred-register: '' }
+# CHECK: - { id: 5, class: gprb, preferred-register: '' }
+# CHECK: - { id: 6, class: fprb, preferred-register: '' }
 
 registers:
   - { id: 0, class: _ }
@@ -498,11 +580,11 @@ legalized:       true
 regBankSelected: false
 selected:        false
 # CHECK: registers:
-# CHECK: - { id: 0, class: gprb }
-# CHECK: - { id: 1, class: gprb }
-# CHECK: - { id: 2, class: gprb }
-# CHECK: - { id: 3, class: gprb }
-# CHECK: - { id: 4, class: gprb }
+# CHECK: - { id: 0, class: gprb, preferred-register: '' }
+# CHECK: - { id: 1, class: gprb, preferred-register: '' }
+# CHECK: - { id: 2, class: gprb, preferred-register: '' }
+# CHECK: - { id: 3, class: gprb, preferred-register: '' }
+# CHECK: - { id: 4, class: gprb, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -531,9 +613,9 @@ legalized:       true
 regBankSelected: false
 selected:        false
 # CHECK: registers:
-# CHECK: - { id: 0, class: gprb }
-# CHECK: - { id: 1, class: gprb }
-# CHECK: - { id: 2, class: gprb }
+# CHECK: - { id: 0, class: gprb, preferred-register: '' }
+# CHECK: - { id: 1, class: gprb, preferred-register: '' }
+# CHECK: - { id: 2, class: gprb, preferred-register: '' }
 
 registers:
   - { id: 0, class: _ }
@@ -556,7 +638,7 @@ legalized:       true
 regBankSelected: false
 selected:        false
 # CHECK: registers:
-# CHECK: - { id: 0, class: gprb }
+# CHECK: - { id: 0, class: gprb, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
 body:             |
@@ -572,8 +654,8 @@ legalized:       true
 regBankSelected: false
 selected:        false
 # CHECK: registers:
-# CHECK: - { id: 0, class: gprb }
-# CHECK: - { id: 1, class: gprb }
+# CHECK: - { id: 0, class: gprb, preferred-register: '' }
+# CHECK: - { id: 1, class: gprb, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -593,8 +675,8 @@ legalized:       true
 regBankSelected: false
 selected:        false
 # CHECK: registers:
-# CHECK: - { id: 0, class: gprb }
-# CHECK: - { id: 1, class: gprb }
+# CHECK: - { id: 0, class: gprb, preferred-register: '' }
+# CHECK: - { id: 1, class: gprb, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -614,8 +696,8 @@ legalized:       true
 regBankSelected: false
 selected:        false
 # CHECK: registers:
-# CHECK: - { id: 0, class: gprb }
-# CHECK: - { id: 1, class: gprb }
+# CHECK: - { id: 0, class: gprb, preferred-register: '' }
+# CHECK: - { id: 1, class: gprb, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -635,9 +717,9 @@ legalized:       true
 regBankSelected: false
 selected:        false
 # CHECK: registers:
-# CHECK: - { id: 0, class: fprb }
-# CHECK: - { id: 1, class: fprb }
-# CHECK: - { id: 2, class: fprb }
+# CHECK: - { id: 0, class: fprb, preferred-register: '' }
+# CHECK: - { id: 1, class: fprb, preferred-register: '' }
+# CHECK: - { id: 2, class: fprb, preferred-register: '' }
 
 registers:
   - { id: 0, class: _ }
@@ -661,9 +743,9 @@ legalized:       true
 regBankSelected: false
 selected:        false
 # CHECK: registers:
-# CHECK: - { id: 0, class: fprb }
-# CHECK: - { id: 1, class: fprb }
-# CHECK: - { id: 2, class: fprb }
+# CHECK: - { id: 0, class: fprb, preferred-register: '' }
+# CHECK: - { id: 1, class: fprb, preferred-register: '' }
+# CHECK: - { id: 2, class: fprb, preferred-register: '' }
 
 registers:
   - { id: 0, class: _ }
@@ -687,11 +769,11 @@ legalized:       true
 regBankSelected: false
 selected:        false
 # CHECK: registers:
-# CHECK: - { id: 0, class: gprb }
-# CHECK: - { id: 1, class: gprb }
-# CHECK: - { id: 2, class: fprb }
-# CHECK: - { id: 3, class: gprb }
-# CHECK: - { id: 4, class: gprb }
+# CHECK: - { id: 0, class: gprb, preferred-register: '' }
+# CHECK: - { id: 1, class: gprb, preferred-register: '' }
+# CHECK: - { id: 2, class: fprb, preferred-register: '' }
+# CHECK: - { id: 3, class: gprb, preferred-register: '' }
+# CHECK: - { id: 4, class: gprb, preferred-register: '' }
 
 registers:
   - { id: 0, class: _ }
@@ -705,9 +787,8 @@ body:             |
 
     %0(s32) = COPY %r0
     %1(s32) = COPY %r1
-    %2(s64) = G_SEQUENCE %0(s32), 0, %1(s32), 32
-    %3(s32) = G_EXTRACT %2(s64), 0
-    %4(s32) = G_EXTRACT %2(s64), 32
+    %2(s64) = G_MERGE_VALUES %0(s32), %1(s32)
+    %3(s32), %4(s32) = G_UNMERGE_VALUES %2(s64)
     %r0 = COPY %3(s32)
     %r1 = COPY %4(s32)
     BX_RET 14, _, implicit %r0, implicit %r1
diff --git a/test/CodeGen/ARM/clang-section.ll b/test/CodeGen/ARM/clang-section.ll
new file mode 100644
index 000000000000..343f0e721d7f
--- /dev/null
+++ b/test/CodeGen/ARM/clang-section.ll
@@ -0,0 +1,140 @@
+;RUN: llc -mtriple=armv7-eabi %s -o - | FileCheck %s
+;Test that global variables and functions are assigned to correct sections.
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "armv7-arm-none-eabi"
+
+@a = global i32 0, align 4 #0
+@b = global i32 1, align 4 #0
+@c = global [4 x i32] zeroinitializer, align 4 #0
+@d = global [5 x i16] zeroinitializer, align 2 #0
+@e = global [6 x i16] [i16 0, i16 0, i16 1, i16 0, i16 0, i16 0], align 2 #0
+@f = constant i32 2, align 4 #0
+@h = global i32 0, align 4 #1
+@i = global i32 0, align 4 #2
+@j = constant i32 4, align 4 #2
+@k = global i32 0, align 4 #2
+@_ZZ3gooE7lstat_h = internal global i32 0, align 4 #2
+@_ZL1g = internal global [2 x i32] zeroinitializer, align 4 #0
+@l = global i32 5, align 4 #3
+@m = constant i32 6, align 4 #3
+@n = global i32 0, align 4
+@o = global i32 6, align 4
+@p = constant i32 7, align 4
+
+; Function Attrs: noinline nounwind
+define i32 @foo() #4 {
+entry:
+  %0 = load i32, i32* @b, align 4
+  ret i32 %0
+}
+
+; Function Attrs: noinline
+define i32 @goo() #5 {
+entry:
+  %call = call i32 @zoo(i32* getelementptr inbounds ([2 x i32], [2 x i32]* @_ZL1g, i32 0, i32 0), i32* @_ZZ3gooE7lstat_h)
+  ret i32 %call
+}
+
+declare i32 @zoo(i32*, i32*) #6
+
+; Function Attrs: noinline nounwind
+define i32 @hoo() #7 {
+entry:
+  %0 = load i32, i32* @b, align 4
+  ret i32 %0
+}
+
+attributes #0 = { "bss-section"="my_bss.1" "data-section"="my_data.1" "rodata-section"="my_rodata.1" }
+attributes #1 = { "data-section"="my_data.1" "rodata-section"="my_rodata.1" }
+attributes #2 = { "bss-section"="my_bss.2" "rodata-section"="my_rodata.1" }
+attributes #3 = { "bss-section"="my_bss.2" "data-section"="my_data.2" "rodata-section"="my_rodata.2" }
+attributes #4 = { noinline nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign" "disable-tail-calls"="false" "implicit-section-name"="my_text.1" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a9" "target-features"="+dsp,+fp16,+neon,+vfp3" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #5 = { noinline "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign" "disable-tail-calls"="false" "implicit-section-name"="my_text.2" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a9" "target-features"="+dsp,+fp16,+neon,+vfp3" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #6 = { "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a9" "target-features"="+dsp,+fp16,+neon,+vfp3" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #7 = { noinline nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a9" "target-features"="+dsp,+fp16,+neon,+vfp3" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.module.flags = !{!0, !1, !2, !3}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 1, !"static_rwdata", i32 1}
+!2 = !{i32 1, !"enumsize_buildattr", i32 2}
+!3 = !{i32 1, !"armlib_unavailable", i32 0}
+
+;CHECK: 	.section	my_text.1,"ax",%progbits
+;CHECK: 	.type	foo,%function
+;CHECK: foo:
+
+;CHECK: 	.section	my_text.2,"ax",%progbits
+;CHECK: 	.type	goo,%function
+;CHECK: goo:
+
+;CHECK: 	.text
+;CHECK: 	.type	hoo,%function
+;CHECK: hoo:
+
+;CHECK: 	.type	a,%object
+;CHECK: 	.section	my_bss.1,"aw",%nobits
+;CHECK: a:
+
+;CHECK: 	.type	b,%object
+;CHECK: 	.section	my_data.1,"aw",%progbits
+;CHECK: b:
+
+;CHECK: 	.type	c,%object
+;CHECK: 	.section	my_bss.1,"aw",%nobits
+;CHECK: c:
+
+;CHECK: 	.type	d,%object
+;CHECK: d:
+
+;CHECK: 	.type	e,%object
+;CHECK: 	.section	my_data.1,"aw",%progbits
+;CHECK: e:
+
+;CHECK: 	.type	f,%object
+;CHECK: 	.section	my_rodata.1,"a",%progbits
+;CHECK: f:
+
+;CHECK: 	.type	h,%object
+;CHECK: 	.bss
+;CHECK: h:
+
+;CHECK: 	.type	i,%object
+;CHECK: 	.section	my_bss.2,"aw",%nobits
+;CHECK: i:
+
+;CHECK: 	.type	j,%object
+;CHECK: 	.section	my_rodata.1,"a",%progbits
+;CHECK: j:
+
+;CHECK: 	.type	k,%object
+;CHECK: 	.section	my_bss.2,"aw",%nobits
+;CHECK: k:
+
+;CHECK: 	.type	_ZZ3gooE7lstat_h,%object @ @_ZZ3gooE7lstat_h
+;CHECK: _ZZ3gooE7lstat_h:
+
+;CHECK: 	.type	_ZL1g,%object
+;CHECK: 	.section	my_bss.1,"aw",%nobits
+;CHECK: _ZL1g:
+
+;CHECK: 	.type	l,%object
+;CHECK: 	.section	my_data.2,"aw",%progbits
+;CHECK: l:
+
+;CHECK: 	.type	m,%object
+;CHECK: 	.section	my_rodata.2,"a",%progbits
+;CHECK: m:
+
+;CHECK: 	.type	n,%object
+;CHECK: 	.bss
+;CHECK: n:
+
+;CHECK: 	.type	o,%object
+;CHECK: 	.data
+;CHECK: o:
+
+;CHECK: 	.type	p,%object
+;CHECK: 	.section	.rodata,"a",%progbits
+;CHECK: p:
diff --git a/test/CodeGen/ARM/cortex-a57-misched-vfma.ll b/test/CodeGen/ARM/cortex-a57-misched-vfma.ll
index a9223e1e2a99..5f914323861a 100644
--- a/test/CodeGen/ARM/cortex-a57-misched-vfma.ll
+++ b/test/CodeGen/ARM/cortex-a57-misched-vfma.ll
@@ -1,5 +1,6 @@
 ; REQUIRES: asserts
-; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s
+; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-DEFAULT
+; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null -fp-contract=fast | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FAST
 ; Check latencies of vmul/vfma accumulate chains.
 
 define float @Test1(float %f1, float %f2, float %f3, float %f4, float %f5, float %f6) {
@@ -14,7 +15,8 @@ define float @Test1(float %f1, float %f2, float %f3, float %f4, float %f5, float
 ; > VMULS read-advanced latency to VMLAS = 0
 ; CHECK-SAME:  Latency=0
 
-; CHECK:       VMLAS
+; CHECK-DEFAULT: VMLAS
+; CHECK-FAST:    VFMAS
 ; > VMLAS common latency = 9
 ; CHECK:       Latency            : 9
 ; CHECK:       Successors:
@@ -22,7 +24,8 @@ define float @Test1(float %f1, float %f2, float %f3, float %f4, float %f5, float
 ; > VMLAS read-advanced latency to the next VMLAS = 4
 ; CHECK-SAME:  Latency=4
 
-; CHECK:       VMLAS
+; CHECK-DEFAULT: VMLAS
+; CHECK-FAST:    VFMAS
 ; CHECK:       Latency            : 9
 ; CHECK:       Successors:
 ; CHECK:       data
@@ -51,7 +54,8 @@ define <2 x float> @Test2(<2 x float> %f1, <2 x float> %f2, <2 x float> %f3, <2
 ; VMULfd read-advanced latency to VMLAfd = 0
 ; CHECK-SAME:  Latency=0
 
-; CHECK:       VMLAfd
+; CHECK-DEFAULT: VMLAfd
+; CHECK-FAST:    VFMAfd
 ; > VMLAfd common latency = 9
 ; CHECK:       Latency            : 9
 ; CHECK:       Successors:
@@ -59,7 +63,8 @@ define <2 x float> @Test2(<2 x float> %f1, <2 x float> %f2, <2 x float> %f3, <2
 ; > VMLAfd read-advanced latency to the next VMLAfd = 4
 ; CHECK-SAME:  Latency=4
 
-; CHECK:       VMLAfd
+; CHECK-DEFAULT: VMLAfd
+; CHECK-FAST:    VFMAfd
 ; CHECK:       Latency            : 9
 ; CHECK:       Successors:
 ; CHECK:       data
@@ -75,3 +80,79 @@ define <2 x float> @Test2(<2 x float> %f1, <2 x float> %f2, <2 x float> %f3, <2
   ret <2 x float> %add2
 }
 
+define float @Test3(float %f1, float %f2, float %f3, float %f4, float %f5, float %f6) {
+; CHECK:       ********** MI Scheduling **********
+; CHECK:       Test3:BB#0
+
+; CHECK:       VMULS
+; > VMULS common latency = 5
+; CHECK:       Latency            : 5
+; CHECK:       Successors:
+; CHECK:       data
+; > VMULS read-advanced latency to VMLSS = 0
+; CHECK-SAME:  Latency=0
+
+; CHECK-DEFAULT: VMLSS
+; CHECK-FAST:    VFMSS
+; > VMLSS common latency = 9
+; CHECK:       Latency            : 9
+; CHECK:       Successors:
+; CHECK:       data
+; > VMLSS read-advanced latency to the next VMLSS = 4
+; CHECK-SAME:  Latency=4
+
+; CHECK-DEFAULT: VMLSS
+; CHECK-FAST:    VFMSS
+; CHECK:       Latency            : 9
+; CHECK:       Successors:
+; CHECK:       data
+; > VMLSS not-optimized latency to VMOVRS = 9
+; CHECK-SAME:  Latency=9
+
+; f1 * f2 + f3 * f4 + f5 * f6  ==>  VMULS, VMLSS, VMLSS
+  %mul1 = fmul float %f1, %f2
+  %mul2 = fmul float %f3, %f4
+  %mul3 = fmul float %f5, %f6
+  %sub1 = fsub float %mul1, %mul2
+  %sub2 = fsub float %sub1, %mul3
+  ret float %sub2
+}
+
+; ASIMD form
+define <2 x float> @Test4(<2 x float> %f1, <2 x float> %f2, <2 x float> %f3, <2 x float> %f4, <2 x float> %f5, <2 x float> %f6) {
+; CHECK:       ********** MI Scheduling **********
+; CHECK:       Test4:BB#0
+
+; CHECK:       VMULfd
+; > VMULfd common latency = 5
+; CHECK:       Latency            : 5
+; CHECK:       Successors:
+; CHECK:       data
+; VMULfd read-advanced latency to VMLSfd = 0
+; CHECK-SAME:  Latency=0
+
+; CHECK-DEFAULT: VMLSfd
+; CHECK-FAST:    VFMSfd
+; > VMLSfd common latency = 9
+; CHECK:       Latency            : 9
+; CHECK:       Successors:
+; CHECK:       data
+; > VMLSfd read-advanced latency to the next VMLSfd = 4
+; CHECK-SAME:  Latency=4
+
+; CHECK-DEFAULT: VMLSfd
+; CHECK-FAST:    VFMSfd
+; CHECK:       Latency            : 9
+; CHECK:       Successors:
+; CHECK:       data
+; > VMLSfd not-optimized latency to VMOVRRD = 9
+; CHECK-SAME:  Latency=9
+
+; f1 * f2 + f3 * f4 + f5 * f6  ==>  VMULS, VMLSS, VMLSS
+  %mul1 = fmul <2 x float> %f1, %f2
+  %mul2 = fmul <2 x float> %f3, %f4
+  %mul3 = fmul <2 x float> %f5, %f6
+  %sub1 = fsub <2 x float> %mul1, %mul2
+  %sub2 = fsub <2 x float> %sub1, %mul3
+  ret <2 x float> %sub2
+}
diff --git a/test/CodeGen/ARM/invalidated-save-point.ll b/test/CodeGen/ARM/invalidated-save-point.ll
index 0ff153b6799d..bb602308a179 100644
--- a/test/CodeGen/ARM/invalidated-save-point.ll
+++ b/test/CodeGen/ARM/invalidated-save-point.ll
@@ -4,8 +4,8 @@
 ; this point. Notably, if it isn't is will be invalid and reference a
 ; deleted block (%bb.-1.if.end)
 
-; CHECK-NOT: savePoint:
-; CHECK-NOT: restorePoint:
+; CHECK: savePoint: ''
+; CHECK: restorePoint: ''
 
 target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
 target triple = "thumbv7"
diff --git a/test/CodeGen/Generic/llc-start-stop.ll b/test/CodeGen/Generic/llc-start-stop.ll
index 7508f94c50a9..49407fbb2d88 100644
--- a/test/CodeGen/Generic/llc-start-stop.ll
+++ b/test/CodeGen/Generic/llc-start-stop.ll
@@ -10,12 +10,12 @@
 ; STOP-BEFORE-NOT: Loop Strength Reduction
 
 ; RUN: llc < %s -debug-pass=Structure -start-after=loop-reduce -o /dev/null 2>&1 | FileCheck %s -check-prefix=START-AFTER
-; START-AFTER: -machine-branch-prob -pre-isel-intrinsic-lowering
+; START-AFTER: -machine-branch-prob -gc-lowering
 ; START-AFTER: FunctionPass Manager
 ; START-AFTER-NEXT: Lower Garbage Collection Instructions
 
 ; RUN: llc < %s -debug-pass=Structure -start-before=loop-reduce -o /dev/null 2>&1 | FileCheck %s -check-prefix=START-BEFORE
-; START-BEFORE: -machine-branch-prob -pre-isel-intrinsic-lowering
+; START-BEFORE: -machine-branch-prob -domtree
 ; START-BEFORE: FunctionPass Manager
 ; START-BEFORE: Loop Strength Reduction
 ; START-BEFORE-NEXT: Lower Garbage Collection Instructions
diff --git a/test/CodeGen/Hexagon/common-gep-inbounds.ll b/test/CodeGen/Hexagon/common-gep-inbounds.ll
new file mode 100644
index 000000000000..a8b75725a0b8
--- /dev/null
+++ b/test/CodeGen/Hexagon/common-gep-inbounds.ll
@@ -0,0 +1,20 @@
+; RUN: llc -march=hexagon -debug-only=commgep 2>&1 < %s | FileCheck %s
+; REQUIRES: asserts
+
+; We should generate new GEPs with "inbounds" flag.
+; CHECK: new GEP:{{.*}}inbounds
+; CHECK: new GEP:{{.*}}inbounds
+
+target triple = "hexagon"
+
+%struct.0 = type { i16, i16 }
+
+; Function Attrs: nounwind
+define i16 @TraceBack() #0 {
+entry:
+  %p = getelementptr inbounds %struct.0, %struct.0* undef, i32 0, i32 0
+  %a = load i16, i16* %p
+  ret i16 %a
+}
+
+attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="-hvx-double,-long-calls" }
diff --git a/test/CodeGen/Hexagon/mux-undef.ll b/test/CodeGen/Hexagon/mux-undef.ll
new file mode 100644
index 000000000000..3780a329b1eb
--- /dev/null
+++ b/test/CodeGen/Hexagon/mux-undef.ll
@@ -0,0 +1,27 @@
+; RUN: llc -march=hexagon -verify-machineinstrs < %s | FileCheck %s
+;
+; Make sure this test compiles successfully.
+; CHECK: jumpr r31
+
+target triple = "hexagon--elf"
+
+; Function Attrs: nounwind
+define i32 @fred() #0 {
+b0:
+  call void @foo() #0
+  br label %b1
+
+b1:                                               ; preds = %b0
+  br i1 undef, label %b2, label %b3
+
+b2:                                               ; preds = %b1
+  br label %b3
+
+b3:                                               ; preds = %b2, %b1
+  %v4 = phi i32 [ 1, %b1 ], [ 2, %b2 ]
+  ret i32 %v4
+}
+
+declare void @foo() #0
+
+attributes #0 = { nounwind "target-cpu"="hexagonv60" }
diff --git a/test/CodeGen/MIR/AArch64/generic-virtual-registers-error.mir b/test/CodeGen/MIR/AArch64/generic-virtual-registers-error.mir
index d63c2ef6e871..af785bcb10a9 100644
--- a/test/CodeGen/MIR/AArch64/generic-virtual-registers-error.mir
+++ b/test/CodeGen/MIR/AArch64/generic-virtual-registers-error.mir
@@ -17,6 +17,5 @@ body: |
     liveins: %w0
     ; ERR: generic virtual registers must have a type
     ; ERR-NEXT: %0
-    ; ERR: Unable to initialize machine function
     %0 = G_ADD i32 %w0, %w0
 ...
diff --git a/test/CodeGen/MIR/AArch64/generic-virtual-registers-with-regbank-error.mir b/test/CodeGen/MIR/AArch64/generic-virtual-registers-with-regbank-error.mir
index e331179773d6..f177b91da559 100644
--- a/test/CodeGen/MIR/AArch64/generic-virtual-registers-with-regbank-error.mir
+++ b/test/CodeGen/MIR/AArch64/generic-virtual-registers-with-regbank-error.mir
@@ -18,6 +18,5 @@ body: |
     liveins: %w0
     ; ERR: generic virtual registers must have a type
     ; ERR-NEXT: %0
-    ; ERR: Unable to initialize machine function
     %0 = G_ADD i32 %w0, %w0
 ...
diff --git a/test/CodeGen/MIR/AArch64/register-operand-bank.mir b/test/CodeGen/MIR/AArch64/register-operand-bank.mir
index d48495167f15..d2f99933a35a 100644
--- a/test/CodeGen/MIR/AArch64/register-operand-bank.mir
+++ b/test/CodeGen/MIR/AArch64/register-operand-bank.mir
@@ -7,8 +7,8 @@
 ---
 # CHECK-LABEL: name: func
 # CHECK: registers:
-# CHECK:   - { id: 0, class: gpr }
-# CHECK:   - { id: 1, class: fpr }
+# CHECK:   - { id: 0, class: gpr, preferred-register: '' }
+# CHECK:   - { id: 1, class: fpr, preferred-register: '' }
 name: func
 body: |
   bb.0:
diff --git a/test/CodeGen/MIR/AArch64/stack-object-local-offset.mir b/test/CodeGen/MIR/AArch64/stack-object-local-offset.mir
index fc0c4ce8c07f..cfb3aef5fb0f 100644
--- a/test/CodeGen/MIR/AArch64/stack-object-local-offset.mir
+++ b/test/CodeGen/MIR/AArch64/stack-object-local-offset.mir
@@ -25,7 +25,9 @@ frameInfo:
   maxAlignment:    8
 # CHECK-LABEL: stack_local
 # CHECK: stack:
-# CHECK-NEXT: { id: 0, name: local_var, offset: 0, size: 8, alignment: 8, local-offset: -8 }
+# CHECK-NEXT: { id: 0, name: local_var, type: default, offset: 0, size: 8, alignment: 8,
+# CHECK-NEXT: callee-saved-register: '', local-offset: -8, di-variable: '', di-expression: '',
+# CHECK-NEXT: di-location: '' }
 stack:
   - { id: 0,name: local_var,offset: 0,size: 8,alignment: 8, local-offset: -8 }
 body: |
diff --git a/test/CodeGen/MIR/Generic/frame-info.mir b/test/CodeGen/MIR/Generic/frame-info.mir
index 157eb99e149e..a467bfa3a1af 100644
--- a/test/CodeGen/MIR/Generic/frame-info.mir
+++ b/test/CodeGen/MIR/Generic/frame-info.mir
@@ -36,9 +36,13 @@ tracksRegLiveness: true
 # CHECK-NEXT: maxAlignment:
 # CHECK-NEXT: adjustsStack: false
 # CHECK-NEXT: hasCalls: false
+# CHECK-NEXT: stackProtector:  ''
+# CHECK-NEXT: maxCallFrameSize:
 # CHECK-NEXT: hasOpaqueSPAdjustment: false
 # CHECK-NEXT: hasVAStart: false
 # CHECK-NEXT: hasMustTailInVarArgFunc: false
+# CHECK-NEXT: savePoint:       ''
+# CHECK-NEXT: restorePoint:    ''
 # CHECK: body
 frameInfo:
   maxAlignment:    4
@@ -61,6 +65,7 @@ tracksRegLiveness: true
 # CHECK-NEXT: maxAlignment:
 # CHECK-NEXT: adjustsStack: true
 # CHECK-NEXT: hasCalls: true
+# CHECK-NEXT: stackProtector:  ''
 # CHECK-NEXT: maxCallFrameSize: 4
 # CHECK-NEXT: hasOpaqueSPAdjustment: true
 # CHECK-NEXT: hasVAStart: true
diff --git a/test/CodeGen/MIR/Generic/function-missing-machine-function.mir b/test/CodeGen/MIR/Generic/function-missing-machine-function.mir
deleted file mode 100644
index f3a834801671..000000000000
--- a/test/CodeGen/MIR/Generic/function-missing-machine-function.mir
+++ /dev/null
@@ -1,13 +0,0 @@
-# RUN: not llc -run-pass none -o /dev/null %s 2>&1 | FileCheck %s
-# This test verifies that an error is reported when a MIR file has some
-# function but is missing a corresponding machine function.
-
-# CHECK: no machine function information for function 'foo' in the MIR file
-
---- |
-
-  define i32 @foo() {
-    ret i32 0
-  }
-
-...
diff --git a/test/CodeGen/MIR/X86/callee-saved-info.mir b/test/CodeGen/MIR/X86/callee-saved-info.mir
index 883f6fdb0d22..6920611019b9 100644
--- a/test/CodeGen/MIR/X86/callee-saved-info.mir
+++ b/test/CodeGen/MIR/X86/callee-saved-info.mir
@@ -50,12 +50,12 @@ frameInfo:
   adjustsStack:    true
   hasCalls:        true
 # CHECK: fixedStack:
-# CHECK-NEXT: , callee-saved-register: '%rbx' }
+# CHECK: , callee-saved-register: '%rbx' }
 fixedStack:
   - { id: 0, type: spill-slot, offset: -16, size: 8, alignment: 16, callee-saved-register: '%rbx' }
 # CHECK: stack:
 # CHECK-NEXT: - { id: 0
-# CHECK-NEXT: , callee-saved-register: '%edi' }
+# CHECK: callee-saved-register: '%edi'
 stack:
   - { id: 0, name: b, offset: -20, size: 4, alignment: 4 }
   - { id: 1, offset: -24, size: 4, alignment: 4, callee-saved-register: '%edi' }
diff --git a/test/CodeGen/MIR/X86/empty0.mir b/test/CodeGen/MIR/X86/empty0.mir
new file mode 100644
index 000000000000..4431af7c6a99
--- /dev/null
+++ b/test/CodeGen/MIR/X86/empty0.mir
@@ -0,0 +1,6 @@
+# RUN: llc -run-pass none -o - %s | FileCheck %s
+# Make sure empty files don't crash us
+# CHECK: --- |
+# ... moduleid, sourcefilename stuff here ..
+# CHECK: target datalayout =
+# CHECK: ...
diff --git a/test/CodeGen/MIR/X86/empty1.mir b/test/CodeGen/MIR/X86/empty1.mir
new file mode 100644
index 000000000000..d80b0cd30231
--- /dev/null
+++ b/test/CodeGen/MIR/X86/empty1.mir
@@ -0,0 +1,8 @@
+# RUN: llc -run-pass none -o - %s | FileCheck %s
+# Make sure empty files don't crash us
+--- |
+...
+# CHECK: --- |
+# ... moduleid, sourcefilename stuff here ..
+# CHECK: target datalayout =
+# CHECK: ...
diff --git a/test/CodeGen/MIR/X86/empty2.mir b/test/CodeGen/MIR/X86/empty2.mir
new file mode 100644
index 000000000000..7495807cd4d6
--- /dev/null
+++ b/test/CodeGen/MIR/X86/empty2.mir
@@ -0,0 +1,8 @@
+# RUN: llc -run-pass none -o - %s | FileCheck %s
+# Make sure empty files don't crash us
+---
+...
+# CHECK: --- |
+# ... moduleid, sourcefilename stuff here ..
+# CHECK: target datalayout =
+# CHECK: ...
diff --git a/test/CodeGen/MIR/X86/fixed-stack-objects.mir b/test/CodeGen/MIR/X86/fixed-stack-objects.mir
index a7ecac841a64..c87cb0b49f93 100644
--- a/test/CodeGen/MIR/X86/fixed-stack-objects.mir
+++ b/test/CodeGen/MIR/X86/fixed-stack-objects.mir
@@ -20,7 +20,7 @@ frameInfo:
   stackSize:       4
   maxAlignment:    4
 # CHECK: fixedStack:
-# CHECK-NEXT: - { id: 0, offset: 0, size: 4, alignment: 4, isImmutable: true, isAliased: false }
+# CHECK-NEXT: - { id: 0, type: default, offset: 0, size: 4, alignment: 4, isImmutable: true,
 fixedStack:
   - { id: 0, offset: 0, size: 4, alignment: 4, isImmutable: true, isAliased: false }
 stack:
diff --git a/test/CodeGen/MIR/X86/generic-instr-type.mir b/test/CodeGen/MIR/X86/generic-instr-type.mir
index b9e47cdf6192..78951de70a3c 100644
--- a/test/CodeGen/MIR/X86/generic-instr-type.mir
+++ b/test/CodeGen/MIR/X86/generic-instr-type.mir
@@ -19,11 +19,11 @@
 ---
 name:            test_vregs
 # CHECK:      registers:
-# CHECK-NEXT:   - { id: 0, class: _ }
-# CHECK-NEXT:   - { id: 1, class: _ }
-# CHECK-NEXT:   - { id: 2, class: _ }
-# CHECK-NEXT:   - { id: 3, class: _ }
-# CHECK-NEXT:   - { id: 4, class: _ }
+# CHECK-NEXT:   - { id: 0, class: _, preferred-register: '' }
+# CHECK-NEXT:   - { id: 1, class: _, preferred-register: '' }
+# CHECK-NEXT:   - { id: 2, class: _, preferred-register: '' }
+# CHECK-NEXT:   - { id: 3, class: _, preferred-register: '' }
+# CHECK-NEXT:   - { id: 4, class: _, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
diff --git a/test/CodeGen/MIR/X86/inline-asm.mir b/test/CodeGen/MIR/X86/inline-asm.mir
new file mode 100644
index 000000000000..be96517144b0
--- /dev/null
+++ b/test/CodeGen/MIR/X86/inline-asm.mir
@@ -0,0 +1,12 @@
+# RUN: llc -o - %s -mtriple=x86_64-- -run-pass none | FileCheck %s
+---
+# Avoid crash/assert when using an emptystring in an INLINEASM.
+# CHECK-LABEL: name: emptystring
+# CHECK: bb.0:
+# CHECK:   INLINEASM $"", 1
+# CHECK:   RET 0
+name: emptystring
+body: |
+  bb.0:
+    INLINEASM $"", 1
+    RET 0
diff --git a/test/CodeGen/MIR/X86/register-operand-class.mir b/test/CodeGen/MIR/X86/register-operand-class.mir
index 63019daad7a1..abdcda2a077b 100644
--- a/test/CodeGen/MIR/X86/register-operand-class.mir
+++ b/test/CodeGen/MIR/X86/register-operand-class.mir
@@ -1,4 +1,4 @@
-# RUN: llc -o - %s -march=x86-64 -run-pass none | FileCheck %s
+# RUN: llc  -o - %s -march=x86-64 -run-pass none | FileCheck %s
 # Test various aspects of register class specification on machine operands.
 --- |
   define void @func() { ret void }
@@ -6,11 +6,11 @@
 ---
 # CHECK-LABEL: name: func
 # CHECK: registers:
-# CHECK:   - { id: 0, class: gr32 }
-# CHECK:   - { id: 1, class: gr64 }
-# CHECK:   - { id: 2, class: gr32 }
-# CHECK:   - { id: 3, class: gr16 }
-# CHECK:   - { id: 4, class: _ }
+# CHECK:   - { id: 0, class: gr32, preferred-register: '' }
+# CHECK:   - { id: 1, class: gr64, preferred-register: '' }
+# CHECK:   - { id: 2, class: gr32, preferred-register: '' }
+# CHECK:   - { id: 3, class: gr16, preferred-register: '' }
+# CHECK:   - { id: 4, class: _, preferred-register: '' }
 name: func
 body: |
   bb.0:
diff --git a/test/CodeGen/MIR/X86/roundtrip.mir b/test/CodeGen/MIR/X86/roundtrip.mir
new file mode 100644
index 000000000000..c697f7306041
--- /dev/null
+++ b/test/CodeGen/MIR/X86/roundtrip.mir
@@ -0,0 +1,20 @@
+# RUN: llc -o - %s -mtriple=x86_64-- -run-pass=none | llc -o - -x mir - -mtriple=x86_64-- -run-pass=none | FileCheck %s
+---
+# CHECK-LABEL: name: func0
+# CHECK: registers:
+# CHECK:   - { id: 0, class: gr32, preferred-register: '' }
+# CHECK:   - { id: 1, class: gr32, preferred-register: '' }
+# CHECK: body: |
+# CHECK:   bb.0:
+# CHECK:     %0 = MOV32r0 implicit-def %eflags
+# CHECK:     dead %1 = COPY %0
+# CHECK:     MOV32mr undef %rcx, 1, _, 0, _, killed %0 :: (volatile store 4)
+# CHECK:     RETQ undef %eax
+name: func0
+body: |
+  bb.0:
+    %0 : gr32 = MOV32r0 implicit-def %eflags
+    dead %1 : gr32 = COPY %0
+    MOV32mr undef %rcx, 1, _, 0, _, killed %0 :: (volatile store 4)
+    RETQ undef %eax
+...
diff --git a/test/CodeGen/MIR/X86/simple-register-allocation-hints.mir b/test/CodeGen/MIR/X86/simple-register-allocation-hints.mir
index 27ca266f7794..310fa6a1c53b 100644
--- a/test/CodeGen/MIR/X86/simple-register-allocation-hints.mir
+++ b/test/CodeGen/MIR/X86/simple-register-allocation-hints.mir
@@ -15,7 +15,7 @@
 name:            test
 tracksRegLiveness: true
 # CHECK: registers:
-# CHECK-NEXT:  - { id: 0, class: gr32 }
+# CHECK-NEXT:  - { id: 0, class: gr32, preferred-register: '' }
 # CHECK-NEXT:  - { id: 1, class: gr32, preferred-register: '%esi' }
 # CHECK-NEXT:  - { id: 2, class: gr32, preferred-register: '%edi' }
 registers:
diff --git a/test/CodeGen/MIR/X86/spill-slot-fixed-stack-objects.mir b/test/CodeGen/MIR/X86/spill-slot-fixed-stack-objects.mir
index 1771d6fafcae..d3c422362848 100644
--- a/test/CodeGen/MIR/X86/spill-slot-fixed-stack-objects.mir
+++ b/test/CodeGen/MIR/X86/spill-slot-fixed-stack-objects.mir
@@ -19,7 +19,7 @@ name:            test
 frameInfo:
   maxAlignment:    4
 # CHECK: fixedStack:
-# CHECK-NEXT: - { id: 0, type: spill-slot, offset: 0, size: 4, alignment: 4 }
+# CHECK-NEXT: - { id: 0, type: spill-slot, offset: 0, size: 4, alignment: 4, callee-saved-register: '' }
 fixedStack:
   - { id: 0, type: spill-slot, offset: 0, size: 4, alignment: 4 }
 stack:
diff --git a/test/CodeGen/MIR/X86/stack-object-debug-info.mir b/test/CodeGen/MIR/X86/stack-object-debug-info.mir
index a893b0836a62..445d1bd3f1fd 100644
--- a/test/CodeGen/MIR/X86/stack-object-debug-info.mir
+++ b/test/CodeGen/MIR/X86/stack-object-debug-info.mir
@@ -51,8 +51,9 @@ frameInfo:
   maxAlignment:    16
 # CHECK-LABEL: foo
 # CHECK: stack:
-# CHECK:  - { id: 0, name: y.i, offset: 0, size: 256, alignment: 16, di-variable: '!4',
-# CHECK-NEXT: di-expression: '!10', di-location: '!11' }
+# CHECK:  - { id: 0, name: y.i, type: default, offset: 0, size: 256, alignment: 16,
+# CHECK-NEXT: callee-saved-register: '', di-variable: '!4', di-expression: '!10',
+# CHECK-NEXT: di-location: '!11' }
 stack:
   - { id: 0, name: y.i, offset: 0, size: 256, alignment: 16, di-variable: '!4',
       di-expression: '!7', di-location: '!8' }
diff --git a/test/CodeGen/MIR/X86/stack-objects.mir b/test/CodeGen/MIR/X86/stack-objects.mir
index 08b9ec0b4347..608202ec5dcc 100644
--- a/test/CodeGen/MIR/X86/stack-objects.mir
+++ b/test/CodeGen/MIR/X86/stack-objects.mir
@@ -21,9 +21,12 @@ name:            test
 frameInfo:
   maxAlignment:    8
 # CHECK: stack:
-# CHECK-NEXT: - { id: 0, name: b, offset: -12, size: 4, alignment: 4 }
-# CHECK-NEXT: - { id: 1, name: x, offset: -24, size: 8, alignment: 8 }
-# CHECK-NEXT: - { id: 2, type: spill-slot, offset: -32, size: 4, alignment: 4 }
+# CHECK-NEXT: - { id: 0, name: b, type: default, offset: -12, size: 4, alignment: 4,
+# CHECK-NEXT: callee-saved-register: '', di-variable: '', di-expression: '', di-location: '' }
+# CHECK-NEXT: - { id: 1, name: x, type: default, offset: -24, size: 8, alignment: 8,
+# CHECK-NEXT: callee-saved-register: '', di-variable: '', di-expression: '', di-location: '' }
+# CHECK-NEXT: - { id: 2, name: '', type: spill-slot, offset: -32, size: 4, alignment: 4,
+# CHECK-NEXT: callee-saved-register: '', di-variable: '', di-expression: '', di-location: '' }
 stack:
   - { id: 0, name: b, offset: -12, size: 4, alignment: 4 }
   - { id: 1, name: x, offset: -24, size: 8, alignment: 8 }
diff --git a/test/CodeGen/MIR/X86/variable-sized-stack-objects.mir b/test/CodeGen/MIR/X86/variable-sized-stack-objects.mir
index 5e7d99352e57..95efd977d9c6 100644
--- a/test/CodeGen/MIR/X86/variable-sized-stack-objects.mir
+++ b/test/CodeGen/MIR/X86/variable-sized-stack-objects.mir
@@ -24,9 +24,11 @@ frameInfo:
   maxAlignment:    8
   adjustsStack:    true
 # CHECK: stack:
-# CHECK-NEXT: - { id: 0, offset: -20, size: 4, alignment: 4 }
-# CHECK-NEXT: - { id: 1, offset: -32, size: 8, alignment: 8 }
-# CHECK-NEXT: - { id: 2, name: y, type: variable-sized, offset: -32, alignment: 1 }
+# CHECK-NEXT: - { id: 0, name: '', type: default, offset: -20, size: 4, alignment: 4,
+# CHECK-NEXT:  callee-saved-register: '', di-variable: '', di-expression: '', di-location: '' }
+# CHECK-NEXT: - { id: 1, name: '', type: default, offset: -32, size: 8, alignment: 8,
+# CHECK-NEXT:  callee-saved-register: '', di-variable: '', di-expression: '', di-location: '' }
+# CHECK-NEXT: - { id: 2, name: y, type: variable-sized, offset: -32, alignment: 1,
 stack:
   - { id: 0, offset: -20, size: 4, alignment: 4 }
   - { id: 1, offset: -32, size: 8, alignment: 8 }
diff --git a/test/CodeGen/MIR/X86/virtual-registers.mir b/test/CodeGen/MIR/X86/virtual-registers.mir
index e63bcf4acdd1..0d181f895aa9 100644
--- a/test/CodeGen/MIR/X86/virtual-registers.mir
+++ b/test/CodeGen/MIR/X86/virtual-registers.mir
@@ -33,9 +33,9 @@
 name:            bar
 tracksRegLiveness: true
 # CHECK:      registers:
-# CHECK-NEXT:   - { id: 0, class: gr32 }
-# CHECK-NEXT:   - { id: 1, class: gr32 }
-# CHECK-NEXT:   - { id: 2, class: gr32 }
+# CHECK-NEXT:   - { id: 0, class: gr32, preferred-register: '' }
+# CHECK-NEXT:   - { id: 1, class: gr32, preferred-register: '' }
+# CHECK-NEXT:   - { id: 2, class: gr32, preferred-register: '' }
 registers:
   - { id: 0, class: gr32 }
   - { id: 1, class: gr32 }
@@ -67,9 +67,9 @@ name:            foo
 tracksRegLiveness: true
 # CHECK: name: foo
 # CHECK:      registers:
-# CHECK-NEXT:   - { id: 0, class: gr32 }
-# CHECK-NEXT:   - { id: 1, class: gr32 }
-# CHECK-NEXT:   - { id: 2, class: gr32 }
+# CHECK-NEXT:   - { id: 0, class: gr32, preferred-register: '' }
+# CHECK-NEXT:   - { id: 1, class: gr32, preferred-register: '' }
+# CHECK-NEXT:   - { id: 2, class: gr32, preferred-register: '' }
 registers:
   - { id: 2, class: gr32 }
   - { id: 0, class: gr32 }
diff --git a/test/CodeGen/Mips/biggot.ll b/test/CodeGen/Mips/biggot.ll
index 3acfa372a905..b266b5e05e21 100644
--- a/test/CodeGen/Mips/biggot.ll
+++ b/test/CodeGen/Mips/biggot.ll
@@ -1,6 +1,9 @@
 ; RUN: llc -march=mipsel -mxgot -relocation-model=pic < %s | FileCheck %s -check-prefix=O32
 ; RUN: llc -march=mips64el -mcpu=mips64r2 -mxgot -relocation-model=pic < %s | \
 ; RUN: FileCheck %s -check-prefix=N64
+; RUN: llc -march=mipsel -mxgot -relocation-model=pic -fast-isel < %s | FileCheck %s -check-prefix=O32
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mxgot -relocation-model=pic -fast-isel < %s | \
+; RUN: FileCheck %s -check-prefix=N64
 
 @v0 = external global i32
 
diff --git a/test/CodeGen/Mips/cconv/vector.ll b/test/CodeGen/Mips/cconv/vector.ll
new file mode 100644
index 000000000000..5a88d064fe73
--- /dev/null
+++ b/test/CodeGen/Mips/cconv/vector.ll
@@ -0,0 +1,1657 @@
+; RUN: llc < %s -march=mips -mcpu=mips32 -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS32,MIPS32EB
+; RUN: llc < %s -march=mips64 -relocation-model=pic -mcpu=mips64 -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS64,MIPS64EB
+; RUN: llc < %s -march=mips -mcpu=mips32r5 -mattr=+fp64,+msa -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS32R5,MIPS32R5EB
+; RUN: llc < %s -march=mips64 -relocation-model=pic -mcpu=mips64r5 -mattr=+fp64,+msa -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS64R5
+; RUN: llc < %s -march=mipsel -mcpu=mips32 -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS32,MIPS32EL
+; RUN: llc < %s -march=mips64el -relocation-model=pic -mcpu=mips64 -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS64,MIPS64EL
+; RUN: llc < %s -march=mipsel -mcpu=mips32r5 -mattr=+fp64,+msa -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS32R5,MIPS32R5EL
+; RUN: llc < %s -march=mips64el -relocation-model=pic -mcpu=mips64r5 -mattr=+fp64,+msa -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS64R5
+
+
+
+; Test that vector types are passed through the integer register set whether or
+; not MSA is enabled. This is a ABI requirement for MIPS. For GCC compatibility
+; we need to handle any power of 2 number of elements. We will test this
+; exhaustively for combinations up to MSA register (128 bits) size.
+
+; First set of tests are for argument passing.
+
+define <2 x i8> @i8_2(<2 x i8> %a, <2 x i8> %b) {
+; ALL-LABEL: i8_2:
+; MIPS32EB-DAG: srl ${{[0-9]+}}, $5, 24
+; MIPS32EB-DAG: srl ${{[0-9]+}}, $4, 24
+; MIPS32EB-DAG: srl ${{[0-9]+}}, $5, 16
+; MIPS32EB-DAG: srl ${{[0-9]+}}, $4, 16
+
+; MIPS32EL: addu $1, $4, $5
+
+; MIPS32R5-DAG: sw $4
+; MIPS32R5-DAG: sw $5
+
+; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $5, 56
+; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $4, 56
+; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $5, 48
+; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $4, 48
+
+; MIPS64EL-DAG: sll ${{[0-9]+}}, $4, 0
+; MIPS64EL-DAG: sll ${{[0-9]+}}, $5, 0
+
+; MIPS64R5-DAG: sd $4
+; MIPS64R5-DAG: sd $5
+
+  %1 = add <2 x i8> %a, %b
+  ret <2 x i8> %1
+}
+
+; Test that vector spilled to the outgoing argument area have the expected
+; offset from $sp.
+
+define <2 x i8> @i8x2_7(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d,
+                        <2 x i8> %e, <2 x i8> %f, <2 x i8> %g) {
+entry:
+
+; MIPS32EB-DAG: srl ${{[0-9]+}}, $4, 24
+; MIPS32EB-DAG: srl ${{[0-9]+}}, $5, 24
+; MIPS32EB-DAG: srl ${{[0-9]+}}, $6, 24
+; MIPS32EB-DAG: srl ${{[0-9]+}}, $7, 24
+
+; MIPS32EL-DAG: andi ${{[0-9]+}}, $4, 65280
+; MIPS32EL-DAG: andi ${{[0-9]+}}, $5, 65280
+; MIPS32EL-DAG: andi ${{[0-9]+}}, $6, 65280
+; MIPS32EL-DAG: andi ${{[0-9]+}}, $7, 65280
+
+; MIPS32-DAG: lbu ${{[0-9]+}}, 16($sp)
+; MIPS32-DAG; lbu ${{[0-9]+}}, 17($sp)
+; MIPS32-DAG: lbu ${{[0-9]+}}, 20($sp)
+; MIPS32-DAG: lbu ${{[0-9]+}}, 21($sp)
+; MIPS32-DAG: lbu ${{[0-9]+}}, 24($sp)
+; MIPS32-DAG: lbu ${{[0-9]+}}, 25($sp)
+
+; MIPS32R5-DAG: sw $4, {{[0-9]+}}($sp)
+; MIPS32R5-DAG: sw $5, {{[0-9]+}}($sp)
+; MIPS32R5-DAG: sw $6, {{[0-9]+}}($sp)
+; MIPS32R5-DAG: sw $7, {{[0-9]+}}($sp)
+
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 40($sp)
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 41($sp)
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 42($sp)
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 43($sp)
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 44($sp)
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 45($sp)
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 46($sp)
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 47($sp)
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 48($sp)
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 49($sp)
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 50($sp)
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 51($sp)
+
+; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $4, 48
+; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $5, 48
+; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $6, 48
+; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $7, 48
+; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $8, 48
+; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $9, 48
+; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $10, 48
+
+; MIPS64R5-DAG: sd $4, {{[0-9]+}}($sp)
+; MIPS64R5-DAG: sd $5, {{[0-9]+}}($sp)
+; MIPS64R5-DAG: sd $6, {{[0-9]+}}($sp)
+; MIPS64R5-DAG: sd $7, {{[0-9]+}}($sp)
+; MIPS64R5-DAG: sd $8, {{[0-9]+}}($sp)
+; MIPS64R5-DAG: sd $9, {{[0-9]+}}($sp)
+; MIPS64R5-DAG: sd $10, {{[0-9]+}}($sp)
+
+  %0 = add <2 x i8> %a, %b
+  %1 = add <2 x i8> %0, %c
+  %2 = add <2 x i8> %1, %d
+  %3 = add <2 x i8> %2, %e
+  %4 = add <2 x i8> %3, %f
+  %5 = add <2 x i8> %4, %g
+  ret <2 x i8> %5
+}
+
+define <4 x i8> @i8_4(<4 x i8> %a, <4 x i8> %b) {
+; ALL-LABEL: i8_4:
+; MIPS32-DAG: srl ${{[0-9]+}}, $5, 24
+; MIPS32-DAG: srl ${{[0-9]+}}, $4, 24
+; MIPS32-DAG: srl ${{[0-9]+}}, $5, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $4, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $5, 8
+; MIPS32-DAG: srl ${{[0-9]+}}, $4, 8
+
+; MIPS32R5-DAG: sw $4
+; MIPS32R5-DAG: sw $5
+
+; MIPS64-DAG: sll ${{[0-9]+}}, $4, 0
+; MIPS64-DAG: sll ${{[0-9]+}}, $5, 0
+
+; MIPS64R5-DAG: sll ${{[0-9]+}}, $4, 0
+; MIPS64R5-DAG: sll ${{[0-9]+}}, $5, 0
+
+  %1 = add <4 x i8> %a, %b
+  ret <4 x i8> %1
+}
+
+define <8 x i8> @i8_8(<8 x i8> %a, <8 x i8> %b) {
+; ALL-LABEL: i8_8:
+; MIPS32-NOT: lw
+; MIPS32-DAG: srl ${{[0-9]+}}, $7, 24
+; MIPS32-DAG: srl ${{[0-9]+}}, $6, 24
+; MIPS32-DAG: srl ${{[0-9]+}}, $7, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $6, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $7, 8
+; MIPS32-DAG: srl ${{[0-9]+}}, $6, 8
+; MIPS32-DAG: srl ${{[0-9]+}}, $5, 24
+; MIPS32-DAG: srl ${{[0-9]+}}, $4, 24
+; MIPS32-DAG: srl ${{[0-9]+}}, $5, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $4, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $5, 8
+; MIPS32-DAG: srl ${{[0-9]+}}, $4, 8
+
+; MIPS32R5-DAG: sw $4
+; MIPS32R5-DAG: sw $5
+; MIPS32R5-DAG: sw $6
+; MIPS32R5-DAG: sw $7
+
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 56
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 56
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 40
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 40
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 32
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 32
+; MIPS64-DAG: sll $[[R0:[0-9]+]], $4, 0
+; MIPS64-DAG: sll $[[R1:[0-9]+]], $5, 0
+; MIPS64-DAG: srl ${{[0-9]+}}, $[[R1]], 24
+; MIPS64-DAG: srl ${{[0-9]+}}, $[[R0]], 24
+; MIPS64-DAG: srl ${{[0-9]+}}, $[[R1]], 16
+; MIPS64-DAG: srl ${{[0-9]+}}, $[[R0]], 16
+; MIPS64-DAG: srl ${{[0-9]+}}, $[[R1]], 8
+; MIPS64-DAG: srl ${{[0-9]+}}, $[[R0]], 8
+
+; MIPS64R5-DAG: sd $4
+; MIPS64R5-DAG: sd $5
+
+  %1 = add <8 x i8> %a, %b
+  ret <8 x i8> %1
+}
+
+define <16 x i8> @i8_16(<16 x i8> %a, <16 x i8> %b) {
+; ALL-LABEL: i8_16:
+; MIPS32-DAG: lw ${{[0-9]+}}, 16($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 20($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 24($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 28($sp)
+; MIPS32-DAG: srl ${{[0-9]+}}, $7, 24
+; MIPS32-DAG: srl ${{[0-9]+}}, $6, 24
+; MIPS32-DAG: srl ${{[0-9]+}}, $7, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $6, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $7, 8
+; MIPS32-DAG: srl ${{[0-9]+}}, $6, 8
+; MIPS32-DAG: srl ${{[0-9]+}}, $5, 24
+; MIPS32-DAG: srl ${{[0-9]+}}, $4, 24
+; MIPS32-DAG: srl ${{[0-9]+}}, $5, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $4, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $5, 8
+; MIPS32-DAG: srl ${{[0-9]+}}, $4, 8
+
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 16($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 20($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 24($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 28($sp)
+; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $4
+; MIPS32R5-DAG: insert.w $w[[W0]][1], $5
+; MIPS32R5-DAG: insert.w $w[[W0]][2], $6
+; MIPS32R5-DAG: insert.w $w[[W0]][3], $7
+
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $7, 56
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $6, 56
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $7, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $6, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $7, 40
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $6, 40
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $7, 32
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $6, 32
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 56
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 56
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 32
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 32
+
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][{{[0-9]}}], $4
+; MIPS64R5-DAG: insert.d $w[[W0]][{{[0-9]}}], $5
+; MIPS64R5-DAG: insert.d $w[[W1:[0-9]+]][{{[0-9]}}], $6
+; MIPS64R5-DAG: insert.d $w[[W1]][{{[0-9]}}], $7
+
+  %1 = add <16 x i8> %a, %b
+
+  ret <16 x i8> %1
+}
+
+define <2 x i16> @i16_2(<2 x i16> %a, <2 x i16> %b) {
+; ALL-LABEL: i16_2:
+; MIPS32: addu    $[[R0:[0-9]+]], $4, $5
+; MIPS32: andi    $[[R1:[0-9]+]], $[[R0]], 65535
+; MIPS32: srl     $[[R2:[0-9]+]], $5, 16
+; MIPS32: srl     $[[R3:[0-9]+]], $4, 16
+; MIPS32: addu    $[[R4:[0-9]+]], $[[R3]], $[[R2]]
+; MIPS32: sll     $2, $[[R4]], 16
+
+; MIPS32R5-DAG: sw $4
+; MIPS32R5-DAG: sw $5
+
+; MIPS64-DAG: sll ${{[0-9]+}}, $5, 0
+; MIPS64-DAG: sll ${{[0-9]+}}, $4, 0
+
+; MIPS64R5-DAG: sll ${{[0-9]+}}, $4, 0
+; MIPS64R5-DAG: sll ${{[0-9]+}}, $5, 0
+
+  %1 = add <2 x i16> %a, %b
+  ret <2 x i16> %1
+}
+
+define <4 x i16> @i16_4(<4 x i16> %a, <4 x i16> %b) {
+; ALL-LABEL: i16_4:
+; MIPS32-DAG: srl ${{[0-9]+}}, $7, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $6, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $5, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $4, 16
+
+; MIPS32R5-DAG: sw $4
+; MIPS32R5-DAG: sw $5
+; MIPS32R5-DAG: sw $6
+; MIPS32R5-DAG: sw $7
+
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 32
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 32
+
+; MIPS64R5-DAG: sd $4
+; MIPS64R5-DAG: sd $5
+
+  %1 = add <4 x i16> %a, %b
+  ret <4 x i16> %1
+}
+
+define <8 x i16> @i16_8(<8 x i16> %a, <8 x i16> %b) {
+; ALL-LABEL: i16_8:
+; MIPS32-DAG: lw ${{[0-9]+}}, 16($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 20($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 24($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 28($sp)
+; MIPS32-DAG: srl ${{[0-9]+}}, $7, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $6, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $5, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $4, 16
+
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 16($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 20($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 24($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 28($sp)
+; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $4
+; MIPS32R5-DAG: insert.w $w[[W0]][1], $5
+; MIPS32R5-DAG: insert.w $w[[W0]][2], $6
+; MIPS32R5-DAG: insert.w $w[[W0]][3], $7
+
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $6, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $7, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $6, 32
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $7, 32
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 32
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 32
+
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][{{[0-9]}}], $4
+; MIPS64R5-DAG: insert.d $w[[W0]][{{[0-9]}}], $5
+; MIPS64R5-DAG: insert.d $w[[W1:[0-9]+]][{{[0-9]}}], $6
+; MIPS64R5-DAG: insert.d $w[[W1]][{{[0-9]}}], $7
+
+  %1 = add <8 x i16> %a, %b
+  ret <8 x i16> %1
+}
+
+define <2 x i32> @i32_2(<2 x i32> %a, <2 x i32> %b) {
+; ALL-LABEL: i32_2:
+; MIPS32-DAG: addu    $2, $4, $6
+; MIPS32-DAG: addu    $3, $5, $7
+
+; MIPS32R5-DAG: sw $4
+; MIPS32R5-DAG: sw $5
+; MIPS32R5-DAG: sw $6
+; MIPS32R5-DAG: sw $7
+
+; MIPS64-DAG: sll     ${{[0-9]+}}, $4, 0
+; MIPS64-DAG: sll     ${{[0-9]+}}, $5, 0
+
+; MIPS64R5-DAG: sd $4
+; MIPS64R5-DAG: sd $5
+
+  %1 = add <2 x i32> %a, %b
+
+  ret <2 x i32> %1
+}
+
+define <4 x i32> @i32_4(<4 x i32> %a, <4 x i32> %b) {
+; ALL-LABEL: i32_4:
+; MIPS32-DAG: lw ${{[0-9]+}}, 16($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 20($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 24($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 28($sp)
+; MIPS32-DAG: addu $2
+; MIPS32-DAG: addu $3
+; MIPS32-DAG: addu $4
+; MIPS32-DAG: addu $5
+
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 16($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 20($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 24($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 28($sp)
+; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $4
+; MIPS32R5-DAG: insert.w $w[[W0]][1], $5
+; MIPS32R5-DAG: insert.w $w[[W0]][2], $6
+; MIPS32R5-DAG: insert.w $w[[W0]][3], $7
+
+; MIPS64-DAG: sll     ${{[0-9]+}}, $4, 0
+; MIPS64-DAG: sll     ${{[0-9]+}}, $5, 0
+; MIPS64-DAG: sll     ${{[0-9]+}}, $6, 0
+; MIPS64-DAG: sll     ${{[0-9]+}}, $7, 0
+; MIPS64-DAG: dsrl    ${{[0-9]+}}, $4, 32
+; MIPS64-DAG: dsrl    ${{[0-9]+}}, $5, 32
+; MIPS64-DAG: dsrl    ${{[0-9]+}}, $6, 32
+; MIPS64-DAG: dsrl    ${{[0-9]+}}, $7, 32
+  %1 = add <4 x i32> %a, %b
+  ret <4 x i32> %1
+}
+
+define <2 x i64> @i64_2(<2 x i64> %a, <2 x i64> %b) {
+; ALL-LABEL: i64_2:
+; MIPS32-DAG: lw ${{[0-9]+}}, 16($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 20($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 24($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 28($sp)
+; MIPS32-DAG: addu $2
+; MIPS32-DAG: addu $3
+; MIPS32-DAG: addu $4
+; MIPS32-DAG: addu $5
+
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 16($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 20($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 24($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 28($sp)
+; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $4
+; MIPS32R5-DAG: insert.w $w[[W0]][1], $5
+; MIPS32R5-DAG: insert.w $w[[W0]][2], $6
+; MIPS32R5-DAG: insert.w $w[[W0]][3], $7
+
+; MIPS64-DAG: daddu $2, $4, $6
+; MIPS64-DAG: daddu $3, $5, $7
+
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][{{[0-9]}}], $4
+; MIPS64R5-DAG: insert.d $w[[W0]][{{[0-9]}}], $5
+; MIPS64R5-DAG: insert.d $w[[W1:[0-9]+]][{{[0-9]}}], $6
+; MIPS64R5-DAG: insert.d $w[[W1]][{{[0-9]}}], $7
+
+  %1 = add <2 x i64> %a, %b
+  ret <2 x i64> %1
+}
+
+; The MIPS vector ABI treats vectors of floats differently to vectors of
+; integers.
+
+; For arguments floating pointer vectors are bitcasted to integer vectors whose
+; elements are of GPR width and where the element count is deduced from
+; the length of the floating point vector divided by the size of the GPRs.
+
+; For returns, integer vectors are passed via the GPR register set, but
+; floating point vectors are returned via a hidden sret pointer.
+
+; For testing purposes we skip returning values here and test them below
+; instead.
+@float_res_v2f32 = external global <2 x float>
+
+define void @float_2(<2 x float> %a, <2 x float> %b) {
+; ALL-LABEL: float_2:
+; MIPS32: mtc1 $7, $f[[F0:[0-9]+]]
+; MIPS32: mtc1 $5, $f[[F1:[0-9]+]]
+; MIPS32: add.s $f[[F2:[0-9]+]], $f[[F1]], $f[[F0]]
+; MIPS32: swc1 $f[[F2]]
+; MIPS32: mtc1 $6, $f[[F3:[0-9]+]]
+; MIPS32: mtc1 $4, $f[[F4:[0-9]+]]
+; MIPS32: add.s $f[[F5:[0-9]+]], $f[[F4]], $f[[F3]]
+; MIPS32: swc1 $f[[F5]]
+
+; MIPS32R5-DAG: sw $4
+; MIPS32R5-DAG: sw $5
+; MIPS32R5-DAG: sw $6
+; MIPS32R5-DAG: sw $7
+
+; MIPS64-DAG: sll $[[R0:[0-9]+]], $4, 0
+; MIPS64-DAG: sll $[[R1:[0-9]+]], $5, 0
+; MIPS64-DAG: mtc1 $[[R0]], $f{{[0-9]+}}
+; MIPS64-DAG: mtc1 $[[R1]], $f{{[0-9]+}}
+; MIPS64-DAG: dsrl $[[R2:[0-9]+]], $4, 32
+; MIPS64-DAG: dsrl $[[R3:[0-9]+]], $5, 32
+; MIPS64-DAG: sll $[[R4:[0-9]+]], $[[R2]], 0
+; MIPS64-DAG: sll $[[R5:[0-9]+]], $[[R3]], 0
+; MIPS64-DAG: mtc1 $[[R4]], $f{{[0-9]+}}
+; MIPS64-DAG: mtc1 $[[R5]], $f{{[0-9]+}}
+
+; MIPS64R5-DAG: sd $4
+; MIPS64R5-DAG: sd $5
+
+  %1 = fadd <2 x float> %a, %b
+  store <2 x float> %1, <2 x float> * @float_res_v2f32
+  ret void
+}
+
+@float_res_v4f32 = external global <4 x float>
+
+; For MSA this case is suboptimal, the 4 loads can be combined into a single
+; ld.w.
+
+define void @float_4(<4 x float> %a, <4 x float> %b) {
+; ALL-LABEL: float_4:
+; MIPS32-DAG: mtc1 $4
+; MIPS32-DAG: mtc1 $5
+; MIPS32-DAG: mtc1 $6
+; MIPS32-DAG: mtc1 $7
+; MIPS32-DAG: lwc1
+; MIPS32-DAG: lwc1
+; MIPS32-DAG: lwc1
+; MIPS32-DAG: lwc1
+
+; MIPS32R5-DAG: lw $[[R1:[0-9]+]], 16($sp)
+; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $[[R1]]
+; MIPS32R5-DAG: lw $[[R2:[0-9]+]], 20($sp)
+; MIPS32R5-DAG: insert.w $w[[W0]][1], $[[R2]]
+; MIPS32R5-DAG: lw $[[R3:[0-9]+]], 24($sp)
+; MIPS32R5-DAG: insert.w $w[[W0]][2], $[[R3]]
+; MIPS32R5-DAG: lw $[[R4:[0-9]+]], 28($sp)
+; MIPS32R5-DAG: insert.w $w[[W0]][3], $[[R4]]
+
+; MIPS32R5-DAG: insert.w $w[[W1:[0-9]+]][0], $4
+; MIPS32R5-DAG: insert.w $w[[W1]][1], $5
+; MIPS32R5-DAG: insert.w $w[[W1]][2], $6
+; MIPS32R5-DAG: insert.w $w[[W1]][3], $7
+
+; MIPS64-DAG: sll $[[R0:[0-9]+]], $4, 0
+; MIPS64-DAG: sll $[[R1:[0-9]+]], $5, 0
+; MIPS64-DAG: mtc1 $[[R0]], $f{{[0-9]+}}
+; MIPS64-DAG: mtc1 $[[R1]], $f{{[0-9]+}}
+; MIPS64-DAG: dsrl $[[R2:[0-9]+]], $4, 32
+; MIPS64-DAG: dsrl $[[R3:[0-9]+]], $5, 32
+; MIPS64-DAG: sll $[[R4:[0-9]+]], $[[R2]], 0
+; MIPS64-DAG: sll $[[R5:[0-9]+]], $[[R3]], 0
+; MIPS64-DAG: mtc1 $[[R4]], $f{{[0-9]+}}
+; MIPS64-DAG: mtc1 $[[R5]], $f{{[0-9]+}}
+; MIPS64-DAG: sll $[[R6:[0-9]+]], $6, 0
+; MIPS64-DAG: sll $[[R7:[0-9]+]], $7, 0
+; MIPS64-DAG: mtc1 $[[R6]], $f{{[0-9]+}}
+; MIPS64-DAG: mtc1 $[[R7]], $f{{[0-9]+}}
+; MIPS64-DAG: dsrl $[[R8:[0-9]+]], $6, 32
+; MIPS64-DAG: dsrl $[[R9:[0-9]+]], $7, 32
+; MIPS64-DAG: sll $[[R10:[0-9]+]], $[[R8]], 0
+; MIPS64-DAG: sll $[[R11:[0-9]+]], $[[R9]], 0
+; MIPS64-DAG: mtc1 $[[R10]], $f{{[0-9]+}}
+; MIPS64-DAG: mtc1 $[[R11]], $f{{[0-9]+}}
+
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][{{[0-9]}}], $4
+; MIPS64R5-DAG: insert.d $w[[W0]][{{[0-9]}}], $5
+; MIPS64R5-DAG: insert.d $w[[W1:[0-9]+]][{{[0-9]}}], $6
+; MIPS64R5-DAG: insert.d $w[[W1]][{{[0-9]}}], $7
+
+  %1 = fadd <4 x float> %a, %b
+  store <4 x float> %1, <4 x float> * @float_res_v4f32
+  ret void
+}
+
+@double_v2f64 = external global <2 x double>
+
+define void @double_2(<2 x double> %a, <2 x double> %b) {
+; ALL-LABEL: double_2:
+; MIPS32-DAG: sw $7
+; MIPS32-DAG: sw $6
+; MIPS32-DAG: ldc1
+; MIPS32-DAG: ldc1
+; MIPS32:     add.d
+; MIPS32-DAG: sw $5
+; MIPS32-DAG: sw $4
+; MIPS32-DAG: ldc1
+; MIPS32-DAG: ldc1
+; MIPS32:     add.d
+
+; MIPS32R5-DAG: lw $[[R1:[0-9]+]], 16($sp)
+; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $[[R1]]
+; MIPS32R5-DAG: lw $[[R2:[0-9]+]], 20($sp)
+; MIPS32R5-DAG: insert.w $w[[W0]][1], $[[R2]]
+; MIPS32R5-DAG: lw $[[R3:[0-9]+]], 24($sp)
+; MIPS32R5-DAG: insert.w $w[[W0]][2], $[[R3]]
+; MIPS32R5-DAG: lw $[[R4:[0-9]+]], 28($sp)
+; MIPS32R5-DAG: insert.w $w[[W0]][3], $[[R4]]
+
+; MIPS32R5-DAG: insert.w $w[[W1:[0-9]+]][0], $4
+; MIPS32R5-DAG: insert.w $w[[W1]][1], $5
+; MIPS32R5-DAG: insert.w $w[[W1]][2], $6
+; MIPS32R5-DAG: insert.w $w[[W1]][3], $7
+
+; MIPS64-DAG: dmtc1 $6, $f[[R0:[0-9]+]]
+; MIPS64-DAG: dmtc1 $4, $f[[R1:[0-9]+]]
+; MIPS64-DAG: add.d $f[[R2:[0-9]+]], $f[[R1]], $f[[R0]]
+; MIPS64-DAG: dmtc1 $7, $f[[R3:[0-9]+]]
+; MIPS64-DAG: dmtc1 $5, $f[[R4:[0-9]+]]
+; MIPS64-DAG: add.d $f[[R5:[0-9]+]], $f[[R4]], $f[[R3]]
+
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][{{[0-9]}}], $4
+; MIPS64R5-DAG: insert.d $w[[W0]][{{[0-9]}}], $5
+; MIPS64R5-DAG: insert.d $w[[W1:[0-9]+]][{{[0-9]}}], $6
+; MIPS64R5-DAG: insert.d $w[[W1]][{{[0-9]}}], $7
+
+  %1 = fadd <2 x double> %a, %b
+  store <2 x double> %1, <2 x double> * @double_v2f64
+  ret void
+}
+
+; Return value testing.
+; Integer vectors are returned in $2, $3, $4, $5 for O32, $2, $3 for N32/N64
+; Floating point vectors are returned through a hidden sret pointer.
+
+@gv2i8 = global <2 x i8> <i8 1, i8 2>
+@gv4i8 = global <4 x i8> <i8 0, i8 1, i8 2, i8 3>
+@gv8i8 = global <8 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>
+@gv16i8 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>
+
+@gv2i16 = global <2 x i16> <i16 1, i16 2>
+@gv4i16 = global <4 x i16> <i16 0, i16 1, i16 2, i16 3>
+@gv8i16 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+
+@gv2i32 = global <2 x i32> <i32 0, i32 1>
+@gv4i32 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+
+@gv2i64 = global <2 x i64> <i64 0, i64 1>
+
+define <2 x i8> @ret_2_i8() {
+; ALL-LABEL: ret_2_i8:
+; MIPS32-DAG:   lhu $2
+; MIPS32R5-DAG: lhu $2
+
+; FIXME: why is this lh instead of lhu on mips64?
+
+; MIPS64-DAG:  lh $2
+; MIPS64-DAG:  lh $2
+  %1 = load <2 x i8>, <2 x i8> * @gv2i8
+  ret <2 x i8> %1
+}
+
+define <4 x i8> @ret_4_i8() {
+; ALL-LABEL: ret_4_i8:
+; MIPS32-DAG:   lw $2
+; MIPS32R5-DAG: lw $2
+
+; MIPS64-DAG:   lw $2
+; MIPS64R5-DAG: lw $2
+
+  %1 = load <4 x i8>, <4 x i8> * @gv4i8
+  ret <4 x i8> %1
+}
+
+define <8 x i8> @ret_8_i8() {
+; ALL-LABEL: ret_8_i8:
+; MIPS32-DAG:   lw $2
+; MIPS32-DAG:   lw $3
+
+; MIPS32R5: copy_s.w $2, $w[[W0:[0-9]+]]
+; MIPS32R5: copy_s.w $3, $w[[W0]]
+
+; MIPS64-DAG:   ld $2
+; MIPS64R5-DAG: ld $2
+  %1 = load <8 x i8>, <8 x i8> * @gv8i8
+  ret <8 x i8> %1
+}
+
+define <16 x i8> @ret_16_i8() {
+; ALL-LABEL: ret_16_i8:
+; MIPS32-DAG: lw $2
+; MIPS32-DAG: lw $3
+; MIPS32-DAG: lw $4
+; MIPS32-DAG: lw $5
+
+; MIPS32R5-DAG: copy_s.w $2, $w[[W0:[0-9]+]][0]
+; MIPS32R5-DAG: copy_s.w $3, $w[[W0]][1]
+; MIPS32R5-DAG: copy_s.w $4, $w[[W0]][2]
+; MIPS32R5-DAG: copy_s.w $5, $w[[W0]][3]
+
+; MIPS64-DAG: ld $2
+; MIPS64-DAG: ld $3
+
+; MIPS64R5-DAG: copy_s.d $2
+; MIPS64R5-DAG: copy_s.d $3
+
+  %1 = load <16 x i8>, <16 x i8> * @gv16i8
+  ret <16 x i8> %1
+}
+
+define <2 x i16> @ret_2_i16() {
+; ALL-LABEL: ret_2_i16:
+; MIPS32-DAG:   lw $2
+
+; MIPS32R5-DAG: lw $2
+
+; MIPS64-DAG:   lw $2
+
+; MIPS64R5-DAG: lw $2
+  %1 = load <2 x i16>, <2 x i16> * @gv2i16
+  ret <2 x i16> %1
+}
+
+define <4 x i16> @ret_4_i16() {
+; ALL-LABEL: ret_4_i16:
+; MIPS32-DAG: lw $2
+; MIPS32-DAG: lw $3
+
+; MIPS32R5-DAG: copy_s.w $2, $w[[W0:[0-9]+]]
+; MIPS32R5-DAG: copy_s.w $3, $w[[W0]]
+
+; MIPS64-DAG:   ld $2
+; MIPS64R5-DAG: ld $2
+  %1 = load <4 x i16>, <4 x i16> * @gv4i16
+  ret <4 x i16> %1
+}
+
+define <8 x i16> @ret_8_i16() {
+; ALL-LABEL: ret_8_i16:
+; MIPS32-DAG: lw $2
+; MIPS32-DAG: lw $3
+; MIPS32-DAG: lw $4
+; MIPS32-DAG: lw $5
+
+; MIPS32R5-DAG: copy_s.w $2, $w[[W0:[0-9]+]][0]
+; MIPS32R5-DAG: copy_s.w $3, $w[[W0]][1]
+; MIPS32R5-DAG: copy_s.w $4, $w[[W0]][2]
+; MIPS32R5-DAG: copy_s.w $5, $w[[W0]][3]
+
+; MIPS64-DAG: ld $2
+; MIPS64-DAG: ld $3
+
+; MIPS64R5-DAG: copy_s.d $2
+; MIPS64R5-DAG: copy_s.d $3
+
+  %1 = load <8 x i16>, <8 x i16> * @gv8i16
+  ret <8 x i16> %1
+}
+
+define <2 x i32> @ret_2_i32() {
+; ALL-LABEL: ret_2_i32:
+; MIPS32-DAG: lw $2
+; MIPS32-DAG: lw $3
+
+; MIPS32R5-DAG: copy_s.w $2, $w[[W0:[0-9]+]]
+; MIPS32R5-DAG: copy_s.w $3, $w[[W0]]
+
+; MIPS64-DAG:   ld $2
+; MIPS64R5-DAG: ld $2
+
+  %1 = load <2 x i32>, <2 x i32> * @gv2i32
+  ret <2 x i32> %1
+}
+
+define <4 x i32> @ret_4_i32() {
+; ALL-LABEL: ret_4_i32:
+; MIPS32-DAG: lw $2
+; MIPS32-DAG: lw $3
+; MIPS32-DAG: lw $4
+; MIPS32-DAG: lw $5
+
+; MIPS32R5-DAG: copy_s.w $2, $w[[W0:[0-9]+]][0]
+; MIPS32R5-DAG: copy_s.w $3, $w[[W0]][1]
+; MIPS32R5-DAG: copy_s.w $4, $w[[W0]][2]
+; MIPS32R5-DAG: copy_s.w $5, $w[[W0]][3]
+
+; MIPS64-DAG: ld $2
+; MIPS64-DAG: ld $3
+
+; MIPS64R5-DAG: copy_s.d $2, $w[[W0:[0-9]+]]
+; MIPS64R5-DAG: copy_s.d $3, $w[[W0]]
+
+  %1 = load <4 x i32>, <4 x i32> * @gv4i32
+  ret <4 x i32> %1
+}
+
+define <2 x i64> @ret_2_i64() {
+; ALL-LABEL: ret_2_i64:
+; MIPS32-DAG: lw $2
+; MIPS32-DAG: lw $3
+; MIPS32-DAG: lw $4
+; MIPS32-DAG: lw $5
+
+; MIPS32R5-DAG: copy_s.w $2, $w[[W0:[0-9]+]][0]
+; MIPS32R5-DAG: copy_s.w $3, $w[[W0]][1]
+; MIPS32R5-DAG: copy_s.w $4, $w[[W0]][2]
+; MIPS32R5-DAG: copy_s.w $5, $w[[W0]][3]
+
+; MIPS64-DAG: ld $2
+; MIPS64-DAG: ld $3
+
+; MIPS64R5-DAG: copy_s.d $2, $w[[W0:[0-9]+]]
+; MIPS64R5-DAG: copy_s.d $3, $w[[W0]]
+
+  %1 = load <2 x i64>, <2 x i64> * @gv2i64
+  ret <2 x i64> %1
+}
+
+@gv2f32 = global <2 x float> <float 0.0, float 0.0>
+@gv4f32 = global <4 x float> <float 0.0, float 0.0, float 0.0, float 0.0>
+
+define <2 x float> @ret_float_2() {
+entry:
+; ALL-LABEL: ret_float_2:
+
+; MIPS32-DAG: swc1 $f{{[0-9]+}}, 0($4)
+; MIPS32-DAG: swc1 $f{{[0-9]+}}, 4($4)
+
+; MIPS32R5-DAG: swc1 $f{{[0-9]+}}, 0($4)
+; MIPS32R5-DAG: swc1 $f{{[0-9]+}}, 4($4)
+
+; MIPS64: ld $2
+
+; MIPS64R5: ld $2
+
+  %0 = load <2 x float>, <2 x float> * @gv2f32
+  ret <2 x float> %0
+}
+
+define <4 x float> @ret_float_4() {
+entry:
+; ALL-LABEL: ret_float_4:
+
+; MIPS32-DAG: swc1 $f{{[0-9]+}}, 0($4)
+; MIPS32-DAG: swc1 $f{{[0-9]+}}, 4($4)
+; MIPS32-DAG: swc1 $f{{[0-9]+}}, 8($4)
+; MIPS32-DAG: swc1 $f{{[0-9]+}}, 12($4)
+
+; MIPS32R5: st.w $w{{[0-9]+}}, 0($4)
+
+; MIPS64-DAG: ld $2
+; MIPS64-DAG: ld $3
+
+; MIPS64R5-DAG: copy_s.d $2, $w{{[0-9]+}}[0]
+; MIPS64R5-DAG: copy_s.d $3, $w{{[0-9]+}}[1]
+
+  %0 = load <4 x float>, <4 x float> * @gv4f32
+  ret <4 x float> %0
+}
+
+@gv2f64 = global <2 x double> <double 0.0, double 0.0>
+
+define <2 x double> @ret_double_2() {
+entry:
+; ALL-LABEL: ret_double_2:
+
+; MIPS32-DAG: sdc1 $f{{[0-9]+}}, 8($4)
+; MIPS32-DAG: sdc1 $f{{[0-9]+}}, 0($4)
+
+; MIPS32R5: st.d $w{{[0-9]+}}, 0($4)
+
+; MIPS64-DAG: ld $2
+; MIPS64-DAG: ld $2
+
+; MIPS64R5-DAG: copy_s.d $2, $w{{[0-9]+}}[0]
+; MIPS64R5-DAG: copy_s.d $3, $w{{[0-9]+}}[1]
+
+  %0 = load <2 x double>, <2 x double> * @gv2f64
+  ret <2 x double> %0
+}
+
+; Test argument lowering and call result lowering.
+
+define void @call_i8_2() {
+entry:
+; ALL-LABEL: call_i8_2:
+; MIPS32EB-DAG: addiu $4
+; MIPS32EB-DAG: addiu $5
+; MIPS32-NOT: addiu $6
+; MIPS32-NOT: addiu $7
+
+; MIPS32R5-DAG: lhu $4, {{[0-9]+}}($sp)
+; MIPS32R5-DAG: lhu $5, {{[0-9]+}}($sp)
+
+; MIPS32R5: jal
+; MIPS32R5: sw $2, {{[0-9]+}}($sp)
+
+; MIPS32R5-DAG: sb ${{[0-9]+}}, 1(${{[0-9]+}})
+; MIPS32R5-DAG; sb ${{[0-9]+}}, %lo(gv2i8)(${{[0-9]+}})
+
+; MIPS64EB: daddiu $4, $zero, 1543
+; MIPS64EB: daddiu $5, $zero, 3080
+
+; MIPS64EL: daddiu $4, $zero, 1798
+; MIPS64EL; daddiu $5, $zero, 2060
+
+; MIPS64R5-DAG: lh $4
+; MIPS64R5-DAG: lh $5
+
+; MIPS32: jal i8_2
+; MIPS64: jalr $25
+
+; MIPS32EB-DAG: srl $[[R0:[0-9]+]], $2, 16
+; MIPS32EB-DAG: sb $[[R0]]
+; MIPS32EB-DAG: srl $[[R1:[0-9]+]], $2, 24
+; MIPS32EB-DAG: sb $[[R1]]
+
+; MIPS32EL: sb $2
+; MIPS32EL: srl $[[R0:[0-9]+]], $2, 8
+; MIPS32EL: sb $[[R0]]
+
+; MIPS64EB: dsrl $[[R4:[0-9]+]], $2, 48
+; MIPS64EB: sb $[[R4]]
+; MIPS64EB: dsrl $[[R5:[0-9]+]], $2, 56
+; MIPS64EB: sb $[[R5]]
+
+; MIPS64EL: sll $[[R6:[0-9]+]], $2, 0
+; MIPS64EL: sb $[[R6]]
+; MIPS64EL: srl $[[R7:[0-9]+]], $[[R6]], 8
+; MIPS64EL: sb $[[R7]]
+
+; MIPS64R5: sd $2
+
+  %0 = call <2 x i8> @i8_2(<2 x i8> <i8 6, i8 7>, <2 x i8> <i8 12, i8 8>)
+  store <2 x i8> %0, <2 x i8> * @gv2i8
+  ret void
+}
+
+define void @call_i8_4() {
+entry:
+; ALL-LABEL: call_i8_4:
+; MIPS32: ori $4
+; MIPS32: ori $5
+; MIPS32-NOT: ori $6
+; MIPS32-NOT: ori $7
+
+; MIPS32R5-DAG: lw $4, {{[0-9]+}}($sp)
+; MIPS32R5-DAG: lw $5, {{[0-9]+}}($sp)
+
+; MIPS64: ori $4
+; MIPS64: ori $5
+
+; MIPS64R5: lw $4
+; MIPS64R5: lw $5
+
+; MIPS32: jal i8_4
+; MIPS64: jalr $25
+
+; MIPS32: sw $2
+
+; MIPS32R5-DAG: sw $2
+
+; MIPS64: sw $2
+; MIPS64R5: sw $2
+
+  %0 = call <4 x i8> @i8_4(<4 x i8> <i8 6, i8 7, i8 9, i8 10>, <4 x i8> <i8 12, i8 8, i8 9, i8 10>)
+  store <4 x i8> %0, <4 x i8> * @gv4i8
+  ret void
+}
+
+define void @call_i8_8() {
+entry:
+; ALL-LABEL: call_i8_8:
+
+; MIPS32: ori $6
+; MIPS32: ori $4
+; MIPS32: move  $5
+; MIPS32: move  $7
+
+; MIPS32R5-DAG: ori $6
+; MIPS32R5-DAG: ori $4
+; MIPS32R5-DAG: move  $5
+; MIPS32R5-DAG: move  $7
+
+; MIPS64EB: daddiu $4, ${{[0-9]+}}, 2314
+; MIPS64EB: daddiu $5, ${{[0-9]+}}, 2314
+
+; MIPS64EL: daddiu $4, ${{[0-9]+}}, 1798
+; MIPS64EL: daddiu $5, ${{[0-9]+}}, 2060
+
+; MIPS32: jal i8_8
+; MIPS64: jalr $25
+
+; MIPS32-DAG: sw $2
+; MIPS32-DAG: sw $3
+
+; MIPS32R5-DAG: sw $2
+; MIPS32R5-DAG: sw $3
+
+; MIPS64: sd $2
+; MIPS64R5: sd $2
+
+  %0 = call <8 x i8> @i8_8(<8 x i8> <i8 6, i8 7, i8 9, i8 10, i8 6, i8 7, i8 9, i8 10>, <8 x i8> <i8 12, i8 8, i8 9, i8 10, i8 6, i8 7, i8 9, i8 10>)
+  store <8 x i8> %0, <8 x i8> * @gv8i8
+  ret void
+}
+
+define void @calli8_16() {
+entry:
+; ALL-LABEL: calli8_16:
+; MIPS32-DAG: sw  ${{[0-9]+}}, 28($sp)
+; MIPS32-DAG: sw  ${{[0-9]+}}, 24($sp)
+; MIPS32-DAG: sw  ${{[0-9]+}}, 20($sp)
+; MIPS32-DAG: sw  ${{[0-9]+}}, 16($sp)
+
+; MIPS32: ori $4, ${{[0-9]+}}, {{[0-9]+}}
+; MIPS32: ori $7, ${{[0-9]+}}, {{[0-9]+}}
+; MIPS32: move  $5, ${{[0-9]+}}
+; MIPS32: move  $6, ${{[0-9]+}}
+
+; MIPS32R5-DAG: copy_s.w $4, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $5, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $6, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $7, $w{{[0-9]+}}
+
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 28($sp)
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 24($sp)
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 20($sp)
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 16($sp)
+
+; MIPS64-DAG: daddiu $4
+; MIPS64-DAG: daddiu $5
+; MIPS64-DAG: daddiu $6
+; MIPS64-DAG: daddiu $7
+
+; MIPS64R5-DAG: copy_s.d $4
+; MIPS64R5-DAG: copy_s.d $5
+; MIPS64R5-DAG: copy_s.d $6
+; MIPS64R5-DAG: copy_s.d $7
+
+; MIPS32: jal i8_16
+; MIPS64: jalr $25
+
+; MIPS32-DAG: sw $5, 12(${{[0-9]+}})
+; MIPS32-DAG: sw $4, 8(${{[0-9]+}})
+; MIPS32-DAG: sw $3, 4(${{[0-9]+}})
+; MIPS32-DAG: sw $2, %lo(gv16i8)(${{[0-9]+}})
+
+; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $2
+; MIPS32R5-DAG: insert.w $w[[W0]][1], $3
+; MIPS32R5-DAG: insert.w $w[[W0]][2], $4
+; MIPS32R5-DAG: insert.w $w[[W0]][3], $5
+; MIPS32R5-DAG: st.w $w[[W0]]
+
+; MIPS64-DAG: sd $3
+; MIPS64-DAG: sd $2
+
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][0], $2
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][1], $3
+
+  %0 = call <16 x i8> @i8_16(<16 x i8> <i8 6, i8 7,i8 6, i8 7,i8 6, i8 7,i8 6, i8 7,i8 6, i8 7,i8 6, i8 7, i8 6, i8 7, i8 9, i8 10>, <16 x i8> <i8 7, i8 9,i8 7, i8 9,i8 7, i8 9,i8 7, i8 9,i8 7, i8 9,i8 7, i8 9,i8 12, i8 8, i8 9, i8 10>)
+  store <16 x i8> %0, <16 x i8> * @gv16i8
+  ret void
+}
+
+define void @calli16_2() {
+entry:
+; ALL-LABEL: calli16_2:
+
+; MIPS32-DAG: ori $4
+; MIPS32-DAG: ori $5
+
+; MIPS32R5-DAG: lw $4
+; MIPS32R5-DAG: lw $5
+
+; MIPS64: ori $4
+; MIPS64: ori $5
+
+; MIPS64R5-DAG: lw $4
+; MIPS64R5-DAG: lw $5
+
+; MIPS32: jal i16_2
+; MIPS64: jalr $25
+
+; MIPS32: sw $2, %lo(gv2i16)
+
+; MIPS32R5: sw $2, %lo(gv2i16)
+
+; MIPS64: sw $2
+
+; MIPS64R6: sw $2
+
+  %0 = call <2 x i16> @i16_2(<2 x i16> <i16 6, i16 7>, <2 x i16> <i16 12, i16 8>)
+  store <2 x i16> %0, <2 x i16> * @gv2i16
+  ret void
+}
+
+define void @calli16_4() {
+entry:
+; ALL-LABEL: calli16_4:
+; MIPS32-DAG: ori $4
+; MIPS32-DAG: ori $5
+; MIPS32-DAG: ori $6
+; MIPS32-DAG: move $7
+
+; MIPS32R5-DAG: ori $4
+; MIPS32R5-DAG: ori $5
+; MIPS32R5-DAG: ori $6
+; MIPS32R5-DAG: move $7
+
+; MIPS64-DAG: daddiu $4
+; MIPS64-DAG: daddiu $5
+
+; MIPS64R5-DAG: ld $4
+; MIPS64R5-DAG: ld $5
+
+; MIPS32: jal i16_4
+; MIPS64: jalr $25
+
+; MIPS32-DAG: sw $3, 4(${{[0-9]+}})
+; MIPS32-DAG: sw $2, %lo(gv4i16)(${{[0-9]+}})
+
+; MIPS32R5-DAG: sw $3, 4(${{[0-9]+}})
+; MIPS32R5-DAG: sw $2, %lo(gv4i16)(${{[0-9]+}})
+
+; MIPS64: sd $2
+; MIPS64R5: sd $2
+
+  %0 = call <4 x i16> @i16_4(<4 x i16> <i16 6, i16 7, i16 9, i16 10>, <4 x i16> <i16 12, i16 8, i16 9, i16 10>)
+  store <4 x i16> %0, <4 x i16> * @gv4i16
+  ret void
+}
+
+define void @calli16_8() {
+entry:
+; ALL-LABEL: calli16_8:
+
+; MIPS32-DAG: sw  ${{[0-9]+}}, 28($sp)
+; MIPS32-DAG: sw  ${{[0-9]+}}, 24($sp)
+; MIPS32-DAG: sw  ${{[0-9]+}}, 20($sp)
+; MIPS32-DAG: sw  ${{[0-9]+}}, 16($sp)
+
+; MIPS32-DAG: ori $4, ${{[0-9]+}}, {{[0-9]+}}
+; MIPS32-DAG: ori $5, ${{[0-9]+}}, {{[0-9]+}}
+; MIPS32-DAG: move  $6, ${{[0-9]+}}
+; MIPS32-DAG: move  $7, ${{[0-9]+}}
+
+; MIPS32R5-DAG: copy_s.w $4, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $5, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $6, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $7, $w{{[0-9]+}}
+
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 28($sp)
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 24($sp)
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 20($sp)
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 16($sp)
+
+; MIPS64-DAG: daddiu $4
+; MIPS64-DAG: daddiu $7
+; MIPS64-DAG: move $5
+; MIPS64-DAG: move $6
+
+; MIPS64R5-DAG: copy_s.d $4, $w[[W0:[0-9]+]][0]
+; MIPS64R5-DAG: copy_s.d $5, $w[[W0]][1]
+; MIPS64R5-DAG: copy_s.d $6, $w[[W1:[0-9]+]][0]
+; MIPS64R5-DAG: copy_s.d $7, $w[[W1]][1]
+
+; MIPS32: jal i16_8
+; MIPS64: jalr $25
+
+; MIPS32-DAG: sw $5, 12(${{[0-9]+}})
+; MIPS32-DAG: sw $4, 8(${{[0-9]+}})
+; MIPS32-DAG: sw $3, 4(${{[0-9]+}})
+; MIPS32-DAG: sw $2, %lo(gv8i16)(${{[0-9]+}})
+
+; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $2
+; MIPS32R5-DAG: insert.w $w[[W0]][1], $3
+; MIPS32R5-DAG: insert.w $w[[W0]][2], $4
+; MIPS32R5-DAG: insert.w $w[[W0]][3], $5
+; MIPS32R5-DAG: st.w $w[[W0]]
+
+; MIPS64: sd $3
+; MIPS64: sd $2
+
+; MIPS64R5-DAG: insert.d $w[[W2:[0-9]+]][0], $2
+; MIPS64R5-DAG: insert.d $w[[W2]][1], $3
+
+  %0 = call <8 x i16> @i16_8(<8 x i16> <i16 6, i16 7, i16 9, i16 10, i16 6, i16 7, i16 9, i16 10>, <8 x i16> <i16 6, i16 7, i16 9, i16 10, i16 12, i16 8, i16 9, i16 10>)
+  store <8 x i16> %0, <8 x i16> * @gv8i16
+  ret void
+}
+
+define void @calli32_2() {
+entry:
+; ALL-LABEL: calli32_2:
+
+; MIPS32-DAG: addiu $4
+; MIPS32-DAG: addiu $5
+; MIPS32-DAG: addiu $6
+; MIPS32-DAG: addiu $7
+
+; MIPS32R5-DAG: addiu $4
+; MIPS32R5-DAG: addiu $5
+; MIPS32R5-DAG: addiu $6
+; MIPS32R5-DAG: addiu $7
+
+; MIPS64: daddiu $4
+; MIPS64: daddiu $5
+
+; MIPS64R5-DAG: ld $4
+; MIPS64R5-DAG: ld $5
+
+; MIPS32: jal i32_2
+; MIPS64: jalr $25
+
+; MIPS32-DAG: sw $2, %lo(gv2i32)(${{[0-9]+}})
+; MIPS32-DAG: sw $3, 4(${{[0-9]+}})
+
+; MIPS32R5-DAG: sw $2, %lo(gv2i32)(${{[0-9]+}})
+; MIPS32R5-DAG: sw $3, 4(${{[0-9]+}})
+
+; MIPS64: sd $2
+
+; MIPS64R5: sd $2
+
+  %0 = call <2 x i32> @i32_2(<2 x i32> <i32 6, i32 7>, <2 x i32> <i32 12, i32 8>)
+  store <2 x i32> %0, <2 x i32> * @gv2i32
+  ret void
+}
+
+define void @calli32_4() {
+entry:
+; ALL-LABEL: calli32_4:
+
+; MIPS32-DAG: sw  ${{[0-9]+}}, 28($sp)
+; MIPS32-DAG: sw  ${{[0-9]+}}, 24($sp)
+; MIPS32-DAG: sw  ${{[0-9]+}}, 20($sp)
+; MIPS32-DAG: sw  ${{[0-9]+}}, 16($sp)
+
+; MIPS32-DAG: addiu $4
+; MIPS32-DAG: addiu $5
+; MIPS32-DAG: addiu $6
+; MIPS32-DAG: addiu $7
+
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 28($sp)
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 24($sp)
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 20($sp)
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 16($sp)
+
+; MIPS32R5-DAG: addiu $4
+; MIPS32R5-DAG: addiu $5
+; MIPS32R5-DAG: addiu $6
+; MIPS32R5-DAG: addiu $7
+
+; MIPS64-DAG: daddiu $4
+; MIPS64-DAG: daddiu $6
+; MIPS64-DAG: daddiu $5
+; MIPS64-DAG: move $7
+
+; MIPS64R5-DAG: copy_s.d $4, $w[[W0:[0-9]+]][0]
+; MIPS64R5-DAG: copy_s.d $5, $w[[W0]][1]
+; MIPS64R5-DAG: copy_s.d $6, $w[[W1:[0-9]+]][0]
+; MIPS64R5-DAG: copy_s.d $7, $w[[W1]][1]
+
+; MIPS32: jal i32_4
+; MIPS64: jalr $25
+
+; MIPS32-DAG: sw $5, 12(${{[0-9]+}})
+; MIPS32-DAG: sw $4, 8(${{[0-9]+}})
+; MIPS32-DAG: sw $3, 4(${{[0-9]+}})
+; MIPS32-DAG: sw $2, %lo(gv4i32)(${{[0-9]+}})
+
+; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $2
+; MIPS32R5-DAG: insert.w $w[[W0]][1], $3
+; MIPS32R5-DAG: insert.w $w[[W0]][2], $4
+; MIPS32R5-DAG: insert.w $w[[W0]][3], $5
+; MIPS32R5-DAG: st.w $w[[W0]]
+
+; MIPS64-DAG: sd $2
+; MIPS64-DAG: sd $3
+
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][0], $2
+; MIPS64R6-DAG: insert.d $w[[W0:[0-9]+]][1], $3
+
+  %0 = call <4 x i32> @i32_4(<4 x i32> <i32 6, i32 7, i32 9, i32 10>, <4 x i32> <i32 12, i32 8, i32 9, i32 10>)
+  store <4 x i32> %0, <4 x i32> * @gv4i32
+  ret void
+}
+
+define void @calli64_2() {
+entry:
+; ALL-LABEL: calli64_2:
+
+; MIPS32-DAG: sw  ${{[0-9a-z]+}}, 28($sp)
+; MIPS32-DAG: sw  ${{[0-9a-z]+}}, 24($sp)
+; MIPS32-DAG: sw  ${{[0-9a-z]+}}, 20($sp)
+; MIPS32-DAG: sw  ${{[0-9a-z]+}}, 16($sp)
+
+; MIPS32-DAG: addiu $4
+; MIPS32-DAG: addiu $5
+; MIPS32-DAG: addiu $6
+; MIPS32-DAG: addiu $7
+
+; MIPS32R5-DAG: copy_s.w $4, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $5, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $6, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $7, $w{{[0-9]+}}
+
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 28($sp)
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 24($sp)
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 20($sp)
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 16($sp)
+
+; MIPS64: daddiu $4
+; MIPS64: daddiu $5
+; MIPS64: daddiu $6
+; MIPS64: daddiu $7
+
+; MIPS64R5: daddiu $4
+; MIPS64R5: daddiu $5
+; MIPS64R5: daddiu $6
+; MIPS64R5: daddiu $7
+
+; MIPS32: jal i64_2
+; MIPS64: jalr $25
+
+; MIPS32-DAG: sw $5, 12(${{[0-9]+}})
+; MIPS32-DAG: sw $4, 8(${{[0-9]+}})
+; MIPS32-DAG: sw $3, 4(${{[0-9]+}})
+; MIPS32-DAG: sw $2, %lo(gv2i64)(${{[0-9]+}})
+
+; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $2
+; MIPS32R5-DAG: insert.w $w[[W0]][1], $3
+; MIPS32R5-DAG: insert.w $w[[W0]][2], $4
+; MIPS32R5-DAG: insert.w $w[[W0]][3], $5
+; MIPS32R5-DAG: st.w $w[[W0]]
+
+; MIPS64-DAG: sd $3
+; MIPS64-DAG: sd $2
+
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][0], $2
+; MIPS64R6-DAG: insert.d $w[[W0:[0-9]+]][1], $3
+
+  %0 = call <2 x i64> @i64_2(<2 x i64> <i64 6, i64 7>, <2 x i64> <i64 12, i64 8>)
+  store <2 x i64> %0, <2 x i64> * @gv2i64
+  ret void
+}
+
+declare <2 x float> @float2_extern(<2 x float>, <2 x float>)
+declare <4 x float> @float4_extern(<4 x float>, <4 x float>)
+declare <2 x double> @double2_extern(<2 x double>, <2 x double>)
+
+define void @callfloat_2() {
+entry:
+; ALL-LABEL: callfloat_2:
+
+; MIPS32-DAG: addiu $4, $sp, 24
+; MIPS32-DAG: addiu $6, $zero, 0
+; MIPS32-DAG: lui $7
+
+; MIPS32R5-DAG: addiu $4, $sp, 24
+; MIPS32R5-DAG: addiu $6, $zero, 0
+; MIPS32R5-DAG: lui $7
+
+; MIPS64: dsll $4
+; MIPS64: dsll $5
+
+; MIPS64R5-DAG: copy_s.d $4, $w{{[0-9]+}}
+; MIPS64R5-DAG: copy_s.d $5, $w{{[0-9]+}}
+
+; MIPS32: jal float2_extern
+; MIPS64: jalr $25
+
+; MIPS32-DAG: lwc1 $f[[F0:[0-9]+]], 24($sp)
+; MIPS32-DAG: lwc1 $f[[F1:[0-9]+]], 28($sp)
+
+; MIPS32-DAG: swc1 $f[[F1]], 4(${{[0-9]+}})
+; MIPS32-DAG: swc1 $f[[F0]], %lo(gv2f32)(${{[0-9]+}})
+
+; MIPS32R5-DAG: lwc1 $f[[F0:[0-9]+]], 24($sp)
+; MIPS32R5-DAG: lwc1 $f[[F1:[0-9]+]], 28($sp)
+
+; MIPS32R5-DAG: swc1 $f[[F1]], 4(${{[0-9]+}})
+; MIPS32R5-DAG: swc1 $f[[F0]], %lo(gv2f32)(${{[0-9]+}})
+
+; MIPS64: sd $2
+
+; MIPS64R5: sd $2
+
+  %0 = call <2 x float> @float2_extern(<2 x float> <float 0.0, float -1.0>, <2 x float> <float 12.0, float 14.0>)
+  store <2 x float> %0, <2 x float> * @gv2f32
+  ret void
+}
+
+define void @callfloat_4() {
+entry:
+; ALL-LABEL: callfloat_4:
+
+; MIPS32: sw ${{[0-9]+}}, 36($sp)
+; MIPS32: sw ${{[0-9]+}}, 32($sp)
+; MIPS32: sw ${{[0-9]+}}, 28($sp)
+; MIPS32: sw ${{[0-9]+}}, 24($sp)
+; MIPS32: sw ${{[0-9]+}}, 20($sp)
+; MIPS32: sw ${{[0-9]+}}, 16($sp)
+; MIPS32: addiu $4, $sp, 48
+; MIPS32: addiu $6, $zero, 0
+; MIPS32: lui $7
+
+; MIPS32R5: copy_s.w $6, $w{{[0-9]+}}
+; MIPS32R5: copy_s.w $7, $w{{[0-9]+}}
+; MIPS32R5: sw ${{[0-9]+}}, 36($sp)
+; MIPS32R5: sw ${{[0-9]+}}, 32($sp)
+; MIPS32R5: sw ${{[0-9]+}}, 28($sp)
+; MIPS32R5: sw ${{[0-9]+}}, 24($sp)
+; MIPS32R5: sw ${{[0-9]+}}, 20($sp)
+; MIPS32R5: sw ${{[0-9]+}}, 16($sp)
+; MIPS32R5: addiu $4, $sp, 48
+
+; MIPS64-DAG: dsll $4
+; MIPS64-DAG: dsll $5
+; MIPS64-DAG: dsll $6
+; MIPS64-DAG: dsll $7
+
+; MIPS64R5-DAG: copy_s.d $4, $w{{[0-9]+}}
+; MIPS64R5-DAG: copy_s.d $5, $w{{[0-9]+}}
+; MIPS64R5-DAG: copy_s.d $6, $w{{[0-9]+}}
+; MIPS64R5-DAG: copy_s.d $7, $w{{[0-9]+}}
+
+; MIPS64: jalr $25
+; MIPS32: jal
+
+; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 48($sp)
+; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 52($sp)
+; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 56($sp)
+; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 60($sp)
+
+; MIPS32R5: ld.w $w{{[0-9]+}}, 48($sp)
+
+; MIPS64-DAG: $2
+; MIPS64-DAG: $3
+
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][0], $2
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][1], $3
+
+  %0 = call <4 x float> @float4_extern(<4 x float> <float 0.0, float -1.0, float 2.0, float 4.0>, <4 x float> <float 12.0, float 14.0, float 15.0, float 16.0>)
+  store <4 x float> %0, <4 x float> * @gv4f32
+  ret void
+}
+
+define void @calldouble_2() {
+entry:
+; ALL-LABEL: calldouble_2:
+
+; MIPS32-DAG: sw ${{[0-9a-z]+}}, 36($sp)
+; MIPS32-DAG: sw ${{[0-9a-z]+}}, 32($sp)
+; MIPS32-DAG: sw ${{[0-9a-z]+}}, 28($sp)
+; MIPS32-DAG: sw ${{[0-9a-z]+}}, 24($sp)
+; MIPS32-DAG: sw ${{[0-9a-z]+}}, 20($sp)
+; MIPS32-DAG: sw ${{[0-9a-z]+}}, 16($sp)
+
+; MIPS32-DAG: addiu $4, $sp, [[R0:[0-9]+]]
+; MIPS32-DAG: addiu $6, $zero, 0
+; MIPS32-DAG: addiu $7, $zero, 0
+
+; MIPS32R5-DAG: copy_s.w $4, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $5, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $6, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $7, $w{{[0-9]+}}
+
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 36($sp)
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 32($sp)
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 28($sp)
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 24($sp)
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 20($sp)
+; MIPS32R5-DAG: sw  ${{[0-9]+}}, 16($sp)
+
+; MIPS64-DAG: dsll $5
+; MIPS64-DAG: dsll $6
+; MIPS64-DAG: dsll $7
+; MIPS64-DAG: daddiu $4
+
+; MIPS64R5-DAG: copy_s.d $4, $w{{[0-9]+}}
+; MIPS64R5-DAG: copy_s.d $5, $w{{[0-9]+}}
+; MIPS64R5-DAG: copy_s.d $6, $w{{[0-9]+}}
+; MIPS64R5-DAG: copy_s.d $7, $w{{[0-9]+}}
+
+; MIPS32: jal double2_extern
+; MIPS64: jalr $25
+
+; MIPS32-DAG: ldc1 $f[[F0:[0-9]+]], 48($sp)
+; MIPS32-DAG: ldc1 $f[[F1:[0-9]+]], 56($sp)
+
+; MIPS32-DAG: sdc1 $f[[F1]], 8(${{[0-9]+}})
+; MIPS32-DAG: sdc1 $f[[F0]], %lo(gv2f64)(${{[0-9]+}})
+
+; MIPS32R5: ld.d $w[[W0:[0-9]+]], 48($sp)
+; MIPS32R5: st.d $w[[W0]], 0(${{[0-9]+}})
+
+; MIPS64-DAG: sd $2
+; MIPS64-DAG: sd $3
+
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][0], $2
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][1], $3
+
+  %0 = call <2 x double> @double2_extern(<2 x double> <double 0.0, double -1.0>, <2 x double> <double 12.0, double 14.0>)
+  store <2 x double> %0, <2 x double> * @gv2f64
+  ret void
+}
+
+; The mixed tests show that due to alignment requirements, $5 is not used
+; in argument passing.
+
+define float @mixed_i8(<2 x float> %a, i8 %b, <2 x float> %c) {
+entry:
+; ALL-LABEL: mixed_i8:
+
+; MIPS32-DAG: mtc1 $5, $f{{[0-9]+}}
+; MIPS32: andi $[[R7:[0-9]+]], $6, 255
+; MIPS32: mtc1 $[[R7]], $f[[F0:[0-9]+]]
+; MIPS32: cvt.s.w $f{{[0-9]+}}, $f[[F0]]
+
+; MIPS32-DAG: mtc1 $4, $f{{[0-9]+}}
+; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 16($sp)
+; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 20($sp)
+; MIPS32-DAG: add.s $f0, $f{{[0-9]+}}, $f{{[0-9]+}}
+
+; MIPS32R5: andi $[[R0:[0-9]+]], $6, 255
+; MIPS32R5: sw $[[R0]], {{[0-9]+}}($sp)
+; MIPS32R5: sw $[[R0]], {{[0-9]+}}($sp)
+; MIPS32R5-DAG: sw $5, {{[0-9]+}}($sp)
+; MIPS32R5-DAG: sw $4, {{[0-9]+}}($sp)
+
+; MIPS64EB-DAG: sll $[[R0:[0-9]+]], $4, 0
+; MIPS64EB-DAG: mtc1 $[[R0]], $f{{[0-9]+}}
+; MIPS64EB: sll $[[R6:[0-9]+]], $5, 0
+; MIPS64EB: andi $[[R7:[0-9]+]], $[[R6]], 255
+; MIPS64EB: mtc1 $[[R7]], $f[[F0:[0-9]+]]
+; MIPS64EB: cvt.s.w $f{{[0-9]+}}, $f[[F0]]
+
+; MIPS64EB-DAG: dsrl $[[R1:[0-9]+]], $4, 32
+; MIPS64EB-DAG: sll $[[R2:[0-9]+]], $[[R1]], 0
+; MIPS64EB-DAG: mtc1 $[[R2:[0-9]+]], $f{{[0-9]+}}
+
+; MIPS64EB-DAG: sll $[[R3:[0-9]+]], $6, 0
+; MIPS64EB-DAG: mtc1 $[[R3]], $f{{[0-9]+}}
+; MIPS64EB-DAG: dsrl $[[R4:[0-9]+]], $6, 32
+; MIPS64EB-DAG: sll $[[R5:[0-9]+]], $[[R4]], 0
+; MIPS64EB-DAG: mtc1 $[[R5:[0-9]+]], $f{{[0-9]+}}
+
+; MIPS64EL-DAG: dsrl $[[R1:[0-9]+]], $4, 32
+; MIPS64EL-DAG: sll $[[R2:[0-9]+]], $[[R1]], 0
+; MIPS64EL-DAG: mtc1 $[[R2:[0-9]+]], $f{{[0-9]+}}
+
+; MIPS64EL: sll $[[R6:[0-9]+]], $5, 0
+; MIPS64EL: andi $[[R7:[0-9]+]], $[[R6]], 255
+; MIPS64EL: mtc1 $[[R7]], $f[[F0:[0-9]+]]
+; MIPS64EL: cvt.s.w $f{{[0-9]+}}, $f[[F0]]
+
+; MIPS64EL-DAG: dsrl $[[R4:[0-9]+]], $6, 32
+; MIPS64EL-DAG: sll $[[R5:[0-9]+]], $[[R4]], 0
+; MIPS64EL-DAG: mtc1 $[[R5:[0-9]+]], $f{{[0-9]+}}
+
+; MIPS64EL-DAG: sll $[[R0:[0-9]+]], $4, 0
+; MIPS64EL-DAG: mtc1 $[[R0]], $f{{[0-9]+}}
+; MIPS64EL-DAG: sll $[[R3:[0-9]+]], $6, 0
+; MIPS64EL-DAG: mtc1 $[[R3]], $f{{[0-9]+}}
+
+; MIPS64R5: sll $[[R0:[0-9]+]], $5, 0
+; MIPS64R5: andi $[[R1:[0-9]+]], $[[R0]], 255
+; MIPS64R5: sd $4, {{[0-9]+}}($sp)
+; MIPS64R5: sd $6, {{[0-9]+}}($sp)
+
+  %0 = zext i8 %b to i32
+  %1 = uitofp i32 %0 to float
+  %2 = insertelement <2 x float> undef, float %1, i32 0
+  %3 = insertelement <2 x float> %2, float %1, i32 1
+  %4 = fadd <2 x float> %3, %a
+  %5 = fadd <2 x float> %4, %c
+  %6 = extractelement <2 x float> %5, i32 0
+  %7 = extractelement <2 x float> %5, i32 1
+  %8 = fadd float %6, %7
+  ret float %8
+}
+
+define <4 x float> @mixed_32(<4 x float> %a, i32 %b) {
+entry:
+; ALL-LABEL: mixed_32:
+
+; MIPS32-DAG: mtc1 $6, $f{{[0-9]+}}
+; MIPS32-DAG: mtc1 $7, $f{{[0-9]+}}
+; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 28($sp)
+; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 24($sp)
+; MIPS32-DAG: swc1 $f{{[0-9]+}}, 0($4)
+; MIPS32-DAG: swc1 $f{{[0-9]+}}, 4($4)
+; MIPS32-DAG: swc1 $f{{[0-9]+}}, 8($4)
+; MIPS32-DAG: swc1 $f{{[0-9]+}}, 12($4)
+
+; MIPS32R5: insert.w $w[[W0:[0-9]+]][0], $6
+; MIPS32R5: insert.w $w[[W0:[0-9]+]][1], $7
+; MIPS32R5: lw $[[R0:[0-9]+]], 16($sp)
+; MIPS32R5: insert.w $w[[W0:[0-9]+]][2], $[[R0]]
+; MIPS32R5: lw $[[R1:[0-9]+]], 20($sp)
+; MIPS32R5: insert.w $w[[W0:[0-9]+]][3], $[[R1]]
+; MIPS32R5: lw $[[R0:[0-9]+]], 24($sp)
+
+; MIPS64-DAG: sll ${{[0-9]+}}, $6, 0
+; MIPS64-DAG: dsrl $[[R0:[0-9]+]], $4, 32
+; MIPS64-DAG: sll $[[R1:[0-9]+]], $[[R0]], 0
+; MIPS64-DAG: mtc1 $[[R1]], $f{{[0-9]+}}
+; MIPS64-DAG: sll $[[R2:[0-9]+]], $4, 0
+; MIPS64-DAG: dsrl $[[R3:[0-9]+]], $5, 32
+; MIPS64-DAG: sll $[[R4:[0-9]+]], $[[R3]], 0
+; MIPS64-DAG: mtc1 $[[R4]], $f{{[0-9]+}}
+; MIPS64-DAG: mtc1 $[[R2]], $f{{[0-9]+}}
+; MIPS64-DAG: sll	$[[R6:[0-9]+]], $5, 0
+; MIPS64-DAG: mtc1 $[[R6:[0-9]+]], $f{{[0-9]+}}
+
+; MIPS64R5: insert.d $w[[W0:[0-9]+]][0], $4
+; MIPS64R5: insert.d $w[[W0]][1], $5
+; MIPS64R5: sll $[[R0:[0-9]+]], $6, 0
+; MIPS64R5: fill.w $w{{[0-9]+}}, $[[R0]]
+
+  %0 = uitofp i32 %b to float
+  %1 = insertelement <4 x float> undef, float %0, i32 0
+  %2 = insertelement <4 x float> %1, float %0, i32 1
+  %3 = insertelement <4 x float> %2, float %0, i32 2
+  %4 = insertelement <4 x float> %3, float %0, i32 3
+  %5 = fadd <4 x float> %4, %a
+  ret <4 x float> %5
+}
+
+
+; This test is slightly more fragile than I'd like as the offset into the
+; outgoing arguments area is dependant on the size of the stack frame for
+; this function.
+
+define <4 x float> @cast(<4 x i32> %a) {
+entry:
+; ALL-LABEL: cast:
+
+; MIPS32: addiu $sp, $sp, -32
+; MIPS32-DAG: sw $6, {{[0-9]+}}($sp)
+; MIPS32-DAG: sw $7, {{[0-9]+}}($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 48($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 52($sp)
+
+; MIPS32R5-DAG: insert.w  $w0[0], $6
+; MIPS32R5-DAG: insert.w  $w0[1], $7
+; MIPS32R5-DAG: lw  $[[R0:[0-9]+]], 16($sp)
+; MIPS32R5-DAG: insert.w  $w0[2], $[[R0]]
+; MIPS32R5-DAG: lw  $[[R1:[0-9]+]], 20($sp)
+; MIPS32R5-DAG: insert.w  $w0[3], $[[R1]]
+
+; MIPS64-DAG: sll ${{[0-9]+}}, $4, 0
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 32
+; MIPS64-DAG: sll ${{[0-9]+}}, $5, 0
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 32
+
+; MIPS64R5-DAG: insert.d  $w0[0], $4
+; MIPS64R5-DAG: insert.d  $w0[1], $5
+
+  %0 = uitofp <4 x i32> %a to <4 x float>
+  ret <4 x float> %0
+}
+
+define <4 x float> @select(<4 x i32> %cond, <4 x float> %arg1, <4 x float> %arg2) {
+entry:
+; ALL-LABEL: select:
+
+; MIPS32-DAG: andi ${{[0-9]+}}, $7, 1
+; MIPS32-DAG: andi ${{[0-9]+}}, $6, 1
+; MIPS32-DAG: lw $[[R0:[0-9]+]], 16($sp)
+; MIPS32-DAG: andi ${{[0-9]+}}, $[[R0]], 1
+; MIPS32-DAG: lw $[[R1:[0-9]+]], 20($sp)
+; MIPS32-DAG: andi ${{[0-9]+}}, $[[R0]], 1
+
+; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $6
+; MIPS32R5-DAG: insert.w $w[[W0]][1], $7
+; MIPS32R5-DAG: lw $[[R0:[0-9]+]], 16($sp)
+; MIPS32R5-DAG: lw $[[R1:[0-9]+]], 20($sp)
+; MIPS32R5-DAG: insert.w $w[[W0]][2], $[[R0]]
+; MIPS32R5-DAG: insert.w $w[[W0]][3], $[[R1]]
+; MIPS32R5-DAG: slli.w $w{{[0-9]}}, $w[[W0]]
+
+; MIPS64-DAG: sll $[[R0:[0-9]+]], $6, 0
+; MIPS64-DAG: mtc1 $[[R0]], $f{{[0-9]+}}
+; MIPS64-DAG: dsrl $[[R1:[0-9]+]], $6, 32
+; MIPS64-DAG: sll $[[R2:[0-9]+]], $[[R1]], 0
+; MIPS64-DAG: mtc1 $[[R2]], $f{{[0-9]+}}
+
+; MIPS64-DAG: sll $[[R3:[0-9]+]], $7, 0
+; MIPS64-DAG: mtc1 $[[R3]], $f{{[0-9]+}}
+; MIPS64-DAG: dsrl $[[R4:[0-9]+]], $7, 32
+; MIPS64-DAG: sll $[[R5:[0-9]+]], $[[R4]], 0
+; MIPS64-DAG: mtc1 $[[R5]], $f{{[0-9]+}}
+
+; MIPS64-DAG: sll $[[R6:[0-9]+]], $8, 0
+; MIPS64-DAG: mtc1 $[[R6]], $f{{[0-9]+}}
+; MIPS64-DAG: dsrl $[[R7:[0-9]+]], $8, 32
+; MIPS64-DAG: sll $[[R8:[0-9]+]], $[[R7]], 0
+; MIPS64-DAG: mtc1 $[[R8]], $f{{[0-9]+}}
+
+; MIPS64-DAG: sll $[[R9:[0-9]+]], $9, 0
+; MIPS64-DAG: mtc1 $[[R9]], $f{{[0-9]+}}
+; MIPS64-DAG: dsrl $[[R10:[0-9]+]], $9, 32
+; MIPS64-DAG: sll $[[R11:[0-9]+]], $[[R10]], 0
+; MIPS64-DAG: mtc1 $[[R11]], $f{{[0-9]+}}
+
+; MIPS64-DAG: sll $[[R12:[0-9]+]], $4, 0
+; MIPS64-DAG: andi ${{[0-9]+}}, $[[R12]], 1
+; MIPS64-DAG: dsrl $[[R13:[0-9]+]], $4, 32
+; MIPS64-DAG: sll $[[R14:[0-9]+]], $[[R13]], 0
+; MIPS64-DAG: andi ${{[0-9]+}}, $[[R14]], 1
+
+; MIPS64-DAG: sll $[[R15:[0-9]+]], $5, 0
+; MIPS64-DAG: andi ${{[0-9]+}}, $[[R15]], 1
+; MIPS64-DAG: dsrl $[[R16:[0-9]+]], $5, 32
+; MIPS64-DAG: sll $[[R17:[0-9]+]], $[[R16]], 0
+; MIPS64-DAG: andi ${{[0-9]+}}, $[[R17]], 1
+
+; MIPS64R5-DAG: insert.d $w{{[0-9]+}}[0], $8
+; MIPS64R5-DAG: insert.d $w{{[0-9]+}}[1], $9
+; MIPS64R5-DAG: insert.d $w{{[0-9]+}}[0], $6
+; MIPS64R5-DAG: insert.d $w{{[0-9]+}}[1], $7
+; MIPS64R5-DAG: insert.d $w{{[0-9]+}}[0], $4
+; MIPS64R5-DAG: insert.d $w{{[0-9]+}}[1], $5
+
+  %cond.t = trunc <4 x i32> %cond to <4 x i1>
+  %res = select <4 x i1> %cond.t, <4 x float> %arg1, <4 x float> %arg2
+  ret <4 x float> %res
+}
diff --git a/test/CodeGen/Mips/ctlz-v.ll b/test/CodeGen/Mips/ctlz-v.ll
index 3d580e5771f4..156c640681b7 100644
--- a/test/CodeGen/Mips/ctlz-v.ll
+++ b/test/CodeGen/Mips/ctlz-v.ll
@@ -8,10 +8,14 @@ entry:
 ; MIPS32: clz     $2, $4
 ; MIPS32: clz     $3, $5
 
-; MIPS64-DAG: sll $[[A0:[0-9]+]], $4, 0
-; MIPS64-DAG: clz $2, $[[A0]]
-; MIPS64-DAG: sll $[[A1:[0-9]+]], $5, 0
-; MIPS64-DAG: clz $3, $[[A1]]
+; MIPS64-DAG: dsrl $[[A0:[0-9]+]], $4, 32
+; MIPS64-DAG: sll $[[A1:[0-9]+]], $[[A0]], 0
+; MIPS64-DAG: clz $[[R0:[0-9]+]], $[[A1]]
+; MIPS64-DAG: dsll $[[R1:[0-9]+]], $[[R0]], 32
+; MIPS64-DAG: sll $[[A2:[0-9]+]], $4, 0
+; MIPS64-DAG: clz $[[R2:[0-9]+]], $[[A2]]
+; MIPS64-DAG: dext $[[R3:[0-9]+]], $[[R2]], 0, 32
+; MIPS64-DAG: or $2, $[[R3]], $[[R1]]
 
   %ret = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %x, i1 true)
   ret <2 x i32> %ret
diff --git a/test/CodeGen/Mips/cttz-v.ll b/test/CodeGen/Mips/cttz-v.ll
index 85f69f9a17d9..dbcde7f5fe5b 100644
--- a/test/CodeGen/Mips/cttz-v.ll
+++ b/test/CodeGen/Mips/cttz-v.ll
@@ -24,14 +24,17 @@ entry:
 ; MIPS64-DAG: and     $[[R2:[0-9]+]], $[[R1]], $[[R0]]
 ; MIPS64-DAG: clz     $[[R3:[0-9]+]], $[[R2]]
 ; MIPS64-DAG: addiu   $[[R4:[0-9]+]], $zero, 32
-; MIPS64-DAG: subu    $2, $[[R4]], $[[R3]]
-; MIPS64-DAG: sll     $[[A1:[0-9]+]], $5, 0
-; MIPS64-DAG: addiu   $[[R5:[0-9]+]], $[[A1]], -1
-; MIPS64-DAG: not     $[[R6:[0-9]+]], $[[A1]]
-; MIPS64-DAG: and     $[[R7:[0-9]+]], $[[R6]], $[[R5]]
-; MIPS64-DAG: clz     $[[R8:[0-9]+]], $[[R7]]
-; MIPS64-DAG: jr      $ra
-; MIPS64-DAG: subu    $3, $[[R4]], $[[R8]]
+; MIPS64-DAG: subu    $[[R5:[0-9]+]], $[[R4]], $[[R3]]
+; MIPS64-DAG: dsrl    $[[R6:[0-9]+]], $4, 32
+; MIPS64-DAG: sll     $[[R7:[0-9]+]], $[[R6]], 0
+; MIPS64-DAG: dext    $[[R8:[0-9]+]], $[[R5]], 0, 32
+; MIPS64-DAG: addiu   $[[R9:[0-9]+]], $[[R7]], -1
+; MIPS64-DAG: not     $[[R10:[0-9]+]], $[[R7]]
+; MIPS64-DAG: and     $[[R11:[0-9]+]], $[[R10]], $[[R9]]
+; MIPS64-DAG: clz     $[[R12:[0-9]+]], $[[R11]]
+; MIPS64-DAG: subu    $[[R13:[0-9]+]], $[[R4]], $[[R12]]
+; MIPS64-DAG: dsll    $[[R14:[0-9]+]], $[[R13]], 32
+; MIPS64-DAG: or      $2, $[[R8]], $[[R14]]
 
   %ret = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %x, i1 true)
   ret <2 x i32> %ret
diff --git a/test/CodeGen/Mips/dsp-r1.ll b/test/CodeGen/Mips/dsp-r1.ll
index edd6258270a0..90eb14a75b42 100644
--- a/test/CodeGen/Mips/dsp-r1.ll
+++ b/test/CodeGen/Mips/dsp-r1.ll
@@ -1172,9 +1172,19 @@ entry:
   ret { i32 } %.fca.0.insert
 }
 
+define { i32 } @test__builtin_mips_repl_ph2(i32 %i0) nounwind readnone {
+entry:
+; CHECK: repl.ph
+
+  %0 = tail call <2 x i16> @llvm.mips.repl.ph(i32 -2)
+  %1 = bitcast <2 x i16> %0 to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %1, 0
+  ret { i32 } %.fca.0.insert
+}
+
 declare <2 x i16> @llvm.mips.repl.ph(i32) nounwind readnone
 
-define { i32 } @test__builtin_mips_repl_ph2(i32 %i0, i32 %a0) nounwind readnone {
+define { i32 } @test__builtin_mips_repl_ph3(i32 %i0, i32 %a0) nounwind readnone {
 entry:
 ; CHECK: replv.ph
 
diff --git a/test/CodeGen/Mips/fmadd1.ll b/test/CodeGen/Mips/fmadd1.ll
index c155eedd62c4..d7f6308ac0b0 100644
--- a/test/CodeGen/Mips/fmadd1.ll
+++ b/test/CodeGen/Mips/fmadd1.ll
@@ -5,52 +5,63 @@
 ; IEEE 754 (1985) and IEEE 754 (2008). These instructions are therefore only
 ; available when -enable-no-nans-fp-math is given.
 
-; RUN: llc < %s -march=mipsel   -mcpu=mips32              -enable-no-nans-fp-math | FileCheck %s -check-prefixes=ALL,32,32-NONAN
+; RUN: llc < %s -march=mipsel   -mcpu=mips32              -enable-no-nans-fp-math | FileCheck %s -check-prefixes=ALL,32-NOMADD,32-NONAN-NOMADD
 ; RUN: llc < %s -march=mipsel   -mcpu=mips32r2            -enable-no-nans-fp-math | FileCheck %s -check-prefixes=ALL,32R2,32R2-NONAN
-; RUN: llc < %s -march=mipsel   -mcpu=mips32r6            -enable-no-nans-fp-math | FileCheck %s -check-prefixes=ALL,32R6,32R6-NONAN
+; RUN: llc < %s -march=mipsel   -mcpu=mips32r6            -enable-no-nans-fp-math | FileCheck %s -check-prefixes=ALL,32R6-NOMADD,32R6-NONAN-NOMADD
 ; RUN: llc < %s -march=mips64el -mcpu=mips64   -target-abi=n64 -enable-no-nans-fp-math | FileCheck %s -check-prefixes=ALL,64,64-NONAN
 ; RUN: llc < %s -march=mips64el -mcpu=mips64r2 -target-abi=n64 -enable-no-nans-fp-math | FileCheck %s -check-prefixes=ALL,64R2,64R2-NONAN
-; RUN: llc < %s -march=mips64el -mcpu=mips64r6 -target-abi=n64 -enable-no-nans-fp-math | FileCheck %s -check-prefixes=ALL,64R6,64R6-NONAN
-; RUN: llc < %s -march=mipsel   -mcpu=mips32              | FileCheck %s -check-prefixes=ALL,32,32-NAN
+; RUN: llc < %s -march=mips64el -mcpu=mips64r6 -target-abi=n64 -enable-no-nans-fp-math | FileCheck %s -check-prefixes=ALL,64R6-NOMADD,64R6-NONAN-NOMADD
+; RUN: llc < %s -march=mipsel   -mcpu=mips32              | FileCheck %s -check-prefixes=ALL,32-NOMADD,32-NAN-NOMADD
 ; RUN: llc < %s -march=mipsel   -mcpu=mips32r2            | FileCheck %s -check-prefixes=ALL,32R2,32R2-NAN
-; RUN: llc < %s -march=mipsel   -mcpu=mips32r6            | FileCheck %s -check-prefixes=ALL,32R6,32R6-NAN
+; RUN: llc < %s -march=mipsel   -mcpu=mips32r6            | FileCheck %s -check-prefixes=ALL,32R6-NOMADD,32R6-NAN-NOMADD
 ; RUN: llc < %s -march=mips64el -mcpu=mips64   -target-abi=n64 | FileCheck %s -check-prefixes=ALL,64,64-NAN
 ; RUN: llc < %s -march=mips64el -mcpu=mips64r2 -target-abi=n64 | FileCheck %s -check-prefixes=ALL,64R2,64R2-NAN
-; RUN: llc < %s -march=mips64el -mcpu=mips64r6 -target-abi=n64 | FileCheck %s -check-prefixes=ALL,64R6,64R6-NAN
+; RUN: llc < %s -march=mips64el -mcpu=mips64r6 -target-abi=n64 | FileCheck %s -check-prefixes=ALL,64R6-NOMADD,64R6-NAN-NOMADD
+
+; Check that madd.[ds], msub.[ds], nmadd.[ds], and nmsub.[ds] are not generated
+; when +nomadd attribute is specified.
+; Output for mips32 and mips64r6 reused since aforementioned instructions are
+; not generated in those cases.
+; RUN: llc < %s -march=mipsel   -mcpu=mips32r2            -enable-no-nans-fp-math -mattr=+nomadd4 | FileCheck %s -check-prefixes=ALL,32-NOMADD,32-NONAN-NOMADD
+; RUN: llc < %s -march=mips64el -mcpu=mips64   -target-abi=n64 -enable-no-nans-fp-math -mattr=+nomadd4 | FileCheck %s -check-prefixes=ALL,64R6-NOMADD,64R6-NONAN-NOMADD
+; RUN: llc < %s -march=mips64el -mcpu=mips64r2 -target-abi=n64 -enable-no-nans-fp-math -mattr=+nomadd4 | FileCheck %s -check-prefixes=ALL,64R6-NOMADD,64R6-NONAN-NOMADD
+; RUN: llc < %s -march=mipsel   -mcpu=mips32r2            -mattr=+nomadd4 | FileCheck %s -check-prefixes=ALL,32-NOMADD,32-NAN-NOMADD
+; RUN: llc < %s -march=mips64el -mcpu=mips64   -target-abi=n64 -mattr=+nomadd4 | FileCheck %s -check-prefixes=ALL,64R6-NOMADD,64R6-NAN-NOMADD
+; RUN: llc < %s -march=mips64el -mcpu=mips64r2 -target-abi=n64 -mattr=+nomadd4 | FileCheck %s -check-prefixes=ALL,64R6-NOMADD,64R6-NAN-NOMADD
 
 define float @FOO0float(float %a, float %b, float %c) nounwind readnone {
 entry:
 ; ALL-LABEL: FOO0float:
 
-; 32-DAG:        mtc1 $6, $[[T0:f[0-9]+]]
-; 32-DAG:        mul.s $[[T1:f[0-9]+]], $f12, $f14
-; 32-DAG:        add.s $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
-; 32-DAG:        mtc1 $zero, $[[T2:f[0-9]+]]
-; 32-DAG:        add.s $f0, $[[T1]], $[[T2]]
+; 32-NOMADD-DAG:        mtc1 $6, $[[T0:f[0-9]+]]
+; 32-NOMADD-DAG:        mul.s $[[T1:f[0-9]+]], $f12, $f14
+; 32-NOMADD-DAG:        add.s $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32-NOMADD-DAG:        mtc1 $zero, $[[T2:f[0-9]+]]
+; 32-NOMADD-DAG:        add.s $f0, $[[T1]], $[[T2]]
 
-; 32R2:          mtc1 $6, $[[T0:f[0-9]+]]
-; 32R2:          madd.s $[[T1:f[0-9]+]], $[[T0]], $f12, $f14
-; 32R2:          mtc1 $zero, $[[T2:f[0-9]+]]
-; 32R2:          add.s $f0, $[[T1]], $[[T2]]
+; 32R2:                 mtc1 $6, $[[T0:f[0-9]+]]
+; 32R2:                 madd.s $[[T1:f[0-9]+]], $[[T0]], $f12, $f14
+; 32R2:                 mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R2:                 add.s $f0, $[[T1]], $[[T2]]
 
-; 32R6-DAG:      mtc1 $6, $[[T0:f[0-9]+]]
-; 32R6-DAG:      mul.s $[[T1:f[0-9]+]], $f12, $f14
-; 32R6-DAG:      add.s $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
-; 32R6-DAG:      mtc1 $zero, $[[T2:f[0-9]+]]
-; 32R6-DAG:      add.s $f0, $[[T1]], $[[T2]]
+; 32R6-NOMADD-DAG:      mtc1 $6, $[[T0:f[0-9]+]]
+; 32R6-NOMADD-DAG:      mul.s $[[T1:f[0-9]+]], $f12, $f14
+; 32R6-NOMADD-DAG:      add.s $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32R6-NOMADD-DAG:      mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R6-NOMADD-DAG:      add.s $f0, $[[T1]], $[[T2]]
 
-; 64-DAG:        madd.s $[[T0:f[0-9]+]], $f14, $f12, $f13
-; 64-DAG:        mtc1 $zero, $[[T1:f[0-9]+]]
-; 64-DAG:        add.s $f0, $[[T0]], $[[T1]]
+; 64-DAG:               madd.s $[[T0:f[0-9]+]], $f14, $f12, $f13
+; 64-DAG:               mtc1 $zero, $[[T1:f[0-9]+]]
+; 64-DAG:               add.s $f0, $[[T0]], $[[T1]]
 
-; 64R2:          madd.s $[[T0:f[0-9]+]], $f14, $f12, $f13
-; 64R2:          mtc1 $zero, $[[T1:f[0-9]+]]
-; 64R2:          add.s $f0, $[[T0]], $[[T1]]
+; 64R2:                 madd.s $[[T0:f[0-9]+]], $f14, $f12, $f13
+; 64R2:                 mtc1 $zero, $[[T1:f[0-9]+]]
+; 64R2:                 add.s $f0, $[[T0]], $[[T1]]
 
-; 64R6-DAG:      mul.s $[[T0:f[0-9]+]], $f12, $f13
-; 64R6-DAG:      add.s $[[T1:f[0-9]+]], $[[T0]], $f14
-; 64R6-DAG:      mtc1 $zero, $[[T2:f[0-9]+]]
-; 64R6-DAG:      add.s $f0, $[[T1]], $[[T2]]
+; 64R6-NOMADD-DAG:      mul.s $[[T0:f[0-9]+]], $f12, $f13
+; 64R6-NOMADD-DAG:      add.s $[[T1:f[0-9]+]], $[[T0]], $f14
+; 64R6-NOMADD-DAG:      mtc1 $zero, $[[T2:f[0-9]+]]
+; 64R6-NOMADD-DAG:      add.s $f0, $[[T1]], $[[T2]]
 
   %mul = fmul float %a, %b
   %add = fadd float %mul, %c
@@ -62,35 +73,35 @@ define float @FOO1float(float %a, float %b, float %c) nounwind readnone {
 entry:
 ; ALL-LABEL: FOO1float:
 
-; 32-DAG:        mtc1 $6, $[[T0:f[0-9]+]]
-; 32-DAG:        mul.s $[[T1:f[0-9]+]], $f12, $f14
-; 32-DAG:        sub.s $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
-; 32-DAG:        mtc1 $zero, $[[T2:f[0-9]+]]
-; 32-DAG:        add.s $f0, $[[T1]], $[[T2]]
+; 32-NOMADD-DAG:        mtc1 $6, $[[T0:f[0-9]+]]
+; 32-NOMADD-DAG:        mul.s $[[T1:f[0-9]+]], $f12, $f14
+; 32-NOMADD-DAG:        sub.s $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32-NOMADD-DAG:        mtc1 $zero, $[[T2:f[0-9]+]]
+; 32-NOMADD-DAG:        add.s $f0, $[[T1]], $[[T2]]
 
-; 32R2:          mtc1 $6, $[[T0:f[0-9]+]]
-; 32R2:          msub.s $[[T1:f[0-9]+]], $[[T0]], $f12, $f14
-; 32R2:          mtc1 $zero, $[[T2:f[0-9]+]]
-; 32R2:          add.s $f0, $[[T1]], $[[T2]]
+; 32R2:                 mtc1 $6, $[[T0:f[0-9]+]]
+; 32R2:                 msub.s $[[T1:f[0-9]+]], $[[T0]], $f12, $f14
+; 32R2:                 mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R2:                 add.s $f0, $[[T1]], $[[T2]]
 
-; 32R6-DAG:      mtc1 $6, $[[T0:f[0-9]+]]
-; 32R6-DAG:      mul.s $[[T1:f[0-9]+]], $f12, $f14
-; 32R6-DAG:      sub.s $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
-; 32R6-DAG:      mtc1 $zero, $[[T2:f[0-9]+]]
-; 32R6-DAG:      add.s $f0, $[[T1]], $[[T2]]
+; 32R6-NOMADD-DAG:      mtc1 $6, $[[T0:f[0-9]+]]
+; 32R6-NOMADD-DAG:      mul.s $[[T1:f[0-9]+]], $f12, $f14
+; 32R6-NOMADD-DAG:      sub.s $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32R6-NOMADD-DAG:      mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R6-NOMADD-DAG:      add.s $f0, $[[T1]], $[[T2]]
 
-; 64-DAG:        msub.s $[[T0:f[0-9]+]], $f14, $f12, $f13
-; 64-DAG:        mtc1 $zero, $[[T1:f[0-9]+]]
-; 64-DAG:        add.s $f0, $[[T0]], $[[T1]]
+; 64-DAG:               msub.s $[[T0:f[0-9]+]], $f14, $f12, $f13
+; 64-DAG:               mtc1 $zero, $[[T1:f[0-9]+]]
+; 64-DAG:               add.s $f0, $[[T0]], $[[T1]]
 
-; 64R2:          msub.s $[[T0:f[0-9]+]], $f14, $f12, $f13
-; 64R2:          mtc1 $zero, $[[T1:f[0-9]+]]
-; 64R2:          add.s $f0, $[[T0]], $[[T1]]
+; 64R2:                 msub.s $[[T0:f[0-9]+]], $f14, $f12, $f13
+; 64R2:                 mtc1 $zero, $[[T1:f[0-9]+]]
+; 64R2:                 add.s $f0, $[[T0]], $[[T1]]
 
-; 64R6-DAG:      mul.s $[[T0:f[0-9]+]], $f12, $f13
-; 64R6-DAG:      sub.s $[[T1:f[0-9]+]], $[[T0]], $f14
-; 64R6-DAG:      mtc1 $zero, $[[T2:f[0-9]+]]
-; 64R6-DAG:      add.s $f0, $[[T1]], $[[T2]]
+; 64R6-NOMADD-DAG:      mul.s $[[T0:f[0-9]+]], $f12, $f13
+; 64R6-NOMADD-DAG:      sub.s $[[T1:f[0-9]+]], $[[T0]], $f14
+; 64R6-NOMADD-DAG:      mtc1 $zero, $[[T2:f[0-9]+]]
+; 64R6-NOMADD-DAG:      add.s $f0, $[[T1]], $[[T2]]
 
   %mul = fmul float %a, %b
   %sub = fsub float %mul, %c
@@ -102,42 +113,42 @@ define float @FOO2float(float %a, float %b, float %c) nounwind readnone {
 entry:
 ; ALL-LABEL: FOO2float:
 
-; 32-DAG:        mtc1 $6, $[[T0:f[0-9]+]]
-; 32-DAG:        mul.s $[[T1:f[0-9]+]], $f12, $f14
-; 32-DAG:        add.s $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
-; 32-DAG:        mtc1 $zero, $[[T2:f[0-9]+]]
-; 32-DAG:        sub.s $f0, $[[T2]], $[[T1]]
+; 32-NOMADD-DAG:        mtc1 $6, $[[T0:f[0-9]+]]
+; 32-NOMADD-DAG:        mul.s $[[T1:f[0-9]+]], $f12, $f14
+; 32-NOMADD-DAG:        add.s $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32-NOMADD-DAG:        mtc1 $zero, $[[T2:f[0-9]+]]
+; 32-NOMADD-DAG:        sub.s $f0, $[[T2]], $[[T1]]
 
-; 32R2-NONAN:    mtc1 $6, $[[T0:f[0-9]+]]
-; 32R2-NONAN:    nmadd.s $f0, $[[T0]], $f12, $f14
+; 32R2-NONAN:           mtc1 $6, $[[T0:f[0-9]+]]
+; 32R2-NONAN:           nmadd.s $f0, $[[T0]], $f12, $f14
 
-; 32R2-NAN:      mtc1 $6, $[[T0:f[0-9]+]]
-; 32R2-NAN:      madd.s $[[T1:f[0-9]+]], $[[T0]], $f12, $f14
-; 32R2-NAN:      mtc1 $zero, $[[T2:f[0-9]+]]
-; 32R2-NAN:      sub.s  $f0, $[[T2]], $[[T1]]
+; 32R2-NAN:             mtc1 $6, $[[T0:f[0-9]+]]
+; 32R2-NAN:             madd.s $[[T1:f[0-9]+]], $[[T0]], $f12, $f14
+; 32R2-NAN:             mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R2-NAN:             sub.s  $f0, $[[T2]], $[[T1]]
 
-; 32R6-DAG:      mtc1 $6, $[[T0:f[0-9]+]]
-; 32R6-DAG:      mul.s $[[T1:f[0-9]+]], $f12, $f14
-; 32R6-DAG:      add.s $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
-; 32R6-DAG:      mtc1 $zero, $[[T2:f[0-9]+]]
-; 32R6-DAG:      sub.s $f0, $[[T2]], $[[T1]]
+; 32R6-NOMADD-DAG:      mtc1 $6, $[[T0:f[0-9]+]]
+; 32R6-NOMADD-DAG:      mul.s $[[T1:f[0-9]+]], $f12, $f14
+; 32R6-NOMADD-DAG:      add.s $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32R6-NOMADD-DAG:      mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R6-NOMADD-DAG:      sub.s $f0, $[[T2]], $[[T1]]
 
-; 64-NONAN:      nmadd.s $f0, $f14, $f12, $f13
+; 64-NONAN:             nmadd.s $f0, $f14, $f12, $f13
 
-; 64-NAN:        madd.s $[[T0:f[0-9]+]], $f14, $f12, $f13
-; 64-NAN:        mtc1 $zero, $[[T1:f[0-9]+]]
-; 64-NAN:        sub.s  $f0, $[[T1]], $[[T0]]
+; 64-NAN:               madd.s $[[T0:f[0-9]+]], $f14, $f12, $f13
+; 64-NAN:               mtc1 $zero, $[[T1:f[0-9]+]]
+; 64-NAN:               sub.s  $f0, $[[T1]], $[[T0]]
 
-; 64R2-NONAN:    nmadd.s $f0, $f14, $f12, $f13
+; 64R2-NONAN:           nmadd.s $f0, $f14, $f12, $f13
 
-; 64R2-NAN:      madd.s $[[T0:f[0-9]+]], $f14, $f12, $f13
-; 64R2-NAN:      mtc1 $zero, $[[T1:f[0-9]+]]
-; 64R2-NAN:      sub.s  $f0, $[[T1]], $[[T0]]
+; 64R2-NAN:             madd.s $[[T0:f[0-9]+]], $f14, $f12, $f13
+; 64R2-NAN:             mtc1 $zero, $[[T1:f[0-9]+]]
+; 64R2-NAN:             sub.s  $f0, $[[T1]], $[[T0]]
 
-; 64R6-DAG:      mul.s $[[T1:f[0-9]+]], $f12, $f13
-; 64R6-DAG:      add.s $[[T2:f[0-9]+]], $[[T1]], $f14
-; 64R6-DAG:      mtc1 $zero, $[[T2:f[0-9]+]]
-; 64R6-DAG:      sub.s $f0, $[[T2]], $[[T1]]
+; 64R6-NOMADD-DAG:      mul.s $[[T1:f[0-9]+]], $f12, $f13
+; 64R6-NOMADD-DAG:      add.s $[[T2:f[0-9]+]], $[[T1]], $f14
+; 64R6-NOMADD-DAG:      mtc1 $zero, $[[T2:f[0-9]+]]
+; 64R6-NOMADD-DAG:      sub.s $f0, $[[T2]], $[[T1]]
 
   %mul = fmul float %a, %b
   %add = fadd float %mul, %c
@@ -149,34 +160,34 @@ define float @FOO3float(float %a, float %b, float %c) nounwind readnone {
 entry:
 ; ALL-LABEL: FOO3float:
 
-; 32-DAG:        mtc1 $6, $[[T0:f[0-9]+]]
-; 32-DAG:        mul.s $[[T1:f[0-9]+]], $f12, $f14
-; 32-DAG:        sub.s $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
-; 32-DAG:        mtc1 $zero, $[[T2:f[0-9]+]]
-; 32-DAG:        sub.s $f0, $[[T2]], $[[T1]]
+; 32-NOMADD-DAG:        mtc1 $6, $[[T0:f[0-9]+]]
+; 32-NOMADD-DAG:        mul.s $[[T1:f[0-9]+]], $f12, $f14
+; 32-NOMADD-DAG:        sub.s $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32-NOMADD-DAG:        mtc1 $zero, $[[T2:f[0-9]+]]
+; 32-NOMADD-DAG:        sub.s $f0, $[[T2]], $[[T1]]
 
-; 32R2-NONAN:    mtc1 $6, $[[T0:f[0-9]+]]
-; 32R2-NONAN:    nmsub.s $f0, $[[T0]], $f12, $f14
+; 32R2-NONAN:           mtc1 $6, $[[T0:f[0-9]+]]
+; 32R2-NONAN:           nmsub.s $f0, $[[T0]], $f12, $f14
 
-; 32R2-NAN:      mtc1 $6, $[[T0:f[0-9]+]]
-; 32R2-NAN:      msub.s $[[T1:f[0-9]+]], $[[T0]], $f12, $f14
-; 32R2-NAN:      mtc1 $zero, $[[T2:f[0-9]+]]
-; 32R2-NAN:      sub.s  $f0, $[[T2]], $[[T1]]
+; 32R2-NAN:             mtc1 $6, $[[T0:f[0-9]+]]
+; 32R2-NAN:             msub.s $[[T1:f[0-9]+]], $[[T0]], $f12, $f14
+; 32R2-NAN:             mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R2-NAN:             sub.s  $f0, $[[T2]], $[[T1]]
 
-; 64-NAN:        msub.s $[[T0:f[0-9]+]], $f14, $f12, $f13
-; 64-NAN:        mtc1 $zero, $[[T1:f[0-9]+]]
-; 64-NAN:        sub.s  $f0, $[[T1]], $[[T0]]
+; 64-NAN:               msub.s $[[T0:f[0-9]+]], $f14, $f12, $f13
+; 64-NAN:               mtc1 $zero, $[[T1:f[0-9]+]]
+; 64-NAN:               sub.s  $f0, $[[T1]], $[[T0]]
 
-; 64-NONAN:      nmsub.s $f0, $f14, $f12, $f13
+; 64-NONAN:             nmsub.s $f0, $f14, $f12, $f13
 
-; 64R2-NAN:      msub.s $[[T0:f[0-9]+]], $f14, $f12, $f13
-; 64R2-NAN:      mtc1 $zero, $[[T1:f[0-9]+]]
-; 64R2-NAN:      sub.s  $f0, $[[T1]], $[[T0]]
+; 64R2-NAN:             msub.s $[[T0:f[0-9]+]], $f14, $f12, $f13
+; 64R2-NAN:             mtc1 $zero, $[[T1:f[0-9]+]]
+; 64R2-NAN:             sub.s  $f0, $[[T1]], $[[T0]]
 
-; 64R6-DAG:      mul.s $[[T1:f[0-9]+]], $f12, $f13
-; 64R6-DAG:      sub.s $[[T2:f[0-9]+]], $[[T1]], $f14
-; 64R6-DAG:      mtc1 $zero, $[[T2:f[0-9]+]]
-; 64R6-DAG:      sub.s $f0, $[[T2]], $[[T1]]
+; 64R6-NOMADD-DAG:      mul.s $[[T1:f[0-9]+]], $f12, $f13
+; 64R6-NOMADD-DAG:      sub.s $[[T2:f[0-9]+]], $[[T1]], $f14
+; 64R6-NOMADD-DAG:      mtc1 $zero, $[[T2:f[0-9]+]]
+; 64R6-NOMADD-DAG:      sub.s $f0, $[[T2]], $[[T1]]
 
   %mul = fmul float %a, %b
   %sub = fsub float %mul, %c
@@ -188,36 +199,36 @@ define double @FOO10double(double %a, double %b, double %c) nounwind readnone {
 entry:
 ; ALL-LABEL: FOO10double:
 
-; 32-DAG:        ldc1 $[[T0:f[0-9]+]], 16($sp)
-; 32-DAG:        mul.d $[[T1:f[0-9]+]], $f12, $f14
-; 32-DAG:        add.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
-; 32-DAG:        mtc1 $zero, $[[T2:f[0-9]+]]
-; 32-DAG:        add.d $f0, $[[T1]], $[[T2]]
+; 32-NOMADD-DAG:        ldc1 $[[T0:f[0-9]+]], 16($sp)
+; 32-NOMADD-DAG:        mul.d $[[T1:f[0-9]+]], $f12, $f14
+; 32-NOMADD-DAG:        add.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32-NOMADD-DAG:        mtc1 $zero, $[[T2:f[0-9]+]]
+; 32-NOMADD-DAG:        add.d $f0, $[[T1]], $[[T2]]
 
-; 32R2:          ldc1 $[[T0:f[0-9]+]], 16($sp)
-; 32R2:          madd.d $[[T1:f[0-9]+]], $[[T0]], $f12, $f14
-; 32R2:          mtc1 $zero, $[[T2:f[0-9]+]]
-; 32R2:          mthc1 $zero, $[[T2]]
-; 32R2:          add.d $f0, $[[T1]], $[[T2]]
+; 32R2:                 ldc1 $[[T0:f[0-9]+]], 16($sp)
+; 32R2:                 madd.d $[[T1:f[0-9]+]], $[[T0]], $f12, $f14
+; 32R2:                 mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R2:                 mthc1 $zero, $[[T2]]
+; 32R2:                 add.d $f0, $[[T1]], $[[T2]]
 
-; 32R6-DAG:      ldc1 $[[T0:f[0-9]+]], 16($sp)
-; 32R6-DAG:      mul.d $[[T1:f[0-9]+]], $f12, $f14
-; 32R6-DAG:      add.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
-; 32R6-DAG:      mtc1 $zero, $[[T2:f[0-9]+]]
-; 32R6-DAG:      add.d $f0, $[[T1]], $[[T2]]
+; 32R6-NOMADD-DAG:      ldc1 $[[T0:f[0-9]+]], 16($sp)
+; 32R6-NOMADD-DAG:      mul.d $[[T1:f[0-9]+]], $f12, $f14
+; 32R6-NOMADD-DAG:      add.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32R6-NOMADD-DAG:      mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R6-NOMADD-DAG:      add.d $f0, $[[T1]], $[[T2]]
 
-; 64-DAG:        madd.d $[[T0:f[0-9]+]], $f14, $f12, $f13
-; 64-DAG:        mtc1 $zero, $[[T1:f[0-9]+]]
-; 64-DAG:        add.d $f0, $[[T0]], $[[T1]]
+; 64-DAG:               madd.d $[[T0:f[0-9]+]], $f14, $f12, $f13
+; 64-DAG:               mtc1 $zero, $[[T1:f[0-9]+]]
+; 64-DAG:               add.d $f0, $[[T0]], $[[T1]]
 
-; 64R2:          madd.d $[[T0:f[0-9]+]], $f14, $f12, $f13
-; 64R2:          mtc1 $zero, $[[T1:f[0-9]+]]
-; 64R2:          add.d $f0, $[[T0]], $[[T1]]
+; 64R2:                 madd.d $[[T0:f[0-9]+]], $f14, $f12, $f13
+; 64R2:                 mtc1 $zero, $[[T1:f[0-9]+]]
+; 64R2:                 add.d $f0, $[[T0]], $[[T1]]
 
-; 64R6-DAG:      mul.d $[[T1:f[0-9]+]], $f12, $f13
-; 64R6-DAG:      add.d $[[T2:f[0-9]+]], $[[T1]], $f14
-; 64R6-DAG:      dmtc1 $zero, $[[T2:f[0-9]+]]
-; 64R6-DAG:      add.d $f0, $[[T1]], $[[T2]]
+; 64R6-NOMADD-DAG:      mul.d $[[T1:f[0-9]+]], $f12, $f13
+; 64R6-NOMADD-DAG:      add.d $[[T2:f[0-9]+]], $[[T1]], $f14
+; 64R6-NOMADD-DAG:      dmtc1 $zero, $[[T2:f[0-9]+]]
+; 64R6-NOMADD-DAG:      add.d $f0, $[[T1]], $[[T2]]
 
   %mul = fmul double %a, %b
   %add = fadd double %mul, %c
@@ -229,36 +240,36 @@ define double @FOO11double(double %a, double %b, double %c) nounwind readnone {
 entry:
 ; ALL-LABEL: FOO11double:
 
-; 32-DAG:        ldc1 $[[T0:f[0-9]+]], 16($sp)
-; 32-DAG:        mul.d $[[T1:f[0-9]+]], $f12, $f14
-; 32-DAG:        sub.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
-; 32-DAG:        mtc1 $zero, $[[T2:f[0-9]+]]
-; 32-DAG:        add.d $f0, $[[T1]], $[[T2]]
+; 32-NOMADD-DAG:        ldc1 $[[T0:f[0-9]+]], 16($sp)
+; 32-NOMADD-DAG:        mul.d $[[T1:f[0-9]+]], $f12, $f14
+; 32-NOMADD-DAG:        sub.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32-NOMADD-DAG:        mtc1 $zero, $[[T2:f[0-9]+]]
+; 32-NOMADD-DAG:        add.d $f0, $[[T1]], $[[T2]]
 
-; 32R2:          ldc1 $[[T0:f[0-9]+]], 16($sp)
-; 32R2:          msub.d $[[T1:f[0-9]+]], $[[T0]], $f12, $f14
-; 32R2:          mtc1 $zero, $[[T2:f[0-9]+]]
-; 32R2:          mthc1 $zero, $[[T2]]
-; 32R2:          add.d $f0, $[[T1]], $[[T2]]
+; 32R2:                 ldc1 $[[T0:f[0-9]+]], 16($sp)
+; 32R2:                 msub.d $[[T1:f[0-9]+]], $[[T0]], $f12, $f14
+; 32R2:                 mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R2:                 mthc1 $zero, $[[T2]]
+; 32R2:                 add.d $f0, $[[T1]], $[[T2]]
 
-; 32R6-DAG:      ldc1 $[[T0:f[0-9]+]], 16($sp)
-; 32R6-DAG:      mul.d $[[T1:f[0-9]+]], $f12, $f14
-; 32R6-DAG:      sub.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
-; 32R6-DAG:      mtc1 $zero, $[[T2:f[0-9]+]]
-; 32R6-DAG:      add.d $f0, $[[T1]], $[[T2]]
+; 32R6-NOMADD-DAG:      ldc1 $[[T0:f[0-9]+]], 16($sp)
+; 32R6-NOMADD-DAG:      mul.d $[[T1:f[0-9]+]], $f12, $f14
+; 32R6-NOMADD-DAG:      sub.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32R6-NOMADD-DAG:      mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R6-NOMADD-DAG:      add.d $f0, $[[T1]], $[[T2]]
 
-; 64-DAG:        msub.d $[[T0:f[0-9]+]], $f14, $f12, $f13
-; 64-DAG:        mtc1 $zero, $[[T1:f[0-9]+]]
-; 64-DAG:        add.d $f0, $[[T0]], $[[T1]]
+; 64-DAG:               msub.d $[[T0:f[0-9]+]], $f14, $f12, $f13
+; 64-DAG:               mtc1 $zero, $[[T1:f[0-9]+]]
+; 64-DAG:               add.d $f0, $[[T0]], $[[T1]]
 
-; 64R2:          msub.d $[[T0:f[0-9]+]], $f14, $f12, $f13
-; 64R2:          mtc1 $zero, $[[T1:f[0-9]+]]
-; 64R2:          add.d $f0, $[[T0]], $[[T1]]
+; 64R2:                 msub.d $[[T0:f[0-9]+]], $f14, $f12, $f13
+; 64R2:                 mtc1 $zero, $[[T1:f[0-9]+]]
+; 64R2:                 add.d $f0, $[[T0]], $[[T1]]
 
-; 64R6-DAG:      mul.d $[[T1:f[0-9]+]], $f12, $f13
-; 64R6-DAG:      sub.d $[[T2:f[0-9]+]], $[[T1]], $f14
-; 64R6-DAG:      dmtc1 $zero, $[[T2:f[0-9]+]]
-; 64R6-DAG:      add.d $f0, $[[T1]], $[[T2]]
+; 64R6-NOMADD-DAG:      mul.d $[[T1:f[0-9]+]], $f12, $f13
+; 64R6-NOMADD-DAG:      sub.d $[[T2:f[0-9]+]], $[[T1]], $f14
+; 64R6-NOMADD-DAG:      dmtc1 $zero, $[[T2:f[0-9]+]]
+; 64R6-NOMADD-DAG:      add.d $f0, $[[T1]], $[[T2]]
 
   %mul = fmul double %a, %b
   %sub = fsub double %mul, %c
@@ -270,43 +281,43 @@ define double @FOO12double(double %a, double %b, double %c) nounwind readnone {
 entry:
 ; ALL-LABEL: FOO12double:
 
-; 32-DAG:        ldc1 $[[T0:f[0-9]+]], 16($sp)
-; 32-DAG:        mul.d $[[T1:f[0-9]+]], $f12, $f14
-; 32-DAG:        add.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
-; 32-DAG:        mtc1 $zero, $[[T2:f[0-9]+]]
-; 32-DAG:        sub.d $f0, $[[T2]], $[[T1]]
+; 32-NOMADD-DAG:        ldc1 $[[T0:f[0-9]+]], 16($sp)
+; 32-NOMADD-DAG:        mul.d $[[T1:f[0-9]+]], $f12, $f14
+; 32-NOMADD-DAG:        add.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32-NOMADD-DAG:        mtc1 $zero, $[[T2:f[0-9]+]]
+; 32-NOMADD-DAG:        sub.d $f0, $[[T2]], $[[T1]]
 
-; 32R2-NONAN:    ldc1 $[[T0:f[0-9]+]], 16($sp)
-; 32R2-NONAN:    nmadd.d $f0, $[[T0]], $f12, $f14
+; 32R2-NONAN:           ldc1 $[[T0:f[0-9]+]], 16($sp)
+; 32R2-NONAN:           nmadd.d $f0, $[[T0]], $f12, $f14
 
-; 32R2-NAN:      ldc1 $[[T0:f[0-9]+]], 16($sp)
-; 32R2-NAN:      madd.d $[[T1:f[0-9]+]], $[[T0]], $f12, $f14
-; 32R2-NAN:      mtc1 $zero, $[[T2:f[0-9]+]]
-; 32R2-NAN:      mthc1 $zero, $[[T2]]
-; 32R2-NAN:      sub.d $f0, $[[T2]], $[[T1]]
+; 32R2-NAN:             ldc1 $[[T0:f[0-9]+]], 16($sp)
+; 32R2-NAN:             madd.d $[[T1:f[0-9]+]], $[[T0]], $f12, $f14
+; 32R2-NAN:             mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R2-NAN:             mthc1 $zero, $[[T2]]
+; 32R2-NAN:             sub.d $f0, $[[T2]], $[[T1]]
 
-; 32R6-DAG:      ldc1 $[[T0:f[0-9]+]], 16($sp)
-; 32R6-DAG:      mul.d $[[T1:f[0-9]+]], $f12, $f14
-; 32R6-DAG:      add.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
-; 32R6-DAG:      mtc1 $zero, $[[T2:f[0-9]+]]
-; 32R6-DAG:      sub.d $f0, $[[T2]], $[[T1]]
+; 32R6-NOMADD-DAG:      ldc1 $[[T0:f[0-9]+]], 16($sp)
+; 32R6-NOMADD-DAG:      mul.d $[[T1:f[0-9]+]], $f12, $f14
+; 32R6-NOMADD-DAG:      add.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32R6-NOMADD-DAG:      mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R6-NOMADD-DAG:      sub.d $f0, $[[T2]], $[[T1]]
 
-; 64-NONAN:      nmadd.d $f0, $f14, $f12, $f13
+; 64-NONAN:             nmadd.d $f0, $f14, $f12, $f13
 
-; 64-NAN:        madd.d $[[T0:f[0-9]+]], $f14, $f12, $f13
-; 64-NAN:        mtc1 $zero, $[[T1:f[0-9]+]]
-; 64-NAN:        sub.d $f0, $[[T1]], $[[T0]]
+; 64-NAN:               madd.d $[[T0:f[0-9]+]], $f14, $f12, $f13
+; 64-NAN:               mtc1 $zero, $[[T1:f[0-9]+]]
+; 64-NAN:               sub.d $f0, $[[T1]], $[[T0]]
 
-; 64R2-NONAN:    nmadd.d $f0, $f14, $f12, $f13
+; 64R2-NONAN:           nmadd.d $f0, $f14, $f12, $f13
 
-; 64R2-NAN:      madd.d $[[T0:f[0-9]+]], $f14, $f12, $f13
-; 64R2-NAN:      mtc1 $zero, $[[T1:f[0-9]+]]
-; 64R2-NAN:      sub.d $f0, $[[T1]], $[[T0]]
+; 64R2-NAN:             madd.d $[[T0:f[0-9]+]], $f14, $f12, $f13
+; 64R2-NAN:             mtc1 $zero, $[[T1:f[0-9]+]]
+; 64R2-NAN:             sub.d $f0, $[[T1]], $[[T0]]
 
-; 64R6-DAG:      mul.d $[[T1:f[0-9]+]], $f12, $f13
-; 64R6-DAG:      add.d $[[T2:f[0-9]+]], $[[T1]], $f14
-; 64R6-DAG:      dmtc1 $zero, $[[T2:f[0-9]+]]
-; 64R6-DAG:      sub.d $f0, $[[T2]], $[[T1]]
+; 64R6-NOMADD-DAG:      mul.d $[[T1:f[0-9]+]], $f12, $f13
+; 64R6-NOMADD-DAG:      add.d $[[T2:f[0-9]+]], $[[T1]], $f14
+; 64R6-NOMADD-DAG:      dmtc1 $zero, $[[T2:f[0-9]+]]
+; 64R6-NOMADD-DAG:      sub.d $f0, $[[T2]], $[[T1]]
 
   %mul = fmul double %a, %b
   %add = fadd double %mul, %c
@@ -318,43 +329,43 @@ define double @FOO13double(double %a, double %b, double %c) nounwind readnone {
 entry:
 ; ALL-LABEL: FOO13double:
 
-; 32-DAG:        ldc1 $[[T0:f[0-9]+]], 16($sp)
-; 32-DAG:        mul.d $[[T1:f[0-9]+]], $f12, $f14
-; 32-DAG:        sub.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
-; 32-DAG:        mtc1 $zero, $[[T2:f[0-9]+]]
-; 32-DAG:        sub.d $f0, $[[T2]], $[[T1]]
+; 32-NOMADD-DAG:        ldc1 $[[T0:f[0-9]+]], 16($sp)
+; 32-NOMADD-DAG:        mul.d $[[T1:f[0-9]+]], $f12, $f14
+; 32-NOMADD-DAG:        sub.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32-NOMADD-DAG:        mtc1 $zero, $[[T2:f[0-9]+]]
+; 32-NOMADD-DAG:        sub.d $f0, $[[T2]], $[[T1]]
 
-; 32R2-NONAN:    ldc1 $[[T0:f[0-9]+]], 16($sp)
-; 32R2-NONAN:    nmsub.d $f0, $[[T0]], $f12, $f14
+; 32R2-NONAN:           ldc1 $[[T0:f[0-9]+]], 16($sp)
+; 32R2-NONAN:           nmsub.d $f0, $[[T0]], $f12, $f14
 
-; 32R2-NAN:      ldc1 $[[T0:f[0-9]+]], 16($sp)
-; 32R2-NAN:      msub.d $[[T1:f[0-9]+]], $[[T0]], $f12, $f14
-; 32R2-NAN:      mtc1 $zero, $[[T2:f[0-9]+]]
-; 32R2-NAN:      mthc1 $zero, $[[T2]]
-; 32R2-NAN:      sub.d $f0, $[[T2]], $[[T1]]
+; 32R2-NAN:             ldc1 $[[T0:f[0-9]+]], 16($sp)
+; 32R2-NAN:             msub.d $[[T1:f[0-9]+]], $[[T0]], $f12, $f14
+; 32R2-NAN:             mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R2-NAN:             mthc1 $zero, $[[T2]]
+; 32R2-NAN:             sub.d $f0, $[[T2]], $[[T1]]
 
-; 32R6-DAG:      ldc1 $[[T0:f[0-9]+]], 16($sp)
-; 32R6-DAG:      mul.d $[[T1:f[0-9]+]], $f12, $f14
-; 32R6-DAG:      sub.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
-; 32R6-DAG:      mtc1 $zero, $[[T2:f[0-9]+]]
-; 32R6-DAG:      sub.d $f0, $[[T2]], $[[T1]]
+; 32R6-NOMADD-DAG:      ldc1 $[[T0:f[0-9]+]], 16($sp)
+; 32R6-NOMADD-DAG:      mul.d $[[T1:f[0-9]+]], $f12, $f14
+; 32R6-NOMADD-DAG:      sub.d $[[T2:f[0-9]+]], $[[T1]], $[[T0]]
+; 32R6-NOMADD-DAG:      mtc1 $zero, $[[T2:f[0-9]+]]
+; 32R6-NOMADD-DAG:      sub.d $f0, $[[T2]], $[[T1]]
 
-; 64-NONAN:      nmsub.d $f0, $f14, $f12, $f13
+; 64-NONAN:             nmsub.d $f0, $f14, $f12, $f13
 
-; 64-NAN:        msub.d $[[T0:f[0-9]+]], $f14, $f12, $f13
-; 64-NAN:        mtc1 $zero, $[[T1:f[0-9]+]]
-; 64-NAN:        sub.d $f0, $[[T1]], $[[T0]]
+; 64-NAN:               msub.d $[[T0:f[0-9]+]], $f14, $f12, $f13
+; 64-NAN:               mtc1 $zero, $[[T1:f[0-9]+]]
+; 64-NAN:               sub.d $f0, $[[T1]], $[[T0]]
 
-; 64R2-NONAN:    nmsub.d $f0, $f14, $f12, $f13
+; 64R2-NONAN:           nmsub.d $f0, $f14, $f12, $f13
 
-; 64R2-NAN:      msub.d $[[T0:f[0-9]+]], $f14, $f12, $f13
-; 64R2-NAN:      mtc1 $zero, $[[T1:f[0-9]+]]
-; 64R2-NAN:      sub.d $f0, $[[T1]], $[[T0]]
+; 64R2-NAN:             msub.d $[[T0:f[0-9]+]], $f14, $f12, $f13
+; 64R2-NAN:             mtc1 $zero, $[[T1:f[0-9]+]]
+; 64R2-NAN:             sub.d $f0, $[[T1]], $[[T0]]
 
-; 64R6-DAG:      mul.d $[[T1:f[0-9]+]], $f12, $f13
-; 64R6-DAG:      sub.d $[[T2:f[0-9]+]], $[[T1]], $f14
-; 64R6-DAG:      dmtc1 $zero, $[[T2:f[0-9]+]]
-; 64R6-DAG:      sub.d $f0, $[[T2]], $[[T1]]
+; 64R6-NOMADD-DAG:      mul.d $[[T1:f[0-9]+]], $f12, $f13
+; 64R6-NOMADD-DAG:      sub.d $[[T2:f[0-9]+]], $[[T1]], $f14
+; 64R6-NOMADD-DAG:      dmtc1 $zero, $[[T2:f[0-9]+]]
+; 64R6-NOMADD-DAG:      sub.d $f0, $[[T2]], $[[T1]]
 
   %mul = fmul double %a, %b
   %sub = fsub double %mul, %c
diff --git a/test/CodeGen/Mips/llvm-ir/mul.ll b/test/CodeGen/Mips/llvm-ir/mul.ll
index 20853073dfa6..1562372ce9a0 100644
--- a/test/CodeGen/Mips/llvm-ir/mul.ll
+++ b/test/CodeGen/Mips/llvm-ir/mul.ll
@@ -268,7 +268,7 @@ entry:
   ; MM64R6:         daddu   $2, $[[T1]], $[[T0]]
   ; MM64R6-DAG:     dmul    $3, $5, $7
 
-  ; MM32:           lw      $25, %call16(__multi3)($gp)
+  ; MM32:           lw      $25, %call16(__multi3)($16)
 
   %r = mul i128 %a, %b
   ret i128 %r
diff --git a/test/CodeGen/Mips/llvm-ir/sdiv.ll b/test/CodeGen/Mips/llvm-ir/sdiv.ll
index ee2b212a9f2f..defd25bb41ac 100644
--- a/test/CodeGen/Mips/llvm-ir/sdiv.ll
+++ b/test/CodeGen/Mips/llvm-ir/sdiv.ll
@@ -172,7 +172,7 @@ entry:
   ; 64R6:         ddiv    $2, $4, $5
   ; 64R6:         teq     $5, $zero, 7
 
-  ; MM32:         lw      $25, %call16(__divdi3)($gp)
+  ; MM32:         lw      $25, %call16(__divdi3)($2)
 
   ; MM64:         ddiv    $2, $4, $5
   ; MM64:         teq     $5, $zero, 7
@@ -184,7 +184,15 @@ entry:
 define signext i128 @sdiv_i128(i128 signext %a, i128 signext %b) {
 entry:
   ; ALL-LABEL: sdiv_i128:
-  ; ALL:         l{{w|d}}      $25, %call16(__divti3)($gp)
+
+  ; GP32:         lw      $25, %call16(__divti3)($gp)
+
+  ; GP64-NOT-R6:  ld      $25, %call16(__divti3)($gp)
+  ; 64R6:         ld      $25, %call16(__divti3)($gp)
+
+  ; MM32:         lw      $25, %call16(__divti3)($16)
+
+  ; MM64:         ld      $25, %call16(__divti3)($2)
 
   %r = sdiv i128 %a, %b
   ret i128 %r
diff --git a/test/CodeGen/Mips/llvm-ir/srem.ll b/test/CodeGen/Mips/llvm-ir/srem.ll
index 812c10566979..42664d7457e5 100644
--- a/test/CodeGen/Mips/llvm-ir/srem.ll
+++ b/test/CodeGen/Mips/llvm-ir/srem.ll
@@ -164,7 +164,7 @@ entry:
   ; 64R6:         dmod    $2, $4, $5
   ; 64R6:         teq     $5, $zero, 7
 
-  ; MM32:         lw      $25, %call16(__moddi3)($gp)
+  ; MM32:         lw      $25, %call16(__moddi3)($2)
 
   ; MM64:         dmod    $2, $4, $5
   ; MM64:         teq     $5, $zero, 7
@@ -177,7 +177,14 @@ define signext i128 @srem_i128(i128 signext %a, i128 signext %b) {
 entry:
 ; ALL-LABEL: srem_i128:
 
-  ; ALL:         l{{w|d}}      $25, %call16(__modti3)($gp)
+  ; GP32:         lw      $25, %call16(__modti3)($gp)
+
+  ; GP64-NOT-R6:  ld      $25, %call16(__modti3)($gp)
+  ; 64R6:         ld      $25, %call16(__modti3)($gp)
+
+  ; MM32:         lw      $25, %call16(__modti3)($16)
+
+  ; MM64:         ld      $25, %call16(__modti3)($2)
 
   %r = srem i128 %a, %b
   ret i128 %r
diff --git a/test/CodeGen/Mips/llvm-ir/udiv.ll b/test/CodeGen/Mips/llvm-ir/udiv.ll
index 6e078fdedfca..78ab36442a9a 100644
--- a/test/CodeGen/Mips/llvm-ir/udiv.ll
+++ b/test/CodeGen/Mips/llvm-ir/udiv.ll
@@ -134,7 +134,7 @@ entry:
   ; 64R6:         ddivu   $2, $4, $5
   ; 64R6:         teq     $5, $zero, 7
 
-  ; MM32:         lw      $25, %call16(__udivdi3)($gp)
+  ; MM32:         lw      $25, %call16(__udivdi3)($2)
 
   ; MM64:         ddivu   $2, $4, $5
   ; MM64:         teq     $5, $zero, 7
@@ -147,7 +147,14 @@ define signext i128 @udiv_i128(i128 signext %a, i128 signext %b) {
 entry:
 ; ALL-LABEL: udiv_i128:
 
-  ; ALL:         l{{w|d}}      $25, %call16(__udivti3)($gp)
+  ; GP32:         lw      $25, %call16(__udivti3)($gp)
+
+  ; GP64-NOT-R6:  ld      $25, %call16(__udivti3)($gp)
+  ; 64-R6:        ld      $25, %call16(__udivti3)($gp)
+
+  ; MM32:         lw      $25, %call16(__udivti3)($16)
+
+  ; MM64:         ld      $25, %call16(__udivti3)($2)
 
   %r = udiv i128 %a, %b
   ret i128 %r
diff --git a/test/CodeGen/Mips/llvm-ir/urem.ll b/test/CodeGen/Mips/llvm-ir/urem.ll
index 3bc82ceecd2a..160c126c7e3a 100644
--- a/test/CodeGen/Mips/llvm-ir/urem.ll
+++ b/test/CodeGen/Mips/llvm-ir/urem.ll
@@ -190,7 +190,7 @@ entry:
   ; 64R6:         dmodu   $2, $4, $5
   ; 64R6:         teq     $5, $zero, 7
 
-  ; MM32:         lw      $25, %call16(__umoddi3)($gp)
+  ; MM32:         lw      $25, %call16(__umoddi3)($2)
 
   ; MM64:         dmodu   $2, $4, $5
   ; MM64:         teq     $5, $zero, 7
@@ -208,9 +208,9 @@ entry:
   ; GP64-NOT-R6:  ld      $25, %call16(__umodti3)($gp)
   ; 64R6:         ld      $25, %call16(__umodti3)($gp)
 
-  ; MM32:         lw      $25, %call16(__umodti3)($gp)
+  ; MM32:         lw      $25, %call16(__umodti3)($16)
 
-  ; MM64:         ld      $25, %call16(__umodti3)($gp)
+  ; MM64:         ld      $25, %call16(__umodti3)($2)
 
     %r = urem i128 %a, %b
     ret i128 %r
diff --git a/test/CodeGen/Mips/micromips-gp-rc.ll b/test/CodeGen/Mips/micromips-gp-rc.ll
index 16e55c357db6..f139f7a8486d 100644
--- a/test/CodeGen/Mips/micromips-gp-rc.ll
+++ b/test/CodeGen/Mips/micromips-gp-rc.ll
@@ -14,5 +14,5 @@ entry:
 ; Function Attrs: noreturn
 declare void @exit(i32 signext)
 
-; CHECK: addu $gp, ${{[0-9]+}}
+; CHECK: move $gp, ${{[0-9]+}}
 
diff --git a/test/CodeGen/Mips/mips64fpldst.ll b/test/CodeGen/Mips/mips64fpldst.ll
index 6fa506849ee6..564ffdd2f691 100644
--- a/test/CodeGen/Mips/mips64fpldst.ll
+++ b/test/CodeGen/Mips/mips64fpldst.ll
@@ -1,9 +1,9 @@
-; RUN: llc  < %s -march=mips64el -mcpu=mips4 -target-abi n64 -relocation-model=pic -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-N64
-; RUN: llc  < %s -march=mips64el -mcpu=mips4 -target-abi n32 -relocation-model=pic -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-N32
-; RUN: llc  < %s -march=mips64el -mcpu=mips64 -target-abi n64 -relocation-model=pic -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-N64
-; RUN: llc  < %s -march=mips64el -mcpu=mips64 -target-abi n32 -relocation-model=pic -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-N32
-; RUN: llc  < %s -march=mipsel -mcpu=mips64r6 -mattr=+micromips -target-abi n32 -relocation-model=pic -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-N32
-; RUN: llc  < %s -march=mipsel -mcpu=mips64r6 -mattr=+micromips -target-abi n64 -relocation-model=pic -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-N64
+; RUN: llc  < %s -march=mips64el -mcpu=mips4 -target-abi n64 -relocation-model=pic | FileCheck %s -check-prefix=CHECK-N64
+; RUN: llc  < %s -march=mips64el -mcpu=mips4 -target-abi n32 -relocation-model=pic | FileCheck %s -check-prefix=CHECK-N32
+; RUN: llc  < %s -march=mips64el -mcpu=mips64 -target-abi n64 -relocation-model=pic | FileCheck %s -check-prefix=CHECK-N64
+; RUN: llc  < %s -march=mips64el -mcpu=mips64 -target-abi n32 -relocation-model=pic | FileCheck %s -check-prefix=CHECK-N32
+; RUN: llc  < %s -march=mipsel -mcpu=mips64r6 -mattr=+micromips -target-abi n32 -relocation-model=pic | FileCheck %s -check-prefix=CHECK-N32
+; RUN: llc  < %s -march=mipsel -mcpu=mips64r6 -mattr=+micromips -target-abi n64 -relocation-model=pic | FileCheck %s -check-prefix=CHECK-N64
 
 @f0 = common global float 0.000000e+00, align 4
 @d0 = common global double 0.000000e+00, align 8
diff --git a/test/CodeGen/Mips/pbqp-reserved-physreg.ll b/test/CodeGen/Mips/pbqp-reserved-physreg.ll
new file mode 100644
index 000000000000..eedc51bd1e57
--- /dev/null
+++ b/test/CodeGen/Mips/pbqp-reserved-physreg.ll
@@ -0,0 +1,35 @@
+; RUN: llc -march=mips -regalloc=pbqp <%s > %t
+; ModuleID = 'bugpoint-reduced-simplified.bc'
+
+; Function Attrs: nounwind
+define void @ham.928() local_unnamed_addr #0 align 2 {
+bb:
+  switch i32 undef, label %bb35 [
+    i32 1, label %bb18
+    i32 0, label %bb19
+    i32 3, label %bb20
+    i32 2, label %bb21
+    i32 4, label %bb17
+  ]
+
+bb17:                                             ; preds = %bb
+  unreachable
+
+bb18:                                             ; preds = %bb
+  unreachable
+
+bb19:                                             ; preds = %bb
+  unreachable
+
+bb20:                                             ; preds = %bb
+  unreachable
+
+bb21:                                             ; preds = %bb
+  unreachable
+
+bb35:                                             ; preds = %bb
+  unreachable
+}
+
+attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
diff --git a/test/CodeGen/Mips/return-vector.ll b/test/CodeGen/Mips/return-vector.ll
index 08eddf370096..c59695d18734 100644
--- a/test/CodeGen/Mips/return-vector.ll
+++ b/test/CodeGen/Mips/return-vector.ll
@@ -128,8 +128,11 @@ entry:
 
 ; CHECK-LABEL:        call_f2:
 ; CHECK:        call16(f2)
-; CHECK-NOT:    lwc1
-; CHECK:        add.s    $[[R2:[a-z0-9]+]], $[[R0:[a-z0-9]+]], $[[R1:[a-z0-9]+]]
+; CHECK:        addiu $4, $sp, [[O0:[0-9]+]]
+; CHECK-DAG:    lwc1 $f[[F0:[0-9]]], [[O0]]($sp)
+; CHECK-DAG:    lwc1 $f[[F1:[0-9]]], 20($sp)
+; CHECK:        add.s    $f0, $f[[F0]], $f[[F1]]
+
 }
 
 
@@ -143,12 +146,13 @@ entry:
 
 ; CHECK-LABEL:        call_d2:
 ; CHECK:        call16(d2)
-; CHECK-NOT:    ldc1
-; CHECK:        add.d    $[[R2:[a-z0-9]+]], $[[R0:[a-z0-9]+]], $[[R1:[a-z0-9]+]]
+; CHECK:        addiu $4, $sp, [[O0:[0-9]+]]
+; CHECK-DAG:    ldc1 $f[[F0:[0-9]+]], 24($sp)
+; CHECK-DAG:    ldc1 $f[[F1:[0-9]+]], [[O0]]($sp)
+; CHECK:        add.d    $f0, $f[[F1]], $f[[F0]]
+
 }
 
-
-
 ; Check that function returns vector on stack in cases when vector can't be
 ; returned in registers. Also check that vector is placed on stack starting
 ; from the address in register $4.
@@ -179,11 +183,12 @@ entry:
   ret <4 x float> %vecins4
 
 ; CHECK-LABEL:        return_f4:
-; CHECK-DAG:    lwc1    $[[R0:[a-z0-9]+]], 16($sp)
-; CHECK-DAG:    swc1    $[[R0]], 12($4)
+; CHECK-DAG:    lwc1    $f[[R0:[0-9]+]], 16($sp)
+; CHECK-DAG:    swc1    $f[[R0]], 12($4)
 ; CHECK-DAG:    sw      $7, 8($4)
 ; CHECK-DAG:    sw      $6, 4($4)
 ; CHECK-DAG:    sw      $5, 0($4)
+
 }
 
 
@@ -227,8 +232,8 @@ entry:
   ret <2 x float> %vecins2
 
 ; CHECK-LABEL:        return_f2:
-; CHECK:        mov.s   $f0, $f12
-; CHECK:        mov.s   $f2, $f14
+; CHECK-DAG:    sw   $5, 0($4)
+; CHECK-DAG:    sw   $6, 4($4)
 }
 
 
@@ -239,6 +244,10 @@ entry:
   ret <2 x double> %vecins2
 
 ; CHECK-LABEL:        return_d2:
-; CHECK:        mov.d   $f0, $f12
-; CHECK:        mov.d   $f2, $f14
+; CHECK-DAG:    ldc1 $f[[F0:[0-9]]], 16($sp)
+; CHECK-DAG:    sdc1 $f[[F0]], 8($4)
+; CHECK-DAG:    mtc1 $6, $f[[F1:[0-9]+]]
+; CHECK-DAG:    mtc1 $7, $f
+; CHECK-DAG:    sdc1 $f[[F0]], 0($4)
+
 }
diff --git a/test/CodeGen/Mips/tailcall/tailcall.ll b/test/CodeGen/Mips/tailcall/tailcall.ll
index 01a9b64ba63c..3f04e1cf3053 100644
--- a/test/CodeGen/Mips/tailcall/tailcall.ll
+++ b/test/CodeGen/Mips/tailcall/tailcall.ll
@@ -176,7 +176,7 @@ entry:
 ; ALL-LABEL: caller8_1:
 ; PIC32: jalr $25
 ; PIC32R6: jalr $25
-; PIC32MM: jalr{{.*}} $25
+; PIC32MM: jalr $25
 ; STATIC32: jal
 ; PIC64: jalr $25
 ; STATIC64: jal
@@ -288,7 +288,7 @@ entry:
 ; ALL-LABEL: caller13:
 ; PIC32: jalr $25
 ; PIC32R6: jalr $25
-; PIC32MM: jalr{{.*}} $25
+; PIC32MM: jalr $25
 ; STATIC32: jal
 ; STATIC64: jal
 ; PIC64R6: jalr $25
diff --git a/test/CodeGen/PowerPC/BoolRetToIntTest-2.ll b/test/CodeGen/PowerPC/BoolRetToIntTest-2.ll
new file mode 100644
index 000000000000..14669b9005b7
--- /dev/null
+++ b/test/CodeGen/PowerPC/BoolRetToIntTest-2.ll
@@ -0,0 +1,19 @@
+; RUN: llc -mtriple=powerpc64le-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=32442
+; Don't generate zero extension for the return value.
+; CHECK-NOT: clrldi
+
+define zeroext i1 @foo(i32 signext %i, i32* %p) {
+entry:
+  %cmp = icmp eq i32 %i, 0
+  br i1 %cmp, label %return, label %if.end
+
+if.end:
+  store i32 %i, i32* %p, align 4
+  br label %return
+
+return:
+  %retval = phi i1 [ true, %if.end ], [ false, %entry ]
+  ret i1 %retval
+}
diff --git a/test/CodeGen/PowerPC/BoolRetToIntTest.ll b/test/CodeGen/PowerPC/BoolRetToIntTest.ll
index 4a0966b2859f..fd515281e394 100644
--- a/test/CodeGen/PowerPC/BoolRetToIntTest.ll
+++ b/test/CodeGen/PowerPC/BoolRetToIntTest.ll
@@ -31,14 +31,14 @@ for.body:                                         ; preds = %for.body.preheader,
   br i1 %call, label %cleanup.loopexit, label %for.cond
 
 cleanup.loopexit:                                 ; preds = %for.body, %for.cond
-; CHECK: [[PHI:%.+]] = phi i32 [ 1, %for.body ], [ 0, %for.cond ]
+; CHECK: [[PHI:%.+]] = phi i64 [ 1, %for.body ], [ 0, %for.cond ]
   %cleanup.dest.slot.0.ph = phi i1 [ true, %for.body ], [ false, %for.cond ]
   br label %cleanup
 
 cleanup:                                          ; preds = %cleanup.loopexit, %entry
-; CHECK: = phi i32 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ]
+; CHECK: = phi i64 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ]
   %cleanup.dest.slot.0 = phi i1 [ false, %entry ], [ %cleanup.dest.slot.0.ph, %cleanup.loopexit ]
-; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1
+; CHECK: [[REG:%.+]] = trunc i64 {{%.+}} to i1
 ; CHECK: ret i1 [[REG]]
   ret i1 %cleanup.dest.slot.0
 }
@@ -78,14 +78,14 @@ for.body:                                         ; preds = %for.body.preheader,
   br i1 %call, label %cleanup.loopexit, label %for.cond
 
 cleanup.loopexit:                                 ; preds = %for.body, %for.cond
-; CHECK: [[PHI:%.+]] = phi i32 [ 1, %for.body ], [ 0, %for.cond ]
+; CHECK: [[PHI:%.+]] = phi i64 [ 1, %for.body ], [ 0, %for.cond ]
   %cleanup.dest.slot.0.ph = phi i1 [ true, %for.body ], [ false, %for.cond ]
   br label %cleanup
 
 cleanup:                                          ; preds = %cleanup.loopexit, %entry
-; CHECK: = phi i32 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ]
+; CHECK: = phi i64 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ]
   %cleanup.dest.slot.0 = phi i1 [ false, %entry ], [ %cleanup.dest.slot.0.ph, %cleanup.loopexit ]
-; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1
+; CHECK: [[REG:%.+]] = trunc i64 {{%.+}} to i1
 ; CHECK: call void %cont(i1 [[REG]]
   tail call void %cont(i1 %cleanup.dest.slot.0)
   ret void
@@ -112,17 +112,17 @@ for.body:                                         ; preds = %for.body.preheader,
   br i1 %call, label %cleanup.loopexit, label %for.cond
 
 cleanup.loopexit:                                 ; preds = %for.body, %for.cond
-; CHECK: [[PHI:%.+]] = phi i32 [ 1, %for.body ], [ 0, %for.cond ]
+; CHECK: [[PHI:%.+]] = phi i64 [ 1, %for.body ], [ 0, %for.cond ]
   %cleanup.dest.slot.0.ph = phi i1 [ true, %for.body ], [ false, %for.cond ]
   br label %cleanup
 
 cleanup:                                          ; preds = %cleanup.loopexit, %entry
-; CHECK: = phi i32 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ]
+; CHECK: = phi i64 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ]
   %cleanup.dest.slot.0 = phi i1 [ false, %entry ], [ %cleanup.dest.slot.0.ph, %cleanup.loopexit ]
-; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1
+; CHECK: [[REG:%.+]] = trunc i64 {{%.+}} to i1
 ; CHECK: call void %cont(i1 [[REG]]
   tail call void %cont(i1 %cleanup.dest.slot.0)
-; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1
+; CHECK: [[REG:%.+]] = trunc i64 {{%.+}} to i1
 ; CHECK: ret i1 [[REG]]
   ret i1 %cleanup.dest.slot.0
 }
@@ -136,7 +136,7 @@ foo:
   br label %cleanup
 
 cleanup:
-; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1
+; CHECK: [[REG:%.+]] = trunc i64 {{%.+}} to i1
 ; CHECK: ret i1 [[REG]]
   %result = phi i1 [ false, %foo ], [ %operand, %entry ]
   ret i1 %result
@@ -186,7 +186,7 @@ foo:
 
 ; CHECK-LABEL: cleanup
 cleanup:
-; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1
+; CHECK: [[REG:%.+]] = trunc i64 {{%.+}} to i1
 ; CHECK: ret i1 [[REG]]
   %result = phi i1 [ %bar, %foo], [ %operand, %entry ]
   ret i1 %result
@@ -198,8 +198,8 @@ declare zeroext i1 @return_i1()
 define zeroext i1 @call_test() {
 ; CHECK: [[REG:%.+]] = call i1
   %result = call i1 @return_i1()
-; CHECK: [[REG:%.+]] = zext i1 {{%.+}} to i32
-; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1
+; CHECK: [[REG:%.+]] = zext i1 {{%.+}} to i64
+; CHECK: [[REG:%.+]] = trunc i64 {{%.+}} to i1
 ; CHECK: ret i1 [[REG]]
   ret i1 %result
 }
diff --git a/test/CodeGen/PowerPC/crbits.ll b/test/CodeGen/PowerPC/crbits.ll
index a85237195c5e..4ae91d1163a4 100644
--- a/test/CodeGen/PowerPC/crbits.ll
+++ b/test/CodeGen/PowerPC/crbits.ll
@@ -94,13 +94,15 @@ entry:
   ret i1 %or7
 
 ; CHECK-LABEL: @test5
+; CHECK-DAG: li [[NEG2:[0-9]+]], -2
 ; CHECK-DAG: and [[REG1:[0-9]+]], 3, 4
-; CHECK-DAG: cmpwi {{[0-9]+}}, 5, -2
-; CHECK-DAG: li [[REG3:[0-9]+]], 1
-; CHECK-DAG: andi. {{[0-9]+}}, [[REG1]], 1
-; CHECK-DAG: crandc [[REG5:[0-9]+]],
-; CHECK: isel 3, 0, [[REG3]], [[REG5]]
-; CHECK: blr
+; CHECK-DAG: xor [[NE1:[0-9]+]], 5, [[NEG2]]
+; CHECK-DAG: clrldi [[TRUNC:[0-9]+]], [[REG1]], 63
+; CHECK-DAG: cntlzw [[NE2:[0-9]+]], [[NE1]]
+; CHECK: srwi [[NE3:[0-9]+]], [[NE2]], 5
+; CHECK: xori [[NE4:[0-9]+]], [[NE3]], 1
+; CHECK: or 3, [[TRUNC]], [[NE4]]
+; CHECK-NEXT: blr
 }
 
 ; Function Attrs: nounwind readnone
@@ -112,15 +114,16 @@ entry:
   ret i1 %and7
 
 ; CHECK-LABEL: @test6
-; CHECK-DAG: andi. {{[0-9]+}}, 3, 1
-; CHECK-DAG: cmpwi {{[0-9]+}}, 5, -2
-; CHECK-DAG: crmove [[REG1:[0-9]+]], 1
-; CHECK-DAG: andi. {{[0-9]+}}, 4, 1
-; CHECK-DAG: li [[REG2:[0-9]+]], 1
-; CHECK-DAG: crorc [[REG4:[0-9]+]], 1,
-; CHECK-DAG: crnand [[REG5:[0-9]+]], [[REG4]], [[REG1]]
-; CHECK: isel 3, 0, [[REG2]], [[REG5]]
-; CHECK: blr
+; CHECK-DAG: li [[NEG2:[0-9]+]], -2
+; CHECK-DAG: clrldi [[CLR1:[0-9]+]], 4, 63
+; CHECK-DAG: clrldi [[CLR2:[0-9]+]], 3, 63
+; CHECK-DAG: xor [[NE1:[0-9]+]], 5, [[NEG2]]
+; CHECK-DAG: cntlzw [[NE2:[0-9]+]], [[NE1]]
+; CHECK: srwi [[NE3:[0-9]+]], [[NE2]], 5
+; CHECK: xori [[NE4:[0-9]+]], [[NE3]], 1
+; CHECK: or [[OR:[0-9]+]], [[NE4]], [[CLR1]]
+; CHECK: and 3, [[OR]], [[CLR2]]
+; CHECK-NEXT: blr
 }
 
 ; Function Attrs: nounwind readnone
@@ -187,12 +190,13 @@ entry:
   ret i32 %and
 
 ; CHECK-LABEL: @test10
-; CHECK-DAG: cmpwi {{[0-9]+}}, 3, 0
-; CHECK-DAG: cmpwi {{[0-9]+}}, 4, 0
-; CHECK-DAG: li [[REG2:[0-9]+]], 1
-; CHECK-DAG: crorc [[REG3:[0-9]+]],
-; CHECK: isel 3, 0, [[REG2]], [[REG3]]
-; CHECK: blr
+; CHECK-DAG: cntlzw 3, 3
+; CHECK-DAG: cntlzw 4, 4
+; CHECK-DAG: srwi 3, 3, 5
+; CHECK-DAG: srwi 4, 4, 5
+; CHECK: xori 3, 3, 1
+; CHECK: and 3, 3, 4
+; CHECK-NEXT: blr
 }
 
 attributes #0 = { nounwind readnone }
diff --git a/test/CodeGen/PowerPC/logic-ops-on-compares.ll b/test/CodeGen/PowerPC/logic-ops-on-compares.ll
index df021c20ea86..5a507e9ff678 100644
--- a/test/CodeGen/PowerPC/logic-ops-on-compares.ll
+++ b/test/CodeGen/PowerPC/logic-ops-on-compares.ll
@@ -40,8 +40,8 @@ return:                                           ; preds = %if.end, %if.then
   ret i32 %retval.0
 }
 
-define void @neg_truncate_i32(i32 *%ptr) {
-; CHECK-LABEL: neg_truncate_i32:
+define void @neg_truncate_i32_eq(i32 *%ptr) {
+; CHECK-LABEL: neg_truncate_i32_eq:
 ; CHECK:       # BB#0: # %entry
 ; CHECK-NEXT:    lwz r3, 0(r3)
 ; CHECK-NEXT:    rldicl. r3, r3, 0, 63
@@ -66,8 +66,8 @@ if.end29:                                         ; preds = %if.else
 }
 
 ; Function Attrs: nounwind
-define i64 @logic_ne_64(i64 %a, i64 %b, i64 %c) {
-; CHECK-LABEL: logic_ne_64:
+define i64 @logic_eq_64(i64 %a, i64 %b, i64 %c) {
+; CHECK-LABEL: logic_eq_64:
 ; CHECK:    xor r7, r3, r4
 ; CHECK-NEXT:    li r6, 55
 ; CHECK-NEXT:    xor r5, r5, r6
@@ -99,8 +99,8 @@ return:                                           ; preds = %if.end, %if.then
   ret i64 %retval.0
 }
 
-define void @neg_truncate_i64(i64 *%ptr) {
-; CHECK-LABEL: neg_truncate_i64:
+define void @neg_truncate_i64_eq(i64 *%ptr) {
+; CHECK-LABEL: neg_truncate_i64_eq:
 ; CHECK:       # BB#0: # %entry
 ; CHECK-NEXT:    ld r3, 0(r3)
 ; CHECK-NEXT:    rldicl. r3, r3, 0, 63
@@ -124,6 +124,67 @@ if.end29:                                         ; preds = %if.else
 
 }
 
+; Function Attrs: nounwind
+define i64 @logic_ne_64(i64 %a, i64 %b, i64 %c) {
+; CHECK-LABEL: logic_ne_64:
+; CHECK:    xor r7, r3, r4
+; CHECK-NEXT:    li r6, 55
+; CHECK-NEXT:    addic r8, r7, -1
+; CHECK-NEXT:    xor r5, r5, r6
+; CHECK-NEXT:    subfe r7, r8, r7
+; CHECK-NEXT:    cntlzd r5, r5
+; CHECK-NEXT:    addic r12, r4, -1
+; CHECK-NEXT:    rldicl r5, r5, 58, 63
+; CHECK-NEXT:    subfe r6, r12, r4
+; CHECK-NEXT:    and r6, r7, r6
+; CHECK-NEXT:    or. r5, r6, r5
+; CHECK-NEXT:    bc 4, 1
+entry:
+  %tobool = icmp ne i64 %a, %b
+  %tobool1 = icmp ne i64 %b, 0
+  %or.cond = and i1 %tobool, %tobool1
+  %tobool3 = icmp eq i64 %c, 55
+  %or.cond5 = or i1 %or.cond, %tobool3
+  br i1 %or.cond5, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  %call = tail call i64 @foo64(i64 %a) #2
+  br label %return
+
+if.end:                                           ; preds = %entry
+  %call4 = tail call i64 @bar64(i64 %b) #2
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  %retval.0 = phi i64 [ %call4, %if.end ], [ %call, %if.then ]
+  ret i64 %retval.0
+}
+
+define void @neg_truncate_i64_ne(i64 *%ptr) {
+; CHECK-LABEL: neg_truncate_i64_ne:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    ld r3, 0(r3)
+; CHECK-NEXT:    andi. r3, r3, 1
+; CHECK-NEXT:    bclr 12, 1, 0
+; CHECK-NEXT:  # BB#1: # %if.end29.thread136
+; CHECK-NEXT:  .LBB5_2: # %if.end29
+entry:
+  %0 = load i64, i64* %ptr, align 4
+  %rem17127 = and i64 %0, 1
+  %cmp18 = icmp ne i64 %rem17127, 0
+  br label %if.else
+
+if.else:                                          ; preds = %entry
+  br i1 %cmp18, label %if.end29, label %if.end29.thread136
+
+if.end29.thread136:                               ; preds = %if.else
+  unreachable
+
+if.end29:                                         ; preds = %if.else
+  ret void
+
+}
+
 declare signext i32 @foo(i32 signext)
 declare signext i32 @bar(i32 signext)
 declare i64 @foo64(i64)
diff --git a/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll b/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll
index 3095429758f6..ad9078c82066 100644
--- a/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll
+++ b/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -verify-machineinstrs -mcpu=pwr8 < %s | FileCheck %s
 target datalayout = "e-m:e-i64:64-n32:64"
 target triple = "powerpc64le-unknown-linux-gnu"
@@ -11,111 +12,237 @@ target triple = "powerpc64le-unknown-linux-gnu"
 @zeroEqualityTest04.buffer1 = private unnamed_addr constant [15 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14], align 4
 @zeroEqualityTest04.buffer2 = private unnamed_addr constant [15 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 13], align 4
 
-; Function Attrs: nounwind readonly
 declare signext i32 @memcmp(i8* nocapture, i8* nocapture, i64) local_unnamed_addr #1
 
-; Validate with if(memcmp())
-; Function Attrs: nounwind readonly
-define signext i32 @zeroEqualityTest01() local_unnamed_addr #0 {
-entry:
-  %call = tail call signext i32 @memcmp(i8* bitcast ([3 x i32]* @zeroEqualityTest01.buffer1 to i8*), i8* bitcast ([3 x i32]* @zeroEqualityTest01.buffer2 to i8*), i64 16)
-  %not.tobool = icmp ne i32 %call, 0
-  %. = zext i1 %not.tobool to i32
-  ret i32 %.
-
-  ; CHECK-LABEL: @zeroEqualityTest01
-  ; CHECK-LABEL: %res_block
-  ; CHECK: li 3, 1
-  ; CHECK-NEXT: clrldi
-  ; CHECK-NEXT: blr
-  ; CHECK: li 3, 0
-  ; CHECK-NEXT: clrldi
-  ; CHECK-NEXT: blr
-}
-
-; Validate with if(memcmp() == 0)
-; Function Attrs: nounwind readonly
-define signext i32 @zeroEqualityTest02() local_unnamed_addr #0 {
-entry:
-  %call = tail call signext i32 @memcmp(i8* bitcast ([4 x i32]* @zeroEqualityTest02.buffer1 to i8*), i8* bitcast ([4 x i32]* @zeroEqualityTest02.buffer2 to i8*), i64 16)
+; Check 4 bytes - requires 1 load for each param.
+define signext i32 @zeroEqualityTest02(i8* %x, i8* %y) {
+; CHECK-LABEL: zeroEqualityTest02:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    lwz 3, 0(3)
+; CHECK-NEXT:    lwz 4, 0(4)
+; CHECK-NEXT:    xor 3, 3, 4
+; CHECK-NEXT:    cntlzw 3, 3
+; CHECK-NEXT:    srwi 3, 3, 5
+; CHECK-NEXT:    xori 3, 3, 1
+; CHECK-NEXT:    blr
+  %call = tail call signext i32 @memcmp(i8* %x, i8* %y, i64 4)
   %not.cmp = icmp ne i32 %call, 0
   %. = zext i1 %not.cmp to i32
   ret i32 %.
+}
 
-  ; CHECK-LABEL: @zeroEqualityTest02
-  ; CHECK-LABEL: %res_block
-  ; CHECK: li 3, 1
-  ; CHECK-NEXT: clrldi
-  ; CHECK-NEXT: blr
-  ; CHECK: li 3, 0
-  ; CHECK-NEXT: clrldi
-  ; CHECK-NEXT: blr
+; Check 16 bytes - requires 2 loads for each param (or use vectors?).
+define signext i32 @zeroEqualityTest01(i8* %x, i8* %y) {
+; CHECK-LABEL: zeroEqualityTest01:
+; CHECK:       # BB#0: # %loadbb
+; CHECK-NEXT:    ld 5, 0(3)
+; CHECK-NEXT:    ld 6, 0(4)
+; CHECK-NEXT:    cmpld 5, 6
+; CHECK-NEXT:    bne 0, .LBB1_2
+; CHECK-NEXT:  # BB#1: # %loadbb1
+; CHECK-NEXT:    ld 3, 8(3)
+; CHECK-NEXT:    ld 4, 8(4)
+; CHECK-NEXT:    cmpld 3, 4
+; CHECK-NEXT:    li 3, 0
+; CHECK-NEXT:    beq 0, .LBB1_3
+; CHECK-NEXT:  .LBB1_2: # %res_block
+; CHECK-NEXT:    li 3, 1
+; CHECK-NEXT:    clrldi 3, 3, 32
+; CHECK-NEXT:    blr
+; CHECK-NEXT:  .LBB1_3: # %endblock
+; CHECK-NEXT:    clrldi 3, 3, 32
+; CHECK-NEXT:    blr
+  %call = tail call signext i32 @memcmp(i8* %x, i8* %y, i64 16)
+  %not.tobool = icmp ne i32 %call, 0
+  %. = zext i1 %not.tobool to i32
+  ret i32 %.
+}
+
+; Check 7 bytes - requires 3 loads for each param.
+define signext i32 @zeroEqualityTest03(i8* %x, i8* %y) {
+; CHECK-LABEL: zeroEqualityTest03:
+; CHECK:       # BB#0: # %loadbb
+; CHECK-NEXT:    lwz 5, 0(3)
+; CHECK-NEXT:    lwz 6, 0(4)
+; CHECK-NEXT:    cmplw 5, 6
+; CHECK-NEXT:    bne 0, .LBB2_3
+; CHECK-NEXT:  # BB#1: # %loadbb1
+; CHECK-NEXT:    lhz 5, 4(3)
+; CHECK-NEXT:    lhz 6, 4(4)
+; CHECK-NEXT:    cmplw 5, 6
+; CHECK-NEXT:    bne 0, .LBB2_3
+; CHECK-NEXT:  # BB#2: # %loadbb2
+; CHECK-NEXT:    lbz 3, 6(3)
+; CHECK-NEXT:    lbz 4, 6(4)
+; CHECK-NEXT:    cmplw 3, 4
+; CHECK-NEXT:    li 3, 0
+; CHECK-NEXT:    beq 0, .LBB2_4
+; CHECK-NEXT:  .LBB2_3: # %res_block
+; CHECK-NEXT:    li 3, 1
+; CHECK-NEXT:    clrldi 3, 3, 32
+; CHECK-NEXT:    blr
+; CHECK-NEXT:  .LBB2_4: # %endblock
+; CHECK-NEXT:    clrldi 3, 3, 32
+; CHECK-NEXT:    blr
+  %call = tail call signext i32 @memcmp(i8* %x, i8* %y, i64 7)
+  %not.lnot = icmp ne i32 %call, 0
+  %cond = zext i1 %not.lnot to i32
+  ret i32 %cond
 }
 
 ; Validate with > 0
-; Function Attrs: nounwind readonly
-define signext i32 @zeroEqualityTest03() local_unnamed_addr #0 {
-entry:
+define signext i32 @zeroEqualityTest04() {
+; CHECK-LABEL: zeroEqualityTest04:
+; CHECK:       # BB#0: # %loadbb
+; CHECK-NEXT:    addis 3, 2, .LzeroEqualityTest02.buffer1@toc@ha
+; CHECK-NEXT:    addis 4, 2, .LzeroEqualityTest02.buffer2@toc@ha
+; CHECK-NEXT:    addi 6, 3, .LzeroEqualityTest02.buffer1@toc@l
+; CHECK-NEXT:    addi 5, 4, .LzeroEqualityTest02.buffer2@toc@l
+; CHECK-NEXT:    ldbrx 3, 0, 6
+; CHECK-NEXT:    ldbrx 4, 0, 5
+; CHECK-NEXT:    subf. 7, 4, 3
+; CHECK-NEXT:    bne 0, .LBB3_2
+; CHECK-NEXT:  # BB#1: # %loadbb1
+; CHECK-NEXT:    li 4, 8
+; CHECK-NEXT:    ldbrx 3, 6, 4
+; CHECK-NEXT:    ldbrx 4, 5, 4
+; CHECK-NEXT:    subf. 5, 4, 3
+; CHECK-NEXT:    beq 0, .LBB3_4
+; CHECK-NEXT:  .LBB3_2: # %res_block
+; CHECK-NEXT:    cmpld 3, 4
+; CHECK-NEXT:    li 3, 1
+; CHECK-NEXT:    li 12, -1
+; CHECK-NEXT:    isel 3, 12, 3, 0
+; CHECK-NEXT:  .LBB3_3: # %endblock
+; CHECK-NEXT:    cmpwi 3, 1
+; CHECK-NEXT:    li 3, 0
+; CHECK-NEXT:    li 4, 1
+; CHECK-NEXT:    isel 3, 4, 3, 0
+; CHECK-NEXT:    blr
+; CHECK-NEXT:  .LBB3_4:
+; CHECK-NEXT:    li 3, 0
+; CHECK-NEXT:    b .LBB3_3
   %call = tail call signext i32 @memcmp(i8* bitcast ([4 x i32]* @zeroEqualityTest02.buffer1 to i8*), i8* bitcast ([4 x i32]* @zeroEqualityTest02.buffer2 to i8*), i64 16)
   %not.cmp = icmp slt i32 %call, 1
   %. = zext i1 %not.cmp to i32
   ret i32 %.
-
-  ; CHECK-LABEL: @zeroEqualityTest03
-  ; CHECK-LABEL: %res_block
-  ; CHECK: cmpld
-  ; CHECK-NEXT: li [[LI:[0-9]+]], 1
-  ; CHECK-NEXT: li [[LI2:[0-9]+]], -1
-  ; CHECK-NEXT: isel [[ISEL:[0-9]+]], [[LI2]], [[LI]], 0
 }
 
 ; Validate with < 0
-; Function Attrs: nounwind readonly
-define signext i32 @zeroEqualityTest04() local_unnamed_addr #0 {
-entry:
+define signext i32 @zeroEqualityTest05() {
+; CHECK-LABEL: zeroEqualityTest05:
+; CHECK:       # BB#0: # %loadbb
+; CHECK-NEXT:    addis 3, 2, .LzeroEqualityTest03.buffer1@toc@ha
+; CHECK-NEXT:    addis 4, 2, .LzeroEqualityTest03.buffer2@toc@ha
+; CHECK-NEXT:    addi 6, 3, .LzeroEqualityTest03.buffer1@toc@l
+; CHECK-NEXT:    addi 5, 4, .LzeroEqualityTest03.buffer2@toc@l
+; CHECK-NEXT:    ldbrx 3, 0, 6
+; CHECK-NEXT:    ldbrx 4, 0, 5
+; CHECK-NEXT:    subf. 7, 4, 3
+; CHECK-NEXT:    bne 0, .LBB4_2
+; CHECK-NEXT:  # BB#1: # %loadbb1
+; CHECK-NEXT:    li 4, 8
+; CHECK-NEXT:    ldbrx 3, 6, 4
+; CHECK-NEXT:    ldbrx 4, 5, 4
+; CHECK-NEXT:    subf. 5, 4, 3
+; CHECK-NEXT:    beq 0, .LBB4_4
+; CHECK-NEXT:  .LBB4_2: # %res_block
+; CHECK-NEXT:    cmpld 3, 4
+; CHECK-NEXT:    li 3, 1
+; CHECK-NEXT:    li 12, -1
+; CHECK-NEXT:    isel 3, 12, 3, 0
+; CHECK-NEXT:  .LBB4_3: # %endblock
+; CHECK-NEXT:    srwi 3, 3, 31
+; CHECK-NEXT:    xori 3, 3, 1
+; CHECK-NEXT:    clrldi 3, 3, 32
+; CHECK-NEXT:    blr
+; CHECK-NEXT:  .LBB4_4:
+; CHECK-NEXT:    li 3, 0
+; CHECK-NEXT:    b .LBB4_3
   %call = tail call signext i32 @memcmp(i8* bitcast ([4 x i32]* @zeroEqualityTest03.buffer1 to i8*), i8* bitcast ([4 x i32]* @zeroEqualityTest03.buffer2 to i8*), i64 16)
   %call.lobit = lshr i32 %call, 31
   %call.lobit.not = xor i32 %call.lobit, 1
   ret i32 %call.lobit.not
-
-  ; CHECK-LABEL: @zeroEqualityTest04
-  ; CHECK-LABEL: %res_block
-  ; CHECK: cmpld
-  ; CHECK-NEXT: li [[LI:[0-9]+]], 1
-  ; CHECK-NEXT: li [[LI2:[0-9]+]], -1
-  ; CHECK-NEXT: isel [[ISEL:[0-9]+]], [[LI2]], [[LI]], 0
 }
 
 ; Validate with memcmp()?:
-; Function Attrs: nounwind readonly
-define signext i32 @zeroEqualityTest05() local_unnamed_addr #0 {
-entry:
+define signext i32 @equalityFoldTwoConstants() {
+; CHECK-LABEL: equalityFoldTwoConstants:
+; CHECK:       # BB#0: # %loadbb
+; CHECK-NEXT:    addis 3, 2, .LzeroEqualityTest04.buffer1@toc@ha
+; CHECK-NEXT:    addis 4, 2, .LzeroEqualityTest04.buffer2@toc@ha
+; CHECK-NEXT:    ld 3, .LzeroEqualityTest04.buffer1@toc@l(3)
+; CHECK-NEXT:    ld 4, .LzeroEqualityTest04.buffer2@toc@l(4)
+; CHECK-NEXT:    cmpld 3, 4
+; CHECK-NEXT:    bne 0, .LBB5_2
+; CHECK-NEXT:  # BB#1: # %loadbb1
+; CHECK-NEXT:    addis 3, 2, .LzeroEqualityTest04.buffer1@toc@ha+8
+; CHECK-NEXT:    addis 4, 2, .LzeroEqualityTest04.buffer2@toc@ha+8
+; CHECK-NEXT:    ld 3, .LzeroEqualityTest04.buffer1@toc@l+8(3)
+; CHECK-NEXT:    ld 4, .LzeroEqualityTest04.buffer2@toc@l+8(4)
+; CHECK-NEXT:    cmpld 3, 4
+; CHECK-NEXT:    li 3, 0
+; CHECK-NEXT:    beq 0, .LBB5_3
+; CHECK-NEXT:  .LBB5_2: # %res_block
+; CHECK-NEXT:    li 3, 1
+; CHECK-NEXT:  .LBB5_3: # %endblock
+; CHECK-NEXT:    cntlzw 3, 3
+; CHECK-NEXT:    srwi 3, 3, 5
+; CHECK-NEXT:    blr
   %call = tail call signext i32 @memcmp(i8* bitcast ([15 x i32]* @zeroEqualityTest04.buffer1 to i8*), i8* bitcast ([15 x i32]* @zeroEqualityTest04.buffer2 to i8*), i64 16)
   %not.tobool = icmp eq i32 %call, 0
   %cond = zext i1 %not.tobool to i32
   ret i32 %cond
-
-  ; CHECK-LABEL: @zeroEqualityTest05
-  ; CHECK-LABEL: %res_block
-  ; CHECK: li 3, 1
-  ; CHECK: li 3, 0
 }
 
-; Validate with !memcmp()?:
-; Function Attrs: nounwind readonly
-define signext i32 @zeroEqualityTest06() local_unnamed_addr #0 {
-entry:
-  %call = tail call signext i32 @memcmp(i8* bitcast ([15 x i32]* @zeroEqualityTest04.buffer1 to i8*), i8* bitcast ([15 x i32]* @zeroEqualityTest04.buffer2 to i8*), i64 16)
-  %not.lnot = icmp ne i32 %call, 0
-  %cond = zext i1 %not.lnot to i32
+define signext i32 @equalityFoldOneConstant(i8* %X) {
+; CHECK-LABEL: equalityFoldOneConstant:
+; CHECK:       # BB#0: # %loadbb
+; CHECK-NEXT:    addis 4, 2, .LzeroEqualityTest04.buffer1@toc@ha
+; CHECK-NEXT:    ld 5, 0(3)
+; CHECK-NEXT:    ld 4, .LzeroEqualityTest04.buffer1@toc@l(4)
+; CHECK-NEXT:    cmpld 4, 5
+; CHECK-NEXT:    bne 0, .LBB6_2
+; CHECK-NEXT:  # BB#1: # %loadbb1
+; CHECK-NEXT:    addis 4, 2, .LzeroEqualityTest04.buffer1@toc@ha+8
+; CHECK-NEXT:    ld 3, 8(3)
+; CHECK-NEXT:    ld 4, .LzeroEqualityTest04.buffer1@toc@l+8(4)
+; CHECK-NEXT:    cmpld 4, 3
+; CHECK-NEXT:    li 3, 0
+; CHECK-NEXT:    beq 0, .LBB6_3
+; CHECK-NEXT:  .LBB6_2: # %res_block
+; CHECK-NEXT:    li 3, 1
+; CHECK-NEXT:  .LBB6_3: # %endblock
+; CHECK-NEXT:    cntlzw 3, 3
+; CHECK-NEXT:    srwi 3, 3, 5
+; CHECK-NEXT:    blr
+  %call = tail call signext i32 @memcmp(i8* bitcast ([15 x i32]* @zeroEqualityTest04.buffer1 to i8*), i8* %X, i64 16)
+  %not.tobool = icmp eq i32 %call, 0
+  %cond = zext i1 %not.tobool to i32
   ret i32 %cond
-
-  ; CHECK-LABEL: @zeroEqualityTest06
-  ; CHECK-LABEL: %res_block
-  ; CHECK: li 3, 1
-  ; CHECK-NEXT: clrldi
-  ; CHECK-NEXT: blr
-  ; CHECK: li 3, 0
-  ; CHECK-NEXT: clrldi
-  ; CHECK-NEXT: blr
 }
+
+define i1 @length2_eq_nobuiltin_attr(i8* %X, i8* %Y) {
+; CHECK-LABEL: length2_eq_nobuiltin_attr:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    mflr 0
+; CHECK-NEXT:    std 0, 16(1)
+; CHECK-NEXT:    stdu 1, -32(1)
+; CHECK-NEXT:  .Lcfi0:
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:  .Lcfi1:
+; CHECK-NEXT:    .cfi_offset lr, 16
+; CHECK-NEXT:    li 5, 2
+; CHECK-NEXT:    bl memcmp
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    cntlzw 3, 3
+; CHECK-NEXT:    rlwinm 3, 3, 27, 31, 31
+; CHECK-NEXT:    addi 1, 1, 32
+; CHECK-NEXT:    ld 0, 16(1)
+; CHECK-NEXT:    mtlr 0
+; CHECK-NEXT:    blr
+  %m = tail call signext i32 @memcmp(i8* %X, i8* %Y, i64 2) nobuiltin
+  %c = icmp eq i32 %m, 0
+  ret i1 %c
+}
+
diff --git a/test/CodeGen/PowerPC/ppc-crbits-onoff.ll b/test/CodeGen/PowerPC/ppc-crbits-onoff.ll
index 0e7f8f1bc668..c403b5ac2e5a 100644
--- a/test/CodeGen/PowerPC/ppc-crbits-onoff.ll
+++ b/test/CodeGen/PowerPC/ppc-crbits-onoff.ll
@@ -37,17 +37,13 @@ entry:
 
 ; CHECK-LABEL: @crbitson
 ; CHECK-NO-ISEL-LABEL: @crbitson
-; CHECK-DAG: cmpwi {{[0-9]+}}, 3, 0
-; CHECK-DAG: cmpwi {{[0-9]+}}, 4, 0
-; CHECK-DAG: li [[REG2:[0-9]+]], 1
-; CHECK-DAG: crorc [[REG3:[0-9]+]],
-; CHECK: isel 3, 0, [[REG2]], [[REG3]]
-; CHECK-NO-ISEL: bc 12, 20, [[TRUE:.LBB[0-9]+]]
-; CHECK-NO-ISEL-NEXT: blr
-; CHECK-NO-ISEL: [[TRUE]]
-; CHECK-NO-ISEL-NEXT: addi 3, 0, 0
-; CHECK-NO-ISEL-NEXT: blr
-; CHECK: blr
+; CHECK-DAG: cntlzw [[REG1:[0-9]+]], 3
+; CHECK-DAG: cntlzw [[REG2:[0-9]+]], 4
+; CHECK: srwi [[REG3:[0-9]+]], [[REG1]], 5
+; CHECK: srwi [[REG4:[0-9]+]], [[REG2]], 5
+; CHECK: xori [[REG5:[0-9]+]], [[REG3]], 1
+; CHECK: and 3, [[REG5]], [[REG4]]
+; CHECK-NEXT: blr
 }
 
 
diff --git a/test/CodeGen/PowerPC/setcc-logic.ll b/test/CodeGen/PowerPC/setcc-logic.ll
index a5a86f101a94..8a6f4975ec97 100644
--- a/test/CodeGen/PowerPC/setcc-logic.ll
+++ b/test/CodeGen/PowerPC/setcc-logic.ll
@@ -59,8 +59,8 @@ define zeroext i1 @any_bits_set(i32 %P, i32 %Q)  {
 ; CHECK:       # BB#0:
 ; CHECK-NEXT:    or 3, 3, 4
 ; CHECK-NEXT:    cntlzw 3, 3
-; CHECK-NEXT:    nor 3, 3, 3
-; CHECK-NEXT:    rlwinm 3, 3, 27, 31, 31
+; CHECK-NEXT:    srwi 3, 3, 5
+; CHECK-NEXT:    xori 3, 3, 1
 ; CHECK-NEXT:    blr
   %a = icmp ne i32 %P, 0
   %b = icmp ne i32 %Q, 0
@@ -83,10 +83,12 @@ define zeroext i1 @any_sign_bits_set(i32 %P, i32 %Q)  {
 define zeroext i1 @any_bits_clear(i32 %P, i32 %Q)  {
 ; CHECK-LABEL: any_bits_clear:
 ; CHECK:       # BB#0:
+; CHECK-NEXT:    li 5, -1
 ; CHECK-NEXT:    and 3, 3, 4
-; CHECK-NEXT:    li 5, 1
-; CHECK-NEXT:    cmpwi 0, 3, -1
-; CHECK-NEXT:    isel 3, 0, 5, 2
+; CHECK-NEXT:    xor 3, 3, 5
+; CHECK-NEXT:    cntlzw   3, 3
+; CHECK-NEXT:    srwi 3, 3, 5
+; CHECK-NEXT:    xori 3, 3, 1
 ; CHECK-NEXT:    blr
   %a = icmp ne i32 %P, -1
   %b = icmp ne i32 %Q, -1
@@ -452,8 +454,8 @@ define zeroext i1 @or_ne(i32 %a, i32 %b, i32 %c, i32 %d) {
 ; CHECK-NEXT:    xor 3, 3, 4
 ; CHECK-NEXT:    or 3, 3, 5
 ; CHECK-NEXT:    cntlzw 3, 3
-; CHECK-NEXT:    nor 3, 3, 3
-; CHECK-NEXT:    rlwinm 3, 3, 27, 31, 31
+; CHECK-NEXT:    srwi 3, 3, 5
+; CHECK-NEXT:    xori 3, 3, 1
 ; CHECK-NEXT:    blr
   %cmp1 = icmp ne i32 %a, %b
   %cmp2 = icmp ne i32 %c, %d
diff --git a/test/CodeGen/PowerPC/testComparesinesc.ll b/test/CodeGen/PowerPC/testComparesinesc.ll
new file mode 100644
index 000000000000..e6ade339573b
--- /dev/null
+++ b/test/CodeGen/PowerPC/testComparesinesc.ll
@@ -0,0 +1,121 @@
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \
+; RUN:   -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \
+; RUN:  --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \
+; RUN:   -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \
+; RUN:  --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+
+@glob = common local_unnamed_addr global i8 0, align 1
+
+define signext i32 @test_inesc(i8 signext %a, i8 signext %b) {
+; CHECK-LABEL: test_inesc:
+; CHECK:    xor r3, r3, r4
+; CHECK-NEXT:    cntlzw r3, r3
+; CHECK-NEXT:    srwi r3, r3, 5
+; CHECK-NEXT:    xori r3, r3, 1
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i8 %a, %b
+  %conv2 = zext i1 %cmp to i32
+  ret i32 %conv2
+}
+
+define signext i32 @test_inesc_sext(i8 signext %a, i8 signext %b) {
+; CHECK-LABEL: test_inesc_sext:
+; CHECK:    xor r3, r3, r4
+; CHECK-NEXT:    cntlzw r3, r3
+; CHECK-NEXT:    srwi r3, r3, 5
+; CHECK-NEXT:    xori r3, r3, 1
+; CHECK-NEXT:    neg r3, r3
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i8 %a, %b
+  %sub = sext i1 %cmp to i32
+  ret i32 %sub
+}
+
+define signext i32 @test_inesc_z(i8 signext %a) {
+; CHECK-LABEL: test_inesc_z:
+; CHECK:    cntlzw r3, r3
+; CHECK-NEXT:    srwi r3, r3, 5
+; CHECK-NEXT:    xori r3, r3, 1
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i8 %a, 0
+  %conv1 = zext i1 %cmp to i32
+  ret i32 %conv1
+}
+
+define signext i32 @test_inesc_sext_z(i8 signext %a) {
+; CHECK-LABEL: test_inesc_sext_z:
+; CHECK:    cntlzw r3, r3
+; CHECK-NEXT:    srwi r3, r3, 5
+; CHECK-NEXT:    xori r3, r3, 1
+; CHECK-NEXT:    neg r3, r3
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i8 %a, 0
+  %sub = sext i1 %cmp to i32
+  ret i32 %sub
+}
+
+define void @test_inesc_store(i8 signext %a, i8 signext %b) {
+; CHECK-LABEL: test_inesc_store:
+; CHECK:    xor r3, r3, r4
+; CHECK:    cntlzw r3, r3
+; CHECK:    srwi r3, r3, 5
+; CHECK:    xori r3, r3, 1
+; CHECK:    stb r3, 0(r4)
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i8 %a, %b
+  %conv3 = zext i1 %cmp to i8
+  store i8 %conv3, i8* @glob, align 1
+  ret void
+}
+
+define void @test_inesc_sext_store(i8 signext %a, i8 signext %b) {
+; CHECK-LABEL: test_inesc_sext_store:
+; CHECK:    xor r3, r3, r4
+; CHECK:    cntlzw r3, r3
+; CHECK:    srwi r3, r3, 5
+; CHECK:    xori r3, r3, 1
+; CHECK:    neg r3, r3
+; CHECK:    stb r3, 0(r4)
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i8 %a, %b
+  %conv3 = sext i1 %cmp to i8
+  store i8 %conv3, i8* @glob, align 1
+  ret void
+}
+
+define void @test_inesc_z_store(i8 signext %a) {
+; CHECK-LABEL: test_inesc_z_store:
+; CHECK:    cntlzw r3, r3
+; CHECK:    srwi r3, r3, 5
+; CHECK:    xori r3, r3, 1
+; CHECK:    stb r3, 0(r4)
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i8 %a, 0
+  %conv2 = zext i1 %cmp to i8
+  store i8 %conv2, i8* @glob, align 1
+  ret void
+}
+
+define void @test_inesc_sext_z_store(i8 signext %a) {
+; CHECK-LABEL: test_inesc_sext_z_store:
+; CHECK:    cntlzw r3, r3
+; CHECK:    srwi r3, r3, 5
+; CHECK:    xori r3, r3, 1
+; CHECK:    neg r3, r3
+; CHECK:    stb r3, 0(r4)
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i8 %a, 0
+  %conv2 = sext i1 %cmp to i8
+  store i8 %conv2, i8* @glob, align 1
+  ret void
+}
diff --git a/test/CodeGen/PowerPC/testComparesinesi.ll b/test/CodeGen/PowerPC/testComparesinesi.ll
new file mode 100644
index 000000000000..ad9431c09e33
--- /dev/null
+++ b/test/CodeGen/PowerPC/testComparesinesi.ll
@@ -0,0 +1,121 @@
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \
+; RUN:   -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \
+; RUN:  --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \
+; RUN:   -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \
+; RUN:  --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+
+@glob = common local_unnamed_addr global i32 0, align 4
+
+define signext i32 @test_inesi(i32 signext %a, i32 signext %b) {
+; CHECK-LABEL: test_inesi:
+; CHECK:    xor r3, r3, r4
+; CHECK-NEXT:    cntlzw r3, r3
+; CHECK-NEXT:    srwi r3, r3, 5
+; CHECK-NEXT:    xori r3, r3, 1
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i32 %a, %b
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define signext i32 @test_inesi_sext(i32 signext %a, i32 signext %b) {
+; CHECK-LABEL: test_inesi_sext:
+; CHECK:    xor r3, r3, r4
+; CHECK-NEXT:    cntlzw r3, r3
+; CHECK-NEXT:    srwi r3, r3, 5
+; CHECK-NEXT:    xori r3, r3, 1
+; CHECK-NEXT:    neg r3, r3
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i32 %a, %b
+  %sub = sext i1 %cmp to i32
+  ret i32 %sub
+}
+
+define signext i32 @test_inesi_z(i32 signext %a) {
+; CHECK-LABEL: test_inesi_z:
+; CHECK:    cntlzw r3, r3
+; CHECK-NEXT:    srwi r3, r3, 5
+; CHECK-NEXT:    xori r3, r3, 1
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i32 %a, 0
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define signext i32 @test_inesi_sext_z(i32 signext %a) {
+; CHECK-LABEL: test_inesi_sext_z:
+; CHECK:    cntlzw r3, r3
+; CHECK-NEXT:    srwi r3, r3, 5
+; CHECK-NEXT:    xori r3, r3, 1
+; CHECK-NEXT:    neg r3, r3
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i32 %a, 0
+  %sub = sext i1 %cmp to i32
+  ret i32 %sub
+}
+
+define void @test_inesi_store(i32 signext %a, i32 signext %b) {
+; CHECK-LABEL: test_inesi_store:
+; CHECK:    xor r3, r3, r4
+; CHECK:    cntlzw r3, r3
+; CHECK:    srwi r3, r3, 5
+; CHECK:    xori r3, r3, 1
+; CHECK:    stw r3, 0(r4)
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i32 %a, %b
+  %conv = zext i1 %cmp to i32
+  store i32 %conv, i32* @glob, align 4
+  ret void
+}
+
+define void @test_inesi_sext_store(i32 signext %a, i32 signext %b) {
+; CHECK-LABEL: test_inesi_sext_store:
+; CHECK:    xor r3, r3, r4
+; CHECK:    cntlzw r3, r3
+; CHECK:    srwi r3, r3, 5
+; CHECK:    xori r3, r3, 1
+; CHECK:    neg r3, r3
+; CHECK:    stw r3, 0(r4)
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i32 %a, %b
+  %sub = sext i1 %cmp to i32
+  store i32 %sub, i32* @glob, align 4
+  ret void
+}
+
+define void @test_inesi_z_store(i32 signext %a) {
+; CHECK-LABEL: test_inesi_z_store:
+; CHECK:    cntlzw r3, r3
+; CHECK:    srwi r3, r3, 5
+; CHECK:    xori r3, r3, 1
+; CHECK:    stw r3, 0(r4)
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i32 %a, 0
+  %conv = zext i1 %cmp to i32
+  store i32 %conv, i32* @glob, align 4
+  ret void
+}
+
+define void @test_inesi_sext_z_store(i32 signext %a) {
+; CHECK-LABEL: test_inesi_sext_z_store:
+; CHECK:    cntlzw r3, r3
+; CHECK:    srwi r3, r3, 5
+; CHECK:    xori r3, r3, 1
+; CHECK:    neg r3, r3
+; CHECK:    stw r3, 0(r4)
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i32 %a, 0
+  %sub = sext i1 %cmp to i32
+  store i32 %sub, i32* @glob, align 4
+  ret void
+}
diff --git a/test/CodeGen/PowerPC/testComparesinesll.ll b/test/CodeGen/PowerPC/testComparesinesll.ll
new file mode 100644
index 000000000000..9e9369455857
--- /dev/null
+++ b/test/CodeGen/PowerPC/testComparesinesll.ll
@@ -0,0 +1,125 @@
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \
+; RUN:   -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \
+; RUN:  --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \
+; RUN:   -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \
+; RUN:  --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+
+@glob = common local_unnamed_addr global i64 0, align 8
+
+define signext i32 @test_inesll(i64 %a, i64 %b) {
+; CHECK-LABEL: test_inesll:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    xor r3, r3, r4
+; CHECK-NEXT:    addic r4, r3, -1
+; CHECK-NEXT:    subfe r3, r4, r3
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i64 %a, %b
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define signext i32 @test_inesll_sext(i64 %a, i64 %b) {
+; CHECK-LABEL: test_inesll_sext:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    xor r3, r3, r4
+; CHECK-NEXT:    subfic r3, r3, 0
+; CHECK-NEXT:    subfe r3, r3, r3
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i64 %a, %b
+  %sub = sext i1 %cmp to i32
+  ret i32 %sub
+}
+
+define signext i32 @test_inesll_z(i64 %a) {
+; CHECK-LABEL: test_inesll_z:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    addic r4, r3, -1
+; CHECK-NEXT:    subfe r3, r4, r3
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i64 %a, 0
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define signext i32 @test_inesll_sext_z(i64 %a) {
+; CHECK-LABEL: test_inesll_sext_z:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    subfic r3, r3, 0
+; CHECK-NEXT:    subfe r3, r3, r3
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i64 %a, 0
+  %sub = sext i1 %cmp to i32
+  ret i32 %sub
+}
+
+define void @test_inesll_store(i64 %a, i64 %b) {
+; CHECK-LABEL: test_inesll_store:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    addis r5, r2, .LC0@toc@ha
+; CHECK-NEXT:    xor r3, r3, r4
+; CHECK-NEXT:    ld r12, .LC0@toc@l(r5)
+; CHECK-NEXT:    addic r5, r3, -1
+; CHECK-NEXT:    subfe r3, r5, r3
+; CHECK-NEXT:    std r3, 0(r12)
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i64 %a, %b
+  %conv1 = zext i1 %cmp to i64
+  store i64 %conv1, i64* @glob, align 8
+  ret void
+}
+
+define void @test_inesll_sext_store(i64 %a, i64 %b) {
+; CHECK-LABEL: test_inesll_sext_store:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    addis r5, r2, .LC0@toc@ha
+; CHECK-NEXT:    xor r3, r3, r4
+; CHECK-NEXT:    ld r12, .LC0@toc@l(r5)
+; CHECK-NEXT:    subfic r3, r3, 0
+; CHECK-NEXT:    subfe r3, r3, r3
+; CHECK-NEXT:    std r3, 0(r12)
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i64 %a, %b
+  %conv1 = sext i1 %cmp to i64
+  store i64 %conv1, i64* @glob, align 8
+  ret void
+}
+
+define void @test_inesll_z_store(i64 %a) {
+; CHECK-LABEL: test_inesll_z_store:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    addis r4, r2, .LC0@toc@ha
+; CHECK-NEXT:    addic r5, r3, -1
+; CHECK-NEXT:    ld r4, .LC0@toc@l(r4)
+; CHECK-NEXT:    subfe r3, r5, r3
+; CHECK-NEXT:    std r3, 0(r4)
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i64 %a, 0
+  %conv1 = zext i1 %cmp to i64
+  store i64 %conv1, i64* @glob, align 8
+  ret void
+}
+
+define void @test_inesll_sext_z_store(i64 %a) {
+; CHECK-LABEL: test_inesll_sext_z_store:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    addis r4, r2, .LC0@toc@ha
+; CHECK-NEXT:    subfic r3, r3, 0
+; CHECK-NEXT:    ld r4, .LC0@toc@l(r4)
+; CHECK-NEXT:    subfe r3, r3, r3
+; CHECK-NEXT:    std r3, 0(r4)
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i64 %a, 0
+  %conv1 = sext i1 %cmp to i64
+  store i64 %conv1, i64* @glob, align 8
+  ret void
+}
diff --git a/test/CodeGen/PowerPC/testComparesiness.ll b/test/CodeGen/PowerPC/testComparesiness.ll
new file mode 100644
index 000000000000..56b7a6ab3974
--- /dev/null
+++ b/test/CodeGen/PowerPC/testComparesiness.ll
@@ -0,0 +1,121 @@
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \
+; RUN:   -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \
+; RUN:  --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \
+; RUN:   -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \
+; RUN:  --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+
+@glob = common local_unnamed_addr global i16 0, align 2
+
+define signext i32 @test_iness(i16 signext %a, i16 signext %b) {
+; CHECK-LABEL: test_iness:
+; CHECK:    xor r3, r3, r4
+; CHECK-NEXT:    cntlzw r3, r3
+; CHECK-NEXT:    srwi r3, r3, 5
+; CHECK-NEXT:    xori r3, r3, 1
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i16 %a, %b
+  %conv2 = zext i1 %cmp to i32
+  ret i32 %conv2
+}
+
+define signext i32 @test_iness_sext(i16 signext %a, i16 signext %b) {
+; CHECK-LABEL: test_iness_sext:
+; CHECK:    xor r3, r3, r4
+; CHECK-NEXT:    cntlzw r3, r3
+; CHECK-NEXT:    srwi r3, r3, 5
+; CHECK-NEXT:    xori r3, r3, 1
+; CHECK-NEXT:    neg r3, r3
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i16 %a, %b
+  %sub = sext i1 %cmp to i32
+  ret i32 %sub
+}
+
+define signext i32 @test_iness_z(i16 signext %a) {
+; CHECK-LABEL: test_iness_z:
+; CHECK:    cntlzw r3, r3
+; CHECK-NEXT:    srwi r3, r3, 5
+; CHECK-NEXT:    xori r3, r3, 1
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i16 %a, 0
+  %conv1 = zext i1 %cmp to i32
+  ret i32 %conv1
+}
+
+define signext i32 @test_iness_sext_z(i16 signext %a) {
+; CHECK-LABEL: test_iness_sext_z:
+; CHECK:    cntlzw r3, r3
+; CHECK-NEXT:    srwi r3, r3, 5
+; CHECK-NEXT:    xori r3, r3, 1
+; CHECK-NEXT:    neg r3, r3
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i16 %a, 0
+  %sub = sext i1 %cmp to i32
+  ret i32 %sub
+}
+
+define void @test_iness_store(i16 signext %a, i16 signext %b) {
+; CHECK-LABEL: test_iness_store:
+; CHECK:    xor r3, r3, r4
+; CHECK:    cntlzw r3, r3
+; CHECK:    srwi r3, r3, 5
+; CHECK:    xori r3, r3, 1
+; CHECK:    sth r3, 0(r4)
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i16 %a, %b
+  %conv3 = zext i1 %cmp to i16
+  store i16 %conv3, i16* @glob, align 2
+  ret void
+}
+
+define void @test_iness_sext_store(i16 signext %a, i16 signext %b) {
+; CHECK-LABEL: test_iness_sext_store:
+; CHECK:    xor r3, r3, r4
+; CHECK:    cntlzw r3, r3
+; CHECK:    srwi r3, r3, 5
+; CHECK:    xori r3, r3, 1
+; CHECK:    neg r3, r3
+; CHECK:    sth r3, 0(r4)
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i16 %a, %b
+  %conv3 = sext i1 %cmp to i16
+  store i16 %conv3, i16* @glob, align 2
+  ret void
+}
+
+define void @test_iness_z_store(i16 signext %a) {
+; CHECK-LABEL: test_iness_z_store:
+; CHECK:    cntlzw r3, r3
+; CHECK:    srwi r3, r3, 5
+; CHECK:    xori r3, r3, 1
+; CHECK:    sth r3, 0(r4)
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i16 %a, 0
+  %conv2 = zext i1 %cmp to i16
+  store i16 %conv2, i16* @glob, align 2
+  ret void
+}
+
+define void @test_iness_sext_z_store(i16 signext %a) {
+; CHECK-LABEL: test_iness_sext_z_store:
+; CHECK:    cntlzw r3, r3
+; CHECK:    srwi r3, r3, 5
+; CHECK:    xori r3, r3, 1
+; CHECK:    neg r3, r3
+; CHECK:    sth r3, 0(r4)
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i16 %a, 0
+  %conv2 = sext i1 %cmp to i16
+  store i16 %conv2, i16* @glob, align 2
+  ret void
+}
diff --git a/test/CodeGen/PowerPC/testComparesineuc.ll b/test/CodeGen/PowerPC/testComparesineuc.ll
new file mode 100644
index 000000000000..1cba13f12292
--- /dev/null
+++ b/test/CodeGen/PowerPC/testComparesineuc.ll
@@ -0,0 +1,136 @@
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \
+; RUN:   -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \
+; RUN:  --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \
+; RUN:   -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \
+; RUN:  --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+@glob = common local_unnamed_addr global i8 0, align 1
+
+define signext i32 @test_ineuc(i8 zeroext %a, i8 zeroext %b) {
+; CHECK-LABEL: test_ineuc:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    xor r3, r3, r4
+; CHECK-NEXT:    cntlzw r3, r3
+; CHECK-NEXT:    srwi r3, r3, 5
+; CHECK-NEXT:    xori r3, r3, 1
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i8 %a, %b
+  %conv2 = zext i1 %cmp to i32
+  ret i32 %conv2
+}
+
+define signext i32 @test_ineuc_sext(i8 zeroext %a, i8 zeroext %b) {
+; CHECK-LABEL: test_ineuc_sext:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    xor r3, r3, r4
+; CHECK-NEXT:    cntlzw r3, r3
+; CHECK-NEXT:    srwi r3, r3, 5
+; CHECK-NEXT:    xori r3, r3, 1
+; CHECK-NEXT:    neg r3, r3
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i8 %a, %b
+  %sub = sext i1 %cmp to i32
+  ret i32 %sub
+}
+
+define signext i32 @test_ineuc_z(i8 zeroext %a) {
+; CHECK-LABEL: test_ineuc_z:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    cntlzw r3, r3
+; CHECK-NEXT:    srwi r3, r3, 5
+; CHECK-NEXT:    xori r3, r3, 1
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i8 %a, 0
+  %conv1 = zext i1 %cmp to i32
+  ret i32 %conv1
+}
+
+define signext i32 @test_ineuc_sext_z(i8 zeroext %a) {
+; CHECK-LABEL: test_ineuc_sext_z:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    cntlzw r3, r3
+; CHECK-NEXT:    srwi r3, r3, 5
+; CHECK-NEXT:    xori r3, r3, 1
+; CHECK-NEXT:    neg r3, r3
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i8 %a, 0
+  %sub = sext i1 %cmp to i32
+  ret i32 %sub
+}
+
+define void @test_ineuc_store(i8 zeroext %a, i8 zeroext %b) {
+; CHECK-LABEL: test_ineuc_store:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    xor r3, r3, r4
+; CHECK-NEXT:    addis r5, r2, .LC0@toc@ha
+; CHECK-NEXT:    cntlzw r3, r3
+; CHECK-NEXT:    ld r4, .LC0@toc@l(r5)
+; CHECK-NEXT:    srwi r3, r3, 5
+; CHECK-NEXT:    xori r3, r3, 1
+; CHECK-NEXT:    stb r3, 0(r4)
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i8 %a, %b
+  %conv3 = zext i1 %cmp to i8
+  store i8 %conv3, i8* @glob, align 1
+  ret void
+}
+
+define void @test_ineuc_sext_store(i8 zeroext %a, i8 zeroext %b) {
+; CHECK-LABEL: test_ineuc_sext_store:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    xor r3, r3, r4
+; CHECK-NEXT:    addis r5, r2, .LC0@toc@ha
+; CHECK-NEXT:    cntlzw r3, r3
+; CHECK-NEXT:    ld r4, .LC0@toc@l(r5)
+; CHECK-NEXT:    srwi r3, r3, 5
+; CHECK-NEXT:    xori r3, r3, 1
+; CHECK-NEXT:    neg r3, r3
+; CHECK-NEXT:    stb r3, 0(r4)
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i8 %a, %b
+  %conv3 = sext i1 %cmp to i8
+  store i8 %conv3, i8* @glob, align 1
+  ret void
+}
+
+define void @test_ineuc_z_store(i8 zeroext %a) {
+; CHECK-LABEL: test_ineuc_z_store:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    addis r4, r2, .LC0@toc@ha
+; CHECK-NEXT:    cntlzw r3, r3
+; CHECK-NEXT:    ld r4, .LC0@toc@l(r4)
+; CHECK-NEXT:    srwi r3, r3, 5
+; CHECK-NEXT:    xori r3, r3, 1
+; CHECK-NEXT:    stb r3, 0(r4)
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i8 %a, 0
+  %conv2 = zext i1 %cmp to i8
+  store i8 %conv2, i8* @glob, align 1
+  ret void
+}
+
+define void @test_ineuc_sext_z_store(i8 zeroext %a) {
+; CHECK-LABEL: test_ineuc_sext_z_store:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    addis r4, r2, .LC0@toc@ha
+; CHECK-NEXT:    cntlzw r3, r3
+; CHECK-NEXT:    srwi r3, r3, 5
+; CHECK-NEXT:    ld r4, .LC0@toc@l(r4)
+; CHECK-NEXT:    xori r3, r3, 1
+; CHECK-NEXT:    neg r3, r3
+; CHECK-NEXT:    stb r3, 0(r4)
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i8 %a, 0
+  %conv2 = sext i1 %cmp to i8
+  store i8 %conv2, i8* @glob, align 1
+  ret void
+}
diff --git a/test/CodeGen/PowerPC/testComparesineui.ll b/test/CodeGen/PowerPC/testComparesineui.ll
new file mode 100644
index 000000000000..36899b7ea8e1
--- /dev/null
+++ b/test/CodeGen/PowerPC/testComparesineui.ll
@@ -0,0 +1,121 @@
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \
+; RUN:   -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \
+; RUN:  --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \
+; RUN:   -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \
+; RUN:  --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+
+@glob = common local_unnamed_addr global i32 0, align 4
+
+define signext i32 @test_ineui(i32 zeroext %a, i32 zeroext %b) {
+; CHECK-LABEL: test_ineui:
+; CHECK:    xor r3, r3, r4
+; CHECK-NEXT:    cntlzw r3, r3
+; CHECK-NEXT:    srwi r3, r3, 5
+; CHECK-NEXT:    xori r3, r3, 1
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i32 %a, %b
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define signext i32 @test_ineui_sext(i32 zeroext %a, i32 zeroext %b) {
+; CHECK-LABEL: test_ineui_sext:
+; CHECK:    xor r3, r3, r4
+; CHECK-NEXT:    cntlzw r3, r3
+; CHECK-NEXT:    srwi r3, r3, 5
+; CHECK-NEXT:    xori r3, r3, 1
+; CHECK-NEXT:    neg r3, r3
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i32 %a, %b
+  %sub = sext i1 %cmp to i32
+  ret i32 %sub
+}
+
+define signext i32 @test_ineui_z(i32 zeroext %a) {
+; CHECK-LABEL: test_ineui_z:
+; CHECK:    cntlzw r3, r3
+; CHECK-NEXT:    srwi r3, r3, 5
+; CHECK-NEXT:    xori r3, r3, 1
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i32 %a, 0
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define signext i32 @test_ineui_sext_z(i32 zeroext %a) {
+; CHECK-LABEL: test_ineui_sext_z:
+; CHECK:    cntlzw r3, r3
+; CHECK-NEXT:    srwi r3, r3, 5
+; CHECK-NEXT:    xori r3, r3, 1
+; CHECK-NEXT:    neg r3, r3
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i32 %a, 0
+  %sub = sext i1 %cmp to i32
+  ret i32 %sub
+}
+
+define void @test_ineui_store(i32 zeroext %a, i32 zeroext %b) {
+; CHECK-LABEL: test_ineui_store:
+; CHECK:    xor r3, r3, r4
+; CHECK:    cntlzw r3, r3
+; CHECK:    srwi r3, r3, 5
+; CHECK:    xori r3, r3, 1
+; CHECK:    stw r3, 0(r4)
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i32 %a, %b
+  %conv = zext i1 %cmp to i32
+  store i32 %conv, i32* @glob, align 4
+  ret void
+}
+
+define void @test_ineui_sext_store(i32 zeroext %a, i32 zeroext %b) {
+; CHECK-LABEL: test_ineui_sext_store:
+; CHECK:    xor r3, r3, r4
+; CHECK:    cntlzw r3, r3
+; CHECK:    srwi r3, r3, 5
+; CHECK:    xori r3, r3, 1
+; CHECK:    neg r3, r3
+; CHECK:    stw r3, 0(r4)
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i32 %a, %b
+  %sub = sext i1 %cmp to i32
+  store i32 %sub, i32* @glob, align 4
+  ret void
+}
+
+define void @test_ineui_z_store(i32 zeroext %a) {
+; CHECK-LABEL: test_ineui_z_store:
+; CHECK:    cntlzw r3, r3
+; CHECK:    srwi r3, r3, 5
+; CHECK:    xori r3, r3, 1
+; CHECK:    stw r3, 0(r4)
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i32 %a, 0
+  %conv = zext i1 %cmp to i32
+  store i32 %conv, i32* @glob, align 4
+  ret void
+}
+
+define void @test_ineui_sext_z_store(i32 zeroext %a) {
+; CHECK-LABEL: test_ineui_sext_z_store:
+; CHECK:    cntlzw r3, r3
+; CHECK:    srwi r3, r3, 5
+; CHECK:    xori r3, r3, 1
+; CHECK:    neg r3, r3
+; CHECK:    stw r3, 0(r4)
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i32 %a, 0
+  %sub = sext i1 %cmp to i32
+  store i32 %sub, i32* @glob, align 4
+  ret void
+}
diff --git a/test/CodeGen/PowerPC/testComparesineull.ll b/test/CodeGen/PowerPC/testComparesineull.ll
new file mode 100644
index 000000000000..7f0fed15157c
--- /dev/null
+++ b/test/CodeGen/PowerPC/testComparesineull.ll
@@ -0,0 +1,125 @@
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \
+; RUN:   -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \
+; RUN:  --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \
+; RUN:   -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \
+; RUN:  --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+
+@glob = common local_unnamed_addr global i64 0, align 8
+
+define signext i32 @test_ineull(i64 %a, i64 %b) {
+; CHECK-LABEL: test_ineull:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    xor r3, r3, r4
+; CHECK-NEXT:    addic r4, r3, -1
+; CHECK-NEXT:    subfe r3, r4, r3
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i64 %a, %b
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define signext i32 @test_ineull_sext(i64 %a, i64 %b) {
+; CHECK-LABEL: test_ineull_sext:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    xor r3, r3, r4
+; CHECK-NEXT:    subfic r3, r3, 0
+; CHECK-NEXT:    subfe r3, r3, r3
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i64 %a, %b
+  %sub = sext i1 %cmp to i32
+  ret i32 %sub
+}
+
+define signext i32 @test_ineull_z(i64 %a) {
+; CHECK-LABEL: test_ineull_z:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    addic r4, r3, -1
+; CHECK-NEXT:    subfe r3, r4, r3
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i64 %a, 0
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define signext i32 @test_ineull_sext_z(i64 %a) {
+; CHECK-LABEL: test_ineull_sext_z:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    subfic r3, r3, 0
+; CHECK-NEXT:    subfe r3, r3, r3
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i64 %a, 0
+  %sub = sext i1 %cmp to i32
+  ret i32 %sub
+}
+
+define void @test_ineull_store(i64 %a, i64 %b) {
+; CHECK-LABEL: test_ineull_store:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    addis r5, r2, .LC0@toc@ha
+; CHECK-NEXT:    xor r3, r3, r4
+; CHECK-NEXT:    ld r12, .LC0@toc@l(r5)
+; CHECK-NEXT:    addic r5, r3, -1
+; CHECK-NEXT:    subfe r3, r5, r3
+; CHECK-NEXT:    std r3, 0(r12)
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i64 %a, %b
+  %conv1 = zext i1 %cmp to i64
+  store i64 %conv1, i64* @glob, align 8
+  ret void
+}
+
+define void @test_ineull_sext_store(i64 %a, i64 %b) {
+; CHECK-LABEL: test_ineull_sext_store:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    addis r5, r2, .LC0@toc@ha
+; CHECK-NEXT:    xor r3, r3, r4
+; CHECK-NEXT:    ld r12, .LC0@toc@l(r5)
+; CHECK-NEXT:    subfic r3, r3, 0
+; CHECK-NEXT:    subfe r3, r3, r3
+; CHECK-NEXT:    std r3, 0(r12)
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i64 %a, %b
+  %conv1 = sext i1 %cmp to i64
+  store i64 %conv1, i64* @glob, align 8
+  ret void
+}
+
+define void @test_ineull_z_store(i64 %a) {
+; CHECK-LABEL: test_ineull_z_store:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    addis r4, r2, .LC0@toc@ha
+; CHECK-NEXT:    addic r5, r3, -1
+; CHECK-NEXT:    ld r4, .LC0@toc@l(r4)
+; CHECK-NEXT:    subfe r3, r5, r3
+; CHECK-NEXT:    std r3, 0(r4)
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i64 %a, 0
+  %conv1 = zext i1 %cmp to i64
+  store i64 %conv1, i64* @glob, align 8
+  ret void
+}
+
+define void @test_ineull_sext_z_store(i64 %a) {
+; CHECK-LABEL: test_ineull_sext_z_store:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    addis r4, r2, .LC0@toc@ha
+; CHECK-NEXT:    subfic r3, r3, 0
+; CHECK-NEXT:    ld r4, .LC0@toc@l(r4)
+; CHECK-NEXT:    subfe r3, r3, r3
+; CHECK-NEXT:    std r3, 0(r4)
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i64 %a, 0
+  %conv1 = sext i1 %cmp to i64
+  store i64 %conv1, i64* @glob, align 8
+  ret void
+}
diff --git a/test/CodeGen/PowerPC/testComparesineus.ll b/test/CodeGen/PowerPC/testComparesineus.ll
new file mode 100644
index 000000000000..d24d854f31c9
--- /dev/null
+++ b/test/CodeGen/PowerPC/testComparesineus.ll
@@ -0,0 +1,137 @@
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \
+; RUN:   -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \
+; RUN:  --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \
+; RUN:   -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \
+; RUN:  --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+
+@glob = common local_unnamed_addr global i16 0, align 2
+
+define signext i32 @test_ineus(i16 zeroext %a, i16 zeroext %b) {
+; CHECK-LABEL: test_ineus:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    xor r3, r3, r4
+; CHECK-NEXT:    cntlzw r3, r3
+; CHECK-NEXT:    srwi r3, r3, 5
+; CHECK-NEXT:    xori r3, r3, 1
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i16 %a, %b
+  %conv2 = zext i1 %cmp to i32
+  ret i32 %conv2
+}
+
+define signext i32 @test_ineus_sext(i16 zeroext %a, i16 zeroext %b) {
+; CHECK-LABEL: test_ineus_sext:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    xor r3, r3, r4
+; CHECK-NEXT:    cntlzw r3, r3
+; CHECK-NEXT:    srwi r3, r3, 5
+; CHECK-NEXT:    xori r3, r3, 1
+; CHECK-NEXT:    neg r3, r3
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i16 %a, %b
+  %sub = sext i1 %cmp to i32
+  ret i32 %sub
+}
+
+define signext i32 @test_ineus_z(i16 zeroext %a) {
+; CHECK-LABEL: test_ineus_z:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    cntlzw r3, r3
+; CHECK-NEXT:    srwi r3, r3, 5
+; CHECK-NEXT:    xori r3, r3, 1
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i16 %a, 0
+  %conv1 = zext i1 %cmp to i32
+  ret i32 %conv1
+}
+
+define signext i32 @test_ineus_sext_z(i16 zeroext %a) {
+; CHECK-LABEL: test_ineus_sext_z:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    cntlzw r3, r3
+; CHECK-NEXT:    srwi r3, r3, 5
+; CHECK-NEXT:    xori r3, r3, 1
+; CHECK-NEXT:    neg r3, r3
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i16 %a, 0
+  %sub = sext i1 %cmp to i32
+  ret i32 %sub
+}
+
+define void @test_ineus_store(i16 zeroext %a, i16 zeroext %b) {
+; CHECK-LABEL: test_ineus_store:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    xor r3, r3, r4
+; CHECK-NEXT:    addis r5, r2, .LC0@toc@ha
+; CHECK-NEXT:    cntlzw r3, r3
+; CHECK-NEXT:    ld r4, .LC0@toc@l(r5)
+; CHECK-NEXT:    srwi r3, r3, 5
+; CHECK-NEXT:    xori r3, r3, 1
+; CHECK-NEXT:    sth r3, 0(r4)
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i16 %a, %b
+  %conv3 = zext i1 %cmp to i16
+  store i16 %conv3, i16* @glob, align 2
+  ret void
+}
+
+define void @test_ineus_sext_store(i16 zeroext %a, i16 zeroext %b) {
+; CHECK-LABEL: test_ineus_sext_store:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    xor r3, r3, r4
+; CHECK-NEXT:    addis r5, r2, .LC0@toc@ha
+; CHECK-NEXT:    cntlzw r3, r3
+; CHECK-NEXT:    ld r4, .LC0@toc@l(r5)
+; CHECK-NEXT:    srwi r3, r3, 5
+; CHECK-NEXT:    xori r3, r3, 1
+; CHECK-NEXT:    neg r3, r3
+; CHECK-NEXT:    sth r3, 0(r4)
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i16 %a, %b
+  %conv3 = sext i1 %cmp to i16
+  store i16 %conv3, i16* @glob, align 2
+  ret void
+}
+
+define void @test_ineus_z_store(i16 zeroext %a) {
+; CHECK-LABEL: test_ineus_z_store:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    addis r4, r2, .LC0@toc@ha
+; CHECK-NEXT:    cntlzw r3, r3
+; CHECK-NEXT:    ld r4, .LC0@toc@l(r4)
+; CHECK-NEXT:    srwi r3, r3, 5
+; CHECK-NEXT:    xori r3, r3, 1
+; CHECK-NEXT:    sth r3, 0(r4)
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i16 %a, 0
+  %conv2 = zext i1 %cmp to i16
+  store i16 %conv2, i16* @glob, align 2
+  ret void
+}
+
+define void @test_ineus_sext_z_store(i16 zeroext %a) {
+; CHECK-LABEL: test_ineus_sext_z_store:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    addis r4, r2, .LC0@toc@ha
+; CHECK-NEXT:    cntlzw r3, r3
+; CHECK-NEXT:    srwi r3, r3, 5
+; CHECK-NEXT:    ld r4, .LC0@toc@l(r4)
+; CHECK-NEXT:    xori r3, r3, 1
+; CHECK-NEXT:    neg r3, r3
+; CHECK-NEXT:    sth r3, 0(r4)
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i16 %a, 0
+  %conv2 = sext i1 %cmp to i16
+  store i16 %conv2, i16* @glob, align 2
+  ret void
+}
diff --git a/test/CodeGen/PowerPC/testComparesllnesll.ll b/test/CodeGen/PowerPC/testComparesllnesll.ll
new file mode 100644
index 000000000000..d87ff55739fc
--- /dev/null
+++ b/test/CodeGen/PowerPC/testComparesllnesll.ll
@@ -0,0 +1,125 @@
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \
+; RUN:   -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \
+; RUN:  --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \
+; RUN:   -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \
+; RUN:  --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+
+@glob = common local_unnamed_addr global i64 0, align 8
+
+define i64 @test_llnesll(i64 %a, i64 %b) {
+; CHECK-LABEL: test_llnesll:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    xor r3, r3, r4
+; CHECK-NEXT:    addic r4, r3, -1
+; CHECK-NEXT:    subfe r3, r4, r3
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i64 %a, %b
+  %conv1 = zext i1 %cmp to i64
+  ret i64 %conv1
+}
+
+define i64 @test_llnesll_sext(i64 %a, i64 %b) {
+; CHECK-LABEL: test_llnesll_sext:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    xor r3, r3, r4
+; CHECK-NEXT:    subfic r3, r3, 0
+; CHECK-NEXT:    subfe r3, r3, r3
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i64 %a, %b
+  %conv1 = sext i1 %cmp to i64
+  ret i64 %conv1
+}
+
+define i64 @test_llnesll_z(i64 %a) {
+; CHECK-LABEL: test_llnesll_z:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    addic r4, r3, -1
+; CHECK-NEXT:    subfe r3, r4, r3
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i64 %a, 0
+  %conv1 = zext i1 %cmp to i64
+  ret i64 %conv1
+}
+
+define i64 @test_llnesll_sext_z(i64 %a) {
+; CHECK-LABEL: test_llnesll_sext_z:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    subfic r3, r3, 0
+; CHECK-NEXT:    subfe r3, r3, r3
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i64 %a, 0
+  %conv1 = sext i1 %cmp to i64
+  ret i64 %conv1
+}
+
+define void @test_llnesll_store(i64 %a, i64 %b) {
+; CHECK-LABEL: test_llnesll_store:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    addis r5, r2, .LC0@toc@ha
+; CHECK-NEXT:    xor r3, r3, r4
+; CHECK-NEXT:    ld r12, .LC0@toc@l(r5)
+; CHECK-NEXT:    addic r5, r3, -1
+; CHECK-NEXT:    subfe r3, r5, r3
+; CHECK-NEXT:    std r3, 0(r12)
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i64 %a, %b
+  %conv1 = zext i1 %cmp to i64
+  store i64 %conv1, i64* @glob, align 8
+  ret void
+}
+
+define void @test_llnesll_sext_store(i64 %a, i64 %b) {
+; CHECK-LABEL: test_llnesll_sext_store:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    addis r5, r2, .LC0@toc@ha
+; CHECK-NEXT:    xor r3, r3, r4
+; CHECK-NEXT:    ld r12, .LC0@toc@l(r5)
+; CHECK-NEXT:    subfic r3, r3, 0
+; CHECK-NEXT:    subfe r3, r3, r3
+; CHECK-NEXT:    std r3, 0(r12)
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i64 %a, %b
+  %conv1 = sext i1 %cmp to i64
+  store i64 %conv1, i64* @glob, align 8
+  ret void
+}
+
+define void @test_llnesll_z_store(i64 %a) {
+; CHECK-LABEL: test_llnesll_z_store:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    addis r4, r2, .LC0@toc@ha
+; CHECK-NEXT:    addic r5, r3, -1
+; CHECK-NEXT:    ld r4, .LC0@toc@l(r4)
+; CHECK-NEXT:    subfe r3, r5, r3
+; CHECK-NEXT:    std r3, 0(r4)
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i64 %a, 0
+  %conv1 = zext i1 %cmp to i64
+  store i64 %conv1, i64* @glob, align 8
+  ret void
+}
+
+define void @test_llnesll_sext_z_store(i64 %a) {
+; CHECK-LABEL: test_llnesll_sext_z_store:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    addis r4, r2, .LC0@toc@ha
+; CHECK-NEXT:    subfic r3, r3, 0
+; CHECK-NEXT:    ld r4, .LC0@toc@l(r4)
+; CHECK-NEXT:    subfe r3, r3, r3
+; CHECK-NEXT:    std r3, 0(r4)
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i64 %a, 0
+  %conv1 = sext i1 %cmp to i64
+  store i64 %conv1, i64* @glob, align 8
+  ret void
+}
diff --git a/test/CodeGen/PowerPC/testComparesllneull.ll b/test/CodeGen/PowerPC/testComparesllneull.ll
new file mode 100644
index 000000000000..7309d5899068
--- /dev/null
+++ b/test/CodeGen/PowerPC/testComparesllneull.ll
@@ -0,0 +1,125 @@
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \
+; RUN:   -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \
+; RUN:  --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \
+; RUN:   -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \
+; RUN:  --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+
+@glob = common local_unnamed_addr global i64 0, align 8
+
+define i64 @test_llneull(i64 %a, i64 %b) {
+; CHECK-LABEL: test_llneull:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    xor r3, r3, r4
+; CHECK-NEXT:    addic r4, r3, -1
+; CHECK-NEXT:    subfe r3, r4, r3
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i64 %a, %b
+  %conv1 = zext i1 %cmp to i64
+  ret i64 %conv1
+}
+
+define i64 @test_llneull_sext(i64 %a, i64 %b) {
+; CHECK-LABEL: test_llneull_sext:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    xor r3, r3, r4
+; CHECK-NEXT:    subfic r3, r3, 0
+; CHECK-NEXT:    subfe r3, r3, r3
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i64 %a, %b
+  %conv1 = sext i1 %cmp to i64
+  ret i64 %conv1
+}
+
+define i64 @test_llneull_z(i64 %a) {
+; CHECK-LABEL: test_llneull_z:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    addic r4, r3, -1
+; CHECK-NEXT:    subfe r3, r4, r3
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i64 %a, 0
+  %conv1 = zext i1 %cmp to i64
+  ret i64 %conv1
+}
+
+define i64 @test_llneull_sext_z(i64 %a) {
+; CHECK-LABEL: test_llneull_sext_z:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    subfic r3, r3, 0
+; CHECK-NEXT:    subfe r3, r3, r3
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i64 %a, 0
+  %conv1 = sext i1 %cmp to i64
+  ret i64 %conv1
+}
+
+define void @test_llneull_store(i64 %a, i64 %b) {
+; CHECK-LABEL: test_llneull_store:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    addis r5, r2, .LC0@toc@ha
+; CHECK-NEXT:    xor r3, r3, r4
+; CHECK-NEXT:    ld r12, .LC0@toc@l(r5)
+; CHECK-NEXT:    addic r5, r3, -1
+; CHECK-NEXT:    subfe r3, r5, r3
+; CHECK-NEXT:    std r3, 0(r12)
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i64 %a, %b
+  %conv1 = zext i1 %cmp to i64
+  store i64 %conv1, i64* @glob, align 8
+  ret void
+}
+
+define void @test_llneull_sext_store(i64 %a, i64 %b) {
+; CHECK-LABEL: test_llneull_sext_store:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    addis r5, r2, .LC0@toc@ha
+; CHECK-NEXT:    xor r3, r3, r4
+; CHECK-NEXT:    ld r12, .LC0@toc@l(r5)
+; CHECK-NEXT:    subfic r3, r3, 0
+; CHECK-NEXT:    subfe r3, r3, r3
+; CHECK-NEXT:    std r3, 0(r12)
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i64 %a, %b
+  %conv1 = sext i1 %cmp to i64
+  store i64 %conv1, i64* @glob, align 8
+  ret void
+}
+
+define void @test_llneull_z_store(i64 %a) {
+; CHECK-LABEL: test_llneull_z_store:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    addis r4, r2, .LC0@toc@ha
+; CHECK-NEXT:    addic r5, r3, -1
+; CHECK-NEXT:    ld r4, .LC0@toc@l(r4)
+; CHECK-NEXT:    subfe r3, r5, r3
+; CHECK-NEXT:    std r3, 0(r4)
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i64 %a, 0
+  %conv1 = zext i1 %cmp to i64
+  store i64 %conv1, i64* @glob, align 8
+  ret void
+}
+
+define void @test_llneull_sext_z_store(i64 %a) {
+; CHECK-LABEL: test_llneull_sext_z_store:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    addis r4, r2, .LC0@toc@ha
+; CHECK-NEXT:    subfic r3, r3, 0
+; CHECK-NEXT:    ld r4, .LC0@toc@l(r4)
+; CHECK-NEXT:    subfe r3, r3, r3
+; CHECK-NEXT:    std r3, 0(r4)
+; CHECK-NEXT:    blr
+entry:
+  %cmp = icmp ne i64 %a, 0
+  %conv1 = sext i1 %cmp to i64
+  store i64 %conv1, i64* @glob, align 8
+  ret void
+}
diff --git a/test/CodeGen/PowerPC/vec_int_ext.ll b/test/CodeGen/PowerPC/vec_int_ext.ll
new file mode 100644
index 000000000000..9e1218c423b7
--- /dev/null
+++ b/test/CodeGen/PowerPC/vec_int_ext.ll
@@ -0,0 +1,90 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 < %s | FileCheck %s -check-prefix=PWR9
+target triple = "powerpc64le-unknown-linux-gnu"
+
+define <4 x i32> @vextsb2w(<16 x i8> %a) {
+; PWR9-LABEL: vextsb2w:
+; PWR9:       # BB#0: # %entry
+; PWR9-NEXT:    vextsb2w 2, 2
+; PWR9-NEXT:    blr
+entry:
+  %vecext = extractelement <16 x i8> %a, i32 0
+  %conv = sext i8 %vecext to i32
+  %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0
+  %vecext1 = extractelement <16 x i8> %a, i32 4
+  %conv2 = sext i8 %vecext1 to i32
+  %vecinit3 = insertelement <4 x i32> %vecinit, i32 %conv2, i32 1
+  %vecext4 = extractelement <16 x i8> %a, i32 8
+  %conv5 = sext i8 %vecext4 to i32
+  %vecinit6 = insertelement <4 x i32> %vecinit3, i32 %conv5, i32 2
+  %vecext7 = extractelement <16 x i8> %a, i32 12
+  %conv8 = sext i8 %vecext7 to i32
+  %vecinit9 = insertelement <4 x i32> %vecinit6, i32 %conv8, i32 3
+  ret <4 x i32> %vecinit9
+}
+
+define <2 x i64> @vextsb2d(<16 x i8> %a) {
+; PWR9-LABEL: vextsb2d:
+; PWR9:       # BB#0: # %entry
+; PWR9-NEXT:    vextsb2d 2, 2
+; PWR9-NEXT:    blr
+entry:
+  %vecext = extractelement <16 x i8> %a, i32 0
+  %conv = sext i8 %vecext to i64
+  %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
+  %vecext1 = extractelement <16 x i8> %a, i32 8
+  %conv2 = sext i8 %vecext1 to i64
+  %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
+  ret <2 x i64> %vecinit3
+}
+
+define <4 x i32> @vextsh2w(<8 x i16> %a) {
+; PWR9-LABEL: vextsh2w:
+; PWR9:       # BB#0: # %entry
+; PWR9-NEXT:    vextsh2w 2, 2
+; PWR9-NEXT:    blr
+entry:
+  %vecext = extractelement <8 x i16> %a, i32 0
+  %conv = sext i16 %vecext to i32
+  %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0
+  %vecext1 = extractelement <8 x i16> %a, i32 2
+  %conv2 = sext i16 %vecext1 to i32
+  %vecinit3 = insertelement <4 x i32> %vecinit, i32 %conv2, i32 1
+  %vecext4 = extractelement <8 x i16> %a, i32 4
+  %conv5 = sext i16 %vecext4 to i32
+  %vecinit6 = insertelement <4 x i32> %vecinit3, i32 %conv5, i32 2
+  %vecext7 = extractelement <8 x i16> %a, i32 6
+  %conv8 = sext i16 %vecext7 to i32
+  %vecinit9 = insertelement <4 x i32> %vecinit6, i32 %conv8, i32 3
+  ret <4 x i32> %vecinit9
+}
+
+define <2 x i64> @vextsh2d(<8 x i16> %a) {
+; PWR9-LABEL: vextsh2d:
+; PWR9:       # BB#0: # %entry
+; PWR9-NEXT:    vextsh2d 2, 2
+; PWR9-NEXT:    blr
+entry:
+  %vecext = extractelement <8 x i16> %a, i32 0
+  %conv = sext i16 %vecext to i64
+  %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
+  %vecext1 = extractelement <8 x i16> %a, i32 4
+  %conv2 = sext i16 %vecext1 to i64
+  %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
+  ret <2 x i64> %vecinit3
+}
+
+define <2 x i64> @vextsw2d(<4 x i32> %a) {
+; PWR9-LABEL: vextsw2d:
+; PWR9:       # BB#0: # %entry
+; PWR9-NEXT:    vextsw2d 2, 2
+; PWR9-NEXT:    blr
+entry:
+  %vecext = extractelement <4 x i32> %a, i32 0
+  %conv = sext i32 %vecext to i64
+  %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
+  %vecext1 = extractelement <4 x i32> %a, i32 2
+  %conv2 = sext i32 %vecext1 to i64
+  %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
+  ret <2 x i64> %vecinit3
+}
diff --git a/test/CodeGen/X86/2006-05-11-InstrSched.ll b/test/CodeGen/X86/2006-05-11-InstrSched.ll
index b1deb2c5f567..e04d10c9d64a 100644
--- a/test/CodeGen/X86/2006-05-11-InstrSched.ll
+++ b/test/CodeGen/X86/2006-05-11-InstrSched.ll
@@ -1,6 +1,6 @@
 ; REQUIRES: asserts
 ; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mcpu=penryn -mattr=+sse2 -stats 2>&1 | \
-; RUN:     grep "asm-printer" | grep 35
+; RUN:     grep "asm-printer" | grep 33
 
 target datalayout = "e-p:32:32"
 define void @foo(i32* %mc, i32* %bp, i32* %ms, i32* %xmb, i32* %mpp, i32* %tpmm, i32* %ip, i32* %tpim, i32* %dpp, i32* %tpdm, i32* %bpi, i32 %M) nounwind {
diff --git a/test/CodeGen/X86/GlobalISel/irtranslator-call.ll b/test/CodeGen/X86/GlobalISel/irtranslator-call.ll
deleted file mode 100644
index 6c60aed67a7b..000000000000
--- a/test/CodeGen/X86/GlobalISel/irtranslator-call.ll
+++ /dev/null
@@ -1,30 +0,0 @@
-; RUN: llc -mtriple i386 -global-isel -stop-after=irtranslator %s -o - | FileCheck %s
-; RUN: llc -mtriple x86_64 -global-isel -stop-after=irtranslator %s -o - | FileCheck %s
-
-define void @test_void_return() {
-; CHECK-LABEL: name:            test_void_return
-; CHECK:      alignment:       4
-; CHECK-NEXT: exposesReturnsTwice: false
-; CHECK-NEXT: legalized:       false
-; CHECK-NEXT: regBankSelected: false
-; CHECK-NEXT: selected:        false
-; CHECK-NEXT: tracksRegLiveness: true
-; CHECK-NEXT: frameInfo:
-; CHECK-NEXT:   isFrameAddressTaken: false
-; CHECK-NEXT:   isReturnAddressTaken: false
-; CHECK-NEXT:   hasStackMap:     false
-; CHECK-NEXT:   hasPatchPoint:   false
-; CHECK-NEXT:   stackSize:       0
-; CHECK-NEXT:   offsetAdjustment: 0
-; CHECK-NEXT:   maxAlignment:    0
-; CHECK-NEXT:   adjustsStack:    false
-; CHECK-NEXT:   hasCalls:        false
-; CHECK-NEXT:   hasOpaqueSPAdjustment: false
-; CHECK-NEXT:   hasVAStart:      false
-; CHECK-NEXT:   hasMustTailInVarArgFunc: false
-; CHECK-NEXT: body:
-; CHECK-NEXT:   bb.1.entry:
-; CHECK-NEXT:     RET 0
-entry:
-  ret void
-}
diff --git a/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll b/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll
index 8ea3e4f9d739..00aa7cf84e55 100644
--- a/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll
+++ b/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=i386-linux-gnu   -global-isel -stop-after=irtranslator < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=X32
-; RUN: llc -mtriple=x86_64-linux-gnu -global-isel -stop-after=irtranslator < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=X64
+; RUN: llc -mtriple=i386-linux-gnu   -mattr=+sse2 -global-isel -stop-after=irtranslator < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=X32
+; RUN: llc -mtriple=x86_64-linux-gnu              -global-isel -stop-after=irtranslator < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=X64
 
 @a1_8bit = external global i8
 @a7_8bit = external global i8
@@ -11,8 +11,8 @@ define i8 @test_i8_args_8(i8 %arg1, i8 %arg2, i8 %arg3, i8 %arg4,
 ; ALL-LABEL: name:            test_i8_args_8
 
 ; X64: fixedStack:
-; X64:  id: [[STACK8:[0-9]+]], offset: 8, size: 1, alignment: 8, isImmutable: true, isAliased: false
-; X64:  id: [[STACK0:[0-9]+]], offset: 0, size: 1, alignment: 16, isImmutable: true, isAliased: false
+; X64:  id: [[STACK8:[0-9]+]], type: default, offset: 8, size: 1, alignment: 8, isImmutable: true,
+; X64:  id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 1, alignment: 16, isImmutable: true,
 ; X64: liveins: %ecx, %edi, %edx, %esi, %r8d, %r9d
 ; X64:      [[ARG1:%[0-9]+]](s8) = COPY %edi
 ; X64-NEXT: %{{[0-9]+}}(s8) = COPY %esi
@@ -26,14 +26,14 @@ define i8 @test_i8_args_8(i8 %arg1, i8 %arg2, i8 %arg3, i8 %arg4,
 ; X64-NEXT: [[ARG8:%[0-9]+]](s8) = G_LOAD [[ARG8_ADDR]](p0) :: (invariant load 1 from %fixed-stack.[[STACK8]], align 0)
 
 ; X32: fixedStack:
-; X32:  id: [[STACK28:[0-9]+]], offset: 28, size: 1, alignment: 4, isImmutable: true, isAliased: false }
-; X32:  id: [[STACK24:[0-9]+]], offset: 24, size: 1, alignment: 8, isImmutable: true, isAliased: false }
-; X32:  id: [[STACK20:[0-9]+]], offset: 20, size: 1, alignment: 4, isImmutable: true, isAliased: false }
-; X32:  id: [[STACK16:[0-9]+]], offset: 16, size: 1, alignment: 16, isImmutable: true, isAliased: false }
-; X32:  id: [[STACK12:[0-9]+]], offset: 12, size: 1, alignment: 4, isImmutable: true, isAliased: false }
-; X32:  id: [[STACK8:[0-9]+]],  offset: 8, size: 1, alignment: 8, isImmutable: true, isAliased: false }
-; X32:  id: [[STACK4:[0-9]+]],  offset: 4, size: 1, alignment: 4, isImmutable: true, isAliased: false }
-; X32:  id: [[STACK0:[0-9]+]],  offset: 0, size: 1, alignment: 16, isImmutable: true, isAliased: false }
+; X32:  id: [[STACK28:[0-9]+]], type: default, offset: 28, size: 1, alignment: 4, isImmutable: true,
+; X32:  id: [[STACK24:[0-9]+]], type: default, offset: 24, size: 1, alignment: 8, isImmutable: true,
+; X32:  id: [[STACK20:[0-9]+]], type: default, offset: 20, size: 1, alignment: 4, isImmutable: true,
+; X32:  id: [[STACK16:[0-9]+]], type: default, offset: 16, size: 1, alignment: 16, isImmutable: true,
+; X32:  id: [[STACK12:[0-9]+]], type: default, offset: 12, size: 1, alignment: 4, isImmutable: true,
+; X32:  id: [[STACK8:[0-9]+]], type: default, offset: 8, size: 1, alignment: 8, isImmutable: true,
+; X32:  id: [[STACK4:[0-9]+]], type: default, offset: 4, size: 1, alignment: 4, isImmutable: true,
+; X32:  id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 1, alignment: 16, isImmutable: true,
 ; X32:       [[ARG1_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]]
 ; X32-NEXT:  [[ARG1:%[0-9]+]](s8) = G_LOAD [[ARG1_ADDR]](p0) :: (invariant load 1 from %fixed-stack.[[STACK0]], align 0)
 ; X32-NEXT:  [[ARG2_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK4]]
@@ -77,8 +77,8 @@ define i32 @test_i32_args_8(i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4,
 ; ALL-LABEL: name:            test_i32_args_8
 
 ; X64: fixedStack:
-; X64:  id: [[STACK8:[0-9]+]], offset: 8, size: 4, alignment: 8, isImmutable: true, isAliased: false
-; X64:  id: [[STACK0:[0-9]+]], offset: 0, size: 4, alignment: 16, isImmutable: true, isAliased: false
+; X64:  id: [[STACK8:[0-9]+]], type: default, offset: 8, size: 4, alignment: 8, isImmutable: true,
+; X64:  id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 4, alignment: 16, isImmutable: true,
 ; X64: liveins: %ecx, %edi, %edx, %esi, %r8d, %r9d
 ; X64:      [[ARG1:%[0-9]+]](s32) = COPY %edi
 ; X64-NEXT: %{{[0-9]+}}(s32) = COPY %esi
@@ -92,14 +92,14 @@ define i32 @test_i32_args_8(i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4,
 ; X64-NEXT: [[ARG8:%[0-9]+]](s32) = G_LOAD [[ARG8_ADDR]](p0) :: (invariant load 4 from %fixed-stack.[[STACK8]], align 0)
 
 ; X32: fixedStack:
-; X32:  id: [[STACK28:[0-9]+]], offset: 28, size: 4, alignment: 4, isImmutable: true, isAliased: false }
-; X32:  id: [[STACK24:[0-9]+]], offset: 24, size: 4, alignment: 8, isImmutable: true, isAliased: false }
-; X32:  id: [[STACK20:[0-9]+]], offset: 20, size: 4, alignment: 4, isImmutable: true, isAliased: false }
-; X32:  id: [[STACK16:[0-9]+]], offset: 16, size: 4, alignment: 16, isImmutable: true, isAliased: false }
-; X32:  id: [[STACK12:[0-9]+]], offset: 12, size: 4, alignment: 4, isImmutable: true, isAliased: false }
-; X32:  id: [[STACK8:[0-9]+]],  offset: 8, size: 4, alignment: 8, isImmutable: true, isAliased: false }
-; X32:  id: [[STACK4:[0-9]+]],  offset: 4, size: 4, alignment: 4, isImmutable: true, isAliased: false }
-; X32:  id: [[STACK0:[0-9]+]],  offset: 0, size: 4, alignment: 16, isImmutable: true, isAliased: false }
+; X32:  id: [[STACK28:[0-9]+]], type: default, offset: 28, size: 4, alignment: 4, isImmutable: true,
+; X32:  id: [[STACK24:[0-9]+]], type: default, offset: 24, size: 4, alignment: 8, isImmutable: true,
+; X32:  id: [[STACK20:[0-9]+]], type: default, offset: 20, size: 4, alignment: 4, isImmutable: true,
+; X32:  id: [[STACK16:[0-9]+]], type: default, offset: 16, size: 4, alignment: 16, isImmutable: true,
+; X32:  id: [[STACK12:[0-9]+]], type: default, offset: 12, size: 4, alignment: 4, isImmutable: true,
+; X32:  id: [[STACK8:[0-9]+]], type: default, offset: 8, size: 4, alignment: 8, isImmutable: true,
+; X32:  id: [[STACK4:[0-9]+]], type: default, offset: 4, size: 4, alignment: 4, isImmutable: true,
+; X32:  id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 4, alignment: 16, isImmutable: true,
 ; X32:       [[ARG1_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]]
 ; X32-NEXT:  [[ARG1:%[0-9]+]](s32) = G_LOAD [[ARG1_ADDR]](p0) :: (invariant load 4 from %fixed-stack.[[STACK0]], align 0)
 ; X32-NEXT:  [[ARG2_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK4]]
@@ -142,8 +142,8 @@ define i64 @test_i64_args_8(i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4,
 
 ; ALL-LABEL: name:            test_i64_args_8
 ; X64: fixedStack:
-; X64:  id: [[STACK8:[0-9]+]], offset: 8, size: 8, alignment: 8, isImmutable: true, isAliased: false
-; X64:  id: [[STACK0:[0-9]+]], offset: 0, size: 8, alignment: 16, isImmutable: true, isAliased: false
+; X64:  id: [[STACK8:[0-9]+]], type: default, offset: 8, size: 8, alignment: 8, isImmutable: true,
+; X64:  id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 8, alignment: 16, isImmutable: true,
 ; X64: liveins: %rcx, %rdi, %rdx, %rsi, %r8, %r9
 ; X64:      [[ARG1:%[0-9]+]](s64) = COPY %rdi
 ; X64-NEXT: %{{[0-9]+}}(s64) = COPY %rsi
@@ -157,22 +157,22 @@ define i64 @test_i64_args_8(i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4,
 ; X64-NEXT: [[ARG8:%[0-9]+]](s64) = G_LOAD [[ARG8_ADDR]](p0) :: (invariant load 8 from %fixed-stack.[[STACK8]], align 0)
 
 ; X32: fixedStack:
-; X32:  id: [[STACK60:[0-9]+]], offset: 60, size: 4, alignment: 4, isImmutable: true, isAliased: false }
-; X32:  id: [[STACK56:[0-9]+]], offset: 56, size: 4, alignment: 8, isImmutable: true, isAliased: false }
-; X32:  id: [[STACK52:[0-9]+]], offset: 52, size: 4, alignment: 4, isImmutable: true, isAliased: false }
-; X32:  id: [[STACK48:[0-9]+]], offset: 48, size: 4, alignment: 16, isImmutable: true, isAliased: false }
-; X32:  id: [[STACK44:[0-9]+]], offset: 44, size: 4, alignment: 4, isImmutable: true, isAliased: false }
-; X32:  id: [[STACK40:[0-9]+]], offset: 40, size: 4, alignment: 8, isImmutable: true, isAliased: false }
-; X32:  id: [[STACK36:[0-9]+]], offset: 36, size: 4, alignment: 4, isImmutable: true, isAliased: false }
-; X32:  id: [[STACK32:[0-9]+]], offset: 32, size: 4, alignment: 16, isImmutable: true, isAliased: false }
-; X32:  id: [[STACK28:[0-9]+]], offset: 28, size: 4, alignment: 4, isImmutable: true, isAliased: false }
-; X32:  id: [[STACK24:[0-9]+]], offset: 24, size: 4, alignment: 8, isImmutable: true, isAliased: false }
-; X32:  id: [[STACK20:[0-9]+]], offset: 20, size: 4, alignment: 4, isImmutable: true, isAliased: false }
-; X32:  id: [[STACK16:[0-9]+]], offset: 16, size: 4, alignment: 16, isImmutable: true, isAliased: false }
-; X32:  id: [[STACK12:[0-9]+]], offset: 12, size: 4, alignment: 4, isImmutable: true, isAliased: false }
-; X32:  id: [[STACK8:[0-9]+]], offset: 8, size: 4, alignment: 8, isImmutable: true, isAliased: false }
-; X32:  id: [[STACK4:[0-9]+]], offset: 4, size: 4, alignment: 4, isImmutable: true, isAliased: false }
-; X32:  id: [[STACK0:[0-9]+]], offset: 0, size: 4, alignment: 16, isImmutable: true, isAliased: false }
+; X32:  id: [[STACK60:[0-9]+]], type: default, offset: 60, size: 4, alignment: 4, isImmutable: true,
+; X32:  id: [[STACK56:[0-9]+]], type: default, offset: 56, size: 4, alignment: 8, isImmutable: true,
+; X32:  id: [[STACK52:[0-9]+]], type: default, offset: 52, size: 4, alignment: 4, isImmutable: true,
+; X32:  id: [[STACK48:[0-9]+]], type: default, offset: 48, size: 4, alignment: 16, isImmutable: true,
+; X32:  id: [[STACK44:[0-9]+]], type: default, offset: 44, size: 4, alignment: 4, isImmutable: true,
+; X32:  id: [[STACK40:[0-9]+]], type: default, offset: 40, size: 4, alignment: 8, isImmutable: true,
+; X32:  id: [[STACK36:[0-9]+]], type: default, offset: 36, size: 4, alignment: 4, isImmutable: true,
+; X32:  id: [[STACK32:[0-9]+]], type: default, offset: 32, size: 4, alignment: 16, isImmutable: true,
+; X32:  id: [[STACK28:[0-9]+]], type: default, offset: 28, size: 4, alignment: 4, isImmutable: true,
+; X32:  id: [[STACK24:[0-9]+]], type: default, offset: 24, size: 4, alignment: 8, isImmutable: true,
+; X32:  id: [[STACK20:[0-9]+]], type: default, offset: 20, size: 4, alignment: 4, isImmutable: true,
+; X32:  id: [[STACK16:[0-9]+]], type: default, offset: 16, size: 4, alignment: 16, isImmutable: true,
+; X32:  id: [[STACK12:[0-9]+]], type: default, offset: 12, size: 4, alignment: 4, isImmutable: true,
+; X32:  id: [[STACK8:[0-9]+]], type: default, offset: 8, size: 4, alignment: 8, isImmutable: true,
+; X32:  id: [[STACK4:[0-9]+]], type: default, offset: 4, size: 4, alignment: 4, isImmutable: true,
+; X32:  id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 4, alignment: 16, isImmutable: true,
 
 ; X32:      [[ARG1L_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]]
 ; X32-NEXT: [[ARG1L:%[0-9]+]](s32) = G_LOAD [[ARG1L_ADDR]](p0) :: (invariant load 4 from %fixed-stack.[[STACK0]], align 0)
@@ -249,8 +249,8 @@ define float @test_float_args(float %arg1, float %arg2) {
 ; X64-NEXT: RET 0, implicit %xmm0
 
 ; X32: fixedStack:
-; X32:  id: [[STACK4:[0-9]+]], offset: 4, size: 4, alignment: 4, isImmutable: true, isAliased: false }
-; X32:  id: [[STACK0:[0-9]+]], offset: 0, size: 4, alignment: 16, isImmutable: true, isAliased: false }
+; X32:  id: [[STACK4:[0-9]+]], type: default, offset: 4, size: 4, alignment: 4, isImmutable: true,
+; X32:  id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 4, alignment: 16, isImmutable: true,
 ; X32:       [[ARG1_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]]
 ; X32-NEXT:  [[ARG1:%[0-9]+]](s32) = G_LOAD [[ARG1_ADDR:%[0-9]+]](p0) :: (invariant load 4 from %fixed-stack.[[STACK0]], align 0)
 ; X32-NEXT:  [[ARG2_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK4]]
@@ -270,8 +270,8 @@ define double @test_double_args(double %arg1, double %arg2) {
 ; X64-NEXT: RET 0, implicit %xmm0
 
 ; X32: fixedStack:
-; X32:  id: [[STACK4:[0-9]+]], offset: 8, size: 8, alignment: 8, isImmutable: true, isAliased: false }
-; X32:  id: [[STACK0:[0-9]+]], offset: 0, size: 8, alignment: 16, isImmutable: true, isAliased: false }
+; X32:  id: [[STACK4:[0-9]+]], type: default, offset: 8, size: 8, alignment: 8, isImmutable: true,
+; X32:  id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 8, alignment: 16, isImmutable: true,
 ; X32:       [[ARG1_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]]
 ; X32-NEXT:  [[ARG1:%[0-9]+]](s64) = G_LOAD [[ARG1_ADDR:%[0-9]+]](p0) :: (invariant load 8 from %fixed-stack.[[STACK0]], align 0)
 ; X32-NEXT:  [[ARG2_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK4]]
@@ -282,6 +282,38 @@ define double @test_double_args(double %arg1, double %arg2) {
   ret double %arg2
 }
 
+define <4 x i32> @test_v4i32_args(<4 x i32> %arg1, <4 x i32> %arg2) {
+; ALL: name:            test_v4i32_args
+; ALL: liveins: %xmm0, %xmm1
+; ALL:      [[ARG1:%[0-9]+]](<4 x s32>) = COPY %xmm0
+; ALL-NEXT: [[ARG2:%[0-9]+]](<4 x s32>) = COPY %xmm1
+; ALL-NEXT: %xmm0 = COPY [[ARG2:%[0-9]+]](<4 x s32>)
+; ALL-NEXT: RET 0, implicit %xmm0
+  ret <4 x i32> %arg2
+}
+
+define <8 x i32> @test_v8i32_args(<8 x i32> %arg1) {
+; ALL: name:            test_v8i32_args
+; ALL: liveins: %xmm0, %xmm1
+; ALL:      [[ARG1L:%[0-9]+]](<4 x s32>) = COPY %xmm0
+; ALL-NEXT: [[ARG1H:%[0-9]+]](<4 x s32>) = COPY %xmm1
+; ALL-NEXT: [[ARG1:%[0-9]+]](<8 x s32>) = G_MERGE_VALUES [[ARG1L]](<4 x s32>), [[ARG1H]](<4 x s32>)
+; ALL-NEXT: [[RETL:%[0-9]+]](<4 x s32>), [[RETH:%[0-9]+]](<4 x s32>) = G_UNMERGE_VALUES [[ARG1:%[0-9]+]](<8 x s32>)
+; ALL-NEXT: %xmm0 = COPY [[RETL:%[0-9]+]](<4 x s32>)
+; ALL-NEXT: %xmm1 = COPY [[RETH:%[0-9]+]](<4 x s32>)
+; ALL-NEXT: RET 0, implicit %xmm0, implicit %xmm1
+
+  ret <8 x i32> %arg1
+}
+
+define void @test_void_return() {
+; ALL-LABEL: name:            test_void_return
+; ALL:        bb.1.entry:
+; ALL-NEXT:     RET 0
+entry:
+  ret void
+}
+
 define i32 * @test_memop_i32(i32 * %p1) {
 ; ALL-LABEL:name:            test_memop_i32
 ;X64    liveins: %rdi
@@ -290,7 +322,7 @@ define i32 * @test_memop_i32(i32 * %p1) {
 ;X64-NEXT:  RET 0, implicit %rax
 
 ;X32: fixedStack:
-;X32:  id: [[STACK0:[0-9]+]], offset: 0, size: 4, alignment: 16, isImmutable: true, isAliased: false }
+;X32:  id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 4, alignment: 16, isImmutable: true,
 ;X32:         %1(p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]]
 ;X32-NEXT:    %0(p0) = G_LOAD %1(p0) :: (invariant load 4 from %fixed-stack.[[STACK0]], align 0)
 ;X32-NEXT:    %eax = COPY %0(p0)
diff --git a/test/CodeGen/X86/GlobalISel/irtranslator-callingconv_64bit.ll b/test/CodeGen/X86/GlobalISel/irtranslator-callingconv_64bit.ll
deleted file mode 100644
index 90a05f5fc225..000000000000
--- a/test/CodeGen/X86/GlobalISel/irtranslator-callingconv_64bit.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; RUN: llc -mtriple=x86_64-linux-gnu -global-isel -stop-after=irtranslator < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=X64
-
-define <4 x i32> @test_v4i32_args(<4 x i32> %arg1, <4 x i32> %arg2) {
-; X64: name:            test_v4i32_args
-; X64: liveins: %xmm0, %xmm1
-; X64:      [[ARG1:%[0-9]+]](<4 x s32>) = COPY %xmm0
-; X64-NEXT: [[ARG2:%[0-9]+]](<4 x s32>) = COPY %xmm1
-; X64-NEXT: %xmm0 = COPY [[ARG2:%[0-9]+]](<4 x s32>)
-; X64-NEXT: RET 0, implicit %xmm0
-  ret <4 x i32> %arg2
-}
-
-define <8 x i32> @test_v8i32_args(<8 x i32> %arg1) {
-; X64: name:            test_v8i32_args
-; X64: liveins: %xmm0, %xmm1
-; X64:      [[ARG1L:%[0-9]+]](<4 x s32>) = COPY %xmm0
-; X64-NEXT: [[ARG1H:%[0-9]+]](<4 x s32>) = COPY %xmm1
-; X64-NEXT: [[ARG1:%[0-9]+]](<8 x s32>) = G_MERGE_VALUES [[ARG1L]](<4 x s32>), [[ARG1H]](<4 x s32>)
-; X64-NEXT: [[RETL:%[0-9]+]](<4 x s32>), [[RETH:%[0-9]+]](<4 x s32>) = G_UNMERGE_VALUES [[ARG1:%[0-9]+]](<8 x s32>)
-; X64-NEXT: %xmm0 = COPY [[RETL:%[0-9]+]](<4 x s32>)
-; X64-NEXT: %xmm1 = COPY [[RETH:%[0-9]+]](<4 x s32>)
-; X64-NEXT: RET 0, implicit %xmm0, implicit %xmm1
-
-  ret <8 x i32> %arg1
-}
diff --git a/test/CodeGen/X86/GlobalISel/legalize-mul-scalar.mir b/test/CodeGen/X86/GlobalISel/legalize-mul-scalar.mir
index 0d66a6384107..682d01e66fa0 100644
--- a/test/CodeGen/X86/GlobalISel/legalize-mul-scalar.mir
+++ b/test/CodeGen/X86/GlobalISel/legalize-mul-scalar.mir
@@ -24,9 +24,9 @@ alignment:       4
 legalized:       false
 regBankSelected: false
 # CHECK:      registers:
-# CHECK-NEXT:   - { id: 0, class: _ }
-# CHECK-NEXT:   - { id: 1, class: _ }
-# CHECK-NEXT:   - { id: 2, class: _ }
+# CHECK-NEXT:   - { id: 0, class: _, preferred-register: '' }
+# CHECK-NEXT:   - { id: 1, class: _, preferred-register: '' }
+# CHECK-NEXT:   - { id: 2, class: _, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -56,9 +56,9 @@ alignment:       4
 legalized:       false
 regBankSelected: false
 # CHECK:      registers:
-# CHECK-NEXT:   - { id: 0, class: _ }
-# CHECK-NEXT:   - { id: 1, class: _ }
-# CHECK-NEXT:   - { id: 2, class: _ }
+# CHECK-NEXT:   - { id: 0, class: _, preferred-register: '' }
+# CHECK-NEXT:   - { id: 1, class: _, preferred-register: '' }
+# CHECK-NEXT:   - { id: 2, class: _, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -88,9 +88,9 @@ alignment:       4
 legalized:       false
 regBankSelected: false
 # CHECK:      registers:
-# CHECK-NEXT:   - { id: 0, class: _ }
-# CHECK-NEXT:   - { id: 1, class: _ }
-# CHECK-NEXT:   - { id: 2, class: _ }
+# CHECK-NEXT:   - { id: 0, class: _, preferred-register: '' }
+# CHECK-NEXT:   - { id: 1, class: _, preferred-register: '' }
+# CHECK-NEXT:   - { id: 2, class: _, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
diff --git a/test/CodeGen/X86/GlobalISel/legalize-mul-v128.mir b/test/CodeGen/X86/GlobalISel/legalize-mul-v128.mir
index be62832b008a..effd26e9866d 100644
--- a/test/CodeGen/X86/GlobalISel/legalize-mul-v128.mir
+++ b/test/CodeGen/X86/GlobalISel/legalize-mul-v128.mir
@@ -26,9 +26,9 @@ alignment:       4
 legalized:       false
 regBankSelected: false
 # ALL:      registers:
-# ALL-NEXT:   - { id: 0, class: _ }
-# ALL-NEXT:   - { id: 1, class: _ }
-# ALL-NEXT:   - { id: 2, class: _ }
+# ALL-NEXT:   - { id: 0, class: _, preferred-register: '' }
+# ALL-NEXT:   - { id: 1, class: _, preferred-register: '' }
+# ALL-NEXT:   - { id: 2, class: _, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -56,9 +56,9 @@ alignment:       4
 legalized:       false
 regBankSelected: false
 # ALL:      registers:
-# ALL-NEXT:   - { id: 0, class: _ }
-# ALL-NEXT:   - { id: 1, class: _ }
-# ALL-NEXT:   - { id: 2, class: _ }
+# ALL-NEXT:   - { id: 0, class: _, preferred-register: '' }
+# ALL-NEXT:   - { id: 1, class: _, preferred-register: '' }
+# ALL-NEXT:   - { id: 2, class: _, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -86,9 +86,9 @@ alignment:       4
 legalized:       false
 regBankSelected: false
 # ALL:      registers:
-# ALL-NEXT:   - { id: 0, class: _ }
-# ALL-NEXT:   - { id: 1, class: _ }
-# ALL-NEXT:   - { id: 2, class: _ }
+# ALL-NEXT:   - { id: 0, class: _, preferred-register: '' }
+# ALL-NEXT:   - { id: 1, class: _, preferred-register: '' }
+# ALL-NEXT:   - { id: 2, class: _, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
diff --git a/test/CodeGen/X86/GlobalISel/legalize-mul-v256.mir b/test/CodeGen/X86/GlobalISel/legalize-mul-v256.mir
index d99303c3ba3b..5ae8132156d5 100644
--- a/test/CodeGen/X86/GlobalISel/legalize-mul-v256.mir
+++ b/test/CodeGen/X86/GlobalISel/legalize-mul-v256.mir
@@ -26,9 +26,9 @@ alignment:       4
 legalized:       false
 regBankSelected: false
 # ALL:      registers:
-# ALL-NEXT:   - { id: 0, class: _ }
-# ALL-NEXT:   - { id: 1, class: _ }
-# ALL-NEXT:   - { id: 2, class: _ }
+# ALL-NEXT:   - { id: 0, class: _, preferred-register: '' }
+# ALL-NEXT:   - { id: 1, class: _, preferred-register: '' }
+# ALL-NEXT:   - { id: 2, class: _, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -56,9 +56,9 @@ alignment:       4
 legalized:       false
 regBankSelected: false
 # ALL:      registers:
-# ALL-NEXT:   - { id: 0, class: _ }
-# ALL-NEXT:   - { id: 1, class: _ }
-# ALL-NEXT:   - { id: 2, class: _ }
+# ALL-NEXT:   - { id: 0, class: _, preferred-register: '' }
+# ALL-NEXT:   - { id: 1, class: _, preferred-register: '' }
+# ALL-NEXT:   - { id: 2, class: _, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -86,9 +86,9 @@ alignment:       4
 legalized:       false
 regBankSelected: false
 # ALL:      registers:
-# ALL-NEXT:   - { id: 0, class: _ }
-# ALL-NEXT:   - { id: 1, class: _ }
-# ALL-NEXT:   - { id: 2, class: _ }
+# ALL-NEXT:   - { id: 0, class: _, preferred-register: '' }
+# ALL-NEXT:   - { id: 1, class: _, preferred-register: '' }
+# ALL-NEXT:   - { id: 2, class: _, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
diff --git a/test/CodeGen/X86/GlobalISel/legalize-mul-v512.mir b/test/CodeGen/X86/GlobalISel/legalize-mul-v512.mir
index 24eefd30c2ac..71ea313c4c72 100644
--- a/test/CodeGen/X86/GlobalISel/legalize-mul-v512.mir
+++ b/test/CodeGen/X86/GlobalISel/legalize-mul-v512.mir
@@ -28,9 +28,9 @@ alignment:       4
 legalized:       false
 regBankSelected: false
 # ALL:      registers:
-# ALL-NEXT:   - { id: 0, class: _ }
-# ALL-NEXT:   - { id: 1, class: _ }
-# ALL-NEXT:   - { id: 2, class: _ }
+# ALL-NEXT:   - { id: 0, class: _, preferred-register: '' }
+# ALL-NEXT:   - { id: 1, class: _, preferred-register: '' }
+# ALL-NEXT:   - { id: 2, class: _, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -58,9 +58,9 @@ alignment:       4
 legalized:       false
 regBankSelected: false
 # ALL:      registers:
-# ALL-NEXT:   - { id: 0, class: _ }
-# ALL-NEXT:   - { id: 1, class: _ }
-# ALL-NEXT:   - { id: 2, class: _ }
+# ALL-NEXT:   - { id: 0, class: _, preferred-register: '' }
+# ALL-NEXT:   - { id: 1, class: _, preferred-register: '' }
+# ALL-NEXT:   - { id: 2, class: _, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -88,9 +88,9 @@ alignment:       4
 legalized:       false
 regBankSelected: false
 # ALL:      registers:
-# ALL-NEXT:   - { id: 0, class: _ }
-# ALL-NEXT:   - { id: 1, class: _ }
-# ALL-NEXT:   - { id: 2, class: _ }
+# ALL-NEXT:   - { id: 0, class: _, preferred-register: '' }
+# ALL-NEXT:   - { id: 1, class: _, preferred-register: '' }
+# ALL-NEXT:   - { id: 2, class: _, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
diff --git a/test/CodeGen/X86/GlobalISel/regbankselect-AVX2.mir b/test/CodeGen/X86/GlobalISel/regbankselect-AVX2.mir
index cc03f3a57f0b..ca238b29c2dd 100644
--- a/test/CodeGen/X86/GlobalISel/regbankselect-AVX2.mir
+++ b/test/CodeGen/X86/GlobalISel/regbankselect-AVX2.mir
@@ -33,8 +33,8 @@ selected:        false
 tracksRegLiveness: true
 # CHECK-LABEL: name:            test_mul_vec256
 # CHECK: registers:
-# CHECK:  - { id: 0, class: vecr }
-# CHECK:  - { id: 1, class: vecr }
+# CHECK:  - { id: 0, class: vecr, preferred-register: '' }
+# CHECK:  - { id: 1, class: vecr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -56,8 +56,8 @@ selected:        false
 tracksRegLiveness: true
 # CHECK-LABEL: name:            test_add_vec256
 # CHECK: registers:
-# CHECK:  - { id: 0, class: vecr }
-# CHECK:  - { id: 1, class: vecr }
+# CHECK:  - { id: 0, class: vecr, preferred-register: '' }
+# CHECK:  - { id: 1, class: vecr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -79,8 +79,8 @@ selected:        false
 tracksRegLiveness: true
 # CHECK-LABEL: name:            test_sub_vec256
 # CHECK: registers:
-# CHECK:  - { id: 0, class: vecr }
-# CHECK:  - { id: 1, class: vecr }
+# CHECK:  - { id: 0, class: vecr, preferred-register: '' }
+# CHECK:  - { id: 1, class: vecr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -100,8 +100,8 @@ alignment:       4
 legalized:       true
 regBankSelected: false
 # CHECK:       registers:
-# CHECK-NEXT:    - { id: 0, class: gpr }
-# CHECK-NEXT:    - { id: 1, class: vecr }
+# CHECK-NEXT:    - { id: 0, class: gpr, preferred-register: '' }
+# CHECK-NEXT:    - { id: 1, class: vecr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -122,8 +122,8 @@ alignment:       4
 legalized:       true
 regBankSelected: false
 # CHECK:       registers:
-# CHECK-NEXT:    - { id: 0, class: vecr }
-# CHECK-NEXT:    - { id: 1, class: gpr }
+# CHECK-NEXT:    - { id: 0, class: vecr, preferred-register: '' }
+# CHECK-NEXT:    - { id: 1, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
diff --git a/test/CodeGen/X86/GlobalISel/regbankselect-AVX512.mir b/test/CodeGen/X86/GlobalISel/regbankselect-AVX512.mir
index 278413ad38ef..c94ecc8e9a8d 100644
--- a/test/CodeGen/X86/GlobalISel/regbankselect-AVX512.mir
+++ b/test/CodeGen/X86/GlobalISel/regbankselect-AVX512.mir
@@ -33,8 +33,8 @@ alignment:       4
 legalized:       true
 regBankSelected: false
 # CHECK:       registers:
-# CHECK-NEXT:    - { id: 0, class: vecr }
-# CHECK-NEXT:    - { id: 1, class: vecr }
+# CHECK-NEXT:    - { id: 0, class: vecr, preferred-register: '' }
+# CHECK-NEXT:    - { id: 1, class: vecr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -53,8 +53,8 @@ alignment:       4
 legalized:       true
 regBankSelected: false
 # CHECK:       registers:
-# CHECK-NEXT:    - { id: 0, class: vecr }
-# CHECK-NEXT:    - { id: 1, class: vecr }
+# CHECK-NEXT:    - { id: 0, class: vecr, preferred-register: '' }
+# CHECK-NEXT:    - { id: 1, class: vecr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -73,8 +73,8 @@ alignment:       4
 legalized:       true
 regBankSelected: false
 # CHECK:       registers:
-# CHECK-NEXT:    - { id: 0, class: vecr }
-# CHECK-NEXT:    - { id: 1, class: vecr }
+# CHECK-NEXT:    - { id: 0, class: vecr, preferred-register: '' }
+# CHECK-NEXT:    - { id: 1, class: vecr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -93,8 +93,8 @@ alignment:       4
 legalized:       true
 regBankSelected: false
 # CHECK:       registers:
-# CHECK-NEXT:    - { id: 0, class: gpr }
-# CHECK-NEXT:    - { id: 1, class: vecr }
+# CHECK-NEXT:    - { id: 0, class: gpr, preferred-register: '' }
+# CHECK-NEXT:    - { id: 1, class: vecr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -115,8 +115,8 @@ alignment:       4
 legalized:       true
 regBankSelected: false
 # CHECK:       registers:
-# CHECK-NEXT:    - { id: 0, class: vecr }
-# CHECK-NEXT:    - { id: 1, class: gpr }
+# CHECK-NEXT:    - { id: 0, class: vecr, preferred-register: '' }
+# CHECK-NEXT:    - { id: 1, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
diff --git a/test/CodeGen/X86/GlobalISel/regbankselect-X32.mir b/test/CodeGen/X86/GlobalISel/regbankselect-X32.mir
index a115d1fa3255..b74e03f0fe79 100644
--- a/test/CodeGen/X86/GlobalISel/regbankselect-X32.mir
+++ b/test/CodeGen/X86/GlobalISel/regbankselect-X32.mir
@@ -14,11 +14,11 @@ alignment:       4
 legalized:       true
 regBankSelected: false
 # CHECK:      registers:
-# CHECK-NEXT:   - { id: 0, class: gpr }
-# CHECK-NEXT:   - { id: 1, class: gpr }
-# CHECK-NEXT:   - { id: 2, class: gpr }
-# CHECK-NEXT:   - { id: 3, class: gpr }
-# CHECK-NEXT:   - { id: 4, class: gpr }
+# CHECK-NEXT:   - { id: 0, class: gpr, preferred-register: '' }
+# CHECK-NEXT:   - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT:   - { id: 2, class: gpr, preferred-register: '' }
+# CHECK-NEXT:   - { id: 3, class: gpr, preferred-register: '' }
+# CHECK-NEXT:   - { id: 4, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
diff --git a/test/CodeGen/X86/GlobalISel/regbankselect-X86_64.mir b/test/CodeGen/X86/GlobalISel/regbankselect-X86_64.mir
index 1ea922ee475a..7bcc57aef4ac 100644
--- a/test/CodeGen/X86/GlobalISel/regbankselect-X86_64.mir
+++ b/test/CodeGen/X86/GlobalISel/regbankselect-X86_64.mir
@@ -145,9 +145,9 @@ selected:        false
 tracksRegLiveness: true
 # CHECK-LABEL: name:            test_add_i8
 # CHECK: registers:
-# CHECK:  - { id: 0, class: gpr }
-# CHECK:  - { id: 1, class: gpr }
-# CHECK:  - { id: 2, class: gpr }
+# CHECK:  - { id: 0, class: gpr, preferred-register: '' }
+# CHECK:  - { id: 1, class: gpr, preferred-register: '' }
+# CHECK:  - { id: 2, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -172,9 +172,9 @@ selected:        false
 tracksRegLiveness: true
 # CHECK-LABEL: name:            test_add_i16
 # CHECK: registers:
-# CHECK:  - { id: 0, class: gpr }
-# CHECK:  - { id: 1, class: gpr }
-# CHECK:  - { id: 2, class: gpr }
+# CHECK:  - { id: 0, class: gpr, preferred-register: '' }
+# CHECK:  - { id: 1, class: gpr, preferred-register: '' }
+# CHECK:  - { id: 2, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -199,9 +199,9 @@ selected:        false
 tracksRegLiveness: true
 # CHECK-LABEL: name:            test_add_i32
 # CHECK: registers:
-# CHECK:  - { id: 0, class: gpr }
-# CHECK:  - { id: 1, class: gpr }
-# CHECK:  - { id: 2, class: gpr }
+# CHECK:  - { id: 0, class: gpr, preferred-register: '' }
+# CHECK:  - { id: 1, class: gpr, preferred-register: '' }
+# CHECK:  - { id: 2, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -226,9 +226,9 @@ selected:        false
 tracksRegLiveness: true
 # CHECK-LABEL: name:            test_add_i64
 # CHECK: registers:
-# CHECK:  - { id: 0, class: gpr }
-# CHECK:  - { id: 1, class: gpr }
-# CHECK:  - { id: 2, class: gpr }
+# CHECK:  - { id: 0, class: gpr, preferred-register: '' }
+# CHECK:  - { id: 1, class: gpr, preferred-register: '' }
+# CHECK:  - { id: 2, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -253,14 +253,14 @@ selected:        false
 tracksRegLiveness: true
 # CHECK-LABEL: name:            test_mul_gpr
 # CHECK: registers:
-# CHECK:  - { id: 0, class: gpr }
-# CHECK:  - { id: 1, class: gpr }
-# CHECK:  - { id: 2, class: gpr }
-# CHECK:  - { id: 3, class: gpr }
-# CHECK:  - { id: 4, class: gpr }
-# CHECK:  - { id: 5, class: gpr }
-# CHECK:  - { id: 6, class: gpr }
-# CHECK:  - { id: 7, class: gpr }
+# CHECK:  - { id: 0, class: gpr, preferred-register: '' }
+# CHECK:  - { id: 1, class: gpr, preferred-register: '' }
+# CHECK:  - { id: 2, class: gpr, preferred-register: '' }
+# CHECK:  - { id: 3, class: gpr, preferred-register: '' }
+# CHECK:  - { id: 4, class: gpr, preferred-register: '' }
+# CHECK:  - { id: 5, class: gpr, preferred-register: '' }
+# CHECK:  - { id: 6, class: gpr, preferred-register: '' }
+# CHECK:  - { id: 7, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -292,9 +292,9 @@ selected:        false
 tracksRegLiveness: true
 # CHECK-LABEL: name:            test_add_float
 # CHECK: registers:
-# CHECK:  - { id: 0, class: vecr }
-# CHECK:  - { id: 1, class: vecr }
-# CHECK:  - { id: 2, class: vecr }
+# CHECK:  - { id: 0, class: vecr, preferred-register: '' }
+# CHECK:  - { id: 1, class: vecr, preferred-register: '' }
+# CHECK:  - { id: 2, class: vecr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -319,9 +319,9 @@ selected:        false
 tracksRegLiveness: true
 # CHECK-LABEL: name:            test_add_double
 # CHECK: registers:
-# CHECK:  - { id: 0, class: vecr }
-# CHECK:  - { id: 1, class: vecr }
-# CHECK:  - { id: 2, class: vecr }
+# CHECK:  - { id: 0, class: vecr, preferred-register: '' }
+# CHECK:  - { id: 1, class: vecr, preferred-register: '' }
+# CHECK:  - { id: 2, class: vecr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -346,9 +346,9 @@ selected:        false
 tracksRegLiveness: true
 # CHECK-LABEL: name:            test_add_v4i32
 # CHECK: registers:
-# CHECK:  - { id: 0, class: vecr }
-# CHECK:  - { id: 1, class: vecr }
-# CHECK:  - { id: 2, class: vecr }
+# CHECK:  - { id: 0, class: vecr, preferred-register: '' }
+# CHECK:  - { id: 1, class: vecr, preferred-register: '' }
+# CHECK:  - { id: 2, class: vecr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -373,9 +373,9 @@ selected:        false
 tracksRegLiveness: true
 # CHECK-LABEL: name:            test_add_v4f32
 # CHECK: registers:
-# CHECK:  - { id: 0, class: vecr }
-# CHECK:  - { id: 1, class: vecr }
-# CHECK:  - { id: 2, class: vecr }
+# CHECK:  - { id: 0, class: vecr, preferred-register: '' }
+# CHECK:  - { id: 1, class: vecr, preferred-register: '' }
+# CHECK:  - { id: 2, class: vecr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -399,8 +399,8 @@ regBankSelected: false
 selected:        false
 # CHECK-LABEL: name:            test_load_i8
 # CHECK: registers:
-# CHECK:   - { id: 0, class: gpr }
-# CHECK:   - { id: 1, class: gpr }
+# CHECK:   - { id: 0, class: gpr, preferred-register: '' }
+# CHECK:   - { id: 1, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -422,8 +422,8 @@ regBankSelected: false
 selected:        false
 # CHECK-LABEL: name:            test_load_i16
 # CHECK: registers:
-# CHECK:   - { id: 0, class: gpr }
-# CHECK:   - { id: 1, class: gpr }
+# CHECK:   - { id: 0, class: gpr, preferred-register: '' }
+# CHECK:   - { id: 1, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -445,8 +445,8 @@ regBankSelected: false
 selected:        false
 # CHECK-LABEL: name:            test_load_i32
 # CHECK: registers:
-# CHECK:   - { id: 0, class: gpr }
-# CHECK:   - { id: 1, class: gpr }
+# CHECK:   - { id: 0, class: gpr, preferred-register: '' }
+# CHECK:   - { id: 1, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -469,8 +469,8 @@ regBankSelected: false
 selected:        false
 # CHECK-LABEL: name:            test_load_i64
 # CHECK: registers:
-# CHECK:   - { id: 0, class: gpr }
-# CHECK:   - { id: 1, class: gpr }
+# CHECK:   - { id: 0, class: gpr, preferred-register: '' }
+# CHECK:   - { id: 1, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -492,8 +492,8 @@ regBankSelected: false
 selected:        false
 # CHECK-LABEL: name:            test_load_float
 # CHECK: registers:
-# CHECK:   - { id: 0, class: gpr }
-# CHECK:   - { id: 1, class: gpr }
+# CHECK:   - { id: 0, class: gpr, preferred-register: '' }
+# CHECK:   - { id: 1, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -515,8 +515,8 @@ regBankSelected: false
 selected:        false
 # CHECK-LABEL: name:            test_load_double
 # CHECK: registers:
-# CHECK:   - { id: 0, class: gpr }
-# CHECK:   - { id: 1, class: gpr }
+# CHECK:   - { id: 0, class: gpr, preferred-register: '' }
+# CHECK:   - { id: 1, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -538,8 +538,8 @@ regBankSelected: false
 selected:        false
 # CHECK-LABEL: name:            test_load_v4i32
 # CHECK: registers:
-# CHECK:   - { id: 0, class: gpr }
-# CHECK:   - { id: 1, class: vecr }
+# CHECK:   - { id: 0, class: gpr, preferred-register: '' }
+# CHECK:   - { id: 1, class: vecr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -561,8 +561,8 @@ regBankSelected: false
 selected:        false
 # CHECK-LABEL: name:            test_store_i32
 # CHECK: registers:
-# CHECK:   - { id: 0, class: gpr }
-# CHECK:   - { id: 1, class: gpr }
+# CHECK:   - { id: 0, class: gpr, preferred-register: '' }
+# CHECK:   - { id: 1, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -585,8 +585,8 @@ regBankSelected: false
 selected:        false
 # CHECK-LABEL: name:            test_store_i64
 # CHECK: registers:
-# CHECK:   - { id: 0, class: gpr }
-# CHECK:   - { id: 1, class: gpr }
+# CHECK:   - { id: 0, class: gpr, preferred-register: '' }
+# CHECK:   - { id: 1, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -610,12 +610,12 @@ selected:        false
 # CHECK-LABEL: name:            test_store_float
 # CHECK: registers:
 
-# FAST-NEXT:    - { id: 0, class: vecr }
-# FAST-NEXT:    - { id: 1, class: gpr }
-# FAST-NEXT:    - { id: 2, class: gpr }
+# FAST-NEXT:    - { id: 0, class: vecr, preferred-register: '' }
+# FAST-NEXT:    - { id: 1, class: gpr, preferred-register: '' }
+# FAST-NEXT:    - { id: 2, class: gpr, preferred-register: '' }
 
-# GREEDY-NEXT:    - { id: 0, class: vecr }
-# GREEDY-NEXT:    - { id: 1, class: gpr }
+# GREEDY-NEXT:    - { id: 0, class: vecr, preferred-register: '' }
+# GREEDY-NEXT:    - { id: 1, class: gpr, preferred-register: '' }
 
 registers:
   - { id: 0, class: _ }
@@ -647,12 +647,12 @@ selected:        false
 # CHECK-LABEL: name:            test_store_double
 # CHECK: registers:
 
-# FAST-NEXT:    - { id: 0, class: vecr }
-# FAST-NEXT:    - { id: 1, class: gpr }
-# FAST-NEXT:    - { id: 2, class: gpr }
+# FAST-NEXT:    - { id: 0, class: vecr, preferred-register: '' }
+# FAST-NEXT:    - { id: 1, class: gpr, preferred-register: '' }
+# FAST-NEXT:    - { id: 2, class: gpr, preferred-register: '' }
 
-# GREEDY-NEXT:    - { id: 0, class: vecr }
-# GREEDY-NEXT:    - { id: 1, class: gpr }
+# GREEDY-NEXT:    - { id: 0, class: vecr, preferred-register: '' }
+# GREEDY-NEXT:    - { id: 1, class: gpr, preferred-register: '' }
 
 registers:
   - { id: 0, class: _ }
@@ -682,10 +682,10 @@ alignment:       4
 legalized:       true
 # CHECK-LABEL: name:            constInt_check
 # CHECK: registers:
-# CHECK-NEXT:  - { id: 0, class: gpr }
-# CHECK-NEXT:  - { id: 1, class: gpr }
-# CHECK-NEXT:  - { id: 2, class: gpr }
-# CHECK-NEXT:  - { id: 3, class: gpr }
+# CHECK-NEXT:  - { id: 0, class: gpr, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: gpr, preferred-register: '' }
+# CHECK-NEXT:  - { id: 3, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -706,10 +706,10 @@ alignment:       4
 legalized:       true
 # CHECK-LABEL: name:            trunc_check
 # CHECK: registers:
-# CHECK-NEXT:  - { id: 0, class: gpr }
-# CHECK-NEXT:  - { id: 1, class: gpr }
-# CHECK-NEXT:  - { id: 2, class: gpr }
-# CHECK-NEXT:  - { id: 3, class: gpr }
+# CHECK-NEXT:  - { id: 0, class: gpr, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: gpr, preferred-register: '' }
+# CHECK-NEXT:  - { id: 3, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -729,11 +729,11 @@ name:            test_gep
 legalized:       true
 # CHECK-LABEL: name:            test_gep
 # CHECK: registers:
-# CHECK-NEXT:  - { id: 0, class: gpr }
-# CHECK-NEXT:  - { id: 1, class: gpr }
-# CHECK-NEXT:  - { id: 2, class: gpr }
-# CHECK-NEXT:  - { id: 3, class: gpr }
-# CHECK-NEXT:  - { id: 4, class: gpr }
+# CHECK-NEXT:  - { id: 0, class: gpr, preferred-register: '' }
+# CHECK-NEXT:  - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT:  - { id: 2, class: gpr, preferred-register: '' }
+# CHECK-NEXT:  - { id: 3, class: gpr, preferred-register: '' }
+# CHECK-NEXT:  - { id: 4, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -757,9 +757,9 @@ alignment:       4
 legalized:       true
 regBankSelected: false
 # CHECK:      registers:
-# CHECK-NEXT:   - { id: 0, class: gpr }
-# CHECK-NEXT:   - { id: 1, class: gpr }
-# CHECK-NEXT:   - { id: 2, class: gpr }
+# CHECK-NEXT:   - { id: 0, class: gpr, preferred-register: '' }
+# CHECK-NEXT:   - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT:   - { id: 2, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -782,9 +782,9 @@ alignment:       4
 legalized:       true
 regBankSelected: false
 # CHECK:      registers:
-# CHECK-NEXT:   - { id: 0, class: gpr }
-# CHECK-NEXT:   - { id: 1, class: gpr }
-# CHECK-NEXT:   - { id: 2, class: gpr }
+# CHECK-NEXT:   - { id: 0, class: gpr, preferred-register: '' }
+# CHECK-NEXT:   - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT:   - { id: 2, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -807,9 +807,9 @@ alignment:       4
 legalized:       true
 regBankSelected: false
 # CHECK:      registers:
-# CHECK-NEXT:   - { id: 0, class: gpr }
-# CHECK-NEXT:   - { id: 1, class: gpr }
-# CHECK-NEXT:   - { id: 2, class: gpr }
+# CHECK-NEXT:   - { id: 0, class: gpr, preferred-register: '' }
+# CHECK-NEXT:   - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT:   - { id: 2, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
@@ -832,9 +832,9 @@ alignment:       4
 legalized:       true
 regBankSelected: false
 # CHECK:      registers:
-# CHECK-NEXT:   - { id: 0, class: gpr }
-# CHECK-NEXT:   - { id: 1, class: gpr }
-# CHECK-NEXT:   - { id: 2, class: gpr }
+# CHECK-NEXT:   - { id: 0, class: gpr, preferred-register: '' }
+# CHECK-NEXT:   - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT:   - { id: 2, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: _ }
   - { id: 1, class: _ }
diff --git a/test/CodeGen/X86/GlobalISel/select-add-v128.mir b/test/CodeGen/X86/GlobalISel/select-add-v128.mir
index a39702340bc2..4f7b6ec72d52 100644
--- a/test/CodeGen/X86/GlobalISel/select-add-v128.mir
+++ b/test/CodeGen/X86/GlobalISel/select-add-v128.mir
@@ -32,19 +32,19 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # NOVL:            registers:
-# NOVL-NEXT:         - { id: 0, class: vr128 }
-# NOVL-NEXT:         - { id: 1, class: vr128 }
-# NOVL-NEXT:         - { id: 2, class: vr128 }
+# NOVL-NEXT:         - { id: 0, class: vr128, preferred-register: '' }
+# NOVL-NEXT:         - { id: 1, class: vr128, preferred-register: '' }
+# NOVL-NEXT:         - { id: 2, class: vr128, preferred-register: '' }
 #
 # AVX512VL:        registers:
-# AVX512VL-NEXT:     - { id: 0, class: vr128 }
-# AVX512VL-NEXT:     - { id: 1, class: vr128 }
-# AVX512VL-NEXT:     - { id: 2, class: vr128 }
+# AVX512VL-NEXT:     - { id: 0, class: vr128, preferred-register: '' }
+# AVX512VL-NEXT:     - { id: 1, class: vr128, preferred-register: '' }
+# AVX512VL-NEXT:     - { id: 2, class: vr128, preferred-register: '' }
 #
 # AVX512BWVL:      registers:
-# AVX512BWVL-NEXT:   - { id: 0, class: vr128x }
-# AVX512BWVL-NEXT:   - { id: 1, class: vr128x }
-# AVX512BWVL-NEXT:   - { id: 2, class: vr128x }
+# AVX512BWVL-NEXT:   - { id: 0, class: vr128x, preferred-register: '' }
+# AVX512BWVL-NEXT:   - { id: 1, class: vr128x, preferred-register: '' }
+# AVX512BWVL-NEXT:   - { id: 2, class: vr128x, preferred-register: '' }
 registers:
   - { id: 0, class: vecr }
   - { id: 1, class: vecr }
@@ -74,19 +74,19 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # NOVL:            registers:
-# NOVL-NEXT:         - { id: 0, class: vr128 }
-# NOVL-NEXT:         - { id: 1, class: vr128 }
-# NOVL-NEXT:         - { id: 2, class: vr128 }
+# NOVL-NEXT:         - { id: 0, class: vr128, preferred-register: '' }
+# NOVL-NEXT:         - { id: 1, class: vr128, preferred-register: '' }
+# NOVL-NEXT:         - { id: 2, class: vr128, preferred-register: '' }
 #
 # AVX512VL:        registers:
-# AVX512VL-NEXT:     - { id: 0, class: vr128 }
-# AVX512VL-NEXT:     - { id: 1, class: vr128 }
-# AVX512VL-NEXT:     - { id: 2, class: vr128 }
+# AVX512VL-NEXT:     - { id: 0, class: vr128, preferred-register: '' }
+# AVX512VL-NEXT:     - { id: 1, class: vr128, preferred-register: '' }
+# AVX512VL-NEXT:     - { id: 2, class: vr128, preferred-register: '' }
 #
 # AVX512BWVL:      registers:
-# AVX512BWVL-NEXT:   - { id: 0, class: vr128x }
-# AVX512BWVL-NEXT:   - { id: 1, class: vr128x }
-# AVX512BWVL-NEXT:   - { id: 2, class: vr128x }
+# AVX512BWVL-NEXT:   - { id: 0, class: vr128x, preferred-register: '' }
+# AVX512BWVL-NEXT:   - { id: 1, class: vr128x, preferred-register: '' }
+# AVX512BWVL-NEXT:   - { id: 2, class: vr128x, preferred-register: '' }
 registers:
   - { id: 0, class: vecr }
   - { id: 1, class: vecr }
@@ -116,19 +116,19 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # NOVL:            registers:
-# NOVL-NEXT:         - { id: 0, class: vr128 }
-# NOVL-NEXT:         - { id: 1, class: vr128 }
-# NOVL-NEXT:         - { id: 2, class: vr128 }
+# NOVL-NEXT:         - { id: 0, class: vr128, preferred-register: '' }
+# NOVL-NEXT:         - { id: 1, class: vr128, preferred-register: '' }
+# NOVL-NEXT:         - { id: 2, class: vr128, preferred-register: '' }
 #
 # AVX512VL:        registers:
-# AVX512VL-NEXT:     - { id: 0, class: vr128x }
-# AVX512VL-NEXT:     - { id: 1, class: vr128x }
-# AVX512VL-NEXT:     - { id: 2, class: vr128x }
+# AVX512VL-NEXT:     - { id: 0, class: vr128x, preferred-register: '' }
+# AVX512VL-NEXT:     - { id: 1, class: vr128x, preferred-register: '' }
+# AVX512VL-NEXT:     - { id: 2, class: vr128x, preferred-register: '' }
 #
 # AVX512BWVL:      registers:
-# AVX512BWVL-NEXT:   - { id: 0, class: vr128x }
-# AVX512BWVL-NEXT:   - { id: 1, class: vr128x }
-# AVX512BWVL-NEXT:   - { id: 2, class: vr128x }
+# AVX512BWVL-NEXT:   - { id: 0, class: vr128x, preferred-register: '' }
+# AVX512BWVL-NEXT:   - { id: 1, class: vr128x, preferred-register: '' }
+# AVX512BWVL-NEXT:   - { id: 2, class: vr128x, preferred-register: '' }
 registers:
   - { id: 0, class: vecr }
   - { id: 1, class: vecr }
@@ -158,19 +158,19 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # NOVL:            registers:
-# NOVL-NEXT:         - { id: 0, class: vr128 }
-# NOVL-NEXT:         - { id: 1, class: vr128 }
-# NOVL-NEXT:         - { id: 2, class: vr128 }
+# NOVL-NEXT:         - { id: 0, class: vr128, preferred-register: '' }
+# NOVL-NEXT:         - { id: 1, class: vr128, preferred-register: '' }
+# NOVL-NEXT:         - { id: 2, class: vr128, preferred-register: '' }
 #
 # AVX512VL:        registers:
-# AVX512VL-NEXT:     - { id: 0, class: vr128x }
-# AVX512VL-NEXT:     - { id: 1, class: vr128x }
-# AVX512VL-NEXT:     - { id: 2, class: vr128x }
+# AVX512VL-NEXT:     - { id: 0, class: vr128x, preferred-register: '' }
+# AVX512VL-NEXT:     - { id: 1, class: vr128x, preferred-register: '' }
+# AVX512VL-NEXT:     - { id: 2, class: vr128x, preferred-register: '' }
 #
 # AVX512BWVL:      registers:
-# AVX512BWVL-NEXT:   - { id: 0, class: vr128x }
-# AVX512BWVL-NEXT:   - { id: 1, class: vr128x }
-# AVX512BWVL-NEXT:   - { id: 2, class: vr128x }
+# AVX512BWVL-NEXT:   - { id: 0, class: vr128x, preferred-register: '' }
+# AVX512BWVL-NEXT:   - { id: 1, class: vr128x, preferred-register: '' }
+# AVX512BWVL-NEXT:   - { id: 2, class: vr128x, preferred-register: '' }
 registers:
   - { id: 0, class: vecr }
   - { id: 1, class: vecr }
diff --git a/test/CodeGen/X86/GlobalISel/select-add-v256.mir b/test/CodeGen/X86/GlobalISel/select-add-v256.mir
index 7556c2104124..143fd9422974 100644
--- a/test/CodeGen/X86/GlobalISel/select-add-v256.mir
+++ b/test/CodeGen/X86/GlobalISel/select-add-v256.mir
@@ -30,19 +30,19 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # AVX2:            registers:
-# AVX2-NEXT:         - { id: 0, class: vr256 }
-# AVX2-NEXT:         - { id: 1, class: vr256 }
-# AVX2-NEXT:         - { id: 2, class: vr256 }
+# AVX2-NEXT:         - { id: 0, class: vr256, preferred-register: '' }
+# AVX2-NEXT:         - { id: 1, class: vr256, preferred-register: '' }
+# AVX2-NEXT:         - { id: 2, class: vr256, preferred-register: '' }
 #
 # AVX512VL:        registers:
-# AVX512VL-NEXT:     - { id: 0, class: vr256 }
-# AVX512VL-NEXT:     - { id: 1, class: vr256 }
-# AVX512VL-NEXT:     - { id: 2, class: vr256 }
+# AVX512VL-NEXT:     - { id: 0, class: vr256, preferred-register: '' }
+# AVX512VL-NEXT:     - { id: 1, class: vr256, preferred-register: '' }
+# AVX512VL-NEXT:     - { id: 2, class: vr256, preferred-register: '' }
 #
 # AVX512BWVL:      registers:
-# AVX512BWVL-NEXT:   - { id: 0, class: vr256x }
-# AVX512BWVL-NEXT:   - { id: 1, class: vr256x }
-# AVX512BWVL-NEXT:   - { id: 2, class: vr256x }
+# AVX512BWVL-NEXT:   - { id: 0, class: vr256x, preferred-register: '' }
+# AVX512BWVL-NEXT:   - { id: 1, class: vr256x, preferred-register: '' }
+# AVX512BWVL-NEXT:   - { id: 2, class: vr256x, preferred-register: '' }
 registers:
   - { id: 0, class: vecr }
   - { id: 1, class: vecr }
@@ -70,19 +70,19 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # AVX2:            registers:
-# AVX2-NEXT:         - { id: 0, class: vr256 }
-# AVX2-NEXT:         - { id: 1, class: vr256 }
-# AVX2-NEXT:         - { id: 2, class: vr256 }
+# AVX2-NEXT:         - { id: 0, class: vr256, preferred-register: '' }
+# AVX2-NEXT:         - { id: 1, class: vr256, preferred-register: '' }
+# AVX2-NEXT:         - { id: 2, class: vr256, preferred-register: '' }
 #
 # AVX512VL:        registers:
-# AVX512VL-NEXT:     - { id: 0, class: vr256 }
-# AVX512VL-NEXT:     - { id: 1, class: vr256 }
-# AVX512VL-NEXT:     - { id: 2, class: vr256 }
+# AVX512VL-NEXT:     - { id: 0, class: vr256, preferred-register: '' }
+# AVX512VL-NEXT:     - { id: 1, class: vr256, preferred-register: '' }
+# AVX512VL-NEXT:     - { id: 2, class: vr256, preferred-register: '' }
 #
 # AVX512BWVL:      registers:
-# AVX512BWVL-NEXT:   - { id: 0, class: vr256x }
-# AVX512BWVL-NEXT:   - { id: 1, class: vr256x }
-# AVX512BWVL-NEXT:   - { id: 2, class: vr256x }
+# AVX512BWVL-NEXT:   - { id: 0, class: vr256x, preferred-register: '' }
+# AVX512BWVL-NEXT:   - { id: 1, class: vr256x, preferred-register: '' }
+# AVX512BWVL-NEXT:   - { id: 2, class: vr256x, preferred-register: '' }
 registers:
   - { id: 0, class: vecr }
   - { id: 1, class: vecr }
@@ -110,19 +110,19 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # AVX2:            registers:
-# AVX2-NEXT:         - { id: 0, class: vr256 }
-# AVX2-NEXT:         - { id: 1, class: vr256 }
-# AVX2-NEXT:         - { id: 2, class: vr256 }
+# AVX2-NEXT:         - { id: 0, class: vr256, preferred-register: '' }
+# AVX2-NEXT:         - { id: 1, class: vr256, preferred-register: '' }
+# AVX2-NEXT:         - { id: 2, class: vr256, preferred-register: '' }
 #
 # AVX512VL:        registers:
-# AVX512VL-NEXT:     - { id: 0, class: vr256x }
-# AVX512VL-NEXT:     - { id: 1, class: vr256x }
-# AVX512VL-NEXT:     - { id: 2, class: vr256x }
+# AVX512VL-NEXT:     - { id: 0, class: vr256x, preferred-register: '' }
+# AVX512VL-NEXT:     - { id: 1, class: vr256x, preferred-register: '' }
+# AVX512VL-NEXT:     - { id: 2, class: vr256x, preferred-register: '' }
 #
 # AVX512BWVL:      registers:
-# AVX512BWVL-NEXT:   - { id: 0, class: vr256x }
-# AVX512BWVL-NEXT:   - { id: 1, class: vr256x }
-# AVX512BWVL-NEXT:   - { id: 2, class: vr256x }
+# AVX512BWVL-NEXT:   - { id: 0, class: vr256x, preferred-register: '' }
+# AVX512BWVL-NEXT:   - { id: 1, class: vr256x, preferred-register: '' }
+# AVX512BWVL-NEXT:   - { id: 2, class: vr256x, preferred-register: '' }
 registers:
   - { id: 0, class: vecr }
   - { id: 1, class: vecr }
@@ -150,19 +150,19 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # AVX2:            registers:
-# AVX2-NEXT:         - { id: 0, class: vr256 }
-# AVX2-NEXT:         - { id: 1, class: vr256 }
-# AVX2-NEXT:         - { id: 2, class: vr256 }
+# AVX2-NEXT:         - { id: 0, class: vr256, preferred-register: '' }
+# AVX2-NEXT:         - { id: 1, class: vr256, preferred-register: '' }
+# AVX2-NEXT:         - { id: 2, class: vr256, preferred-register: '' }
 #
 # AVX512VL:        registers:
-# AVX512VL-NEXT:     - { id: 0, class: vr256x }
-# AVX512VL-NEXT:     - { id: 1, class: vr256x }
-# AVX512VL-NEXT:     - { id: 2, class: vr256x }
+# AVX512VL-NEXT:     - { id: 0, class: vr256x, preferred-register: '' }
+# AVX512VL-NEXT:     - { id: 1, class: vr256x, preferred-register: '' }
+# AVX512VL-NEXT:     - { id: 2, class: vr256x, preferred-register: '' }
 #
 # AVX512BWVL:      registers:
-# AVX512BWVL-NEXT:   - { id: 0, class: vr256x }
-# AVX512BWVL-NEXT:   - { id: 1, class: vr256x }
-# AVX512BWVL-NEXT:   - { id: 2, class: vr256x }
+# AVX512BWVL-NEXT:   - { id: 0, class: vr256x, preferred-register: '' }
+# AVX512BWVL-NEXT:   - { id: 1, class: vr256x, preferred-register: '' }
+# AVX512BWVL-NEXT:   - { id: 2, class: vr256x, preferred-register: '' }
 registers:
   - { id: 0, class: vecr }
   - { id: 1, class: vecr }
diff --git a/test/CodeGen/X86/GlobalISel/select-add-v512.mir b/test/CodeGen/X86/GlobalISel/select-add-v512.mir
index e90be4e996f8..6a0cd32eefd5 100644
--- a/test/CodeGen/X86/GlobalISel/select-add-v512.mir
+++ b/test/CodeGen/X86/GlobalISel/select-add-v512.mir
@@ -31,9 +31,9 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # ALL:      registers:
-# ALL-NEXT:   - { id: 0, class: vr512 }
-# ALL-NEXT:   - { id: 1, class: vr512 }
-# ALL-NEXT:   - { id: 2, class: vr512 }
+# ALL-NEXT:   - { id: 0, class: vr512, preferred-register: '' }
+# ALL-NEXT:   - { id: 1, class: vr512, preferred-register: '' }
+# ALL-NEXT:   - { id: 2, class: vr512, preferred-register: '' }
 registers:
   - { id: 0, class: vecr }
   - { id: 1, class: vecr }
@@ -57,9 +57,9 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # ALL:      registers:
-# ALL-NEXT:   - { id: 0, class: vr512 }
-# ALL-NEXT:   - { id: 1, class: vr512 }
-# ALL-NEXT:   - { id: 2, class: vr512 }
+# ALL-NEXT:   - { id: 0, class: vr512, preferred-register: '' }
+# ALL-NEXT:   - { id: 1, class: vr512, preferred-register: '' }
+# ALL-NEXT:   - { id: 2, class: vr512, preferred-register: '' }
 registers:
   - { id: 0, class: vecr }
   - { id: 1, class: vecr }
@@ -83,9 +83,9 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # ALL:      registers:
-# ALL-NEXT:   - { id: 0, class: vr512 }
-# ALL-NEXT:   - { id: 1, class: vr512 }
-# ALL-NEXT:   - { id: 2, class: vr512 }
+# ALL-NEXT:   - { id: 0, class: vr512, preferred-register: '' }
+# ALL-NEXT:   - { id: 1, class: vr512, preferred-register: '' }
+# ALL-NEXT:   - { id: 2, class: vr512, preferred-register: '' }
 registers:
   - { id: 0, class: vecr }
   - { id: 1, class: vecr }
@@ -109,9 +109,9 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # ALL:      registers:
-# ALL-NEXT:   - { id: 0, class: vr512 }
-# ALL-NEXT:   - { id: 1, class: vr512 }
-# ALL-NEXT:   - { id: 2, class: vr512 }
+# ALL-NEXT:   - { id: 0, class: vr512, preferred-register: '' }
+# ALL-NEXT:   - { id: 1, class: vr512, preferred-register: '' }
+# ALL-NEXT:   - { id: 2, class: vr512, preferred-register: '' }
 registers:
   - { id: 0, class: vecr }
   - { id: 1, class: vecr }
diff --git a/test/CodeGen/X86/GlobalISel/select-add-x32.mir b/test/CodeGen/X86/GlobalISel/select-add-x32.mir
index 8710aaa61a21..0b864f417367 100644
--- a/test/CodeGen/X86/GlobalISel/select-add-x32.mir
+++ b/test/CodeGen/X86/GlobalISel/select-add-x32.mir
@@ -13,16 +13,16 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # X32:      registers:
-# X32-NEXT:   - { id: 0, class: gr32 }
-# X32-NEXT:   - { id: 1, class: gr32 }
-# X32-NEXT:   - { id: 2, class: gr32 }
-# X32-NEXT:   - { id: 3, class: gr32 }
-# X32-NEXT:   - { id: 4, class: gpr }
-# X32-NEXT:   - { id: 5, class: gr32 }
-# X32-NEXT:   - { id: 6, class: gr32 }
-# X32-NEXT:   - { id: 7, class: gr32 }
-# X32-NEXT:   - { id: 8, class: gr32 }
-# X32-NEXT:   - { id: 9, class: gpr }
+# X32-NEXT:   - { id: 0, class: gr32, preferred-register: '' }
+# X32-NEXT:   - { id: 1, class: gr32, preferred-register: '' }
+# X32-NEXT:   - { id: 2, class: gr32, preferred-register: '' }
+# X32-NEXT:   - { id: 3, class: gr32, preferred-register: '' }
+# X32-NEXT:   - { id: 4, class: gpr, preferred-register: '' }
+# X32-NEXT:   - { id: 5, class: gr32, preferred-register: '' }
+# X32-NEXT:   - { id: 6, class: gr32, preferred-register: '' }
+# X32-NEXT:   - { id: 7, class: gr32, preferred-register: '' }
+# X32-NEXT:   - { id: 8, class: gr32, preferred-register: '' }
+# X32-NEXT:   - { id: 9, class: gpr, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
diff --git a/test/CodeGen/X86/GlobalISel/select-add.mir b/test/CodeGen/X86/GlobalISel/select-add.mir
index 7337ce12c395..78e6bb6913a4 100644
--- a/test/CodeGen/X86/GlobalISel/select-add.mir
+++ b/test/CodeGen/X86/GlobalISel/select-add.mir
@@ -51,9 +51,9 @@ name:            test_add_i64
 legalized:       true
 regBankSelected: true
 # ALL:      registers:
-# ALL-NEXT:  - { id: 0, class: gr64 }
-# ALL-NEXT:  - { id: 1, class: gr64 }
-# ALL-NEXT:  - { id: 2, class: gr64 }
+# ALL-NEXT:  - { id: 0, class: gr64, preferred-register: '' }
+# ALL-NEXT:  - { id: 1, class: gr64, preferred-register: '' }
+# ALL-NEXT:  - { id: 2, class: gr64, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -78,9 +78,9 @@ name:            test_add_i32
 legalized:       true
 regBankSelected: true
 # ALL:      registers:
-# ALL-NEXT:  - { id: 0, class: gr32 }
-# ALL-NEXT:  - { id: 1, class: gr32 }
-# ALL-NEXT:  - { id: 2, class: gr32 }
+# ALL-NEXT:  - { id: 0, class: gr32, preferred-register: '' }
+# ALL-NEXT:  - { id: 1, class: gr32, preferred-register: '' }
+# ALL-NEXT:  - { id: 2, class: gr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -106,9 +106,9 @@ legalized:       true
 regBankSelected: true
 selected:        false
 # ALL:      registers:
-# ALL-NEXT:  - { id: 0, class: gr16 }
-# ALL-NEXT:  - { id: 1, class: gr16 }
-# ALL-NEXT:  - { id: 2, class: gr16 }
+# ALL-NEXT:  - { id: 0, class: gr16, preferred-register: '' }
+# ALL-NEXT:  - { id: 1, class: gr16, preferred-register: '' }
+# ALL-NEXT:  - { id: 2, class: gr16, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -135,9 +135,9 @@ legalized:       true
 regBankSelected: true
 selected:        false
 # ALL:      registers:
-# ALL-NEXT:  - { id: 0, class: gr8 }
-# ALL-NEXT:  - { id: 1, class: gr8 }
-# ALL-NEXT:  - { id: 2, class: gr8 }
+# ALL-NEXT:  - { id: 0, class: gr8, preferred-register: '' }
+# ALL-NEXT:  - { id: 1, class: gr8, preferred-register: '' }
+# ALL-NEXT:  - { id: 2, class: gr8, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -165,12 +165,12 @@ regBankSelected: true
 selected:        false
 tracksRegLiveness: true
 # ALL: registers:
-# NO_AVX512F-NEXT:  - { id: 0, class: fr32 }
-# NO_AVX512F-NEXT:  - { id: 1, class: fr32 }
-# NO_AVX512F-NEXT:  - { id: 2, class: fr32 }
-# AVX512ALL-NEXT:  - { id: 0, class: fr32x }
-# AVX512ALL-NEXT:  - { id: 1, class: fr32x }
-# AVX512ALL-NEXT:  - { id: 2, class: fr32x }
+# NO_AVX512F-NEXT:  - { id: 0, class: fr32, preferred-register: '' }
+# NO_AVX512F-NEXT:  - { id: 1, class: fr32, preferred-register: '' }
+# NO_AVX512F-NEXT:  - { id: 2, class: fr32, preferred-register: '' }
+# AVX512ALL-NEXT:  - { id: 0, class: fr32x, preferred-register: '' }
+# AVX512ALL-NEXT:  - { id: 1, class: fr32x, preferred-register: '' }
+# AVX512ALL-NEXT:  - { id: 2, class: fr32x, preferred-register: '' }
 registers:
   - { id: 0, class: vecr }
   - { id: 1, class: vecr }
@@ -200,12 +200,12 @@ regBankSelected: true
 selected:        false
 tracksRegLiveness: true
 # ALL: registers:
-# NO_AVX512F-NEXT:  - { id: 0, class: fr64 }
-# NO_AVX512F-NEXT:  - { id: 1, class: fr64 }
-# NO_AVX512F-NEXT:  - { id: 2, class: fr64 }
-# AVX512ALL-NEXT:  - { id: 0, class: fr64x }
-# AVX512ALL-NEXT:  - { id: 1, class: fr64x }
-# AVX512ALL-NEXT:  - { id: 2, class: fr64x }
+# NO_AVX512F-NEXT:  - { id: 0, class: fr64, preferred-register: '' }
+# NO_AVX512F-NEXT:  - { id: 1, class: fr64, preferred-register: '' }
+# NO_AVX512F-NEXT:  - { id: 2, class: fr64, preferred-register: '' }
+# AVX512ALL-NEXT:  - { id: 0, class: fr64x, preferred-register: '' }
+# AVX512ALL-NEXT:  - { id: 1, class: fr64x, preferred-register: '' }
+# AVX512ALL-NEXT:  - { id: 2, class: fr64x, preferred-register: '' }
 registers:
   - { id: 0, class: vecr }
   - { id: 1, class: vecr }
@@ -235,12 +235,12 @@ regBankSelected: true
 selected:        false
 tracksRegLiveness: true
 # ALL: registers:
-# NO_AVX512VL-NEXT:  - { id: 0, class: vr128 }
-# NO_AVX512VL-NEXT:  - { id: 1, class: vr128 }
-# NO_AVX512VL-NEXT:  - { id: 2, class: vr128 }
-# AVX512VL-NEXT:  - { id: 0, class: vr128x }
-# AVX512VL-NEXT:  - { id: 1, class: vr128x }
-# AVX512VL-NEXT:  - { id: 2, class: vr128x }
+# NO_AVX512VL-NEXT:  - { id: 0, class: vr128, preferred-register: '' }
+# NO_AVX512VL-NEXT:  - { id: 1, class: vr128, preferred-register: '' }
+# NO_AVX512VL-NEXT:  - { id: 2, class: vr128, preferred-register: '' }
+# AVX512VL-NEXT:  - { id: 0, class: vr128x, preferred-register: '' }
+# AVX512VL-NEXT:  - { id: 1, class: vr128x, preferred-register: '' }
+# AVX512VL-NEXT:  - { id: 2, class: vr128x, preferred-register: '' }
 registers:
   - { id: 0, class: vecr }
   - { id: 1, class: vecr }
@@ -271,12 +271,12 @@ regBankSelected: true
 selected:        false
 tracksRegLiveness: true
 # ALL: registers:
-# NO_AVX512VL-NEXT:  - { id: 0, class: vr128 }
-# NO_AVX512VL-NEXT:  - { id: 1, class: vr128 }
-# NO_AVX512VL-NEXT:  - { id: 2, class: vr128 }
-# AVX512VL-NEXT:  - { id: 0, class: vr128x }
-# AVX512VL-NEXT:  - { id: 1, class: vr128x }
-# AVX512VL-NEXT:  - { id: 2, class: vr128x }
+# NO_AVX512VL-NEXT:  - { id: 0, class: vr128, preferred-register: '' }
+# NO_AVX512VL-NEXT:  - { id: 1, class: vr128, preferred-register: '' }
+# NO_AVX512VL-NEXT:  - { id: 2, class: vr128, preferred-register: '' }
+# AVX512VL-NEXT:  - { id: 0, class: vr128x, preferred-register: '' }
+# AVX512VL-NEXT:  - { id: 1, class: vr128x, preferred-register: '' }
+# AVX512VL-NEXT:  - { id: 2, class: vr128x, preferred-register: '' }
 registers:
   - { id: 0, class: vecr }
   - { id: 1, class: vecr }
diff --git a/test/CodeGen/X86/GlobalISel/select-cmp.mir b/test/CodeGen/X86/GlobalISel/select-cmp.mir
index a92c388c1db9..64c8cb6b823a 100644
--- a/test/CodeGen/X86/GlobalISel/select-cmp.mir
+++ b/test/CodeGen/X86/GlobalISel/select-cmp.mir
@@ -87,11 +87,11 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # CHECK:      registers:
-# CHECK-NEXT:   - { id: 0, class: gr8 }
-# CHECK-NEXT:   - { id: 1, class: gr8 }
-# CHECK-NEXT:   - { id: 2, class: gr8 }
-# CHECK-NEXT:   - { id: 3, class: gr32 }
-# CHECK-NEXT:   - { id: 4, class: gr32 }
+# CHECK-NEXT:   - { id: 0, class: gr8, preferred-register: '' }
+# CHECK-NEXT:   - { id: 1, class: gr8, preferred-register: '' }
+# CHECK-NEXT:   - { id: 2, class: gr8, preferred-register: '' }
+# CHECK-NEXT:   - { id: 3, class: gr32, preferred-register: '' }
+# CHECK-NEXT:   - { id: 4, class: gr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -124,11 +124,11 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # CHECK:      registers:
-# CHECK-NEXT:   - { id: 0, class: gr16 }
-# CHECK-NEXT:   - { id: 1, class: gr16 }
-# CHECK-NEXT:   - { id: 2, class: gr8 }
-# CHECK-NEXT:   - { id: 3, class: gr32 }
-# CHECK-NEXT:   - { id: 4, class: gr32 }
+# CHECK-NEXT:   - { id: 0, class: gr16, preferred-register: '' }
+# CHECK-NEXT:   - { id: 1, class: gr16, preferred-register: '' }
+# CHECK-NEXT:   - { id: 2, class: gr8, preferred-register: '' }
+# CHECK-NEXT:   - { id: 3, class: gr32, preferred-register: '' }
+# CHECK-NEXT:   - { id: 4, class: gr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -161,11 +161,11 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # CHECK:      registers:
-# CHECK-NEXT:   - { id: 0, class: gr64 }
-# CHECK-NEXT:   - { id: 1, class: gr64 }
-# CHECK-NEXT:   - { id: 2, class: gr8 }
-# CHECK-NEXT:   - { id: 3, class: gr32 }
-# CHECK-NEXT:   - { id: 4, class: gr32 }
+# CHECK-NEXT:   - { id: 0, class: gr64, preferred-register: '' }
+# CHECK-NEXT:   - { id: 1, class: gr64, preferred-register: '' }
+# CHECK-NEXT:   - { id: 2, class: gr8, preferred-register: '' }
+# CHECK-NEXT:   - { id: 3, class: gr32, preferred-register: '' }
+# CHECK-NEXT:   - { id: 4, class: gr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -198,11 +198,11 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # CHECK:      registers:
-# CHECK-NEXT:   - { id: 0, class: gr32 }
-# CHECK-NEXT:   - { id: 1, class: gr32 }
-# CHECK-NEXT:   - { id: 2, class: gr8 }
-# CHECK-NEXT:   - { id: 3, class: gr32 }
-# CHECK-NEXT:   - { id: 4, class: gr32 }
+# CHECK-NEXT:   - { id: 0, class: gr32, preferred-register: '' }
+# CHECK-NEXT:   - { id: 1, class: gr32, preferred-register: '' }
+# CHECK-NEXT:   - { id: 2, class: gr8, preferred-register: '' }
+# CHECK-NEXT:   - { id: 3, class: gr32, preferred-register: '' }
+# CHECK-NEXT:   - { id: 4, class: gr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -235,11 +235,11 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # CHECK:      registers:
-# CHECK-NEXT:   - { id: 0, class: gr32 }
-# CHECK-NEXT:   - { id: 1, class: gr32 }
-# CHECK-NEXT:   - { id: 2, class: gr8 }
-# CHECK-NEXT:   - { id: 3, class: gr32 }
-# CHECK-NEXT:   - { id: 4, class: gr32 }
+# CHECK-NEXT:   - { id: 0, class: gr32, preferred-register: '' }
+# CHECK-NEXT:   - { id: 1, class: gr32, preferred-register: '' }
+# CHECK-NEXT:   - { id: 2, class: gr8, preferred-register: '' }
+# CHECK-NEXT:   - { id: 3, class: gr32, preferred-register: '' }
+# CHECK-NEXT:   - { id: 4, class: gr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -272,11 +272,11 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # CHECK:      registers:
-# CHECK-NEXT:   - { id: 0, class: gr32 }
-# CHECK-NEXT:   - { id: 1, class: gr32 }
-# CHECK-NEXT:   - { id: 2, class: gr8 }
-# CHECK-NEXT:   - { id: 3, class: gr32 }
-# CHECK-NEXT:   - { id: 4, class: gr32 }
+# CHECK-NEXT:   - { id: 0, class: gr32, preferred-register: '' }
+# CHECK-NEXT:   - { id: 1, class: gr32, preferred-register: '' }
+# CHECK-NEXT:   - { id: 2, class: gr8, preferred-register: '' }
+# CHECK-NEXT:   - { id: 3, class: gr32, preferred-register: '' }
+# CHECK-NEXT:   - { id: 4, class: gr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -309,11 +309,11 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # CHECK:      registers:
-# CHECK-NEXT:   - { id: 0, class: gr32 }
-# CHECK-NEXT:   - { id: 1, class: gr32 }
-# CHECK-NEXT:   - { id: 2, class: gr8 }
-# CHECK-NEXT:   - { id: 3, class: gr32 }
-# CHECK-NEXT:   - { id: 4, class: gr32 }
+# CHECK-NEXT:   - { id: 0, class: gr32, preferred-register: '' }
+# CHECK-NEXT:   - { id: 1, class: gr32, preferred-register: '' }
+# CHECK-NEXT:   - { id: 2, class: gr8, preferred-register: '' }
+# CHECK-NEXT:   - { id: 3, class: gr32, preferred-register: '' }
+# CHECK-NEXT:   - { id: 4, class: gr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -346,11 +346,11 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # CHECK:      registers:
-# CHECK-NEXT:   - { id: 0, class: gr32 }
-# CHECK-NEXT:   - { id: 1, class: gr32 }
-# CHECK-NEXT:   - { id: 2, class: gr8 }
-# CHECK-NEXT:   - { id: 3, class: gr32 }
-# CHECK-NEXT:   - { id: 4, class: gr32 }
+# CHECK-NEXT:   - { id: 0, class: gr32, preferred-register: '' }
+# CHECK-NEXT:   - { id: 1, class: gr32, preferred-register: '' }
+# CHECK-NEXT:   - { id: 2, class: gr8, preferred-register: '' }
+# CHECK-NEXT:   - { id: 3, class: gr32, preferred-register: '' }
+# CHECK-NEXT:   - { id: 4, class: gr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -383,11 +383,11 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # CHECK:      registers:
-# CHECK-NEXT:   - { id: 0, class: gr32 }
-# CHECK-NEXT:   - { id: 1, class: gr32 }
-# CHECK-NEXT:   - { id: 2, class: gr8 }
-# CHECK-NEXT:   - { id: 3, class: gr32 }
-# CHECK-NEXT:   - { id: 4, class: gr32 }
+# CHECK-NEXT:   - { id: 0, class: gr32, preferred-register: '' }
+# CHECK-NEXT:   - { id: 1, class: gr32, preferred-register: '' }
+# CHECK-NEXT:   - { id: 2, class: gr8, preferred-register: '' }
+# CHECK-NEXT:   - { id: 3, class: gr32, preferred-register: '' }
+# CHECK-NEXT:   - { id: 4, class: gr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -420,11 +420,11 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # CHECK:      registers:
-# CHECK-NEXT:   - { id: 0, class: gr32 }
-# CHECK-NEXT:   - { id: 1, class: gr32 }
-# CHECK-NEXT:   - { id: 2, class: gr8 }
-# CHECK-NEXT:   - { id: 3, class: gr32 }
-# CHECK-NEXT:   - { id: 4, class: gr32 }
+# CHECK-NEXT:   - { id: 0, class: gr32, preferred-register: '' }
+# CHECK-NEXT:   - { id: 1, class: gr32, preferred-register: '' }
+# CHECK-NEXT:   - { id: 2, class: gr8, preferred-register: '' }
+# CHECK-NEXT:   - { id: 3, class: gr32, preferred-register: '' }
+# CHECK-NEXT:   - { id: 4, class: gr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -457,11 +457,11 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # CHECK:      registers:
-# CHECK-NEXT:   - { id: 0, class: gr32 }
-# CHECK-NEXT:   - { id: 1, class: gr32 }
-# CHECK-NEXT:   - { id: 2, class: gr8 }
-# CHECK-NEXT:   - { id: 3, class: gr32 }
-# CHECK-NEXT:   - { id: 4, class: gr32 }
+# CHECK-NEXT:   - { id: 0, class: gr32, preferred-register: '' }
+# CHECK-NEXT:   - { id: 1, class: gr32, preferred-register: '' }
+# CHECK-NEXT:   - { id: 2, class: gr8, preferred-register: '' }
+# CHECK-NEXT:   - { id: 3, class: gr32, preferred-register: '' }
+# CHECK-NEXT:   - { id: 4, class: gr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -494,11 +494,11 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # CHECK:      registers:
-# CHECK-NEXT:   - { id: 0, class: gr32 }
-# CHECK-NEXT:   - { id: 1, class: gr32 }
-# CHECK-NEXT:   - { id: 2, class: gr8 }
-# CHECK-NEXT:   - { id: 3, class: gr32 }
-# CHECK-NEXT:   - { id: 4, class: gr32 }
+# CHECK-NEXT:   - { id: 0, class: gr32, preferred-register: '' }
+# CHECK-NEXT:   - { id: 1, class: gr32, preferred-register: '' }
+# CHECK-NEXT:   - { id: 2, class: gr8, preferred-register: '' }
+# CHECK-NEXT:   - { id: 3, class: gr32, preferred-register: '' }
+# CHECK-NEXT:   - { id: 4, class: gr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -531,11 +531,11 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # CHECK:      registers:
-# CHECK-NEXT:   - { id: 0, class: gr32 }
-# CHECK-NEXT:   - { id: 1, class: gr32 }
-# CHECK-NEXT:   - { id: 2, class: gr8 }
-# CHECK-NEXT:   - { id: 3, class: gr32 }
-# CHECK-NEXT:   - { id: 4, class: gr32 }
+# CHECK-NEXT:   - { id: 0, class: gr32, preferred-register: '' }
+# CHECK-NEXT:   - { id: 1, class: gr32, preferred-register: '' }
+# CHECK-NEXT:   - { id: 2, class: gr8, preferred-register: '' }
+# CHECK-NEXT:   - { id: 3, class: gr32, preferred-register: '' }
+# CHECK-NEXT:   - { id: 4, class: gr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
diff --git a/test/CodeGen/X86/GlobalISel/select-constant.mir b/test/CodeGen/X86/GlobalISel/select-constant.mir
index 162de0264435..7902a5084ce6 100644
--- a/test/CodeGen/X86/GlobalISel/select-constant.mir
+++ b/test/CodeGen/X86/GlobalISel/select-constant.mir
@@ -33,7 +33,7 @@ regBankSelected: true
 selected:        false
 # CHECK-LABEL: name:            const_i8
 # CHECK: registers:
-# CHECK-NEXT:  - { id: 0, class: gr8 }
+# CHECK-NEXT:  - { id: 0, class: gr8, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
 # CHECK:  body:
@@ -52,7 +52,7 @@ regBankSelected: true
 selected:        false
 # CHECK-LABEL: name:            const_i16
 # CHECK: registers:
-# CHECK-NEXT:  - { id: 0, class: gr16 }
+# CHECK-NEXT:  - { id: 0, class: gr16, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
 # CHECK:  body:
@@ -71,7 +71,7 @@ regBankSelected: true
 selected:        false
 # CHECK-LABEL: name:            const_i32
 # CHECK: registers:
-# CHECK-NEXT:  - { id: 0, class: gr32 }
+# CHECK-NEXT:  - { id: 0, class: gr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
 # CHECK:  body:
@@ -90,7 +90,7 @@ regBankSelected: true
 selected:        false
 # CHECK-LABEL: name:            const_i64
 # CHECK: registers:
-# CHECK-NEXT:  - { id: 0, class: gr64 }
+# CHECK-NEXT:  - { id: 0, class: gr64, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
 # CHECK:  body:
@@ -110,7 +110,7 @@ regBankSelected: true
 selected:        false
 # CHECK-LABEL: name:            const_i64_u32
 # CHECK: registers:
-# CHECK-NEXT:  - { id: 0, class: gr64 }
+# CHECK-NEXT:  - { id: 0, class: gr64, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
 # CHECK:  body:
@@ -129,7 +129,7 @@ regBankSelected: true
 selected:        false
 # CHECK-LABEL: name:            const_i64_i32
 # CHECK: registers:
-# CHECK-NEXT:  - { id: 0, class: gr64 }
+# CHECK-NEXT:  - { id: 0, class: gr64, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
 # CHECK:  body:
diff --git a/test/CodeGen/X86/GlobalISel/select-ext-x86-64.mir b/test/CodeGen/X86/GlobalISel/select-ext-x86-64.mir
index d1a3abfd0f93..edb467b2bf90 100644
--- a/test/CodeGen/X86/GlobalISel/select-ext-x86-64.mir
+++ b/test/CodeGen/X86/GlobalISel/select-ext-x86-64.mir
@@ -25,10 +25,10 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # ALL:      registers:
-# ALL-NEXT:   - { id: 0, class: gr8 }
-# ALL-NEXT:   - { id: 1, class: gr8 }
-# ALL-NEXT:   - { id: 2, class: gr64 }
-# ALL-NEXT:   - { id: 3, class: gr64 }
+# ALL-NEXT:   - { id: 0, class: gr8, preferred-register: '' }
+# ALL-NEXT:   - { id: 1, class: gr8, preferred-register: '' }
+# ALL-NEXT:   - { id: 2, class: gr64, preferred-register: '' }
+# ALL-NEXT:   - { id: 3, class: gr64, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -57,8 +57,8 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # ALL:      registers:
-# ALL-NEXT:   - { id: 0, class: gr8 }
-# ALL-NEXT:   - { id: 1, class: gr64 }
+# ALL-NEXT:   - { id: 0, class: gr8, preferred-register: '' }
+# ALL-NEXT:   - { id: 1, class: gr64, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -83,8 +83,8 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # ALL:      registers:
-# ALL-NEXT:   - { id: 0, class: gr16 }
-# ALL-NEXT:   - { id: 1, class: gr64 }
+# ALL-NEXT:   - { id: 0, class: gr16, preferred-register: '' }
+# ALL-NEXT:   - { id: 1, class: gr64, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
diff --git a/test/CodeGen/X86/GlobalISel/select-ext.mir b/test/CodeGen/X86/GlobalISel/select-ext.mir
index dccc20e57100..b52f1f6fa621 100644
--- a/test/CodeGen/X86/GlobalISel/select-ext.mir
+++ b/test/CodeGen/X86/GlobalISel/select-ext.mir
@@ -35,9 +35,9 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # ALL:      registers:
-# ALL-NEXT:   - { id: 0, class: gr8 }
-# ALL-NEXT:   - { id: 1, class: gr32 }
-# ALL-NEXT:   - { id: 2, class: gr32 }
+# ALL-NEXT:   - { id: 0, class: gr8, preferred-register: '' }
+# ALL-NEXT:   - { id: 1, class: gr32, preferred-register: '' }
+# ALL-NEXT:   - { id: 2, class: gr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -63,8 +63,8 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # ALL:      registers:
-# ALL-NEXT:   - { id: 0, class: gr8 }
-# ALL-NEXT:   - { id: 1, class: gr32 }
+# ALL-NEXT:   - { id: 0, class: gr8, preferred-register: '' }
+# ALL-NEXT:   - { id: 1, class: gr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -89,8 +89,8 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # ALL:      registers:
-# ALL-NEXT:   - { id: 0, class: gr16 }
-# ALL-NEXT:   - { id: 1, class: gr32 }
+# ALL-NEXT:   - { id: 0, class: gr16, preferred-register: '' }
+# ALL-NEXT:   - { id: 1, class: gr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -115,8 +115,8 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # ALL:      registers:
-# ALL-NEXT:   - { id: 0, class: gr8 }
-# ALL-NEXT:   - { id: 1, class: gr32 }
+# ALL-NEXT:   - { id: 0, class: gr8, preferred-register: '' }
+# ALL-NEXT:   - { id: 1, class: gr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -141,8 +141,8 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # ALL:      registers:
-# ALL-NEXT:   - { id: 0, class: gr16 }
-# ALL-NEXT:   - { id: 1, class: gr32 }
+# ALL-NEXT:   - { id: 0, class: gr16, preferred-register: '' }
+# ALL-NEXT:   - { id: 1, class: gr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
diff --git a/test/CodeGen/X86/GlobalISel/select-gep.mir b/test/CodeGen/X86/GlobalISel/select-gep.mir
index c8a4dc80cb2c..61c766230035 100644
--- a/test/CodeGen/X86/GlobalISel/select-gep.mir
+++ b/test/CodeGen/X86/GlobalISel/select-gep.mir
@@ -14,9 +14,9 @@ regBankSelected: true
 selected:        false
 # CHECK-LABEL: name:            test_gep_i32
 # CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr64 }
-# CHECK-NEXT: - { id: 1, class: gr64_nosp }
-# CHECK-NEXT: - { id: 2, class: gr64 }
+# CHECK-NEXT: - { id: 0, class: gr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gr64_nosp, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gr64, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
diff --git a/test/CodeGen/X86/GlobalISel/select-inc.mir b/test/CodeGen/X86/GlobalISel/select-inc.mir
index 7a77864091d3..47fe6ef672ba 100644
--- a/test/CodeGen/X86/GlobalISel/select-inc.mir
+++ b/test/CodeGen/X86/GlobalISel/select-inc.mir
@@ -13,10 +13,10 @@ name:            test_add_i8
 legalized:       true
 regBankSelected: true
 # ALL:      registers:
-# ALL-NEXT:  - { id: 0, class: gr8 }
-# INC-NEXT:  - { id: 1, class: gpr }
-# ADD-NEXT:  - { id: 1, class: gr8 }
-# ALL-NEXT:  - { id: 2, class: gr8 }
+# ALL-NEXT:  - { id: 0, class: gr8, preferred-register: '' }
+# INC-NEXT:  - { id: 1, class: gpr, preferred-register: '' }
+# ADD-NEXT:  - { id: 1, class: gr8, preferred-register: '' }
+# ALL-NEXT:  - { id: 2, class: gr8, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
diff --git a/test/CodeGen/X86/GlobalISel/select-leaf-constant.mir b/test/CodeGen/X86/GlobalISel/select-leaf-constant.mir
index 539520c0b8f5..9128f19b1d24 100644
--- a/test/CodeGen/X86/GlobalISel/select-leaf-constant.mir
+++ b/test/CodeGen/X86/GlobalISel/select-leaf-constant.mir
@@ -29,7 +29,7 @@ regBankSelected: true
 selected:        false
 # CHECK-LABEL: name: const_i32_1
 # CHECK:       registers:
-# CHECK-NEXT:  - { id: 0, class: gr32 }
+# CHECK-NEXT:  - { id: 0, class: gr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
 # CHECK:  body:
@@ -47,7 +47,7 @@ regBankSelected: true
 selected:        false
 # CHECK-LABEL: name: const_i32_1_optsize
 # CHECK:       registers:
-# CHECK-NEXT:  - { id: 0, class: gr32 }
+# CHECK-NEXT:  - { id: 0, class: gr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
 # CHECK:  body:
@@ -65,7 +65,7 @@ regBankSelected: true
 selected:        false
 # CHECK-LABEL: name: const_i32_1b
 # CHECK:       registers:
-# CHECK-NEXT:  - { id: 0, class: gr32 }
+# CHECK-NEXT:  - { id: 0, class: gr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
 # CHECK:  body:
@@ -83,7 +83,7 @@ regBankSelected: true
 selected:        false
 # CHECK-LABEL: name: const_i32_1_optsizeb
 # CHECK:       registers:
-# CHECK-NEXT:  - { id: 0, class: gr32 }
+# CHECK-NEXT:  - { id: 0, class: gr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
 # CHECK:  body:
diff --git a/test/CodeGen/X86/GlobalISel/select-memop-scalar-x32.mir b/test/CodeGen/X86/GlobalISel/select-memop-scalar-x32.mir
index 8e6a2771db6e..09f414b48a8a 100644
--- a/test/CodeGen/X86/GlobalISel/select-memop-scalar-x32.mir
+++ b/test/CodeGen/X86/GlobalISel/select-memop-scalar-x32.mir
@@ -49,9 +49,9 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # ALL:      registers:
-# ALL-NEXT:   - { id: 0, class: gr32 }
-# ALL-NEXT:   - { id: 1, class: gr32 }
-# ALL-NEXT:   - { id: 2, class: gr8 }
+# ALL-NEXT:   - { id: 0, class: gr32, preferred-register: '' }
+# ALL-NEXT:   - { id: 1, class: gr32, preferred-register: '' }
+# ALL-NEXT:   - { id: 2, class: gr8, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -79,9 +79,9 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # ALL:      registers:
-# ALL-NEXT:   - { id: 0, class: gr32 }
-# ALL-NEXT:   - { id: 1, class: gr32 }
-# ALL-NEXT:   - { id: 2, class: gr16 }
+# ALL-NEXT:   - { id: 0, class: gr32, preferred-register: '' }
+# ALL-NEXT:   - { id: 1, class: gr32, preferred-register: '' }
+# ALL-NEXT:   - { id: 2, class: gr16, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -109,9 +109,9 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # ALL:      registers:
-# ALL-NEXT:   - { id: 0, class: gr32 }
-# ALL-NEXT:   - { id: 1, class: gr32 }
-# ALL-NEXT:   - { id: 2, class: gr32 }
+# ALL-NEXT:   - { id: 0, class: gr32, preferred-register: '' }
+# ALL-NEXT:   - { id: 1, class: gr32, preferred-register: '' }
+# ALL-NEXT:   - { id: 2, class: gr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -139,10 +139,10 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # ALL:      registers:
-# ALL-NEXT:   - { id: 0, class: gr8 }
-# ALL-NEXT:   - { id: 1, class: gr32 }
-# ALL-NEXT:   - { id: 2, class: gr32 }
-# ALL-NEXT:   - { id: 3, class: gr32 }
+# ALL-NEXT:   - { id: 0, class: gr8, preferred-register: '' }
+# ALL-NEXT:   - { id: 1, class: gr32, preferred-register: '' }
+# ALL-NEXT:   - { id: 2, class: gr32, preferred-register: '' }
+# ALL-NEXT:   - { id: 3, class: gr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -176,10 +176,10 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # ALL:      registers:
-# ALL-NEXT:   - { id: 0, class: gr16 }
-# ALL-NEXT:   - { id: 1, class: gr32 }
-# ALL-NEXT:   - { id: 2, class: gr32 }
-# ALL-NEXT:   - { id: 3, class: gr32 }
+# ALL-NEXT:   - { id: 0, class: gr16, preferred-register: '' }
+# ALL-NEXT:   - { id: 1, class: gr32, preferred-register: '' }
+# ALL-NEXT:   - { id: 2, class: gr32, preferred-register: '' }
+# ALL-NEXT:   - { id: 3, class: gr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -213,10 +213,10 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # ALL:      registers:
-# ALL-NEXT:   - { id: 0, class: gr32 }
-# ALL-NEXT:   - { id: 1, class: gr32 }
-# ALL-NEXT:   - { id: 2, class: gr32 }
-# ALL-NEXT:   - { id: 3, class: gr32 }
+# ALL-NEXT:   - { id: 0, class: gr32, preferred-register: '' }
+# ALL-NEXT:   - { id: 1, class: gr32, preferred-register: '' }
+# ALL-NEXT:   - { id: 2, class: gr32, preferred-register: '' }
+# ALL-NEXT:   - { id: 3, class: gr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -250,9 +250,9 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # ALL:      registers:
-# ALL-NEXT:   - { id: 0, class: gr32 }
-# ALL-NEXT:   - { id: 1, class: gr32 }
-# ALL-NEXT:   - { id: 2, class: gr32 }
+# ALL-NEXT:   - { id: 0, class: gr32, preferred-register: '' }
+# ALL-NEXT:   - { id: 1, class: gr32, preferred-register: '' }
+# ALL-NEXT:   - { id: 2, class: gr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -280,10 +280,10 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # ALL:      registers:
-# ALL-NEXT:   - { id: 0, class: gr32 }
-# ALL-NEXT:   - { id: 1, class: gr32 }
-# ALL-NEXT:   - { id: 2, class: gr32 }
-# ALL-NEXT:   - { id: 3, class: gr32 }
+# ALL-NEXT:   - { id: 0, class: gr32, preferred-register: '' }
+# ALL-NEXT:   - { id: 1, class: gr32, preferred-register: '' }
+# ALL-NEXT:   - { id: 2, class: gr32, preferred-register: '' }
+# ALL-NEXT:   - { id: 3, class: gr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
diff --git a/test/CodeGen/X86/GlobalISel/select-memop-scalar.mir b/test/CodeGen/X86/GlobalISel/select-memop-scalar.mir
index b57c9b0cca98..6d03d7525d20 100644
--- a/test/CodeGen/X86/GlobalISel/select-memop-scalar.mir
+++ b/test/CodeGen/X86/GlobalISel/select-memop-scalar.mir
@@ -91,8 +91,8 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 registers:
-# ALL:   - { id: 0, class: gr64 }
-# ALL:   - { id: 1, class: gr8 }
+# ALL:   - { id: 0, class: gr64, preferred-register: '' }
+# ALL:   - { id: 1, class: gr8, preferred-register: '' }
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
 # ALL:     %0 = COPY %rdi
@@ -115,8 +115,8 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 registers:
-# ALL:   - { id: 0, class: gr64 }
-# ALL:   - { id: 1, class: gr16 }
+# ALL:   - { id: 0, class: gr64, preferred-register: '' }
+# ALL:   - { id: 1, class: gr16, preferred-register: '' }
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
 # ALL:     %0 = COPY %rdi
@@ -139,8 +139,8 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 registers:
-# ALL:   - { id: 0, class: gr64 }
-# ALL:   - { id: 1, class: gr32 }
+# ALL:   - { id: 0, class: gr64, preferred-register: '' }
+# ALL:   - { id: 1, class: gr32, preferred-register: '' }
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
 # ALL:     %0 = COPY %rdi
@@ -163,8 +163,8 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 registers:
-# ALL:   - { id: 0, class: gr64 }
-# ALL:   - { id: 1, class: gr64 }
+# ALL:   - { id: 0, class: gr64, preferred-register: '' }
+# ALL:   - { id: 1, class: gr64, preferred-register: '' }
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
 # ALL:     %0 = COPY %rdi
@@ -187,8 +187,8 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 registers:
-# ALL:   - { id: 0, class: gr64 }
-# ALL:   - { id: 1, class: gr32 }
+# ALL:   - { id: 0, class: gr64, preferred-register: '' }
+# ALL:   - { id: 1, class: gr32, preferred-register: '' }
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
 # ALL:     %0 = COPY %rdi
@@ -211,9 +211,9 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 registers:
-# ALL:          - { id: 0, class: gr64 }
-# NO_AVX512F:   - { id: 1, class: fr32 }
-# AVX512ALL:    - { id: 1, class: fr32x }
+# ALL:          - { id: 0, class: gr64, preferred-register: '' }
+# NO_AVX512F:   - { id: 1, class: fr32, preferred-register: '' }
+# AVX512ALL:    - { id: 1, class: fr32x, preferred-register: '' }
   - { id: 0, class: gpr }
   - { id: 1, class: vecr }
 # ALL:       %0 = COPY %rdi
@@ -238,8 +238,8 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 registers:
-# ALL:   - { id: 0, class: gr64 }
-# ALL:   - { id: 1, class: gr64 }
+# ALL:   - { id: 0, class: gr64, preferred-register: '' }
+# ALL:   - { id: 1, class: gr64, preferred-register: '' }
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
 # ALL:     %0 = COPY %rdi
@@ -262,9 +262,9 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 registers:
-# ALL:          - { id: 0, class: gr64 }
-# NO_AVX512F:   - { id: 1, class: fr64 }
-# AVX512ALL:    - { id: 1, class: fr64x }
+# ALL:          - { id: 0, class: gr64, preferred-register: '' }
+# NO_AVX512F:   - { id: 1, class: fr64, preferred-register: '' }
+# AVX512ALL:    - { id: 1, class: fr64x, preferred-register: '' }
   - { id: 0, class: gpr }
   - { id: 1, class: vecr }
 # ALL:       %0 = COPY %rdi
@@ -289,8 +289,8 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 registers:
-# ALL:   - { id: 0, class: gr32 }
-# ALL:   - { id: 1, class: gr64 }
+# ALL:   - { id: 0, class: gr32, preferred-register: '' }
+# ALL:   - { id: 1, class: gr64, preferred-register: '' }
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
 # ALL:     %0 = COPY %edi
@@ -315,8 +315,8 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 registers:
-# ALL:   - { id: 0, class: gr64 }
-# ALL:   - { id: 1, class: gr64 }
+# ALL:   - { id: 0, class: gr64, preferred-register: '' }
+# ALL:   - { id: 1, class: gr64, preferred-register: '' }
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
 # ALL:     %0 = COPY %rdi
@@ -341,9 +341,9 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 registers:
-# ALL:   - { id: 0, class: fr32x }
-# ALL:   - { id: 1, class: gr64 }
-# ALL:   - { id: 2, class: gr32 }
+# ALL:   - { id: 0, class: fr32x, preferred-register: '' }
+# ALL:   - { id: 1, class: gr64, preferred-register: '' }
+# ALL:   - { id: 2, class: gr32, preferred-register: '' }
   - { id: 0, class: vecr }
   - { id: 1, class: gpr }
   - { id: 2, class: gpr }
@@ -371,9 +371,9 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 registers:
-# NO_AVX512F:   - { id: 0, class: fr32 }
-# AVX512ALL:    - { id: 0, class: fr32x }
-# ALL:   - { id: 1, class: gr64 }
+# NO_AVX512F:   - { id: 0, class: fr32, preferred-register: '' }
+# AVX512ALL:    - { id: 0, class: fr32x, preferred-register: '' }
+# ALL:   - { id: 1, class: gr64, preferred-register: '' }
   - { id: 0, class: vecr }
   - { id: 1, class: gpr }
 # ALL:       %0 = COPY %xmm0
@@ -400,9 +400,9 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 registers:
-# ALL:   - { id: 0, class: fr64x }
-# ALL:   - { id: 1, class: gr64 }
-# ALL:   - { id: 2, class: gr64 }
+# ALL:   - { id: 0, class: fr64x, preferred-register: '' }
+# ALL:   - { id: 1, class: gr64, preferred-register: '' }
+# ALL:   - { id: 2, class: gr64, preferred-register: '' }
   - { id: 0, class: vecr }
   - { id: 1, class: gpr }
   - { id: 2, class: gpr }
@@ -430,9 +430,9 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 registers:
-# NO_AVX512F:   - { id: 0, class: fr64 }
-# AVX512ALL:    - { id: 0, class: fr64x }
-# ALL:   - { id: 1, class: gr64 }
+# NO_AVX512F:   - { id: 0, class: fr64, preferred-register: '' }
+# AVX512ALL:    - { id: 0, class: fr64x, preferred-register: '' }
+# ALL:   - { id: 1, class: gr64, preferred-register: '' }
   - { id: 0, class: vecr }
   - { id: 1, class: gpr }
 # ALL:       %0 = COPY %xmm0
@@ -460,8 +460,8 @@ legalized:       true
 regBankSelected: true
 selected:        false
 registers:
-# ALL:   - { id: 0, class: gr64 }
-# ALL:   - { id: 1, class: gr64 }
+# ALL:   - { id: 0, class: gr64, preferred-register: '' }
+# ALL:   - { id: 1, class: gr64, preferred-register: '' }
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
 # ALL: %1 = MOV64rm %0, 1, _, 0, _ :: (load 8 from %ir.ptr1)
@@ -483,8 +483,8 @@ legalized:       true
 regBankSelected: true
 selected:        false
 registers:
-# ALL:   - { id: 0, class: gr64 }
-# ALL:   - { id: 1, class: gr64 }
+# ALL:   - { id: 0, class: gr64, preferred-register: '' }
+# ALL:   - { id: 1, class: gr64, preferred-register: '' }
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
 # ALL: MOV64mr %0, 1, _, 0, _, %1 :: (store 8 into %ir.ptr1)
diff --git a/test/CodeGen/X86/GlobalISel/select-memop-v128.mir b/test/CodeGen/X86/GlobalISel/select-memop-v128.mir
index ce3f6b91dcf6..08844657e2a2 100644
--- a/test/CodeGen/X86/GlobalISel/select-memop-v128.mir
+++ b/test/CodeGen/X86/GlobalISel/select-memop-v128.mir
@@ -32,9 +32,9 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 registers:
-# ALL:          - { id: 0, class: gr64 }
-# NO_AVX512F:   - { id: 1, class: vr128 }
-# AVX512ALL:    - { id: 1, class: vr128x }
+# ALL:          - { id: 0, class: gr64, preferred-register: '' }
+# NO_AVX512F:   - { id: 1, class: vr128, preferred-register: '' }
+# AVX512ALL:    - { id: 1, class: vr128x, preferred-register: '' }
   - { id: 0, class: gpr }
   - { id: 1, class: vecr }
 # ALL:      %0 = COPY %rdi
@@ -60,9 +60,9 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 registers:
-# ALL:   - { id: 0, class: gr64 }
-# NO_AVX512F:   - { id: 1, class: vr128 }
-# AVX512ALL:    - { id: 1, class: vr128x }
+# ALL:   - { id: 0, class: gr64, preferred-register: '' }
+# NO_AVX512F:   - { id: 1, class: vr128, preferred-register: '' }
+# AVX512ALL:    - { id: 1, class: vr128x, preferred-register: '' }
   - { id: 0, class: gpr }
   - { id: 1, class: vecr }
 # ALL:      %0 = COPY %rdi
@@ -88,9 +88,9 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 registers:
-# NO_AVX512F:   - { id: 0, class: vr128 }
-# AVX512ALL:    - { id: 0, class: vr128x }
-# ALL:   - { id: 1, class: gr64 }
+# NO_AVX512F:   - { id: 0, class: vr128, preferred-register: '' }
+# AVX512ALL:    - { id: 0, class: vr128x, preferred-register: '' }
+# ALL:   - { id: 1, class: gr64, preferred-register: '' }
   - { id: 0, class: vecr }
   - { id: 1, class: gpr }
 # ALL:       %0 = COPY %xmm0
@@ -118,9 +118,9 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 registers:
-# NO_AVX512F:   - { id: 0, class: vr128 }
-# AVX512ALL:    - { id: 0, class: vr128x }
-# ALL:   - { id: 1, class: gr64 }
+# NO_AVX512F:   - { id: 0, class: vr128, preferred-register: '' }
+# AVX512ALL:    - { id: 0, class: vr128x, preferred-register: '' }
+# ALL:   - { id: 1, class: gr64, preferred-register: '' }
   - { id: 0, class: vecr }
   - { id: 1, class: gpr }
 # ALL:       %0 = COPY %xmm0
diff --git a/test/CodeGen/X86/GlobalISel/select-memop-v256.mir b/test/CodeGen/X86/GlobalISel/select-memop-v256.mir
index b9a7e4a8cc4a..ff371ad9989f 100644
--- a/test/CodeGen/X86/GlobalISel/select-memop-v256.mir
+++ b/test/CodeGen/X86/GlobalISel/select-memop-v256.mir
@@ -33,12 +33,12 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # NO_AVX512F:       registers:
-# NO_AVX512F-NEXT:    - { id: 0, class: gr64 }
-# NO_AVX512F-NEXT:    - { id: 1, class: vr256 }
+# NO_AVX512F-NEXT:    - { id: 0, class: gr64, preferred-register: '' }
+# NO_AVX512F-NEXT:    - { id: 1, class: vr256, preferred-register: '' }
 #
 # AVX512ALL:        registers:
-# AVX512ALL-NEXT:     - { id: 0, class: gr64 }
-# AVX512ALL-NEXT:     - { id: 1, class: vr256x }
+# AVX512ALL-NEXT:     - { id: 0, class: gr64, preferred-register: '' }
+# AVX512ALL-NEXT:     - { id: 1, class: vr256x, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: vecr }
@@ -73,12 +73,12 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # NO_AVX512F:       registers:
-# NO_AVX512F-NEXT:    - { id: 0, class: gr64 }
-# NO_AVX512F-NEXT:    - { id: 1, class: vr256 }
+# NO_AVX512F-NEXT:    - { id: 0, class: gr64, preferred-register: '' }
+# NO_AVX512F-NEXT:    - { id: 1, class: vr256, preferred-register: '' }
 #
 # AVX512ALL:        registers:
-# AVX512ALL-NEXT:     - { id: 0, class: gr64 }
-# AVX512ALL-NEXT:     - { id: 1, class: vr256x }
+# AVX512ALL-NEXT:     - { id: 0, class: gr64, preferred-register: '' }
+# AVX512ALL-NEXT:     - { id: 1, class: vr256x, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: vecr }
@@ -113,12 +113,12 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # NO_AVX512F:       registers:
-# NO_AVX512F-NEXT:    - { id: 0, class: vr256 }
-# NO_AVX512F-NEXT:    - { id: 1, class: gr64 }
+# NO_AVX512F-NEXT:    - { id: 0, class: vr256, preferred-register: '' }
+# NO_AVX512F-NEXT:    - { id: 1, class: gr64, preferred-register: '' }
 #
 # AVX512ALL:        registers:
-# AVX512ALL-NEXT:     - { id: 0, class: vr256x }
-# AVX512ALL-NEXT:     - { id: 1, class: gr64 }
+# AVX512ALL-NEXT:     - { id: 0, class: vr256x, preferred-register: '' }
+# AVX512ALL-NEXT:     - { id: 1, class: gr64, preferred-register: '' }
 registers:
   - { id: 0, class: vecr }
   - { id: 1, class: gpr }
@@ -153,12 +153,12 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # NO_AVX512F:       registers:
-# NO_AVX512F-NEXT:    - { id: 0, class: vr256 }
-# NO_AVX512F-NEXT:    - { id: 1, class: gr64 }
+# NO_AVX512F-NEXT:    - { id: 0, class: vr256, preferred-register: '' }
+# NO_AVX512F-NEXT:    - { id: 1, class: gr64, preferred-register: '' }
 #
 # AVX512ALL:        registers:
-# AVX512ALL-NEXT:     - { id: 0, class: vr256x }
-# AVX512ALL-NEXT:     - { id: 1, class: gr64 }
+# AVX512ALL-NEXT:     - { id: 0, class: vr256x, preferred-register: '' }
+# AVX512ALL-NEXT:     - { id: 1, class: gr64, preferred-register: '' }
 registers:
   - { id: 0, class: vecr }
   - { id: 1, class: gpr }
diff --git a/test/CodeGen/X86/GlobalISel/select-memop-v512.mir b/test/CodeGen/X86/GlobalISel/select-memop-v512.mir
index 87978a684d4c..131902d81a00 100644
--- a/test/CodeGen/X86/GlobalISel/select-memop-v512.mir
+++ b/test/CodeGen/X86/GlobalISel/select-memop-v512.mir
@@ -28,8 +28,8 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # AVX512F:      registers:
-# AVX512F-NEXT:   - { id: 0, class: gr64 }
-# AVX512F-NEXT:   - { id: 1, class: vr512 }
+# AVX512F-NEXT:   - { id: 0, class: gr64, preferred-register: '' }
+# AVX512F-NEXT:   - { id: 1, class: vr512, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: vecr }
@@ -54,8 +54,8 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # AVX512F:      registers:
-# AVX512F-NEXT:   - { id: 0, class: gr64 }
-# AVX512F-NEXT:   - { id: 1, class: vr512 }
+# AVX512F-NEXT:   - { id: 0, class: gr64, preferred-register: '' }
+# AVX512F-NEXT:   - { id: 1, class: vr512, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: vecr }
@@ -80,8 +80,8 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # AVX512F:      registers:
-# AVX512F-NEXT:   - { id: 0, class: vr512 }
-# AVX512F-NEXT:   - { id: 1, class: gr64 }
+# AVX512F-NEXT:   - { id: 0, class: vr512, preferred-register: '' }
+# AVX512F-NEXT:   - { id: 1, class: gr64, preferred-register: '' }
 registers:
   - { id: 0, class: vecr }
   - { id: 1, class: gpr }
@@ -106,8 +106,8 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # AVX512F:      registers:
-# AVX512F-NEXT:   - { id: 0, class: vr512 }
-# AVX512F-NEXT:   - { id: 1, class: gr64 }
+# AVX512F-NEXT:   - { id: 0, class: vr512, preferred-register: '' }
+# AVX512F-NEXT:   - { id: 1, class: gr64, preferred-register: '' }
 registers:
   - { id: 0, class: vecr }
   - { id: 1, class: gpr }
diff --git a/test/CodeGen/X86/GlobalISel/select-mul-scalar.mir b/test/CodeGen/X86/GlobalISel/select-mul-scalar.mir
index 34a77acc2d1e..453557c08469 100644
--- a/test/CodeGen/X86/GlobalISel/select-mul-scalar.mir
+++ b/test/CodeGen/X86/GlobalISel/select-mul-scalar.mir
@@ -24,9 +24,9 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # ALL:      registers:
-# ALL-NEXT:   - { id: 0, class: gr16 }
-# ALL-NEXT:   - { id: 1, class: gr16 }
-# ALL-NEXT:   - { id: 2, class: gr16 }
+# ALL-NEXT:   - { id: 0, class: gr16, preferred-register: '' }
+# ALL-NEXT:   - { id: 1, class: gr16, preferred-register: '' }
+# ALL-NEXT:   - { id: 2, class: gr16, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -55,9 +55,9 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # ALL:      registers:
-# ALL-NEXT:   - { id: 0, class: gr32 }
-# ALL-NEXT:   - { id: 1, class: gr32 }
-# ALL-NEXT:   - { id: 2, class: gr32 }
+# ALL-NEXT:   - { id: 0, class: gr32, preferred-register: '' }
+# ALL-NEXT:   - { id: 1, class: gr32, preferred-register: '' }
+# ALL-NEXT:   - { id: 2, class: gr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -86,9 +86,9 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # ALL:      registers:
-# ALL-NEXT:   - { id: 0, class: gr64 }
-# ALL-NEXT:   - { id: 1, class: gr64 }
-# ALL-NEXT:   - { id: 2, class: gr64 }
+# ALL-NEXT:   - { id: 0, class: gr64, preferred-register: '' }
+# ALL-NEXT:   - { id: 1, class: gr64, preferred-register: '' }
+# ALL-NEXT:   - { id: 2, class: gr64, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
diff --git a/test/CodeGen/X86/GlobalISel/select-mul-vec.mir b/test/CodeGen/X86/GlobalISel/select-mul-vec.mir
index 5f8ab1e4f189..d3651ccd1ab9 100644
--- a/test/CodeGen/X86/GlobalISel/select-mul-vec.mir
+++ b/test/CodeGen/X86/GlobalISel/select-mul-vec.mir
@@ -95,9 +95,9 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # CHECK:      registers:
-# CHECK-NEXT:   - { id: 0, class: vr128 }
-# CHECK-NEXT:   - { id: 1, class: vr128 }
-# CHECK-NEXT:   - { id: 2, class: vr128 }
+# CHECK-NEXT:   - { id: 0, class: vr128, preferred-register: '' }
+# CHECK-NEXT:   - { id: 1, class: vr128, preferred-register: '' }
+# CHECK-NEXT:   - { id: 2, class: vr128, preferred-register: '' }
 registers:
   - { id: 0, class: vecr }
   - { id: 1, class: vecr }
@@ -121,9 +121,9 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # CHECK:      registers:
-# CHECK-NEXT:   - { id: 0, class: vr128 }
-# CHECK-NEXT:   - { id: 1, class: vr128 }
-# CHECK-NEXT:   - { id: 2, class: vr128 }
+# CHECK-NEXT:   - { id: 0, class: vr128, preferred-register: '' }
+# CHECK-NEXT:   - { id: 1, class: vr128, preferred-register: '' }
+# CHECK-NEXT:   - { id: 2, class: vr128, preferred-register: '' }
 registers:
   - { id: 0, class: vecr }
   - { id: 1, class: vecr }
@@ -147,9 +147,9 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # CHECK:      registers:
-# CHECK-NEXT:   - { id: 0, class: vr128x }
-# CHECK-NEXT:   - { id: 1, class: vr128x }
-# CHECK-NEXT:   - { id: 2, class: vr128x }
+# CHECK-NEXT:   - { id: 0, class: vr128x, preferred-register: '' }
+# CHECK-NEXT:   - { id: 1, class: vr128x, preferred-register: '' }
+# CHECK-NEXT:   - { id: 2, class: vr128x, preferred-register: '' }
 registers:
   - { id: 0, class: vecr }
   - { id: 1, class: vecr }
@@ -173,9 +173,9 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # CHECK:      registers:
-# CHECK-NEXT:   - { id: 0, class: vr128 }
-# CHECK-NEXT:   - { id: 1, class: vr128 }
-# CHECK-NEXT:   - { id: 2, class: vr128 }
+# CHECK-NEXT:   - { id: 0, class: vr128, preferred-register: '' }
+# CHECK-NEXT:   - { id: 1, class: vr128, preferred-register: '' }
+# CHECK-NEXT:   - { id: 2, class: vr128, preferred-register: '' }
 registers:
   - { id: 0, class: vecr }
   - { id: 1, class: vecr }
@@ -199,9 +199,9 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # CHECK:      registers:
-# CHECK-NEXT:   - { id: 0, class: vr128 }
-# CHECK-NEXT:   - { id: 1, class: vr128 }
-# CHECK-NEXT:   - { id: 2, class: vr128 }
+# CHECK-NEXT:   - { id: 0, class: vr128, preferred-register: '' }
+# CHECK-NEXT:   - { id: 1, class: vr128, preferred-register: '' }
+# CHECK-NEXT:   - { id: 2, class: vr128, preferred-register: '' }
 registers:
   - { id: 0, class: vecr }
   - { id: 1, class: vecr }
@@ -225,9 +225,9 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # CHECK:      registers:
-# CHECK-NEXT:   - { id: 0, class: vr128x }
-# CHECK-NEXT:   - { id: 1, class: vr128x }
-# CHECK-NEXT:   - { id: 2, class: vr128x }
+# CHECK-NEXT:   - { id: 0, class: vr128x, preferred-register: '' }
+# CHECK-NEXT:   - { id: 1, class: vr128x, preferred-register: '' }
+# CHECK-NEXT:   - { id: 2, class: vr128x, preferred-register: '' }
 registers:
   - { id: 0, class: vecr }
   - { id: 1, class: vecr }
@@ -251,9 +251,9 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # CHECK:      registers:
-# CHECK-NEXT:   - { id: 0, class: vr128x }
-# CHECK-NEXT:   - { id: 1, class: vr128x }
-# CHECK-NEXT:   - { id: 2, class: vr128x }
+# CHECK-NEXT:   - { id: 0, class: vr128x, preferred-register: '' }
+# CHECK-NEXT:   - { id: 1, class: vr128x, preferred-register: '' }
+# CHECK-NEXT:   - { id: 2, class: vr128x, preferred-register: '' }
 registers:
   - { id: 0, class: vecr }
   - { id: 1, class: vecr }
@@ -277,9 +277,9 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # CHECK:      registers:
-# CHECK-NEXT:   - { id: 0, class: vr256 }
-# CHECK-NEXT:   - { id: 1, class: vr256 }
-# CHECK-NEXT:   - { id: 2, class: vr256 }
+# CHECK-NEXT:   - { id: 0, class: vr256, preferred-register: '' }
+# CHECK-NEXT:   - { id: 1, class: vr256, preferred-register: '' }
+# CHECK-NEXT:   - { id: 2, class: vr256, preferred-register: '' }
 registers:
   - { id: 0, class: vecr }
   - { id: 1, class: vecr }
@@ -303,9 +303,9 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # CHECK:      registers:
-# CHECK-NEXT:   - { id: 0, class: vr256x }
-# CHECK-NEXT:   - { id: 1, class: vr256x }
-# CHECK-NEXT:   - { id: 2, class: vr256x }
+# CHECK-NEXT:   - { id: 0, class: vr256x, preferred-register: '' }
+# CHECK-NEXT:   - { id: 1, class: vr256x, preferred-register: '' }
+# CHECK-NEXT:   - { id: 2, class: vr256x, preferred-register: '' }
 registers:
   - { id: 0, class: vecr }
   - { id: 1, class: vecr }
@@ -329,9 +329,9 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # CHECK:      registers:
-# CHECK-NEXT:   - { id: 0, class: vr256 }
-# CHECK-NEXT:   - { id: 1, class: vr256 }
-# CHECK-NEXT:   - { id: 2, class: vr256 }
+# CHECK-NEXT:   - { id: 0, class: vr256, preferred-register: '' }
+# CHECK-NEXT:   - { id: 1, class: vr256, preferred-register: '' }
+# CHECK-NEXT:   - { id: 2, class: vr256, preferred-register: '' }
 registers:
   - { id: 0, class: vecr }
   - { id: 1, class: vecr }
@@ -355,9 +355,9 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # CHECK:      registers:
-# CHECK-NEXT:   - { id: 0, class: vr256x }
-# CHECK-NEXT:   - { id: 1, class: vr256x }
-# CHECK-NEXT:   - { id: 2, class: vr256x }
+# CHECK-NEXT:   - { id: 0, class: vr256x, preferred-register: '' }
+# CHECK-NEXT:   - { id: 1, class: vr256x, preferred-register: '' }
+# CHECK-NEXT:   - { id: 2, class: vr256x, preferred-register: '' }
 registers:
   - { id: 0, class: vecr }
   - { id: 1, class: vecr }
@@ -381,9 +381,9 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # CHECK:      registers:
-# CHECK-NEXT:   - { id: 0, class: vr256x }
-# CHECK-NEXT:   - { id: 1, class: vr256x }
-# CHECK-NEXT:   - { id: 2, class: vr256x }
+# CHECK-NEXT:   - { id: 0, class: vr256x, preferred-register: '' }
+# CHECK-NEXT:   - { id: 1, class: vr256x, preferred-register: '' }
+# CHECK-NEXT:   - { id: 2, class: vr256x, preferred-register: '' }
 registers:
   - { id: 0, class: vecr }
   - { id: 1, class: vecr }
@@ -407,9 +407,9 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # CHECK:      registers:
-# CHECK-NEXT:   - { id: 0, class: vr512 }
-# CHECK-NEXT:   - { id: 1, class: vr512 }
-# CHECK-NEXT:   - { id: 2, class: vr512 }
+# CHECK-NEXT:   - { id: 0, class: vr512, preferred-register: '' }
+# CHECK-NEXT:   - { id: 1, class: vr512, preferred-register: '' }
+# CHECK-NEXT:   - { id: 2, class: vr512, preferred-register: '' }
 registers:
   - { id: 0, class: vecr }
   - { id: 1, class: vecr }
@@ -433,9 +433,9 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # CHECK:      registers:
-# CHECK-NEXT:   - { id: 0, class: vr512 }
-# CHECK-NEXT:   - { id: 1, class: vr512 }
-# CHECK-NEXT:   - { id: 2, class: vr512 }
+# CHECK-NEXT:   - { id: 0, class: vr512, preferred-register: '' }
+# CHECK-NEXT:   - { id: 1, class: vr512, preferred-register: '' }
+# CHECK-NEXT:   - { id: 2, class: vr512, preferred-register: '' }
 registers:
   - { id: 0, class: vecr }
   - { id: 1, class: vecr }
@@ -459,9 +459,9 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # CHECK:      registers:
-# CHECK-NEXT:   - { id: 0, class: vr512 }
-# CHECK-NEXT:   - { id: 1, class: vr512 }
-# CHECK-NEXT:   - { id: 2, class: vr512 }
+# CHECK-NEXT:   - { id: 0, class: vr512, preferred-register: '' }
+# CHECK-NEXT:   - { id: 1, class: vr512, preferred-register: '' }
+# CHECK-NEXT:   - { id: 2, class: vr512, preferred-register: '' }
 registers:
   - { id: 0, class: vecr }
   - { id: 1, class: vecr }
diff --git a/test/CodeGen/X86/GlobalISel/select-sub-v128.mir b/test/CodeGen/X86/GlobalISel/select-sub-v128.mir
index d60d4155e29d..f77879d93009 100644
--- a/test/CodeGen/X86/GlobalISel/select-sub-v128.mir
+++ b/test/CodeGen/X86/GlobalISel/select-sub-v128.mir
@@ -32,19 +32,19 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # NOVL:            registers:
-# NOVL-NEXT:         - { id: 0, class: vr128 }
-# NOVL-NEXT:         - { id: 1, class: vr128 }
-# NOVL-NEXT:         - { id: 2, class: vr128 }
+# NOVL-NEXT:         - { id: 0, class: vr128, preferred-register: '' }
+# NOVL-NEXT:         - { id: 1, class: vr128, preferred-register: '' }
+# NOVL-NEXT:         - { id: 2, class: vr128, preferred-register: '' }
 #
 # AVX512VL:        registers:
-# AVX512VL-NEXT:     - { id: 0, class: vr128 }
-# AVX512VL-NEXT:     - { id: 1, class: vr128 }
-# AVX512VL-NEXT:     - { id: 2, class: vr128 }
+# AVX512VL-NEXT:     - { id: 0, class: vr128, preferred-register: '' }
+# AVX512VL-NEXT:     - { id: 1, class: vr128, preferred-register: '' }
+# AVX512VL-NEXT:     - { id: 2, class: vr128, preferred-register: '' }
 #
 # AVX512BWVL:      registers:
-# AVX512BWVL-NEXT:   - { id: 0, class: vr128x }
-# AVX512BWVL-NEXT:   - { id: 1, class: vr128x }
-# AVX512BWVL-NEXT:   - { id: 2, class: vr128x }
+# AVX512BWVL-NEXT:   - { id: 0, class: vr128x, preferred-register: '' }
+# AVX512BWVL-NEXT:   - { id: 1, class: vr128x, preferred-register: '' }
+# AVX512BWVL-NEXT:   - { id: 2, class: vr128x, preferred-register: '' }
 registers:
   - { id: 0, class: vecr }
   - { id: 1, class: vecr }
@@ -74,19 +74,19 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # NOVL:            registers:
-# NOVL-NEXT:         - { id: 0, class: vr128 }
-# NOVL-NEXT:         - { id: 1, class: vr128 }
-# NOVL-NEXT:         - { id: 2, class: vr128 }
+# NOVL-NEXT:         - { id: 0, class: vr128, preferred-register: '' }
+# NOVL-NEXT:         - { id: 1, class: vr128, preferred-register: '' }
+# NOVL-NEXT:         - { id: 2, class: vr128, preferred-register: '' }
 #
 # AVX512VL:        registers:
-# AVX512VL-NEXT:     - { id: 0, class: vr128 }
-# AVX512VL-NEXT:     - { id: 1, class: vr128 }
-# AVX512VL-NEXT:     - { id: 2, class: vr128 }
+# AVX512VL-NEXT:     - { id: 0, class: vr128, preferred-register: '' }
+# AVX512VL-NEXT:     - { id: 1, class: vr128, preferred-register: '' }
+# AVX512VL-NEXT:     - { id: 2, class: vr128, preferred-register: '' }
 #
 # AVX512BWVL:      registers:
-# AVX512BWVL-NEXT:   - { id: 0, class: vr128x }
-# AVX512BWVL-NEXT:   - { id: 1, class: vr128x }
-# AVX512BWVL-NEXT:   - { id: 2, class: vr128x }
+# AVX512BWVL-NEXT:   - { id: 0, class: vr128x, preferred-register: '' }
+# AVX512BWVL-NEXT:   - { id: 1, class: vr128x, preferred-register: '' }
+# AVX512BWVL-NEXT:   - { id: 2, class: vr128x, preferred-register: '' }
 registers:
   - { id: 0, class: vecr }
   - { id: 1, class: vecr }
@@ -116,19 +116,19 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # NOVL:            registers:
-# NOVL-NEXT:         - { id: 0, class: vr128 }
-# NOVL-NEXT:         - { id: 1, class: vr128 }
-# NOVL-NEXT:         - { id: 2, class: vr128 }
+# NOVL-NEXT:         - { id: 0, class: vr128, preferred-register: '' }
+# NOVL-NEXT:         - { id: 1, class: vr128, preferred-register: '' }
+# NOVL-NEXT:         - { id: 2, class: vr128, preferred-register: '' }
 #
 # AVX512VL:        registers:
-# AVX512VL-NEXT:     - { id: 0, class: vr128x }
-# AVX512VL-NEXT:     - { id: 1, class: vr128x }
-# AVX512VL-NEXT:     - { id: 2, class: vr128x }
+# AVX512VL-NEXT:     - { id: 0, class: vr128x, preferred-register: '' }
+# AVX512VL-NEXT:     - { id: 1, class: vr128x, preferred-register: '' }
+# AVX512VL-NEXT:     - { id: 2, class: vr128x, preferred-register: '' }
 #
 # AVX512BWVL:      registers:
-# AVX512BWVL-NEXT:   - { id: 0, class: vr128x }
-# AVX512BWVL-NEXT:   - { id: 1, class: vr128x }
-# AVX512BWVL-NEXT:   - { id: 2, class: vr128x }
+# AVX512BWVL-NEXT:   - { id: 0, class: vr128x, preferred-register: '' }
+# AVX512BWVL-NEXT:   - { id: 1, class: vr128x, preferred-register: '' }
+# AVX512BWVL-NEXT:   - { id: 2, class: vr128x, preferred-register: '' }
 registers:
   - { id: 0, class: vecr }
   - { id: 1, class: vecr }
@@ -158,19 +158,19 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # NOVL:            registers:
-# NOVL-NEXT:         - { id: 0, class: vr128 }
-# NOVL-NEXT:         - { id: 1, class: vr128 }
-# NOVL-NEXT:         - { id: 2, class: vr128 }
+# NOVL-NEXT:         - { id: 0, class: vr128, preferred-register: '' }
+# NOVL-NEXT:         - { id: 1, class: vr128, preferred-register: '' }
+# NOVL-NEXT:         - { id: 2, class: vr128, preferred-register: '' }
 #
 # AVX512VL:        registers:
-# AVX512VL-NEXT:     - { id: 0, class: vr128x }
-# AVX512VL-NEXT:     - { id: 1, class: vr128x }
-# AVX512VL-NEXT:     - { id: 2, class: vr128x }
+# AVX512VL-NEXT:     - { id: 0, class: vr128x, preferred-register: '' }
+# AVX512VL-NEXT:     - { id: 1, class: vr128x, preferred-register: '' }
+# AVX512VL-NEXT:     - { id: 2, class: vr128x, preferred-register: '' }
 #
 # AVX512BWVL:      registers:
-# AVX512BWVL-NEXT:   - { id: 0, class: vr128x }
-# AVX512BWVL-NEXT:   - { id: 1, class: vr128x }
-# AVX512BWVL-NEXT:   - { id: 2, class: vr128x }
+# AVX512BWVL-NEXT:   - { id: 0, class: vr128x, preferred-register: '' }
+# AVX512BWVL-NEXT:   - { id: 1, class: vr128x, preferred-register: '' }
+# AVX512BWVL-NEXT:   - { id: 2, class: vr128x, preferred-register: '' }
 registers:
   - { id: 0, class: vecr }
   - { id: 1, class: vecr }
diff --git a/test/CodeGen/X86/GlobalISel/select-sub-v256.mir b/test/CodeGen/X86/GlobalISel/select-sub-v256.mir
index fbc44997b4a2..d6bde7fbb691 100644
--- a/test/CodeGen/X86/GlobalISel/select-sub-v256.mir
+++ b/test/CodeGen/X86/GlobalISel/select-sub-v256.mir
@@ -30,19 +30,19 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # AVX2:            registers:
-# AVX2-NEXT:         - { id: 0, class: vr256 }
-# AVX2-NEXT:         - { id: 1, class: vr256 }
-# AVX2-NEXT:         - { id: 2, class: vr256 }
+# AVX2-NEXT:         - { id: 0, class: vr256, preferred-register: '' }
+# AVX2-NEXT:         - { id: 1, class: vr256, preferred-register: '' }
+# AVX2-NEXT:         - { id: 2, class: vr256, preferred-register: '' }
 #
 # AVX512VL:        registers:
-# AVX512VL-NEXT:     - { id: 0, class: vr256 }
-# AVX512VL-NEXT:     - { id: 1, class: vr256 }
-# AVX512VL-NEXT:     - { id: 2, class: vr256 }
+# AVX512VL-NEXT:     - { id: 0, class: vr256, preferred-register: '' }
+# AVX512VL-NEXT:     - { id: 1, class: vr256, preferred-register: '' }
+# AVX512VL-NEXT:     - { id: 2, class: vr256, preferred-register: '' }
 #
 # AVX512BWVL:      registers:
-# AVX512BWVL-NEXT:   - { id: 0, class: vr256x }
-# AVX512BWVL-NEXT:   - { id: 1, class: vr256x }
-# AVX512BWVL-NEXT:   - { id: 2, class: vr256x }
+# AVX512BWVL-NEXT:   - { id: 0, class: vr256x, preferred-register: '' }
+# AVX512BWVL-NEXT:   - { id: 1, class: vr256x, preferred-register: '' }
+# AVX512BWVL-NEXT:   - { id: 2, class: vr256x, preferred-register: '' }
 registers:
   - { id: 0, class: vecr }
   - { id: 1, class: vecr }
@@ -70,19 +70,19 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # AVX2:            registers:
-# AVX2-NEXT:         - { id: 0, class: vr256 }
-# AVX2-NEXT:         - { id: 1, class: vr256 }
-# AVX2-NEXT:         - { id: 2, class: vr256 }
+# AVX2-NEXT:         - { id: 0, class: vr256, preferred-register: '' }
+# AVX2-NEXT:         - { id: 1, class: vr256, preferred-register: '' }
+# AVX2-NEXT:         - { id: 2, class: vr256, preferred-register: '' }
 #
 # AVX512VL:        registers:
-# AVX512VL-NEXT:     - { id: 0, class: vr256 }
-# AVX512VL-NEXT:     - { id: 1, class: vr256 }
-# AVX512VL-NEXT:     - { id: 2, class: vr256 }
+# AVX512VL-NEXT:     - { id: 0, class: vr256, preferred-register: '' }
+# AVX512VL-NEXT:     - { id: 1, class: vr256, preferred-register: '' }
+# AVX512VL-NEXT:     - { id: 2, class: vr256, preferred-register: '' }
 #
 # AVX512BWVL:      registers:
-# AVX512BWVL-NEXT:   - { id: 0, class: vr256x }
-# AVX512BWVL-NEXT:   - { id: 1, class: vr256x }
-# AVX512BWVL-NEXT:   - { id: 2, class: vr256x }
+# AVX512BWVL-NEXT:   - { id: 0, class: vr256x, preferred-register: '' }
+# AVX512BWVL-NEXT:   - { id: 1, class: vr256x, preferred-register: '' }
+# AVX512BWVL-NEXT:   - { id: 2, class: vr256x, preferred-register: '' }
 registers:
   - { id: 0, class: vecr }
   - { id: 1, class: vecr }
@@ -110,19 +110,19 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # AVX2:            registers:
-# AVX2-NEXT:         - { id: 0, class: vr256 }
-# AVX2-NEXT:         - { id: 1, class: vr256 }
-# AVX2-NEXT:         - { id: 2, class: vr256 }
+# AVX2-NEXT:         - { id: 0, class: vr256, preferred-register: '' }
+# AVX2-NEXT:         - { id: 1, class: vr256, preferred-register: '' }
+# AVX2-NEXT:         - { id: 2, class: vr256, preferred-register: '' }
 #
 # AVX512VL:        registers:
-# AVX512VL-NEXT:     - { id: 0, class: vr256x }
-# AVX512VL-NEXT:     - { id: 1, class: vr256x }
-# AVX512VL-NEXT:     - { id: 2, class: vr256x }
+# AVX512VL-NEXT:     - { id: 0, class: vr256x, preferred-register: '' }
+# AVX512VL-NEXT:     - { id: 1, class: vr256x, preferred-register: '' }
+# AVX512VL-NEXT:     - { id: 2, class: vr256x, preferred-register: '' }
 #
 # AVX512BWVL:      registers:
-# AVX512BWVL-NEXT:   - { id: 0, class: vr256x }
-# AVX512BWVL-NEXT:   - { id: 1, class: vr256x }
-# AVX512BWVL-NEXT:   - { id: 2, class: vr256x }
+# AVX512BWVL-NEXT:   - { id: 0, class: vr256x, preferred-register: '' }
+# AVX512BWVL-NEXT:   - { id: 1, class: vr256x, preferred-register: '' }
+# AVX512BWVL-NEXT:   - { id: 2, class: vr256x, preferred-register: '' }
 registers:
   - { id: 0, class: vecr }
   - { id: 1, class: vecr }
@@ -150,19 +150,19 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # AVX2:            registers:
-# AVX2-NEXT:         - { id: 0, class: vr256 }
-# AVX2-NEXT:         - { id: 1, class: vr256 }
-# AVX2-NEXT:         - { id: 2, class: vr256 }
+# AVX2-NEXT:         - { id: 0, class: vr256, preferred-register: '' }
+# AVX2-NEXT:         - { id: 1, class: vr256, preferred-register: '' }
+# AVX2-NEXT:         - { id: 2, class: vr256, preferred-register: '' }
 #
 # AVX512VL:        registers:
-# AVX512VL-NEXT:     - { id: 0, class: vr256x }
-# AVX512VL-NEXT:     - { id: 1, class: vr256x }
-# AVX512VL-NEXT:     - { id: 2, class: vr256x }
+# AVX512VL-NEXT:     - { id: 0, class: vr256x, preferred-register: '' }
+# AVX512VL-NEXT:     - { id: 1, class: vr256x, preferred-register: '' }
+# AVX512VL-NEXT:     - { id: 2, class: vr256x, preferred-register: '' }
 #
 # AVX512BWVL:      registers:
-# AVX512BWVL-NEXT:   - { id: 0, class: vr256x }
-# AVX512BWVL-NEXT:   - { id: 1, class: vr256x }
-# AVX512BWVL-NEXT:   - { id: 2, class: vr256x }
+# AVX512BWVL-NEXT:   - { id: 0, class: vr256x, preferred-register: '' }
+# AVX512BWVL-NEXT:   - { id: 1, class: vr256x, preferred-register: '' }
+# AVX512BWVL-NEXT:   - { id: 2, class: vr256x, preferred-register: '' }
 registers:
   - { id: 0, class: vecr }
   - { id: 1, class: vecr }
diff --git a/test/CodeGen/X86/GlobalISel/select-sub-v512.mir b/test/CodeGen/X86/GlobalISel/select-sub-v512.mir
index dcd05f056949..828a243b2656 100644
--- a/test/CodeGen/X86/GlobalISel/select-sub-v512.mir
+++ b/test/CodeGen/X86/GlobalISel/select-sub-v512.mir
@@ -31,9 +31,9 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # ALL:      registers:
-# ALL-NEXT:   - { id: 0, class: vr512 }
-# ALL-NEXT:   - { id: 1, class: vr512 }
-# ALL-NEXT:   - { id: 2, class: vr512 }
+# ALL-NEXT:   - { id: 0, class: vr512, preferred-register: '' }
+# ALL-NEXT:   - { id: 1, class: vr512, preferred-register: '' }
+# ALL-NEXT:   - { id: 2, class: vr512, preferred-register: '' }
 registers:
   - { id: 0, class: vecr }
   - { id: 1, class: vecr }
@@ -57,9 +57,9 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # ALL:      registers:
-# ALL-NEXT:   - { id: 0, class: vr512 }
-# ALL-NEXT:   - { id: 1, class: vr512 }
-# ALL-NEXT:   - { id: 2, class: vr512 }
+# ALL-NEXT:   - { id: 0, class: vr512, preferred-register: '' }
+# ALL-NEXT:   - { id: 1, class: vr512, preferred-register: '' }
+# ALL-NEXT:   - { id: 2, class: vr512, preferred-register: '' }
 registers:
   - { id: 0, class: vecr }
   - { id: 1, class: vecr }
@@ -83,9 +83,9 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # ALL:      registers:
-# ALL-NEXT:   - { id: 0, class: vr512 }
-# ALL-NEXT:   - { id: 1, class: vr512 }
-# ALL-NEXT:   - { id: 2, class: vr512 }
+# ALL-NEXT:   - { id: 0, class: vr512, preferred-register: '' }
+# ALL-NEXT:   - { id: 1, class: vr512, preferred-register: '' }
+# ALL-NEXT:   - { id: 2, class: vr512, preferred-register: '' }
 registers:
   - { id: 0, class: vecr }
   - { id: 1, class: vecr }
@@ -109,9 +109,9 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # ALL:      registers:
-# ALL-NEXT:   - { id: 0, class: vr512 }
-# ALL-NEXT:   - { id: 1, class: vr512 }
-# ALL-NEXT:   - { id: 2, class: vr512 }
+# ALL-NEXT:   - { id: 0, class: vr512, preferred-register: '' }
+# ALL-NEXT:   - { id: 1, class: vr512, preferred-register: '' }
+# ALL-NEXT:   - { id: 2, class: vr512, preferred-register: '' }
 registers:
   - { id: 0, class: vecr }
   - { id: 1, class: vecr }
diff --git a/test/CodeGen/X86/GlobalISel/select-sub.mir b/test/CodeGen/X86/GlobalISel/select-sub.mir
index d4db6eec6d80..4768a2d93222 100644
--- a/test/CodeGen/X86/GlobalISel/select-sub.mir
+++ b/test/CodeGen/X86/GlobalISel/select-sub.mir
@@ -40,9 +40,9 @@ name:            test_sub_i64
 legalized:       true
 regBankSelected: true
 # ALL:      registers:
-# ALL-NEXT:  - { id: 0, class: gr64 }
-# ALL-NEXT:  - { id: 1, class: gr64 }
-# ALL-NEXT:  - { id: 2, class: gr64 }
+# ALL-NEXT:  - { id: 0, class: gr64, preferred-register: '' }
+# ALL-NEXT:  - { id: 1, class: gr64, preferred-register: '' }
+# ALL-NEXT:  - { id: 2, class: gr64, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -66,9 +66,9 @@ name:            test_sub_i32
 legalized:       true
 regBankSelected: true
 # ALL:      registers:
-# ALL-NEXT:  - { id: 0, class: gr32 }
-# ALL-NEXT:  - { id: 1, class: gr32 }
-# ALL-NEXT:  - { id: 2, class: gr32 }
+# ALL-NEXT:  - { id: 0, class: gr32, preferred-register: '' }
+# ALL-NEXT:  - { id: 1, class: gr32, preferred-register: '' }
+# ALL-NEXT:  - { id: 2, class: gr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -94,12 +94,12 @@ regBankSelected: true
 selected:        false
 tracksRegLiveness: true
 # ALL: registers:
-# NO_AVX512F-NEXT:  - { id: 0, class: fr32 }
-# NO_AVX512F-NEXT:  - { id: 1, class: fr32 }
-# NO_AVX512F-NEXT:  - { id: 2, class: fr32 }
-# AVX512ALL-NEXT:  - { id: 0, class: fr32x }
-# AVX512ALL-NEXT:  - { id: 1, class: fr32x }
-# AVX512ALL-NEXT:  - { id: 2, class: fr32x }
+# NO_AVX512F-NEXT:  - { id: 0, class: fr32, preferred-register: '' }
+# NO_AVX512F-NEXT:  - { id: 1, class: fr32, preferred-register: '' }
+# NO_AVX512F-NEXT:  - { id: 2, class: fr32, preferred-register: '' }
+# AVX512ALL-NEXT:  - { id: 0, class: fr32x, preferred-register: '' }
+# AVX512ALL-NEXT:  - { id: 1, class: fr32x, preferred-register: '' }
+# AVX512ALL-NEXT:  - { id: 2, class: fr32x, preferred-register: '' }
 registers:
   - { id: 0, class: vecr }
   - { id: 1, class: vecr }
@@ -128,12 +128,12 @@ regBankSelected: true
 selected:        false
 tracksRegLiveness: true
 # ALL: registers:
-# NO_AVX512F-NEXT:  - { id: 0, class: fr64 }
-# NO_AVX512F-NEXT:  - { id: 1, class: fr64 }
-# NO_AVX512F-NEXT:  - { id: 2, class: fr64 }
-# AVX512ALL-NEXT:  - { id: 0, class: fr64x }
-# AVX512ALL-NEXT:  - { id: 1, class: fr64x }
-# AVX512ALL-NEXT:  - { id: 2, class: fr64x }
+# NO_AVX512F-NEXT:  - { id: 0, class: fr64, preferred-register: '' }
+# NO_AVX512F-NEXT:  - { id: 1, class: fr64, preferred-register: '' }
+# NO_AVX512F-NEXT:  - { id: 2, class: fr64, preferred-register: '' }
+# AVX512ALL-NEXT:  - { id: 0, class: fr64x, preferred-register: '' }
+# AVX512ALL-NEXT:  - { id: 1, class: fr64x, preferred-register: '' }
+# AVX512ALL-NEXT:  - { id: 2, class: fr64x, preferred-register: '' }
 registers:
   - { id: 0, class: vecr }
   - { id: 1, class: vecr }
@@ -161,12 +161,12 @@ regBankSelected: true
 selected:        false
 tracksRegLiveness: true
 # ALL: registers:
-# NO_AVX512VL-NEXT:  - { id: 0, class: vr128 }
-# NO_AVX512VL-NEXT:  - { id: 1, class: vr128 }
-# NO_AVX512VL-NEXT:  - { id: 2, class: vr128 }
-# AVX512VL-NEXT:  - { id: 0, class: vr128x }
-# AVX512VL-NEXT:  - { id: 1, class: vr128x }
-# AVX512VL-NEXT:  - { id: 2, class: vr128x }
+# NO_AVX512VL-NEXT:  - { id: 0, class: vr128, preferred-register: '' }
+# NO_AVX512VL-NEXT:  - { id: 1, class: vr128, preferred-register: '' }
+# NO_AVX512VL-NEXT:  - { id: 2, class: vr128, preferred-register: '' }
+# AVX512VL-NEXT:  - { id: 0, class: vr128x, preferred-register: '' }
+# AVX512VL-NEXT:  - { id: 1, class: vr128x, preferred-register: '' }
+# AVX512VL-NEXT:  - { id: 2, class: vr128x, preferred-register: '' }
 registers:
   - { id: 0, class: vecr }
   - { id: 1, class: vecr }
@@ -196,12 +196,12 @@ regBankSelected: true
 selected:        false
 tracksRegLiveness: true
 # ALL: registers:
-# NO_AVX512VL-NEXT:  - { id: 0, class: vr128 }
-# NO_AVX512VL-NEXT:  - { id: 1, class: vr128 }
-# NO_AVX512VL-NEXT:  - { id: 2, class: vr128 }
-# AVX512VL-NEXT:  - { id: 0, class: vr128x }
-# AVX512VL-NEXT:  - { id: 1, class: vr128x }
-# AVX512VL-NEXT:  - { id: 2, class: vr128x }
+# NO_AVX512VL-NEXT:  - { id: 0, class: vr128, preferred-register: '' }
+# NO_AVX512VL-NEXT:  - { id: 1, class: vr128, preferred-register: '' }
+# NO_AVX512VL-NEXT:  - { id: 2, class: vr128, preferred-register: '' }
+# AVX512VL-NEXT:  - { id: 0, class: vr128x, preferred-register: '' }
+# AVX512VL-NEXT:  - { id: 1, class: vr128x, preferred-register: '' }
+# AVX512VL-NEXT:  - { id: 2, class: vr128x, preferred-register: '' }
 registers:
   - { id: 0, class: vecr }
   - { id: 1, class: vecr }
diff --git a/test/CodeGen/X86/GlobalISel/select-trunc.mir b/test/CodeGen/X86/GlobalISel/select-trunc.mir
index 9b90543d6559..4df585628ddc 100644
--- a/test/CodeGen/X86/GlobalISel/select-trunc.mir
+++ b/test/CodeGen/X86/GlobalISel/select-trunc.mir
@@ -38,8 +38,8 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # CHECK:      registers:
-# CHECK-NEXT:   - { id: 0, class: gr32 }
-# CHECK-NEXT:   - { id: 1, class: gr8 }
+# CHECK-NEXT:   - { id: 0, class: gr32, preferred-register: '' }
+# CHECK-NEXT:   - { id: 1, class: gr8, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -64,8 +64,8 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # CHECK:      registers:
-# CHECK-NEXT:   - { id: 0, class: gr32 }
-# CHECK-NEXT:   - { id: 1, class: gr8 }
+# CHECK-NEXT:   - { id: 0, class: gr32, preferred-register: '' }
+# CHECK-NEXT:   - { id: 1, class: gr8, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -90,8 +90,8 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # CHECK:      registers:
-# CHECK-NEXT:   - { id: 0, class: gr32 }
-# CHECK-NEXT:   - { id: 1, class: gr16 }
+# CHECK-NEXT:   - { id: 0, class: gr32, preferred-register: '' }
+# CHECK-NEXT:   - { id: 1, class: gr16, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -116,8 +116,8 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # CHECK:      registers:
-# CHECK-NEXT:   - { id: 0, class: gr64_with_sub_8bit }
-# CHECK-NEXT:   - { id: 1, class: gr8 }
+# CHECK-NEXT:   - { id: 0, class: gr64_with_sub_8bit, preferred-register: '' }
+# CHECK-NEXT:   - { id: 1, class: gr8, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -142,8 +142,8 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # CHECK:      registers:
-# CHECK-NEXT:   - { id: 0, class: gr64 }
-# CHECK-NEXT:   - { id: 1, class: gr16 }
+# CHECK-NEXT:   - { id: 0, class: gr64, preferred-register: '' }
+# CHECK-NEXT:   - { id: 1, class: gr16, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
@@ -168,8 +168,8 @@ alignment:       4
 legalized:       true
 regBankSelected: true
 # CHECK:      registers:
-# CHECK-NEXT:   - { id: 0, class: gr64 }
-# CHECK-NEXT:   - { id: 1, class: gr32 }
+# CHECK-NEXT:   - { id: 0, class: gr64, preferred-register: '' }
+# CHECK-NEXT:   - { id: 1, class: gr32, preferred-register: '' }
 registers:
   - { id: 0, class: gpr }
   - { id: 1, class: gpr }
diff --git a/test/CodeGen/X86/O0-pipeline.ll b/test/CodeGen/X86/O0-pipeline.ll
index 874e3e379d8e..5e375cc42e01 100644
--- a/test/CodeGen/X86/O0-pipeline.ll
+++ b/test/CodeGen/X86/O0-pipeline.ll
@@ -5,12 +5,12 @@
 ; CHECK-LABEL: Pass Arguments:
 ; CHECK-NEXT: Target Library Information
 ; CHECK-NEXT: Target Pass Configuration
+; CHECK-NEXT: Machine Module Information
 ; CHECK-NEXT: Target Transform Information
 ; CHECK-NEXT: Type-Based Alias Analysis
 ; CHECK-NEXT: Scoped NoAlias Alias Analysis
 ; CHECK-NEXT: Assumption Cache Tracker
 ; CHECK-NEXT: Create Garbage Collector Module Metadata
-; CHECK-NEXT: Machine Module Information
 ; CHECK-NEXT: Machine Branch Probability Analysis
 ; CHECK-NEXT:   ModulePass Manager
 ; CHECK-NEXT:     Pre-ISel Intrinsic Lowering
diff --git a/test/CodeGen/X86/atom-fixup-lea3.ll b/test/CodeGen/X86/atom-fixup-lea3.ll
index ed2df277480e..e79d2e69e347 100644
--- a/test/CodeGen/X86/atom-fixup-lea3.ll
+++ b/test/CodeGen/X86/atom-fixup-lea3.ll
@@ -1,6 +1,8 @@
 ; RUN: llc < %s -mcpu=atom -mtriple=i686-linux | FileCheck %s
-; CHECK: addl ([[reg:%[a-z]+]])
-; CHECK-NEXT: addl $4, [[reg]]
+; CHECK: addl ({{%[a-z]+}},[[reg:%[a-z]+]],4)
+; CHECK-NEXT: movl
+; CHECK-NEXT: addl 4({{%[a-z]+}},[[reg:%[a-z]+]],4)
+; CHECK-NEXT: incl
 
 ; Test for the FixupLEAs pre-emit pass.
 ; An LEA should NOT be substituted for the ADD instruction
@@ -20,7 +22,7 @@
 ;  return sum;
 ;}
 
-define i32 @test(i32 %n, i32* nocapture %array, i32* nocapture %m, i32* nocapture %array2) #0 {
+define i32 @test(i32 %n, i32* nocapture %array, i32* nocapture %k, i32* nocapture %l, i32* nocapture %m, i32* nocapture %array2) #0 {
 entry:
   %cmp7 = icmp sgt i32 %n, 0
   br i1 %cmp7, label %for.body.lr.ph, label %for.end
@@ -35,6 +37,9 @@ for.body:                                         ; preds = %for.body, %for.body
   %j.09 = phi i32 [ 0, %for.body.lr.ph ], [ %inc1, %for.body ]
   %inc1 = add nsw i32 %j.09, 1
   %arrayidx = getelementptr inbounds i32, i32* %array2, i32 %j.09
+  store i32 %0, i32* %m, align 4
+  store i32 %sum.010, i32* %m, align 4
+  store i32 %0, i32* %m, align 4
   %1 = load i32, i32* %arrayidx, align 4
   %add = add nsw i32 %0, %1
   store i32 %add, i32* %m, align 4
diff --git a/test/CodeGen/X86/avx-schedule.ll b/test/CodeGen/X86/avx-schedule.ll
index bb05481e313d..47e95fe31bdf 100644
--- a/test/CodeGen/X86/avx-schedule.ll
+++ b/test/CodeGen/X86/avx-schedule.ll
@@ -910,14 +910,14 @@ define <4 x double> @test_haddpd(<4 x double> %a0, <4 x double> %a1, <4 x double
 ;
 ; BTVER2-LABEL: test_haddpd:
 ; BTVER2:       # BB#0:
-; BTVER2-NEXT:    vhaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; BTVER2-NEXT:    vhaddpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; BTVER2-NEXT:    vhaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
+; BTVER2-NEXT:    vhaddpd (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_haddpd:
 ; ZNVER1:       # BB#0:
-; ZNVER1-NEXT:    vhaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT:    vhaddpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; ZNVER1-NEXT:    vhaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
+; ZNVER1-NEXT:    vhaddpd (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
 ; ZNVER1-NEXT:    retq # sched: [4:1.00]
   %1 = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a0, <4 x double> %a1)
   %2 = load <4 x double>, <4 x double> *%a2, align 32
@@ -941,14 +941,14 @@ define <8 x float> @test_haddps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%
 ;
 ; BTVER2-LABEL: test_haddps:
 ; BTVER2:       # BB#0:
-; BTVER2-NEXT:    vhaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; BTVER2-NEXT:    vhaddps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; BTVER2-NEXT:    vhaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
+; BTVER2-NEXT:    vhaddps (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_haddps:
 ; ZNVER1:       # BB#0:
-; ZNVER1-NEXT:    vhaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT:    vhaddps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; ZNVER1-NEXT:    vhaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
+; ZNVER1-NEXT:    vhaddps (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
 ; ZNVER1-NEXT:    retq # sched: [4:1.00]
   %1 = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a0, <8 x float> %a1)
   %2 = load <8 x float>, <8 x float> *%a2, align 32
@@ -972,14 +972,14 @@ define <4 x double> @test_hsubpd(<4 x double> %a0, <4 x double> %a1, <4 x double
 ;
 ; BTVER2-LABEL: test_hsubpd:
 ; BTVER2:       # BB#0:
-; BTVER2-NEXT:    vhsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; BTVER2-NEXT:    vhsubpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; BTVER2-NEXT:    vhsubpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
+; BTVER2-NEXT:    vhsubpd (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_hsubpd:
 ; ZNVER1:       # BB#0:
-; ZNVER1-NEXT:    vhsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT:    vhsubpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; ZNVER1-NEXT:    vhsubpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
+; ZNVER1-NEXT:    vhsubpd (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
 ; ZNVER1-NEXT:    retq # sched: [4:1.00]
   %1 = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a0, <4 x double> %a1)
   %2 = load <4 x double>, <4 x double> *%a2, align 32
@@ -1003,14 +1003,14 @@ define <8 x float> @test_hsubps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%
 ;
 ; BTVER2-LABEL: test_hsubps:
 ; BTVER2:       # BB#0:
-; BTVER2-NEXT:    vhsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; BTVER2-NEXT:    vhsubps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; BTVER2-NEXT:    vhsubps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
+; BTVER2-NEXT:    vhsubps (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_hsubps:
 ; ZNVER1:       # BB#0:
-; ZNVER1-NEXT:    vhsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT:    vhsubps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; ZNVER1-NEXT:    vhsubps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
+; ZNVER1-NEXT:    vhsubps (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
 ; ZNVER1-NEXT:    retq # sched: [4:1.00]
   %1 = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a0, <8 x float> %a1)
   %2 = load <8 x float>, <8 x float> *%a2, align 32
diff --git a/test/CodeGen/X86/avx-splat.ll b/test/CodeGen/X86/avx-splat.ll
index 1914b5134bee..91d1f64c6706 100644
--- a/test/CodeGen/X86/avx-splat.ll
+++ b/test/CodeGen/X86/avx-splat.ll
@@ -1,9 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
 
 define <32 x i8> @funcA(<32 x i8> %a) nounwind uwtable readnone ssp {
 ; CHECK-LABEL: funcA:
-; CHECK:       ## BB#0: ## %entry
+; CHECK:       # BB#0: # %entry
 ; CHECK-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
 ; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
 ; CHECK-NEXT:    retq
@@ -14,7 +14,7 @@ entry:
 
 define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp {
 ; CHECK-LABEL: funcB:
-; CHECK:       ## BB#0: ## %entry
+; CHECK:       # BB#0: # %entry
 ; CHECK-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5]
 ; CHECK-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
 ; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
@@ -26,7 +26,7 @@ entry:
 
 define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp {
 ; CHECK-LABEL: funcC:
-; CHECK:       ## BB#0: ## %entry
+; CHECK:       # BB#0: # %entry
 ; CHECK-NEXT:    vmovq %rdi, %xmm0
 ; CHECK-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
 ; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
@@ -41,7 +41,7 @@ entry:
 
 define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp {
 ; CHECK-LABEL: funcD:
-; CHECK:       ## BB#0: ## %entry
+; CHECK:       # BB#0: # %entry
 ; CHECK-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
 ; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
 ; CHECK-NEXT:    retq
@@ -58,20 +58,20 @@ entry:
 ;
 define <8 x float> @funcE() nounwind {
 ; CHECK-LABEL: funcE:
-; CHECK:       ## BB#0: ## %for_exit499
+; CHECK:       # BB#0: # %for_exit499
 ; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    ## implicit-def: %YMM0
+; CHECK-NEXT:    # implicit-def: %YMM0
 ; CHECK-NEXT:    testb %al, %al
-; CHECK-NEXT:    jne LBB4_2
-; CHECK-NEXT:  ## BB#1: ## %load.i1247
+; CHECK-NEXT:    jne .LBB4_2
+; CHECK-NEXT:  # BB#1: # %load.i1247
 ; CHECK-NEXT:    pushq %rbp
 ; CHECK-NEXT:    movq %rsp, %rbp
 ; CHECK-NEXT:    andq $-32, %rsp
-; CHECK-NEXT:    subq $1312, %rsp ## imm = 0x520
+; CHECK-NEXT:    subq $1312, %rsp # imm = 0x520
 ; CHECK-NEXT:    vbroadcastss {{[0-9]+}}(%rsp), %ymm0
 ; CHECK-NEXT:    movq %rbp, %rsp
 ; CHECK-NEXT:    popq %rbp
-; CHECK-NEXT:  LBB4_2: ## %__load_and_broadcast_32.exit1249
+; CHECK-NEXT:  .LBB4_2: # %__load_and_broadcast_32.exit1249
 ; CHECK-NEXT:    retq
 allocas:
   %udx495 = alloca [18 x [18 x float]], align 32
@@ -99,7 +99,7 @@ __load_and_broadcast_32.exit1249:                 ; preds = %load.i1247, %for_ex
 
 define <8 x float> @funcF(i32 %val) nounwind {
 ; CHECK-LABEL: funcF:
-; CHECK:       ## BB#0:
+; CHECK:       # BB#0:
 ; CHECK-NEXT:    vmovd %edi, %xmm0
 ; CHECK-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,0]
 ; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
@@ -112,7 +112,7 @@ define <8 x float> @funcF(i32 %val) nounwind {
 
 define <8 x float> @funcG(<8 x float> %a) nounwind uwtable readnone ssp {
 ; CHECK-LABEL: funcG:
-; CHECK:       ## BB#0: ## %entry
+; CHECK:       # BB#0: # %entry
 ; CHECK-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
 ; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
 ; CHECK-NEXT:    retq
@@ -123,7 +123,7 @@ entry:
 
 define <8 x float> @funcH(<8 x float> %a) nounwind uwtable readnone ssp {
 ; CHECK-LABEL: funcH:
-; CHECK:       ## BB#0: ## %entry
+; CHECK:       # BB#0: # %entry
 ; CHECK-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,1,1,1,5,5,5,5]
 ; CHECK-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
 ; CHECK-NEXT:    retq
@@ -134,7 +134,7 @@ entry:
 
 define <2 x double> @splat_load_2f64_11(<2 x double>* %ptr) {
 ; CHECK-LABEL: splat_load_2f64_11:
-; CHECK:       ## BB#0:
+; CHECK:       # BB#0:
 ; CHECK-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
 ; CHECK-NEXT:    retq
   %x = load <2 x double>, <2 x double>* %ptr
@@ -144,7 +144,7 @@ define <2 x double> @splat_load_2f64_11(<2 x double>* %ptr) {
 
 define <4 x double> @splat_load_4f64_2222(<4 x double>* %ptr) {
 ; CHECK-LABEL: splat_load_4f64_2222:
-; CHECK:       ## BB#0:
+; CHECK:       # BB#0:
 ; CHECK-NEXT:    vbroadcastsd 16(%rdi), %ymm0
 ; CHECK-NEXT:    retq
   %x = load <4 x double>, <4 x double>* %ptr
@@ -154,7 +154,7 @@ define <4 x double> @splat_load_4f64_2222(<4 x double>* %ptr) {
 
 define <4 x float> @splat_load_4f32_0000(<4 x float>* %ptr) {
 ; CHECK-LABEL: splat_load_4f32_0000:
-; CHECK:       ## BB#0:
+; CHECK:       # BB#0:
 ; CHECK-NEXT:    vbroadcastss (%rdi), %xmm0
 ; CHECK-NEXT:    retq
   %x = load <4 x float>, <4 x float>* %ptr
@@ -164,7 +164,7 @@ define <4 x float> @splat_load_4f32_0000(<4 x float>* %ptr) {
 
 define <8 x float> @splat_load_8f32_77777777(<8 x float>* %ptr) {
 ; CHECK-LABEL: splat_load_8f32_77777777:
-; CHECK:       ## BB#0:
+; CHECK:       # BB#0:
 ; CHECK-NEXT:    vbroadcastss 28(%rdi), %ymm0
 ; CHECK-NEXT:    retq
   %x = load <8 x float>, <8 x float>* %ptr
diff --git a/test/CodeGen/X86/avx512-cvt.ll b/test/CodeGen/X86/avx512-cvt.ll
index 8f6afa8785d0..140299f5495d 100644
--- a/test/CodeGen/X86/avx512-cvt.ll
+++ b/test/CodeGen/X86/avx512-cvt.ll
@@ -1549,8 +1549,6 @@ define <2 x float> @uitofp_2i1_float(<2 x i32> %a) {
 ; NOVL:       # BB#0:
 ; NOVL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; NOVL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
-; NOVL-NEXT:    vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
-; NOVL-NEXT:    vpxor %xmm1, %xmm0, %xmm0
 ; NOVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; NOVL-NEXT:    vpextrb $8, %xmm0, %eax
 ; NOVL-NEXT:    andl $1, %eax
@@ -1579,8 +1577,6 @@ define <2 x double> @uitofp_2i1_double(<2 x i32> %a) {
 ; NOVL:       # BB#0:
 ; NOVL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; NOVL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
-; NOVL-NEXT:    vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
-; NOVL-NEXT:    vpxor %xmm1, %xmm0, %xmm0
 ; NOVL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; NOVL-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
 ; NOVL-NEXT:    retq
diff --git a/test/CodeGen/X86/build-vector-128.ll b/test/CodeGen/X86/build-vector-128.ll
index 8c3a6790ffa6..c73d7654045e 100644
--- a/test/CodeGen/X86/build-vector-128.ll
+++ b/test/CodeGen/X86/build-vector-128.ll
@@ -41,9 +41,9 @@ define <4 x float> @test_buildvector_v4f32(float %a0, float %a1, float %a2, floa
 ;
 ; SSE2-64-LABEL: test_buildvector_v4f32:
 ; SSE2-64:       # BB#0:
-; SSE2-64-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
-; SSE2-64-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE2-64-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
 ; SSE2-64-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-64-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
 ; SSE2-64-NEXT:    retq
 ;
 ; SSE41-64-LABEL: test_buildvector_v4f32:
@@ -74,13 +74,9 @@ define <4 x float> @test_buildvector_v4f32(float %a0, float %a1, float %a2, floa
 define <2 x i64> @test_buildvector_v2i64(i64 %a0, i64 %a1) {
 ; SSE2-32-LABEL: test_buildvector_v2i64:
 ; SSE2-32:       # BB#0:
-; SSE2-32-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE2-32-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSE2-32-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE2-32-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSE2-32-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE2-32-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE2-32-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-32-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
+; SSE2-32-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
+; SSE2-32-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE2-32-NEXT:    retl
 ;
 ; SSE-64-LABEL: test_buildvector_v2i64:
@@ -126,12 +122,12 @@ define <4 x i32> @test_buildvector_v4i32(i32 %f0, i32 %f1, i32 %f2, i32 %f3) {
 ; SSE2-64-LABEL: test_buildvector_v4i32:
 ; SSE2-64:       # BB#0:
 ; SSE2-64-NEXT:    movd %ecx, %xmm0
-; SSE2-64-NEXT:    movd %esi, %xmm1
+; SSE2-64-NEXT:    movd %edx, %xmm1
 ; SSE2-64-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE2-64-NEXT:    movd %edx, %xmm2
+; SSE2-64-NEXT:    movd %esi, %xmm2
 ; SSE2-64-NEXT:    movd %edi, %xmm0
 ; SSE2-64-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE2-64-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-64-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE2-64-NEXT:    retq
 ;
 ; SSE41-64-LABEL: test_buildvector_v4i32:
@@ -170,34 +166,34 @@ define <8 x i16> @test_buildvector_v8i16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16
 ; SSE2-32-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; SSE2-32-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
 ; SSE2-32-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; SSE2-32-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSE2-32-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
 ; SSE2-32-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; SSE2-32-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; SSE2-32-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
 ; SSE2-32-NEXT:    movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
 ; SSE2-32-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; SSE2-32-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; SSE2-32-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSE2-32-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-32-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-32-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
 ; SSE2-32-NEXT:    retl
 ;
 ; SSE2-64-LABEL: test_buildvector_v8i16:
 ; SSE2-64:       # BB#0:
-; SSE2-64-NEXT:    movd %ecx, %xmm0
-; SSE2-64-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSE2-64-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSE2-64-NEXT:    movd %r9d, %xmm1
-; SSE2-64-NEXT:    movd %esi, %xmm2
-; SSE2-64-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; SSE2-64-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; SSE2-64-NEXT:    movd %edx, %xmm1
 ; SSE2-64-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE2-64-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; SSE2-64-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSE2-64-NEXT:    movd %r8d, %xmm3
+; SSE2-64-NEXT:    movd %r9d, %xmm0
+; SSE2-64-NEXT:    movd %r8d, %xmm2
+; SSE2-64-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; SSE2-64-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE2-64-NEXT:    movd %ecx, %xmm0
+; SSE2-64-NEXT:    movd %edx, %xmm1
+; SSE2-64-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSE2-64-NEXT:    movd %esi, %xmm3
 ; SSE2-64-NEXT:    movd %edi, %xmm0
 ; SSE2-64-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; SSE2-64-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSE2-64-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-64-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-64-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
 ; SSE2-64-NEXT:    retq
 ;
 ; SSE41-32-LABEL: test_buildvector_v8i16:
@@ -267,31 +263,31 @@ define <16 x i8> @test_buildvector_v16i8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4,
 ; SSE2-32-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; SSE2-32-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
 ; SSE2-32-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE2-32-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; SSE2-32-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
 ; SSE2-32-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; SSE2-32-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; SSE2-32-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
 ; SSE2-32-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; SSE2-32-NEXT:    movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
 ; SSE2-32-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSE2-32-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
-; SSE2-32-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; SSE2-32-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
+; SSE2-32-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
 ; SSE2-32-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; SSE2-32-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; SSE2-32-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
 ; SSE2-32-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; SSE2-32-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
 ; SSE2-32-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE2-32-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; SSE2-32-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
 ; SSE2-32-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; SSE2-32-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; SSE2-32-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
 ; SSE2-32-NEXT:    movd {{.*#+}} xmm4 = mem[0],zero,zero,zero
 ; SSE2-32-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; SSE2-32-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; SSE2-32-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSE2-32-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSE2-32-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
+; SSE2-32-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-32-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE2-32-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
 ; SSE2-32-NEXT:    retl
 ;
 ; SSE2-64-LABEL: test_buildvector_v16i8:
@@ -299,34 +295,34 @@ define <16 x i8> @test_buildvector_v16i8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4,
 ; SSE2-64-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; SSE2-64-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; SSE2-64-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE2-64-NEXT:    movd %ecx, %xmm0
+; SSE2-64-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; SSE2-64-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSE2-64-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSE2-64-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSE2-64-NEXT:    movd %r9d, %xmm1
-; SSE2-64-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSE2-64-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; SSE2-64-NEXT:    movd %esi, %xmm2
-; SSE2-64-NEXT:    movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
-; SSE2-64-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
-; SSE2-64-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
 ; SSE2-64-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSE2-64-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
 ; SSE2-64-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; SSE2-64-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; SSE2-64-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE2-64-NEXT:    movd %edx, %xmm3
 ; SSE2-64-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE2-64-NEXT:    movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
 ; SSE2-64-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSE2-64-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
-; SSE2-64-NEXT:    movd %r8d, %xmm1
+; SSE2-64-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
+; SSE2-64-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
 ; SSE2-64-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE2-64-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; SSE2-64-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-64-NEXT:    movd %r9d, %xmm0
+; SSE2-64-NEXT:    movd %r8d, %xmm2
+; SSE2-64-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSE2-64-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSE2-64-NEXT:    movd %ecx, %xmm0
+; SSE2-64-NEXT:    movd %edx, %xmm1
+; SSE2-64-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-64-NEXT:    movd %esi, %xmm4
 ; SSE2-64-NEXT:    movd %edi, %xmm0
-; SSE2-64-NEXT:    movd {{.*#+}} xmm4 = mem[0],zero,zero,zero
 ; SSE2-64-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; SSE2-64-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSE2-64-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; SSE2-64-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSE2-64-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-64-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE2-64-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
 ; SSE2-64-NEXT:    retq
 ;
 ; SSE41-32-LABEL: test_buildvector_v16i8:
diff --git a/test/CodeGen/X86/buildvec-insertvec.ll b/test/CodeGen/X86/buildvec-insertvec.ll
index 730376acdc93..cd5abc1373b9 100644
--- a/test/CodeGen/X86/buildvec-insertvec.ll
+++ b/test/CodeGen/X86/buildvec-insertvec.ll
@@ -75,9 +75,9 @@ entry:
 define <4 x float> @test_buildvector_v4f32_register(float %f0, float %f1, float %f2, float %f3) {
 ; SSE2-LABEL: test_buildvector_v4f32_register:
 ; SSE2:       # BB#0:
-; SSE2-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
-; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE2-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
 ; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: test_buildvector_v4f32_register:
@@ -102,7 +102,7 @@ define <4 x float> @test_buildvector_v4f32_load(float* %p0, float* %p1, float* %
 ; SSE2-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
 ; SSE2-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: test_buildvector_v4f32_load:
@@ -126,10 +126,10 @@ define <4 x float> @test_buildvector_v4f32_load(float* %p0, float* %p1, float* %
 define <4 x float> @test_buildvector_v4f32_partial_load(float %f0, float %f1, float %f2, float* %p3) {
 ; SSE2-LABEL: test_buildvector_v4f32_partial_load:
 ; SSE2:       # BB#0:
-; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE2-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSE2-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
 ; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; SSE2-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE2-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: test_buildvector_v4f32_partial_load:
@@ -150,12 +150,12 @@ define <4 x i32> @test_buildvector_v4i32_register(i32 %a0, i32 %a1, i32 %a2, i32
 ; SSE2-LABEL: test_buildvector_v4i32_register:
 ; SSE2:       # BB#0:
 ; SSE2-NEXT:    movd %ecx, %xmm0
-; SSE2-NEXT:    movd %esi, %xmm1
+; SSE2-NEXT:    movd %edx, %xmm1
 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE2-NEXT:    movd %edx, %xmm2
+; SSE2-NEXT:    movd %esi, %xmm2
 ; SSE2-NEXT:    movd %edi, %xmm0
 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: test_buildvector_v4i32_register:
@@ -178,7 +178,7 @@ define <4 x i32> @test_buildvector_v4i32_partial(i32 %a0, i32 %a3) {
 ; SSE2-NEXT:    movd %edi, %xmm0
 ; SSE2-NEXT:    movd %esi, %xmm1
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: test_buildvector_v4i32_partial:
@@ -228,21 +228,21 @@ define <4 x i32> @test_buildvector_v4i32_register_zero_2(i32 %a1, i32 %a2, i32 %
 define <8 x i16> @test_buildvector_v8i16_register(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) {
 ; SSE2-LABEL: test_buildvector_v8i16_register:
 ; SSE2:       # BB#0:
-; SSE2-NEXT:    movd %ecx, %xmm0
-; SSE2-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSE2-NEXT:    movd %r9d, %xmm1
-; SSE2-NEXT:    movd %esi, %xmm2
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; SSE2-NEXT:    movd %edx, %xmm1
 ; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE2-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSE2-NEXT:    movd %r8d, %xmm3
+; SSE2-NEXT:    movd %r9d, %xmm0
+; SSE2-NEXT:    movd %r8d, %xmm2
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE2-NEXT:    movd %ecx, %xmm0
+; SSE2-NEXT:    movd %edx, %xmm1
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSE2-NEXT:    movd %esi, %xmm3
 ; SSE2-NEXT:    movd %edi, %xmm0
 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: test_buildvector_v8i16_register:
@@ -333,34 +333,34 @@ define <16 x i8> @test_buildvector_v16i8_register(i8 %a0, i8 %a1, i8 %a2, i8 %a3
 ; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; SSE2-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE2-NEXT:    movd %ecx, %xmm0
+; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; SSE2-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSE2-NEXT:    movd %r9d, %xmm1
-; SSE2-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; SSE2-NEXT:    movd %esi, %xmm2
-; SSE2-NEXT:    movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
 ; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; SSE2-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE2-NEXT:    movd %edx, %xmm3
 ; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE2-NEXT:    movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
-; SSE2-NEXT:    movd %r8d, %xmm1
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
 ; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE2-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT:    movd %r9d, %xmm0
+; SSE2-NEXT:    movd %r8d, %xmm2
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSE2-NEXT:    movd %ecx, %xmm0
+; SSE2-NEXT:    movd %edx, %xmm1
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT:    movd %esi, %xmm4
 ; SSE2-NEXT:    movd %edi, %xmm0
-; SSE2-NEXT:    movd {{.*#+}} xmm4 = mem[0],zero,zero,zero
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: test_buildvector_v16i8_register:
diff --git a/test/CodeGen/X86/clear_upper_vector_element_bits.ll b/test/CodeGen/X86/clear_upper_vector_element_bits.ll
index 1218b68b1be4..f6d816ec8919 100644
--- a/test/CodeGen/X86/clear_upper_vector_element_bits.ll
+++ b/test/CodeGen/X86/clear_upper_vector_element_bits.ll
@@ -159,28 +159,7 @@ define <8 x i32> @_clearupper8xi32a(<8 x i32>) nounwind {
 define <8 x i16> @_clearupper8xi16a(<8 x i16>) nounwind {
 ; SSE-LABEL: _clearupper8xi16a:
 ; SSE:       # BB#0:
-; SSE-NEXT:    pextrw $1, %xmm0, %eax
-; SSE-NEXT:    pextrw $2, %xmm0, %r9d
-; SSE-NEXT:    pextrw $3, %xmm0, %edx
-; SSE-NEXT:    pextrw $4, %xmm0, %r8d
-; SSE-NEXT:    pextrw $5, %xmm0, %edi
-; SSE-NEXT:    pextrw $6, %xmm0, %esi
-; SSE-NEXT:    pextrw $7, %xmm0, %ecx
-; SSE-NEXT:    movd %ecx, %xmm1
-; SSE-NEXT:    movd %edx, %xmm2
-; SSE-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; SSE-NEXT:    movd %edi, %xmm1
-; SSE-NEXT:    movd %eax, %xmm3
-; SSE-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
-; SSE-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
-; SSE-NEXT:    movd %esi, %xmm1
-; SSE-NEXT:    movd %r9d, %xmm2
-; SSE-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; SSE-NEXT:    movd %r8d, %xmm1
-; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; SSE-NEXT:    pand {{.*}}(%rip), %xmm0
+; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: _clearupper8xi16a:
@@ -225,61 +204,9 @@ define <8 x i16> @_clearupper8xi16a(<8 x i16>) nounwind {
 define <16 x i16> @_clearupper16xi16a(<16 x i16>) nounwind {
 ; SSE-LABEL: _clearupper16xi16a:
 ; SSE:       # BB#0:
-; SSE-NEXT:    pushq %rbp
-; SSE-NEXT:    pushq %r15
-; SSE-NEXT:    pushq %r14
-; SSE-NEXT:    pushq %r12
-; SSE-NEXT:    pushq %rbx
-; SSE-NEXT:    pextrw $1, %xmm0, %edi
-; SSE-NEXT:    pextrw $2, %xmm0, %eax
-; SSE-NEXT:    pextrw $3, %xmm0, %ecx
-; SSE-NEXT:    pextrw $4, %xmm0, %edx
-; SSE-NEXT:    pextrw $5, %xmm0, %esi
-; SSE-NEXT:    pextrw $6, %xmm0, %ebx
-; SSE-NEXT:    pextrw $7, %xmm0, %ebp
-; SSE-NEXT:    pextrw $1, %xmm1, %r10d
-; SSE-NEXT:    pextrw $2, %xmm1, %r9d
-; SSE-NEXT:    pextrw $3, %xmm1, %r14d
-; SSE-NEXT:    pextrw $4, %xmm1, %r8d
-; SSE-NEXT:    pextrw $5, %xmm1, %r15d
-; SSE-NEXT:    pextrw $6, %xmm1, %r11d
-; SSE-NEXT:    pextrw $7, %xmm1, %r12d
-; SSE-NEXT:    movd %ebp, %xmm2
-; SSE-NEXT:    movd %ecx, %xmm3
-; SSE-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
-; SSE-NEXT:    movd %esi, %xmm2
-; SSE-NEXT:    movd %edi, %xmm4
-; SSE-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
-; SSE-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
-; SSE-NEXT:    movd %ebx, %xmm2
-; SSE-NEXT:    movd %eax, %xmm3
-; SSE-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
-; SSE-NEXT:    movd %edx, %xmm2
-; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
-; SSE-NEXT:    pand %xmm2, %xmm0
-; SSE-NEXT:    movd %r12d, %xmm3
-; SSE-NEXT:    movd %r14d, %xmm4
-; SSE-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
-; SSE-NEXT:    movd %r15d, %xmm3
-; SSE-NEXT:    movd %r10d, %xmm5
-; SSE-NEXT:    punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1],xmm5[2],xmm3[2],xmm5[3],xmm3[3]
-; SSE-NEXT:    punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3]
-; SSE-NEXT:    movd %r11d, %xmm3
-; SSE-NEXT:    movd %r9d, %xmm4
-; SSE-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
-; SSE-NEXT:    movd %r8d, %xmm3
-; SSE-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
-; SSE-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
-; SSE-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1],xmm1[2],xmm5[2],xmm1[3],xmm5[3]
-; SSE-NEXT:    pand %xmm2, %xmm1
-; SSE-NEXT:    popq %rbx
-; SSE-NEXT:    popq %r12
-; SSE-NEXT:    popq %r14
-; SSE-NEXT:    popq %r15
-; SSE-NEXT:    popq %rbp
+; SSE-NEXT:    movaps {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
+; SSE-NEXT:    andps %xmm2, %xmm0
+; SSE-NEXT:    andps %xmm2, %xmm1
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: _clearupper16xi16a:
@@ -364,10 +291,9 @@ define <16 x i8> @_clearupper16xi8a(<16 x i8>) nounwind {
 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
 ; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
 ; SSE-NEXT:    movd %eax, %xmm0
-; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; SSE-NEXT:    movd %eax, %xmm2
+; SSE-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; SSE-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
 ; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
 ; SSE-NEXT:    movd %eax, %xmm0
 ; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
@@ -375,31 +301,32 @@ define <16 x i8> @_clearupper16xi8a(<16 x i8>) nounwind {
 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
 ; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
 ; SSE-NEXT:    movd %eax, %xmm0
-; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; SSE-NEXT:    movd %eax, %xmm1
+; SSE-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
-; SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; SSE-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; SSE-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; SSE-NEXT:    movd %eax, %xmm0
+; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; SSE-NEXT:    movd %eax, %xmm2
 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; SSE-NEXT:    movd %eax, %xmm0
 ; SSE-NEXT:    movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
-; SSE-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; SSE-NEXT:    movd %eax, %xmm2
-; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; SSE-NEXT:    movd %eax, %xmm3
-; SSE-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; SSE-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; SSE-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; SSE-NEXT:    movd %eax, %xmm0
 ; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
 ; SSE-NEXT:    movd %eax, %xmm2
+; SSE-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
 ; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
 ; SSE-NEXT:    movd %eax, %xmm4
-; SSE-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
-; SSE-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
+; SSE-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE-NEXT:    pand {{.*}}(%rip), %xmm0
 ; SSE-NEXT:    retq
 ;
@@ -486,10 +413,9 @@ define <32 x i8> @_clearupper32xi8a(<32 x i8>) nounwind {
 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
 ; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
 ; SSE-NEXT:    movd %eax, %xmm0
-; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; SSE-NEXT:    movd %eax, %xmm2
+; SSE-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; SSE-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
 ; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
 ; SSE-NEXT:    movd %eax, %xmm0
 ; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
@@ -497,31 +423,32 @@ define <32 x i8> @_clearupper32xi8a(<32 x i8>) nounwind {
 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
 ; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
 ; SSE-NEXT:    movd %eax, %xmm0
-; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; SSE-NEXT:    movd %eax, %xmm1
+; SSE-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
-; SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; SSE-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; SSE-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; SSE-NEXT:    movd %eax, %xmm0
+; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; SSE-NEXT:    movd %eax, %xmm2
 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; SSE-NEXT:    movd %eax, %xmm0
 ; SSE-NEXT:    movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
-; SSE-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; SSE-NEXT:    movd %eax, %xmm2
-; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; SSE-NEXT:    movd %eax, %xmm3
-; SSE-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; SSE-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; SSE-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; SSE-NEXT:    movd %eax, %xmm0
 ; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
 ; SSE-NEXT:    movd %eax, %xmm2
+; SSE-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
 ; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
 ; SSE-NEXT:    movd %eax, %xmm4
-; SSE-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
-; SSE-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
+; SSE-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
 ; SSE-NEXT:    pand %xmm2, %xmm0
 ; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
@@ -531,10 +458,9 @@ define <32 x i8> @_clearupper32xi8a(<32 x i8>) nounwind {
 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
 ; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
 ; SSE-NEXT:    movd %eax, %xmm1
-; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; SSE-NEXT:    movd %eax, %xmm4
+; SSE-NEXT:    movd {{.*#+}} xmm4 = mem[0],zero,zero,zero
 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7]
-; SSE-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
+; SSE-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
 ; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
 ; SSE-NEXT:    movd %eax, %xmm1
 ; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
@@ -542,31 +468,32 @@ define <32 x i8> @_clearupper32xi8a(<32 x i8>) nounwind {
 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm1[0],xmm5[1],xmm1[1],xmm5[2],xmm1[2],xmm5[3],xmm1[3],xmm5[4],xmm1[4],xmm5[5],xmm1[5],xmm5[6],xmm1[6],xmm5[7],xmm1[7]
 ; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
 ; SSE-NEXT:    movd %eax, %xmm1
-; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; SSE-NEXT:    movd %eax, %xmm3
+; SSE-NEXT:    movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
-; SSE-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1],xmm3[2],xmm5[2],xmm3[3],xmm5[3],xmm3[4],xmm5[4],xmm3[5],xmm5[5],xmm3[6],xmm5[6],xmm3[7],xmm5[7]
-; SSE-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
-; SSE-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSE-NEXT:    movd {{.*#+}} xmm4 = mem[0],zero,zero,zero
+; SSE-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1],xmm3[2],xmm5[2],xmm3[3],xmm5[3]
+; SSE-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
+; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; SSE-NEXT:    movd %eax, %xmm1
+; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; SSE-NEXT:    movd %eax, %xmm4
 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7]
+; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; SSE-NEXT:    movd %eax, %xmm1
 ; SSE-NEXT:    movd {{.*#+}} xmm5 = mem[0],zero,zero,zero
-; SSE-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1],xmm1[2],xmm5[2],xmm1[3],xmm5[3],xmm1[4],xmm5[4],xmm1[5],xmm5[5],xmm1[6],xmm5[6],xmm1[7],xmm5[7]
-; SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
-; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; SSE-NEXT:    movd %eax, %xmm4
-; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; SSE-NEXT:    movd %eax, %xmm5
-; SSE-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3],xmm5[4],xmm4[4],xmm5[5],xmm4[5],xmm5[6],xmm4[6],xmm5[7],xmm4[7]
+; SSE-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm1[0],xmm5[1],xmm1[1],xmm5[2],xmm1[2],xmm5[3],xmm1[3],xmm5[4],xmm1[4],xmm5[5],xmm1[5],xmm5[6],xmm1[6],xmm5[7],xmm1[7]
+; SSE-NEXT:    punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3]
+; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; SSE-NEXT:    movd %eax, %xmm1
 ; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
 ; SSE-NEXT:    movd %eax, %xmm4
+; SSE-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7]
 ; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
 ; SSE-NEXT:    movd %eax, %xmm6
-; SSE-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm4[0],xmm6[1],xmm4[1],xmm6[2],xmm4[2],xmm6[3],xmm4[3],xmm6[4],xmm4[4],xmm6[5],xmm4[5],xmm6[6],xmm4[6],xmm6[7],xmm4[7]
-; SSE-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3],xmm6[4],xmm5[4],xmm6[5],xmm5[5],xmm6[6],xmm5[6],xmm6[7],xmm5[7]
+; SSE-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3],xmm1[4],xmm6[4],xmm1[5],xmm6[5],xmm1[6],xmm6[6],xmm1[7],xmm6[7]
-; SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
+; SSE-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
+; SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1]
+; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
 ; SSE-NEXT:    pand %xmm2, %xmm1
 ; SSE-NEXT:    retq
 ;
diff --git a/test/CodeGen/X86/fast-isel-nontemporal.ll b/test/CodeGen/X86/fast-isel-nontemporal.ll
index 4140721bd5f3..33d001cdc216 100644
--- a/test/CodeGen/X86/fast-isel-nontemporal.ll
+++ b/test/CodeGen/X86/fast-isel-nontemporal.ll
@@ -545,7 +545,11 @@ define <8 x float> @test_load_nt8xfloat(<8 x float>* nocapture %ptr) {
 ;
 ; AVX1-LABEL: test_load_nt8xfloat:
 ; AVX1:       # BB#0: # %entry
-; AVX1-NEXT:    vmovaps (%rdi), %ymm0
+; AVX1-NEXT:    vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT:    # implicit-def: %YMM1
+; AVX1-NEXT:    vmovaps %xmm0, %xmm1
+; AVX1-NEXT:    vmovntdqa 16(%rdi), %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test_load_nt8xfloat:
@@ -583,7 +587,11 @@ define <4 x double> @test_load_nt4xdouble(<4 x double>* nocapture %ptr) {
 ;
 ; AVX1-LABEL: test_load_nt4xdouble:
 ; AVX1:       # BB#0: # %entry
-; AVX1-NEXT:    vmovapd (%rdi), %ymm0
+; AVX1-NEXT:    vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT:    # implicit-def: %YMM1
+; AVX1-NEXT:    vmovaps %xmm0, %xmm1
+; AVX1-NEXT:    vmovntdqa 16(%rdi), %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test_load_nt4xdouble:
@@ -621,7 +629,11 @@ define <32 x i8> @test_load_nt32xi8(<32 x i8>* nocapture %ptr) {
 ;
 ; AVX1-LABEL: test_load_nt32xi8:
 ; AVX1:       # BB#0: # %entry
-; AVX1-NEXT:    vmovdqa (%rdi), %ymm0
+; AVX1-NEXT:    vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT:    # implicit-def: %YMM1
+; AVX1-NEXT:    vmovaps %xmm0, %xmm1
+; AVX1-NEXT:    vmovntdqa 16(%rdi), %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test_load_nt32xi8:
@@ -659,7 +671,11 @@ define <16 x i16> @test_load_nt16xi16(<16 x i16>* nocapture %ptr) {
 ;
 ; AVX1-LABEL: test_load_nt16xi16:
 ; AVX1:       # BB#0: # %entry
-; AVX1-NEXT:    vmovdqa (%rdi), %ymm0
+; AVX1-NEXT:    vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT:    # implicit-def: %YMM1
+; AVX1-NEXT:    vmovaps %xmm0, %xmm1
+; AVX1-NEXT:    vmovntdqa 16(%rdi), %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test_load_nt16xi16:
@@ -697,7 +713,11 @@ define <8 x i32> @test_load_nt8xi32(<8 x i32>* nocapture %ptr) {
 ;
 ; AVX1-LABEL: test_load_nt8xi32:
 ; AVX1:       # BB#0: # %entry
-; AVX1-NEXT:    vmovdqa (%rdi), %ymm0
+; AVX1-NEXT:    vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT:    # implicit-def: %YMM1
+; AVX1-NEXT:    vmovaps %xmm0, %xmm1
+; AVX1-NEXT:    vmovntdqa 16(%rdi), %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test_load_nt8xi32:
@@ -735,7 +755,11 @@ define <4 x i64> @test_load_nt4xi64(<4 x i64>* nocapture %ptr) {
 ;
 ; AVX1-LABEL: test_load_nt4xi64:
 ; AVX1:       # BB#0: # %entry
-; AVX1-NEXT:    vmovdqa (%rdi), %ymm0
+; AVX1-NEXT:    vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT:    # implicit-def: %YMM1
+; AVX1-NEXT:    vmovaps %xmm0, %xmm1
+; AVX1-NEXT:    vmovntdqa 16(%rdi), %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test_load_nt4xi64:
@@ -957,8 +981,16 @@ define <16 x float> @test_load_nt16xfloat(<16 x float>* nocapture %ptr) {
 ;
 ; AVX1-LABEL: test_load_nt16xfloat:
 ; AVX1:       # BB#0: # %entry
-; AVX1-NEXT:    vmovaps (%rdi), %ymm0
-; AVX1-NEXT:    vmovaps 32(%rdi), %ymm1
+; AVX1-NEXT:    vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT:    # implicit-def: %YMM1
+; AVX1-NEXT:    vmovaps %xmm0, %xmm1
+; AVX1-NEXT:    vmovntdqa 16(%rdi), %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    vmovntdqa 32(%rdi), %xmm2
+; AVX1-NEXT:    # implicit-def: %YMM1
+; AVX1-NEXT:    vmovaps %xmm2, %xmm1
+; AVX1-NEXT:    vmovntdqa 48(%rdi), %xmm2
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test_load_nt16xfloat:
@@ -1003,8 +1035,16 @@ define <8 x double> @test_load_nt8xdouble(<8 x double>* nocapture %ptr) {
 ;
 ; AVX1-LABEL: test_load_nt8xdouble:
 ; AVX1:       # BB#0: # %entry
-; AVX1-NEXT:    vmovapd (%rdi), %ymm0
-; AVX1-NEXT:    vmovapd 32(%rdi), %ymm1
+; AVX1-NEXT:    vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT:    # implicit-def: %YMM1
+; AVX1-NEXT:    vmovaps %xmm0, %xmm1
+; AVX1-NEXT:    vmovntdqa 16(%rdi), %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    vmovntdqa 32(%rdi), %xmm2
+; AVX1-NEXT:    # implicit-def: %YMM1
+; AVX1-NEXT:    vmovaps %xmm2, %xmm1
+; AVX1-NEXT:    vmovntdqa 48(%rdi), %xmm2
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test_load_nt8xdouble:
@@ -1049,8 +1089,16 @@ define <64 x i8> @test_load_nt64xi8(<64 x i8>* nocapture %ptr) {
 ;
 ; AVX1-LABEL: test_load_nt64xi8:
 ; AVX1:       # BB#0: # %entry
-; AVX1-NEXT:    vmovaps (%rdi), %ymm0
-; AVX1-NEXT:    vmovaps 32(%rdi), %ymm1
+; AVX1-NEXT:    vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT:    # implicit-def: %YMM1
+; AVX1-NEXT:    vmovaps %xmm0, %xmm1
+; AVX1-NEXT:    vmovntdqa 16(%rdi), %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    vmovntdqa 32(%rdi), %xmm2
+; AVX1-NEXT:    # implicit-def: %YMM1
+; AVX1-NEXT:    vmovaps %xmm2, %xmm1
+; AVX1-NEXT:    vmovntdqa 48(%rdi), %xmm2
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test_load_nt64xi8:
@@ -1101,8 +1149,16 @@ define <32 x i16> @test_load_nt32xi16(<32 x i16>* nocapture %ptr) {
 ;
 ; AVX1-LABEL: test_load_nt32xi16:
 ; AVX1:       # BB#0: # %entry
-; AVX1-NEXT:    vmovaps (%rdi), %ymm0
-; AVX1-NEXT:    vmovaps 32(%rdi), %ymm1
+; AVX1-NEXT:    vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT:    # implicit-def: %YMM1
+; AVX1-NEXT:    vmovaps %xmm0, %xmm1
+; AVX1-NEXT:    vmovntdqa 16(%rdi), %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    vmovntdqa 32(%rdi), %xmm2
+; AVX1-NEXT:    # implicit-def: %YMM1
+; AVX1-NEXT:    vmovaps %xmm2, %xmm1
+; AVX1-NEXT:    vmovntdqa 48(%rdi), %xmm2
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test_load_nt32xi16:
@@ -1153,8 +1209,16 @@ define <16 x i32> @test_load_nt16xi32(<16 x i32>* nocapture %ptr) {
 ;
 ; AVX1-LABEL: test_load_nt16xi32:
 ; AVX1:       # BB#0: # %entry
-; AVX1-NEXT:    vmovaps (%rdi), %ymm0
-; AVX1-NEXT:    vmovaps 32(%rdi), %ymm1
+; AVX1-NEXT:    vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT:    # implicit-def: %YMM1
+; AVX1-NEXT:    vmovaps %xmm0, %xmm1
+; AVX1-NEXT:    vmovntdqa 16(%rdi), %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    vmovntdqa 32(%rdi), %xmm2
+; AVX1-NEXT:    # implicit-def: %YMM1
+; AVX1-NEXT:    vmovaps %xmm2, %xmm1
+; AVX1-NEXT:    vmovntdqa 48(%rdi), %xmm2
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test_load_nt16xi32:
@@ -1199,8 +1263,16 @@ define <8 x i64> @test_load_nt8xi64(<8 x i64>* nocapture %ptr) {
 ;
 ; AVX1-LABEL: test_load_nt8xi64:
 ; AVX1:       # BB#0: # %entry
-; AVX1-NEXT:    vmovaps (%rdi), %ymm0
-; AVX1-NEXT:    vmovaps 32(%rdi), %ymm1
+; AVX1-NEXT:    vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT:    # implicit-def: %YMM1
+; AVX1-NEXT:    vmovaps %xmm0, %xmm1
+; AVX1-NEXT:    vmovntdqa 16(%rdi), %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    vmovntdqa 32(%rdi), %xmm2
+; AVX1-NEXT:    # implicit-def: %YMM1
+; AVX1-NEXT:    vmovaps %xmm2, %xmm1
+; AVX1-NEXT:    vmovntdqa 48(%rdi), %xmm2
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test_load_nt8xi64:
diff --git a/test/CodeGen/X86/full-lsr.ll b/test/CodeGen/X86/full-lsr.ll
index 85b2b41fa191..068480873c23 100644
--- a/test/CodeGen/X86/full-lsr.ll
+++ b/test/CodeGen/X86/full-lsr.ll
@@ -1,16 +1,10 @@
 ; RUN: llc < %s -march=x86 -mcpu=generic | FileCheck %s
-; RUN: llc < %s -march=x86 -mcpu=atom | FileCheck -check-prefix=ATOM %s
+; RUN: llc < %s -march=x86 -mcpu=atom | FileCheck %s
 
 define void @foo(float* nocapture %A, float* nocapture %B, float* nocapture %C, i32 %N) nounwind {
-; ATOM: foo
-; ATOM: addl
-; ATOM: addl
-; ATOM: leal
 
 ; CHECK: foo
-; CHECK: addl
-; CHECK: addl
-; CHECK: addl
+; CHECK: incl
 
 entry:
 	%0 = icmp sgt i32 %N, 0		; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/haddsub-2.ll b/test/CodeGen/X86/haddsub-2.ll
index 4596b83f7bc2..fd023d018031 100644
--- a/test/CodeGen/X86/haddsub-2.ll
+++ b/test/CodeGen/X86/haddsub-2.ll
@@ -142,12 +142,12 @@ define <4 x i32> @phadd_d_test1(<4 x i32> %A, <4 x i32> %B) {
 ; SSE3-NEXT:    movd %xmm0, %edi
 ; SSE3-NEXT:    addl %eax, %edi
 ; SSE3-NEXT:    movd %edi, %xmm0
-; SSE3-NEXT:    movd %edx, %xmm1
+; SSE3-NEXT:    movd %esi, %xmm1
 ; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE3-NEXT:    movd %esi, %xmm2
+; SSE3-NEXT:    movd %edx, %xmm2
 ; SSE3-NEXT:    movd %ecx, %xmm0
 ; SSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE3-NEXT:    retq
 ;
 ; SSSE3-LABEL: phadd_d_test1:
@@ -196,16 +196,16 @@ define <4 x i32> @phadd_d_test2(<4 x i32> %A, <4 x i32> %B) {
 ; SSE3-NEXT:    movd %xmm0, %esi
 ; SSE3-NEXT:    addl %eax, %esi
 ; SSE3-NEXT:    movd %esi, %xmm0
+; SSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,1,2,3]
+; SSE3-NEXT:    movd %xmm2, %eax
+; SSE3-NEXT:    movd %xmm1, %esi
+; SSE3-NEXT:    addl %eax, %esi
+; SSE3-NEXT:    movd %esi, %xmm1
+; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
 ; SSE3-NEXT:    movd %ecx, %xmm2
-; SSE3-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
-; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; SSE3-NEXT:    movd %xmm0, %eax
-; SSE3-NEXT:    movd %xmm1, %ecx
-; SSE3-NEXT:    addl %eax, %ecx
-; SSE3-NEXT:    movd %ecx, %xmm1
 ; SSE3-NEXT:    movd %edx, %xmm0
-; SSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
 ; SSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE3-NEXT:    retq
 ;
 ; SSSE3-LABEL: phadd_d_test2:
@@ -258,12 +258,12 @@ define <4 x i32> @phsub_d_test1(<4 x i32> %A, <4 x i32> %B) {
 ; SSE3-NEXT:    movd %xmm0, %edi
 ; SSE3-NEXT:    subl %edi, %esi
 ; SSE3-NEXT:    movd %esi, %xmm0
-; SSE3-NEXT:    movd %ecx, %xmm1
+; SSE3-NEXT:    movd %edx, %xmm1
 ; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE3-NEXT:    movd %edx, %xmm2
+; SSE3-NEXT:    movd %ecx, %xmm2
 ; SSE3-NEXT:    movd %eax, %xmm0
 ; SSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE3-NEXT:    retq
 ;
 ; SSSE3-LABEL: phsub_d_test1:
@@ -312,16 +312,16 @@ define <4 x i32> @phsub_d_test2(<4 x i32> %A, <4 x i32> %B) {
 ; SSE3-NEXT:    movd %xmm0, %esi
 ; SSE3-NEXT:    subl %esi, %edx
 ; SSE3-NEXT:    movd %edx, %xmm0
+; SSE3-NEXT:    movd %xmm1, %edx
+; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,2,3]
+; SSE3-NEXT:    movd %xmm1, %esi
+; SSE3-NEXT:    subl %esi, %edx
+; SSE3-NEXT:    movd %edx, %xmm1
+; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
 ; SSE3-NEXT:    movd %eax, %xmm2
-; SSE3-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
-; SSE3-NEXT:    movd %xmm1, %eax
-; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; SSE3-NEXT:    movd %xmm0, %edx
-; SSE3-NEXT:    subl %edx, %eax
-; SSE3-NEXT:    movd %eax, %xmm1
 ; SSE3-NEXT:    movd %ecx, %xmm0
-; SSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
 ; SSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE3-NEXT:    retq
 ;
 ; SSSE3-LABEL: phsub_d_test2:
@@ -518,19 +518,19 @@ define <8 x i32> @avx2_vphadd_d_test(<8 x i32> %A, <8 x i32> %B) {
 ; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
 ; SSE3-NEXT:    movd %xmm0, %r9d
 ; SSE3-NEXT:    addl %edx, %r9d
-; SSE3-NEXT:    movd %xmm1, %esi
+; SSE3-NEXT:    movd %xmm1, %edx
 ; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; SSE3-NEXT:    movd %xmm0, %r10d
-; SSE3-NEXT:    addl %esi, %r10d
-; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
 ; SSE3-NEXT:    movd %xmm0, %esi
+; SSE3-NEXT:    addl %edx, %esi
+; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
+; SSE3-NEXT:    movd %xmm0, %edx
 ; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[3,1,2,3]
 ; SSE3-NEXT:    movd %xmm0, %edi
-; SSE3-NEXT:    addl %esi, %edi
+; SSE3-NEXT:    addl %edx, %edi
 ; SSE3-NEXT:    movd %xmm2, %eax
 ; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
-; SSE3-NEXT:    movd %xmm0, %r11d
-; SSE3-NEXT:    addl %eax, %r11d
+; SSE3-NEXT:    movd %xmm0, %r10d
+; SSE3-NEXT:    addl %eax, %r10d
 ; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
 ; SSE3-NEXT:    movd %xmm0, %eax
 ; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[3,1,2,3]
@@ -541,24 +541,24 @@ define <8 x i32> @avx2_vphadd_d_test(<8 x i32> %A, <8 x i32> %B) {
 ; SSE3-NEXT:    movd %xmm0, %edx
 ; SSE3-NEXT:    addl %eax, %edx
 ; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[2,3,0,1]
-; SSE3-NEXT:    movd %xmm0, %eax
+; SSE3-NEXT:    movd %xmm0, %r11d
 ; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[3,1,2,3]
-; SSE3-NEXT:    movd %xmm0, %esi
-; SSE3-NEXT:    addl %eax, %esi
+; SSE3-NEXT:    movd %xmm0, %eax
+; SSE3-NEXT:    addl %r11d, %eax
 ; SSE3-NEXT:    movd %edi, %xmm0
-; SSE3-NEXT:    movd %r9d, %xmm1
+; SSE3-NEXT:    movd %esi, %xmm1
 ; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE3-NEXT:    movd %r10d, %xmm2
+; SSE3-NEXT:    movd %r9d, %xmm2
 ; SSE3-NEXT:    movd %r8d, %xmm0
 ; SSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE3-NEXT:    movd %esi, %xmm1
-; SSE3-NEXT:    movd %ecx, %xmm2
+; SSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE3-NEXT:    movd %eax, %xmm1
+; SSE3-NEXT:    movd %edx, %xmm2
 ; SSE3-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; SSE3-NEXT:    movd %edx, %xmm3
-; SSE3-NEXT:    movd %r11d, %xmm1
+; SSE3-NEXT:    movd %ecx, %xmm3
+; SSE3-NEXT:    movd %r10d, %xmm1
 ; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
-; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE3-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
 ; SSE3-NEXT:    retq
 ;
 ; SSSE3-LABEL: avx2_vphadd_d_test:
@@ -658,83 +658,83 @@ define <16 x i16> @avx2_vphadd_w_test(<16 x i16> %a, <16 x i16> %b) {
 ; SSE3-NEXT:    addl %eax, %ecx
 ; SSE3-NEXT:    movl %ecx, -{{[0-9]+}}(%rsp) # 4-byte Spill
 ; SSE3-NEXT:    pextrw $2, %xmm0, %eax
-; SSE3-NEXT:    pextrw $3, %xmm0, %r11d
-; SSE3-NEXT:    addl %eax, %r11d
-; SSE3-NEXT:    pextrw $4, %xmm0, %eax
-; SSE3-NEXT:    pextrw $5, %xmm0, %r10d
-; SSE3-NEXT:    addl %eax, %r10d
-; SSE3-NEXT:    pextrw $6, %xmm0, %eax
-; SSE3-NEXT:    pextrw $7, %xmm0, %r13d
-; SSE3-NEXT:    addl %eax, %r13d
-; SSE3-NEXT:    movd %xmm1, %eax
-; SSE3-NEXT:    pextrw $1, %xmm1, %r14d
-; SSE3-NEXT:    addl %eax, %r14d
-; SSE3-NEXT:    pextrw $2, %xmm1, %eax
-; SSE3-NEXT:    pextrw $3, %xmm1, %ebp
-; SSE3-NEXT:    addl %eax, %ebp
-; SSE3-NEXT:    pextrw $4, %xmm1, %eax
-; SSE3-NEXT:    pextrw $5, %xmm1, %ebx
-; SSE3-NEXT:    addl %eax, %ebx
-; SSE3-NEXT:    pextrw $6, %xmm1, %eax
-; SSE3-NEXT:    pextrw $7, %xmm1, %edx
-; SSE3-NEXT:    addl %eax, %edx
-; SSE3-NEXT:    movd %xmm2, %eax
-; SSE3-NEXT:    pextrw $1, %xmm2, %ecx
+; SSE3-NEXT:    pextrw $3, %xmm0, %ecx
 ; SSE3-NEXT:    addl %eax, %ecx
 ; SSE3-NEXT:    movl %ecx, -{{[0-9]+}}(%rsp) # 4-byte Spill
-; SSE3-NEXT:    pextrw $2, %xmm2, %eax
-; SSE3-NEXT:    pextrw $3, %xmm2, %r12d
-; SSE3-NEXT:    addl %eax, %r12d
-; SSE3-NEXT:    pextrw $4, %xmm2, %eax
-; SSE3-NEXT:    pextrw $5, %xmm2, %r15d
+; SSE3-NEXT:    pextrw $4, %xmm0, %eax
+; SSE3-NEXT:    pextrw $5, %xmm0, %r11d
+; SSE3-NEXT:    addl %eax, %r11d
+; SSE3-NEXT:    pextrw $6, %xmm0, %eax
+; SSE3-NEXT:    pextrw $7, %xmm0, %r15d
 ; SSE3-NEXT:    addl %eax, %r15d
-; SSE3-NEXT:    pextrw $6, %xmm2, %eax
-; SSE3-NEXT:    pextrw $7, %xmm2, %r8d
+; SSE3-NEXT:    movd %xmm1, %eax
+; SSE3-NEXT:    pextrw $1, %xmm1, %r13d
+; SSE3-NEXT:    addl %eax, %r13d
+; SSE3-NEXT:    pextrw $2, %xmm1, %eax
+; SSE3-NEXT:    pextrw $3, %xmm1, %ebx
+; SSE3-NEXT:    addl %eax, %ebx
+; SSE3-NEXT:    pextrw $4, %xmm1, %eax
+; SSE3-NEXT:    pextrw $5, %xmm1, %r8d
 ; SSE3-NEXT:    addl %eax, %r8d
-; SSE3-NEXT:    movd %xmm3, %eax
-; SSE3-NEXT:    pextrw $1, %xmm3, %r9d
-; SSE3-NEXT:    addl %eax, %r9d
-; SSE3-NEXT:    pextrw $2, %xmm3, %eax
-; SSE3-NEXT:    pextrw $3, %xmm3, %esi
+; SSE3-NEXT:    pextrw $6, %xmm1, %eax
+; SSE3-NEXT:    pextrw $7, %xmm1, %esi
 ; SSE3-NEXT:    addl %eax, %esi
-; SSE3-NEXT:    pextrw $4, %xmm3, %eax
-; SSE3-NEXT:    pextrw $5, %xmm3, %edi
-; SSE3-NEXT:    addl %eax, %edi
-; SSE3-NEXT:    pextrw $6, %xmm3, %ecx
+; SSE3-NEXT:    movd %xmm2, %eax
+; SSE3-NEXT:    pextrw $1, %xmm2, %r10d
+; SSE3-NEXT:    addl %eax, %r10d
+; SSE3-NEXT:    pextrw $2, %xmm2, %eax
+; SSE3-NEXT:    pextrw $3, %xmm2, %r14d
+; SSE3-NEXT:    addl %eax, %r14d
+; SSE3-NEXT:    pextrw $4, %xmm2, %eax
+; SSE3-NEXT:    pextrw $5, %xmm2, %r12d
+; SSE3-NEXT:    addl %eax, %r12d
+; SSE3-NEXT:    pextrw $6, %xmm2, %eax
+; SSE3-NEXT:    pextrw $7, %xmm2, %r9d
+; SSE3-NEXT:    addl %eax, %r9d
+; SSE3-NEXT:    movd %xmm3, %eax
+; SSE3-NEXT:    pextrw $1, %xmm3, %ebp
+; SSE3-NEXT:    addl %eax, %ebp
+; SSE3-NEXT:    pextrw $2, %xmm3, %edx
+; SSE3-NEXT:    pextrw $3, %xmm3, %edi
+; SSE3-NEXT:    addl %edx, %edi
+; SSE3-NEXT:    pextrw $4, %xmm3, %edx
+; SSE3-NEXT:    pextrw $5, %xmm3, %ecx
+; SSE3-NEXT:    addl %edx, %ecx
+; SSE3-NEXT:    pextrw $6, %xmm3, %edx
 ; SSE3-NEXT:    pextrw $7, %xmm3, %eax
-; SSE3-NEXT:    addl %ecx, %eax
-; SSE3-NEXT:    movd %edx, %xmm8
-; SSE3-NEXT:    movd %r13d, %xmm3
-; SSE3-NEXT:    movd %ebp, %xmm9
-; SSE3-NEXT:    movd %r11d, %xmm4
-; SSE3-NEXT:    movd %ebx, %xmm10
-; SSE3-NEXT:    movd %r10d, %xmm7
-; SSE3-NEXT:    movd %r14d, %xmm11
+; SSE3-NEXT:    addl %edx, %eax
+; SSE3-NEXT:    movd %esi, %xmm8
+; SSE3-NEXT:    movd %r8d, %xmm3
+; SSE3-NEXT:    movd %ebx, %xmm9
+; SSE3-NEXT:    movd %r13d, %xmm4
+; SSE3-NEXT:    movd %r15d, %xmm10
+; SSE3-NEXT:    movd %r11d, %xmm7
+; SSE3-NEXT:    movd -{{[0-9]+}}(%rsp), %xmm11 # 4-byte Folded Reload
+; SSE3-NEXT:    # xmm11 = mem[0],zero,zero,zero
 ; SSE3-NEXT:    movd -{{[0-9]+}}(%rsp), %xmm0 # 4-byte Folded Reload
 ; SSE3-NEXT:    # xmm0 = mem[0],zero,zero,zero
 ; SSE3-NEXT:    movd %eax, %xmm12
-; SSE3-NEXT:    movd %r8d, %xmm6
-; SSE3-NEXT:    movd %esi, %xmm13
-; SSE3-NEXT:    movd %r12d, %xmm5
-; SSE3-NEXT:    movd %edi, %xmm14
-; SSE3-NEXT:    movd %r15d, %xmm2
-; SSE3-NEXT:    movd %r9d, %xmm15
-; SSE3-NEXT:    movd -{{[0-9]+}}(%rsp), %xmm1 # 4-byte Folded Reload
-; SSE3-NEXT:    # xmm1 = mem[0],zero,zero,zero
+; SSE3-NEXT:    movd %ecx, %xmm6
+; SSE3-NEXT:    movd %edi, %xmm13
+; SSE3-NEXT:    movd %ebp, %xmm5
+; SSE3-NEXT:    movd %r9d, %xmm14
+; SSE3-NEXT:    movd %r12d, %xmm2
+; SSE3-NEXT:    movd %r14d, %xmm15
+; SSE3-NEXT:    movd %r10d, %xmm1
 ; SSE3-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm8[0],xmm3[1],xmm8[1],xmm3[2],xmm8[2],xmm3[3],xmm8[3]
 ; SSE3-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm9[0],xmm4[1],xmm9[1],xmm4[2],xmm9[2],xmm4[3],xmm9[3]
-; SSE3-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
+; SSE3-NEXT:    punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
 ; SSE3-NEXT:    punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm10[0],xmm7[1],xmm10[1],xmm7[2],xmm10[2],xmm7[3],xmm10[3]
 ; SSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm11[0],xmm0[1],xmm11[1],xmm0[2],xmm11[2],xmm0[3],xmm11[3]
-; SSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3]
-; SSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
+; SSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1]
+; SSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm4[0]
 ; SSE3-NEXT:    punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm12[0],xmm6[1],xmm12[1],xmm6[2],xmm12[2],xmm6[3],xmm12[3]
 ; SSE3-NEXT:    punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm13[0],xmm5[1],xmm13[1],xmm5[2],xmm13[2],xmm5[3],xmm13[3]
-; SSE3-NEXT:    punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1],xmm5[2],xmm6[2],xmm5[3],xmm6[3]
+; SSE3-NEXT:    punpckldq {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1]
 ; SSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm14[0],xmm2[1],xmm14[1],xmm2[2],xmm14[2],xmm2[3],xmm14[3]
 ; SSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm15[0],xmm1[1],xmm15[1],xmm1[2],xmm15[2],xmm1[3],xmm15[3]
-; SSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
-; SSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1],xmm1[2],xmm5[2],xmm1[3],xmm5[3]
+; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE3-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm5[0]
 ; SSE3-NEXT:    popq %rbx
 ; SSE3-NEXT:    popq %r12
 ; SSE3-NEXT:    popq %r13
@@ -858,12 +858,12 @@ define <4 x i32> @not_a_hsub_1(<4 x i32> %A, <4 x i32> %B) {
 ; SSE-NEXT:    movd %xmm0, %edi
 ; SSE-NEXT:    subl %edi, %esi
 ; SSE-NEXT:    movd %esi, %xmm0
-; SSE-NEXT:    movd %ecx, %xmm1
+; SSE-NEXT:    movd %edx, %xmm1
 ; SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE-NEXT:    movd %edx, %xmm2
+; SSE-NEXT:    movd %ecx, %xmm2
 ; SSE-NEXT:    movd %eax, %xmm0
 ; SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: not_a_hsub_1:
@@ -919,11 +919,11 @@ define <4 x float> @not_a_hsub_2(<4 x float> %A, <4 x float> %B) {
 ; SSE-NEXT:    movaps %xmm1, %xmm4
 ; SSE-NEXT:    movhlps {{.*#+}} xmm4 = xmm4[1,1]
 ; SSE-NEXT:    subss %xmm4, %xmm3
-; SSE-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
-; SSE-NEXT:    movshdup {{.*#+}} xmm3 = xmm1[1,1,3,3]
-; SSE-NEXT:    subss %xmm3, %xmm1
-; SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT:    movshdup {{.*#+}} xmm4 = xmm1[1,1,3,3]
+; SSE-NEXT:    subss %xmm4, %xmm1
+; SSE-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
 ; SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: not_a_hsub_2:
@@ -1162,19 +1162,19 @@ define <8 x i32> @avx2_hadd_d(<8 x i32> %a, <8 x i32> %b) {
 ; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
 ; SSE3-NEXT:    movd %xmm0, %r9d
 ; SSE3-NEXT:    addl %edx, %r9d
-; SSE3-NEXT:    movd %xmm2, %esi
+; SSE3-NEXT:    movd %xmm2, %edx
 ; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
-; SSE3-NEXT:    movd %xmm0, %r10d
-; SSE3-NEXT:    addl %esi, %r10d
-; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
 ; SSE3-NEXT:    movd %xmm0, %esi
+; SSE3-NEXT:    addl %edx, %esi
+; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
+; SSE3-NEXT:    movd %xmm0, %edx
 ; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[3,1,2,3]
 ; SSE3-NEXT:    movd %xmm0, %edi
-; SSE3-NEXT:    addl %esi, %edi
+; SSE3-NEXT:    addl %edx, %edi
 ; SSE3-NEXT:    movd %xmm1, %eax
 ; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; SSE3-NEXT:    movd %xmm0, %r11d
-; SSE3-NEXT:    addl %eax, %r11d
+; SSE3-NEXT:    movd %xmm0, %r10d
+; SSE3-NEXT:    addl %eax, %r10d
 ; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
 ; SSE3-NEXT:    movd %xmm0, %eax
 ; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[3,1,2,3]
@@ -1185,24 +1185,24 @@ define <8 x i32> @avx2_hadd_d(<8 x i32> %a, <8 x i32> %b) {
 ; SSE3-NEXT:    movd %xmm0, %edx
 ; SSE3-NEXT:    addl %eax, %edx
 ; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[2,3,0,1]
-; SSE3-NEXT:    movd %xmm0, %eax
+; SSE3-NEXT:    movd %xmm0, %r11d
 ; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[3,1,2,3]
-; SSE3-NEXT:    movd %xmm0, %esi
-; SSE3-NEXT:    addl %eax, %esi
+; SSE3-NEXT:    movd %xmm0, %eax
+; SSE3-NEXT:    addl %r11d, %eax
 ; SSE3-NEXT:    movd %edi, %xmm0
-; SSE3-NEXT:    movd %r9d, %xmm1
+; SSE3-NEXT:    movd %esi, %xmm1
 ; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE3-NEXT:    movd %r10d, %xmm2
+; SSE3-NEXT:    movd %r9d, %xmm2
 ; SSE3-NEXT:    movd %r8d, %xmm0
 ; SSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE3-NEXT:    movd %esi, %xmm1
-; SSE3-NEXT:    movd %ecx, %xmm2
+; SSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE3-NEXT:    movd %eax, %xmm1
+; SSE3-NEXT:    movd %edx, %xmm2
 ; SSE3-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; SSE3-NEXT:    movd %edx, %xmm3
-; SSE3-NEXT:    movd %r11d, %xmm1
+; SSE3-NEXT:    movd %ecx, %xmm3
+; SSE3-NEXT:    movd %r10d, %xmm1
 ; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
-; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE3-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
 ; SSE3-NEXT:    retq
 ;
 ; SSSE3-LABEL: avx2_hadd_d:
@@ -1293,15 +1293,14 @@ define <16 x i16> @avx2_hadd_w(<16 x i16> %a, <16 x i16> %b) {
 ; SSE3-NEXT:  .Lcfi23:
 ; SSE3-NEXT:    .cfi_offset %rbp, -16
 ; SSE3-NEXT:    movd %xmm0, %eax
-; SSE3-NEXT:    pextrw $1, %xmm0, %ecx
-; SSE3-NEXT:    addl %eax, %ecx
-; SSE3-NEXT:    movl %ecx, -{{[0-9]+}}(%rsp) # 4-byte Spill
+; SSE3-NEXT:    pextrw $1, %xmm0, %r10d
+; SSE3-NEXT:    addl %eax, %r10d
 ; SSE3-NEXT:    pextrw $2, %xmm0, %eax
-; SSE3-NEXT:    pextrw $3, %xmm0, %r15d
-; SSE3-NEXT:    addl %eax, %r15d
+; SSE3-NEXT:    pextrw $3, %xmm0, %r11d
+; SSE3-NEXT:    addl %eax, %r11d
 ; SSE3-NEXT:    pextrw $4, %xmm0, %eax
-; SSE3-NEXT:    pextrw $5, %xmm0, %r14d
-; SSE3-NEXT:    addl %eax, %r14d
+; SSE3-NEXT:    pextrw $5, %xmm0, %r12d
+; SSE3-NEXT:    addl %eax, %r12d
 ; SSE3-NEXT:    pextrw $6, %xmm0, %eax
 ; SSE3-NEXT:    pextrw $7, %xmm0, %r13d
 ; SSE3-NEXT:    addl %eax, %r13d
@@ -1310,70 +1309,71 @@ define <16 x i16> @avx2_hadd_w(<16 x i16> %a, <16 x i16> %b) {
 ; SSE3-NEXT:    addl %eax, %ecx
 ; SSE3-NEXT:    movl %ecx, -{{[0-9]+}}(%rsp) # 4-byte Spill
 ; SSE3-NEXT:    pextrw $2, %xmm1, %eax
-; SSE3-NEXT:    pextrw $3, %xmm1, %r11d
-; SSE3-NEXT:    addl %eax, %r11d
-; SSE3-NEXT:    pextrw $4, %xmm1, %eax
-; SSE3-NEXT:    pextrw $5, %xmm1, %r10d
-; SSE3-NEXT:    addl %eax, %r10d
-; SSE3-NEXT:    pextrw $6, %xmm1, %eax
-; SSE3-NEXT:    pextrw $7, %xmm1, %r12d
-; SSE3-NEXT:    addl %eax, %r12d
-; SSE3-NEXT:    movd %xmm2, %eax
-; SSE3-NEXT:    pextrw $1, %xmm2, %ebx
-; SSE3-NEXT:    addl %eax, %ebx
-; SSE3-NEXT:    pextrw $2, %xmm2, %eax
-; SSE3-NEXT:    pextrw $3, %xmm2, %ecx
+; SSE3-NEXT:    pextrw $3, %xmm1, %ecx
 ; SSE3-NEXT:    addl %eax, %ecx
+; SSE3-NEXT:    movl %ecx, -{{[0-9]+}}(%rsp) # 4-byte Spill
+; SSE3-NEXT:    pextrw $4, %xmm1, %eax
+; SSE3-NEXT:    pextrw $5, %xmm1, %r14d
+; SSE3-NEXT:    addl %eax, %r14d
+; SSE3-NEXT:    pextrw $6, %xmm1, %esi
+; SSE3-NEXT:    pextrw $7, %xmm1, %r15d
+; SSE3-NEXT:    addl %esi, %r15d
+; SSE3-NEXT:    movd %xmm2, %esi
+; SSE3-NEXT:    pextrw $1, %xmm2, %ebp
+; SSE3-NEXT:    addl %esi, %ebp
+; SSE3-NEXT:    pextrw $2, %xmm2, %esi
+; SSE3-NEXT:    pextrw $3, %xmm2, %edi
+; SSE3-NEXT:    addl %esi, %edi
 ; SSE3-NEXT:    pextrw $4, %xmm2, %esi
-; SSE3-NEXT:    pextrw $5, %xmm2, %r8d
-; SSE3-NEXT:    addl %esi, %r8d
-; SSE3-NEXT:    pextrw $6, %xmm2, %esi
-; SSE3-NEXT:    pextrw $7, %xmm2, %edx
-; SSE3-NEXT:    addl %esi, %edx
-; SSE3-NEXT:    movd %xmm3, %edi
-; SSE3-NEXT:    pextrw $1, %xmm3, %r9d
-; SSE3-NEXT:    addl %edi, %r9d
-; SSE3-NEXT:    pextrw $2, %xmm3, %ebp
-; SSE3-NEXT:    pextrw $3, %xmm3, %edi
-; SSE3-NEXT:    addl %ebp, %edi
-; SSE3-NEXT:    pextrw $4, %xmm3, %eax
-; SSE3-NEXT:    pextrw $5, %xmm3, %ebp
-; SSE3-NEXT:    addl %eax, %ebp
-; SSE3-NEXT:    pextrw $6, %xmm3, %esi
-; SSE3-NEXT:    pextrw $7, %xmm3, %eax
+; SSE3-NEXT:    pextrw $5, %xmm2, %eax
 ; SSE3-NEXT:    addl %esi, %eax
-; SSE3-NEXT:    movd %edx, %xmm8
-; SSE3-NEXT:    movd %r13d, %xmm3
-; SSE3-NEXT:    movd %ecx, %xmm9
-; SSE3-NEXT:    movd %r15d, %xmm4
-; SSE3-NEXT:    movd %r8d, %xmm10
-; SSE3-NEXT:    movd %r14d, %xmm7
-; SSE3-NEXT:    movd %ebx, %xmm11
-; SSE3-NEXT:    movd -{{[0-9]+}}(%rsp), %xmm0 # 4-byte Folded Reload
-; SSE3-NEXT:    # xmm0 = mem[0],zero,zero,zero
-; SSE3-NEXT:    movd %eax, %xmm12
-; SSE3-NEXT:    movd %r12d, %xmm6
-; SSE3-NEXT:    movd %edi, %xmm13
-; SSE3-NEXT:    movd %r11d, %xmm5
-; SSE3-NEXT:    movd %ebp, %xmm14
-; SSE3-NEXT:    movd %r10d, %xmm2
-; SSE3-NEXT:    movd %r9d, %xmm15
+; SSE3-NEXT:    pextrw $6, %xmm2, %esi
+; SSE3-NEXT:    pextrw $7, %xmm2, %ecx
+; SSE3-NEXT:    addl %esi, %ecx
+; SSE3-NEXT:    movd %xmm3, %ebx
+; SSE3-NEXT:    pextrw $1, %xmm3, %r9d
+; SSE3-NEXT:    addl %ebx, %r9d
+; SSE3-NEXT:    pextrw $2, %xmm3, %edx
+; SSE3-NEXT:    pextrw $3, %xmm3, %ebx
+; SSE3-NEXT:    addl %edx, %ebx
+; SSE3-NEXT:    pextrw $4, %xmm3, %edx
+; SSE3-NEXT:    pextrw $5, %xmm3, %esi
+; SSE3-NEXT:    addl %edx, %esi
+; SSE3-NEXT:    pextrw $6, %xmm3, %r8d
+; SSE3-NEXT:    pextrw $7, %xmm3, %edx
+; SSE3-NEXT:    addl %r8d, %edx
+; SSE3-NEXT:    movd %ecx, %xmm8
+; SSE3-NEXT:    movd %eax, %xmm3
+; SSE3-NEXT:    movd %edi, %xmm9
+; SSE3-NEXT:    movd %ebp, %xmm4
+; SSE3-NEXT:    movd %r13d, %xmm10
+; SSE3-NEXT:    movd %r12d, %xmm7
+; SSE3-NEXT:    movd %r11d, %xmm11
+; SSE3-NEXT:    movd %r10d, %xmm0
+; SSE3-NEXT:    movd %edx, %xmm12
+; SSE3-NEXT:    movd %esi, %xmm6
+; SSE3-NEXT:    movd %ebx, %xmm13
+; SSE3-NEXT:    movd %r9d, %xmm5
+; SSE3-NEXT:    movd %r15d, %xmm14
+; SSE3-NEXT:    movd %r14d, %xmm2
+; SSE3-NEXT:    movd -{{[0-9]+}}(%rsp), %xmm15 # 4-byte Folded Reload
+; SSE3-NEXT:    # xmm15 = mem[0],zero,zero,zero
 ; SSE3-NEXT:    movd -{{[0-9]+}}(%rsp), %xmm1 # 4-byte Folded Reload
 ; SSE3-NEXT:    # xmm1 = mem[0],zero,zero,zero
 ; SSE3-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm8[0],xmm3[1],xmm8[1],xmm3[2],xmm8[2],xmm3[3],xmm8[3]
 ; SSE3-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm9[0],xmm4[1],xmm9[1],xmm4[2],xmm9[2],xmm4[3],xmm9[3]
-; SSE3-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
+; SSE3-NEXT:    punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
 ; SSE3-NEXT:    punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm10[0],xmm7[1],xmm10[1],xmm7[2],xmm10[2],xmm7[3],xmm10[3]
 ; SSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm11[0],xmm0[1],xmm11[1],xmm0[2],xmm11[2],xmm0[3],xmm11[3]
-; SSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3]
-; SSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
+; SSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1]
+; SSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm4[0]
 ; SSE3-NEXT:    punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm12[0],xmm6[1],xmm12[1],xmm6[2],xmm12[2],xmm6[3],xmm12[3]
 ; SSE3-NEXT:    punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm13[0],xmm5[1],xmm13[1],xmm5[2],xmm13[2],xmm5[3],xmm13[3]
-; SSE3-NEXT:    punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1],xmm5[2],xmm6[2],xmm5[3],xmm6[3]
+; SSE3-NEXT:    punpckldq {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1]
 ; SSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm14[0],xmm2[1],xmm14[1],xmm2[2],xmm14[2],xmm2[3],xmm14[3]
 ; SSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm15[0],xmm1[1],xmm15[1],xmm1[2],xmm15[2],xmm1[3],xmm15[3]
-; SSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
-; SSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1],xmm1[2],xmm5[2],xmm1[3],xmm5[3]
+; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE3-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm5[0]
 ; SSE3-NEXT:    popq %rbx
 ; SSE3-NEXT:    popq %r12
 ; SSE3-NEXT:    popq %r13
diff --git a/test/CodeGen/X86/haddsub-undef.ll b/test/CodeGen/X86/haddsub-undef.ll
index 6d79d4de5206..091d1a22dbcd 100644
--- a/test/CodeGen/X86/haddsub-undef.ll
+++ b/test/CodeGen/X86/haddsub-undef.ll
@@ -171,9 +171,8 @@ define <4 x float> @test8_undef(<4 x float> %a, <4 x float> %b) {
 ; SSE-NEXT:    movhlps {{.*#+}} xmm2 = xmm2[1,1]
 ; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
 ; SSE-NEXT:    addss %xmm2, %xmm0
-; SSE-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1,1,3]
-; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE-NEXT:    movapd %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: test8_undef:
diff --git a/test/CodeGen/X86/hoist-spill.ll b/test/CodeGen/X86/hoist-spill.ll
index afabf96b12a3..03f558fc3ae2 100644
--- a/test/CodeGen/X86/hoist-spill.ll
+++ b/test/CodeGen/X86/hoist-spill.ll
@@ -3,10 +3,8 @@
 ; Check no spills to the same stack slot after hoisting.
 ; CHECK: mov{{.}} %{{.*}}, [[SPOFFSET1:-?[0-9]*]](%rsp)
 ; CHECK: mov{{.}} %{{.*}}, [[SPOFFSET2:-?[0-9]*]](%rsp)
-; CHECK: mov{{.}} %{{.*}}, [[SPOFFSET3:-?[0-9]*]](%rsp)
 ; CHECK-NOT: mov{{.}} %{{.*}}, [[SPOFFSET1]](%rsp)
 ; CHECK-NOT: mov{{.}} %{{.*}}, [[SPOFFSET2]](%rsp)
-; CHECK-NOT: mov{{.}} %{{.*}}, [[SPOFFSET3]](%rsp)
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/CodeGen/X86/loop-strength-reduce4.ll b/test/CodeGen/X86/loop-strength-reduce4.ll
index 786534b00d39..56f4161147b4 100644
--- a/test/CodeGen/X86/loop-strength-reduce4.ll
+++ b/test/CodeGen/X86/loop-strength-reduce4.ll
@@ -4,16 +4,19 @@
 ; By starting the IV at -64 instead of 0, a cmp is eliminated,
 ; as the flags from the add can be used directly.
 
-; STATIC: movl    $-64, [[ECX:%e..]]
+; STATIC: movl    $-64, [[EAX:%e..]]
 
-; STATIC: movl    [[EAX:%e..]], _state+76([[ECX]])
-; STATIC: addl    $16, [[ECX]]
+; STATIC: movl    %{{.+}}, _state+76([[EAX]])
+; STATIC: addl    $16, [[EAX]]
 ; STATIC: jne
 
-; In PIC mode the symbol can't be folded, so the change-compare-stride
-; trick applies.
+; The same for PIC mode.
 
-; PIC: cmpl $64
+; PIC: movl    $-64, [[EAX:%e..]]
+
+; PIC: movl    %{{.+}}, 76(%{{.+}},[[EAX]])
+; PIC: addl    $16, [[EAX]]
+; PIC: jne
 
 @state = external global [0 x i32]		; <[0 x i32]*> [#uses=4]
 @S = external global [0 x i32]		; <[0 x i32]*> [#uses=4]
diff --git a/test/CodeGen/X86/madd.ll b/test/CodeGen/X86/madd.ll
index af86df510016..7c2bb822c967 100644
--- a/test/CodeGen/X86/madd.ll
+++ b/test/CodeGen/X86/madd.ll
@@ -9,17 +9,17 @@ define i32 @_Z10test_shortPsS_i(i16* nocapture readonly, i16* nocapture readonly
 ; SSE2:       # BB#0: # %entry
 ; SSE2-NEXT:    movl %edx, %eax
 ; SSE2-NEXT:    pxor %xmm0, %xmm0
+; SSE2-NEXT:    xorl %ecx, %ecx
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
 ; SSE2-NEXT:    .p2align 4, 0x90
 ; SSE2-NEXT:  .LBB0_1: # %vector.body
 ; SSE2-NEXT:    # =>This Inner Loop Header: Depth=1
-; SSE2-NEXT:    movdqu (%rdi), %xmm2
-; SSE2-NEXT:    movdqu (%rsi), %xmm3
+; SSE2-NEXT:    movdqu (%rdi,%rcx,2), %xmm2
+; SSE2-NEXT:    movdqu (%rsi,%rcx,2), %xmm3
 ; SSE2-NEXT:    pmaddwd %xmm2, %xmm3
 ; SSE2-NEXT:    paddd %xmm3, %xmm1
-; SSE2-NEXT:    addq $16, %rsi
-; SSE2-NEXT:    addq $16, %rdi
-; SSE2-NEXT:    addq $-8, %rax
+; SSE2-NEXT:    addq $8, %rcx
+; SSE2-NEXT:    cmpq %rcx, %rax
 ; SSE2-NEXT:    jne .LBB0_1
 ; SSE2-NEXT:  # BB#2: # %middle.block
 ; SSE2-NEXT:    paddd %xmm0, %xmm1
@@ -34,17 +34,17 @@ define i32 @_Z10test_shortPsS_i(i16* nocapture readonly, i16* nocapture readonly
 ; AVX2:       # BB#0: # %entry
 ; AVX2-NEXT:    movl %edx, %eax
 ; AVX2-NEXT:    vpxor %ymm0, %ymm0, %ymm0
+; AVX2-NEXT:    xorl %ecx, %ecx
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    .p2align 4, 0x90
 ; AVX2-NEXT:  .LBB0_1: # %vector.body
 ; AVX2-NEXT:    # =>This Inner Loop Header: Depth=1
-; AVX2-NEXT:    vmovdqu (%rsi), %xmm2
-; AVX2-NEXT:    vpmaddwd (%rdi), %xmm2, %xmm2
+; AVX2-NEXT:    vmovdqu (%rsi,%rcx,2), %xmm2
+; AVX2-NEXT:    vpmaddwd (%rdi,%rcx,2), %xmm2, %xmm2
 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm2, %ymm2
 ; AVX2-NEXT:    vpaddd %ymm0, %ymm2, %ymm0
-; AVX2-NEXT:    addq $16, %rsi
-; AVX2-NEXT:    addq $16, %rdi
-; AVX2-NEXT:    addq $-8, %rax
+; AVX2-NEXT:    addq $8, %rcx
+; AVX2-NEXT:    cmpq %rcx, %rax
 ; AVX2-NEXT:    jne .LBB0_1
 ; AVX2-NEXT:  # BB#2: # %middle.block
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
@@ -60,17 +60,17 @@ define i32 @_Z10test_shortPsS_i(i16* nocapture readonly, i16* nocapture readonly
 ; AVX512:       # BB#0: # %entry
 ; AVX512-NEXT:    movl %edx, %eax
 ; AVX512-NEXT:    vpxor %ymm0, %ymm0, %ymm0
+; AVX512-NEXT:    xorl %ecx, %ecx
 ; AVX512-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512-NEXT:    .p2align 4, 0x90
 ; AVX512-NEXT:  .LBB0_1: # %vector.body
 ; AVX512-NEXT:    # =>This Inner Loop Header: Depth=1
-; AVX512-NEXT:    vmovdqu (%rsi), %xmm2
-; AVX512-NEXT:    vpmaddwd (%rdi), %xmm2, %xmm2
+; AVX512-NEXT:    vmovdqu (%rsi,%rcx,2), %xmm2
+; AVX512-NEXT:    vpmaddwd (%rdi,%rcx,2), %xmm2, %xmm2
 ; AVX512-NEXT:    vinserti128 $1, %xmm1, %ymm2, %ymm2
 ; AVX512-NEXT:    vpaddd %ymm0, %ymm2, %ymm0
-; AVX512-NEXT:    addq $16, %rsi
-; AVX512-NEXT:    addq $16, %rdi
-; AVX512-NEXT:    addq $-8, %rax
+; AVX512-NEXT:    addq $8, %rcx
+; AVX512-NEXT:    cmpq %rcx, %rax
 ; AVX512-NEXT:    jne .LBB0_1
 ; AVX512-NEXT:  # BB#2: # %middle.block
 ; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
@@ -118,12 +118,13 @@ define i32 @test_unsigned_short(i16* nocapture readonly, i16* nocapture readonly
 ; SSE2:       # BB#0: # %entry
 ; SSE2-NEXT:    movl %edx, %eax
 ; SSE2-NEXT:    pxor %xmm0, %xmm0
+; SSE2-NEXT:    xorl %ecx, %ecx
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
 ; SSE2-NEXT:    .p2align 4, 0x90
 ; SSE2-NEXT:  .LBB1_1: # %vector.body
 ; SSE2-NEXT:    # =>This Inner Loop Header: Depth=1
-; SSE2-NEXT:    movdqu (%rdi), %xmm2
-; SSE2-NEXT:    movdqu (%rsi), %xmm3
+; SSE2-NEXT:    movdqu (%rdi,%rcx,2), %xmm2
+; SSE2-NEXT:    movdqu (%rsi,%rcx,2), %xmm3
 ; SSE2-NEXT:    movdqa %xmm3, %xmm4
 ; SSE2-NEXT:    pmulhuw %xmm2, %xmm4
 ; SSE2-NEXT:    pmullw %xmm2, %xmm3
@@ -132,9 +133,8 @@ define i32 @test_unsigned_short(i16* nocapture readonly, i16* nocapture readonly
 ; SSE2-NEXT:    paddd %xmm2, %xmm0
 ; SSE2-NEXT:    punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
 ; SSE2-NEXT:    paddd %xmm3, %xmm1
-; SSE2-NEXT:    addq $16, %rsi
-; SSE2-NEXT:    addq $16, %rdi
-; SSE2-NEXT:    addq $-8, %rax
+; SSE2-NEXT:    addq $8, %rcx
+; SSE2-NEXT:    cmpq %rcx, %rax
 ; SSE2-NEXT:    jne .LBB1_1
 ; SSE2-NEXT:  # BB#2: # %middle.block
 ; SSE2-NEXT:    paddd %xmm1, %xmm0
@@ -149,6 +149,7 @@ define i32 @test_unsigned_short(i16* nocapture readonly, i16* nocapture readonly
 ; AVX2:       # BB#0: # %entry
 ; AVX2-NEXT:    movl %edx, %eax
 ; AVX2-NEXT:    vpxor %ymm0, %ymm0, %ymm0
+; AVX2-NEXT:    xorl %ecx, %ecx
 ; AVX2-NEXT:    .p2align 4, 0x90
 ; AVX2-NEXT:  .LBB1_1: # %vector.body
 ; AVX2-NEXT:    # =>This Inner Loop Header: Depth=1
@@ -156,9 +157,8 @@ define i32 @test_unsigned_short(i16* nocapture readonly, i16* nocapture readonly
 ; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
 ; AVX2-NEXT:    vpmulld %ymm1, %ymm2, %ymm1
 ; AVX2-NEXT:    vpaddd %ymm0, %ymm1, %ymm0
-; AVX2-NEXT:    addq $16, %rsi
-; AVX2-NEXT:    addq $16, %rdi
-; AVX2-NEXT:    addq $-8, %rax
+; AVX2-NEXT:    addq $8, %rcx
+; AVX2-NEXT:    cmpq %rcx, %rax
 ; AVX2-NEXT:    jne .LBB1_1
 ; AVX2-NEXT:  # BB#2: # %middle.block
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
@@ -174,6 +174,7 @@ define i32 @test_unsigned_short(i16* nocapture readonly, i16* nocapture readonly
 ; AVX512:       # BB#0: # %entry
 ; AVX512-NEXT:    movl %edx, %eax
 ; AVX512-NEXT:    vpxor %ymm0, %ymm0, %ymm0
+; AVX512-NEXT:    xorl %ecx, %ecx
 ; AVX512-NEXT:    .p2align 4, 0x90
 ; AVX512-NEXT:  .LBB1_1: # %vector.body
 ; AVX512-NEXT:    # =>This Inner Loop Header: Depth=1
@@ -181,9 +182,8 @@ define i32 @test_unsigned_short(i16* nocapture readonly, i16* nocapture readonly
 ; AVX512-NEXT:    vpmovzxwd {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
 ; AVX512-NEXT:    vpmulld %ymm1, %ymm2, %ymm1
 ; AVX512-NEXT:    vpaddd %ymm0, %ymm1, %ymm0
-; AVX512-NEXT:    addq $16, %rsi
-; AVX512-NEXT:    addq $16, %rdi
-; AVX512-NEXT:    addq $-8, %rax
+; AVX512-NEXT:    addq $8, %rcx
+; AVX512-NEXT:    cmpq %rcx, %rax
 ; AVX512-NEXT:    jne .LBB1_1
 ; AVX512-NEXT:  # BB#2: # %middle.block
 ; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
@@ -231,6 +231,7 @@ define i32 @_Z9test_charPcS_i(i8* nocapture readonly, i8* nocapture readonly, i3
 ; SSE2:       # BB#0: # %entry
 ; SSE2-NEXT:    movl %edx, %eax
 ; SSE2-NEXT:    pxor %xmm0, %xmm0
+; SSE2-NEXT:    xorl %ecx, %ecx
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
 ; SSE2-NEXT:    pxor %xmm3, %xmm3
 ; SSE2-NEXT:    pxor %xmm2, %xmm2
@@ -263,9 +264,8 @@ define i32 @_Z9test_charPcS_i(i8* nocapture readonly, i8* nocapture readonly, i3
 ; SSE2-NEXT:    punpckhwd {{.*#+}} xmm4 = xmm4[4],xmm5[4],xmm4[5],xmm5[5],xmm4[6],xmm5[6],xmm4[7],xmm5[7]
 ; SSE2-NEXT:    psrad $16, %xmm4
 ; SSE2-NEXT:    paddd %xmm4, %xmm2
-; SSE2-NEXT:    addq $16, %rsi
-; SSE2-NEXT:    addq $16, %rdi
-; SSE2-NEXT:    addq $-16, %rax
+; SSE2-NEXT:    addq $16, %rcx
+; SSE2-NEXT:    cmpq %rcx, %rax
 ; SSE2-NEXT:    jne .LBB2_1
 ; SSE2-NEXT:  # BB#2: # %middle.block
 ; SSE2-NEXT:    paddd %xmm3, %xmm0
@@ -282,17 +282,17 @@ define i32 @_Z9test_charPcS_i(i8* nocapture readonly, i8* nocapture readonly, i3
 ; AVX2:       # BB#0: # %entry
 ; AVX2-NEXT:    movl %edx, %eax
 ; AVX2-NEXT:    vpxor %ymm0, %ymm0, %ymm0
+; AVX2-NEXT:    xorl %ecx, %ecx
 ; AVX2-NEXT:    vpxor %ymm1, %ymm1, %ymm1
 ; AVX2-NEXT:    .p2align 4, 0x90
 ; AVX2-NEXT:  .LBB2_1: # %vector.body
 ; AVX2-NEXT:    # =>This Inner Loop Header: Depth=1
-; AVX2-NEXT:    vpmovsxbw (%rdi), %ymm2
-; AVX2-NEXT:    vpmovsxbw (%rsi), %ymm3
+; AVX2-NEXT:    vpmovsxbw (%rdi,%rcx), %ymm2
+; AVX2-NEXT:    vpmovsxbw (%rsi,%rcx), %ymm3
 ; AVX2-NEXT:    vpmaddwd %ymm2, %ymm3, %ymm2
 ; AVX2-NEXT:    vpaddd %ymm1, %ymm2, %ymm1
-; AVX2-NEXT:    addq $16, %rsi
-; AVX2-NEXT:    addq $16, %rdi
-; AVX2-NEXT:    addq $-16, %rax
+; AVX2-NEXT:    addq $16, %rcx
+; AVX2-NEXT:    cmpq %rcx, %rax
 ; AVX2-NEXT:    jne .LBB2_1
 ; AVX2-NEXT:  # BB#2: # %middle.block
 ; AVX2-NEXT:    vpaddd %ymm0, %ymm1, %ymm0
@@ -309,18 +309,18 @@ define i32 @_Z9test_charPcS_i(i8* nocapture readonly, i8* nocapture readonly, i3
 ; AVX512:       # BB#0: # %entry
 ; AVX512-NEXT:    movl %edx, %eax
 ; AVX512-NEXT:    vpxord %zmm0, %zmm0, %zmm0
+; AVX512-NEXT:    xorl %ecx, %ecx
 ; AVX512-NEXT:    vpxor %ymm1, %ymm1, %ymm1
 ; AVX512-NEXT:    .p2align 4, 0x90
 ; AVX512-NEXT:  .LBB2_1: # %vector.body
 ; AVX512-NEXT:    # =>This Inner Loop Header: Depth=1
-; AVX512-NEXT:    vpmovsxbw (%rdi), %ymm2
-; AVX512-NEXT:    vpmovsxbw (%rsi), %ymm3
+; AVX512-NEXT:    vpmovsxbw (%rdi,%rcx), %ymm2
+; AVX512-NEXT:    vpmovsxbw (%rsi,%rcx), %ymm3
 ; AVX512-NEXT:    vpmaddwd %ymm2, %ymm3, %ymm2
 ; AVX512-NEXT:    vinserti64x4 $1, %ymm1, %zmm2, %zmm2
 ; AVX512-NEXT:    vpaddd %zmm0, %zmm2, %zmm0
-; AVX512-NEXT:    addq $16, %rsi
-; AVX512-NEXT:    addq $16, %rdi
-; AVX512-NEXT:    addq $-16, %rax
+; AVX512-NEXT:    addq $16, %rcx
+; AVX512-NEXT:    cmpq %rcx, %rax
 ; AVX512-NEXT:    jne .LBB2_1
 ; AVX512-NEXT:  # BB#2: # %middle.block
 ; AVX512-NEXT:    vshufi64x2 {{.*#+}} zmm1 = zmm0[4,5,6,7,0,1,0,1]
diff --git a/test/CodeGen/X86/masked-iv-safe.ll b/test/CodeGen/X86/masked-iv-safe.ll
index 8c0a4d4f1752..61aa05a5270b 100644
--- a/test/CodeGen/X86/masked-iv-safe.ll
+++ b/test/CodeGen/X86/masked-iv-safe.ll
@@ -5,7 +5,7 @@
 
 ; CHECK-LABEL: count_up
 ; CHECK-NOT: {{and|movz|sar|shl}}
-; CHECK: incq
+; CHECK: addq $8
 ; CHECK-NOT: {{and|movz|sar|shl}}
 ; CHECK: jne
 define void @count_up(double* %d, i64 %n) nounwind {
@@ -38,7 +38,7 @@ return:
 
 ; CHECK-LABEL: count_down
 ; CHECK-NOT: {{and|movz|sar|shl}}
-; CHECK: addq
+; CHECK: addq $-8
 ; CHECK-NOT: {{and|movz|sar|shl}}
 ; CHECK: jne
 define void @count_down(double* %d, i64 %n) nounwind {
@@ -71,7 +71,7 @@ return:
 
 ; CHECK-LABEL: count_up_signed
 ; CHECK-NOT: {{and|movz|sar|shl}}
-; CHECK: incq
+; CHECK: addq $8
 ; CHECK-NOT: {{and|movz|sar|shl}}
 ; CHECK: jne
 define void @count_up_signed(double* %d, i64 %n) nounwind {
@@ -106,7 +106,7 @@ return:
 
 ; CHECK-LABEL: count_down_signed
 ; CHECK-NOT: {{and|movz|sar|shl}}
-; CHECK: addq
+; CHECK: addq $-8
 ; CHECK-NOT: {{and|movz|sar|shl}}
 ; CHECK: jne
 define void @count_down_signed(double* %d, i64 %n) nounwind {
@@ -141,7 +141,7 @@ return:
 
 ; CHECK-LABEL: another_count_up
 ; CHECK-NOT: {{and|movz|sar|shl}}
-; CHECK: addq
+; CHECK: addq $8
 ; CHECK-NOT: {{and|movz|sar|shl}}
 ; CHECK: jne
 define void @another_count_up(double* %d, i64 %n) nounwind {
@@ -174,7 +174,7 @@ return:
 
 ; CHECK-LABEL: another_count_down
 ; CHECK-NOT: {{and|movz|sar|shl}}
-; CHECK: addq $-8,
+; CHECK: addq $-8
 ; CHECK-NOT: {{and|movz|sar|shl}}
 ; CHECK: jne
 define void @another_count_down(double* %d, i64 %n) nounwind {
@@ -207,7 +207,7 @@ return:
 
 ; CHECK-LABEL: another_count_up_signed
 ; CHECK-NOT: {{and|movz|sar|shl}}
-; CHECK: addq
+; CHECK: addq $8
 ; CHECK-NOT: {{and|movz|sar|shl}}
 ; CHECK: jne
 define void @another_count_up_signed(double* %d, i64 %n) nounwind {
@@ -242,7 +242,7 @@ return:
 
 ; CHECK-LABEL: another_count_down_signed
 ; CHECK-NOT: {{and|movz|sar|shl}}
-; CHECK: decq
+; CHECK: addq $-8
 ; CHECK-NOT: {{and|movz|sar|shl}}
 ; CHECK: jne
 define void @another_count_down_signed(double* %d, i64 %n) nounwind {
diff --git a/test/CodeGen/X86/memcmp.ll b/test/CodeGen/X86/memcmp.ll
index ce1bb3b06ce5..4e2475b1c67d 100644
--- a/test/CodeGen/X86/memcmp.ll
+++ b/test/CodeGen/X86/memcmp.ll
@@ -10,9 +10,28 @@
 
 declare i32 @memcmp(i8*, i8*, i64)
 
-define i1 @length2(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
+define i32 @length2(i8* %X, i8* %Y) nounwind {
 ; X32-LABEL: length2:
 ; X32:       # BB#0:
+; X32-NEXT:    pushl $0
+; X32-NEXT:    pushl $2
+; X32-NEXT:    pushl {{[0-9]+}}(%esp)
+; X32-NEXT:    pushl {{[0-9]+}}(%esp)
+; X32-NEXT:    calll memcmp
+; X32-NEXT:    addl $16, %esp
+; X32-NEXT:    retl
+;
+; X64-LABEL: length2:
+; X64:       # BB#0:
+; X64-NEXT:    movl $2, %edx
+; X64-NEXT:    jmp memcmp # TAILCALL
+  %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind
+  ret i32 %m
+}
+
+define i1 @length2_eq(i8* %X, i8* %Y) nounwind {
+; X32-LABEL: length2_eq:
+; X32:       # BB#0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    movzwl (%ecx), %ecx
@@ -20,7 +39,7 @@ define i1 @length2(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
 ; X32-NEXT:    sete %al
 ; X32-NEXT:    retl
 ;
-; X64-LABEL: length2:
+; X64-LABEL: length2_eq:
 ; X64:       # BB#0:
 ; X64-NEXT:    movzwl (%rdi), %eax
 ; X64-NEXT:    cmpw (%rsi), %ax
@@ -31,8 +50,8 @@ define i1 @length2(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
   ret i1 %c
 }
 
-define i1 @length2_const(i8* %X, i32* nocapture %P) nounwind {
-; X32-LABEL: length2_const:
+define i1 @length2_eq_const(i8* %X) nounwind {
+; X32-LABEL: length2_eq_const:
 ; X32:       # BB#0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movzwl (%eax), %eax
@@ -40,7 +59,7 @@ define i1 @length2_const(i8* %X, i32* nocapture %P) nounwind {
 ; X32-NEXT:    setne %al
 ; X32-NEXT:    retl
 ;
-; X64-LABEL: length2_const:
+; X64-LABEL: length2_eq_const:
 ; X64:       # BB#0:
 ; X64-NEXT:    movzwl (%rdi), %eax
 ; X64-NEXT:    cmpl $12849, %eax # imm = 0x3231
@@ -51,8 +70,8 @@ define i1 @length2_const(i8* %X, i32* nocapture %P) nounwind {
   ret i1 %c
 }
 
-define i1 @length2_nobuiltin_attr(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
-; X32-LABEL: length2_nobuiltin_attr:
+define i1 @length2_eq_nobuiltin_attr(i8* %X, i8* %Y) nounwind {
+; X32-LABEL: length2_eq_nobuiltin_attr:
 ; X32:       # BB#0:
 ; X32-NEXT:    pushl $0
 ; X32-NEXT:    pushl $2
@@ -64,7 +83,7 @@ define i1 @length2_nobuiltin_attr(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
 ; X32-NEXT:    sete %al
 ; X32-NEXT:    retl
 ;
-; X64-LABEL: length2_nobuiltin_attr:
+; X64-LABEL: length2_eq_nobuiltin_attr:
 ; X64:       # BB#0:
 ; X64-NEXT:    pushq %rax
 ; X64-NEXT:    movl $2, %edx
@@ -78,9 +97,74 @@ define i1 @length2_nobuiltin_attr(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
   ret i1 %c
 }
 
-define i1 @length4(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
+define i32 @length3(i8* %X, i8* %Y) nounwind {
+; X32-LABEL: length3:
+; X32:       # BB#0:
+; X32-NEXT:    pushl $0
+; X32-NEXT:    pushl $3
+; X32-NEXT:    pushl {{[0-9]+}}(%esp)
+; X32-NEXT:    pushl {{[0-9]+}}(%esp)
+; X32-NEXT:    calll memcmp
+; X32-NEXT:    addl $16, %esp
+; X32-NEXT:    retl
+;
+; X64-LABEL: length3:
+; X64:       # BB#0:
+; X64-NEXT:    movl $3, %edx
+; X64-NEXT:    jmp memcmp # TAILCALL
+  %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 3) nounwind
+  ret i32 %m
+}
+
+define i1 @length3_eq(i8* %X, i8* %Y) nounwind {
+; X32-LABEL: length3_eq:
+; X32:       # BB#0:
+; X32-NEXT:    pushl $0
+; X32-NEXT:    pushl $3
+; X32-NEXT:    pushl {{[0-9]+}}(%esp)
+; X32-NEXT:    pushl {{[0-9]+}}(%esp)
+; X32-NEXT:    calll memcmp
+; X32-NEXT:    addl $16, %esp
+; X32-NEXT:    testl %eax, %eax
+; X32-NEXT:    setne %al
+; X32-NEXT:    retl
+;
+; X64-LABEL: length3_eq:
+; X64:       # BB#0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movl $3, %edx
+; X64-NEXT:    callq memcmp
+; X64-NEXT:    testl %eax, %eax
+; X64-NEXT:    setne %al
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 3) nounwind
+  %c = icmp ne i32 %m, 0
+  ret i1 %c
+}
+
+define i32 @length4(i8* %X, i8* %Y) nounwind {
 ; X32-LABEL: length4:
 ; X32:       # BB#0:
+; X32-NEXT:    pushl $0
+; X32-NEXT:    pushl $4
+; X32-NEXT:    pushl {{[0-9]+}}(%esp)
+; X32-NEXT:    pushl {{[0-9]+}}(%esp)
+; X32-NEXT:    calll memcmp
+; X32-NEXT:    addl $16, %esp
+; X32-NEXT:    retl
+;
+; X64-LABEL: length4:
+; X64:       # BB#0:
+; X64-NEXT:    movl $4, %edx
+; X64-NEXT:    jmp memcmp # TAILCALL
+  %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 4) nounwind
+  ret i32 %m
+}
+
+define i1 @length4_eq(i8* %X, i8* %Y) nounwind {
+; X32-LABEL: length4_eq:
+; X32:       # BB#0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    movl (%ecx), %ecx
@@ -88,7 +172,7 @@ define i1 @length4(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
 ; X32-NEXT:    setne %al
 ; X32-NEXT:    retl
 ;
-; X64-LABEL: length4:
+; X64-LABEL: length4_eq:
 ; X64:       # BB#0:
 ; X64-NEXT:    movl (%rdi), %eax
 ; X64-NEXT:    cmpl (%rsi), %eax
@@ -99,15 +183,15 @@ define i1 @length4(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
   ret i1 %c
 }
 
-define i1 @length4_const(i8* %X, i32* nocapture %P) nounwind {
-; X32-LABEL: length4_const:
+define i1 @length4_eq_const(i8* %X) nounwind {
+; X32-LABEL: length4_eq_const:
 ; X32:       # BB#0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    cmpl $875770417, (%eax) # imm = 0x34333231
 ; X32-NEXT:    sete %al
 ; X32-NEXT:    retl
 ;
-; X64-LABEL: length4_const:
+; X64-LABEL: length4_eq_const:
 ; X64:       # BB#0:
 ; X64-NEXT:    cmpl $875770417, (%rdi) # imm = 0x34333231
 ; X64-NEXT:    sete %al
@@ -117,7 +201,53 @@ define i1 @length4_const(i8* %X, i32* nocapture %P) nounwind {
   ret i1 %c
 }
 
-define i1 @length8(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
+define i32 @length5(i8* %X, i8* %Y) nounwind {
+; X32-LABEL: length5:
+; X32:       # BB#0:
+; X32-NEXT:    pushl $0
+; X32-NEXT:    pushl $5
+; X32-NEXT:    pushl {{[0-9]+}}(%esp)
+; X32-NEXT:    pushl {{[0-9]+}}(%esp)
+; X32-NEXT:    calll memcmp
+; X32-NEXT:    addl $16, %esp
+; X32-NEXT:    retl
+;
+; X64-LABEL: length5:
+; X64:       # BB#0:
+; X64-NEXT:    movl $5, %edx
+; X64-NEXT:    jmp memcmp # TAILCALL
+  %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 5) nounwind
+  ret i32 %m
+}
+
+define i1 @length5_eq(i8* %X, i8* %Y) nounwind {
+; X32-LABEL: length5_eq:
+; X32:       # BB#0:
+; X32-NEXT:    pushl $0
+; X32-NEXT:    pushl $5
+; X32-NEXT:    pushl {{[0-9]+}}(%esp)
+; X32-NEXT:    pushl {{[0-9]+}}(%esp)
+; X32-NEXT:    calll memcmp
+; X32-NEXT:    addl $16, %esp
+; X32-NEXT:    testl %eax, %eax
+; X32-NEXT:    setne %al
+; X32-NEXT:    retl
+;
+; X64-LABEL: length5_eq:
+; X64:       # BB#0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movl $5, %edx
+; X64-NEXT:    callq memcmp
+; X64-NEXT:    testl %eax, %eax
+; X64-NEXT:    setne %al
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 5) nounwind
+  %c = icmp ne i32 %m, 0
+  ret i1 %c
+}
+
+define i32 @length8(i8* %X, i8* %Y) nounwind {
 ; X32-LABEL: length8:
 ; X32:       # BB#0:
 ; X32-NEXT:    pushl $0
@@ -126,11 +256,30 @@ define i1 @length8(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
 ; X32-NEXT:    pushl {{[0-9]+}}(%esp)
 ; X32-NEXT:    calll memcmp
 ; X32-NEXT:    addl $16, %esp
+; X32-NEXT:    retl
+;
+; X64-LABEL: length8:
+; X64:       # BB#0:
+; X64-NEXT:    movl $8, %edx
+; X64-NEXT:    jmp memcmp # TAILCALL
+  %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 8) nounwind
+  ret i32 %m
+}
+
+define i1 @length8_eq(i8* %X, i8* %Y) nounwind {
+; X32-LABEL: length8_eq:
+; X32:       # BB#0:
+; X32-NEXT:    pushl $0
+; X32-NEXT:    pushl $8
+; X32-NEXT:    pushl {{[0-9]+}}(%esp)
+; X32-NEXT:    pushl {{[0-9]+}}(%esp)
+; X32-NEXT:    calll memcmp
+; X32-NEXT:    addl $16, %esp
 ; X32-NEXT:    testl %eax, %eax
 ; X32-NEXT:    sete %al
 ; X32-NEXT:    retl
 ;
-; X64-LABEL: length8:
+; X64-LABEL: length8_eq:
 ; X64:       # BB#0:
 ; X64-NEXT:    movq (%rdi), %rax
 ; X64-NEXT:    cmpq (%rsi), %rax
@@ -141,8 +290,8 @@ define i1 @length8(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
   ret i1 %c
 }
 
-define i1 @length8_const(i8* %X, i32* nocapture %P) nounwind {
-; X32-LABEL: length8_const:
+define i1 @length8_eq_const(i8* %X) nounwind {
+; X32-LABEL: length8_eq_const:
 ; X32:       # BB#0:
 ; X32-NEXT:    pushl $0
 ; X32-NEXT:    pushl $8
@@ -154,7 +303,7 @@ define i1 @length8_const(i8* %X, i32* nocapture %P) nounwind {
 ; X32-NEXT:    setne %al
 ; X32-NEXT:    retl
 ;
-; X64-LABEL: length8_const:
+; X64-LABEL: length8_eq_const:
 ; X64:       # BB#0:
 ; X64-NEXT:    movabsq $3978425819141910832, %rax # imm = 0x3736353433323130
 ; X64-NEXT:    cmpq %rax, (%rdi)
@@ -165,7 +314,55 @@ define i1 @length8_const(i8* %X, i32* nocapture %P) nounwind {
   ret i1 %c
 }
 
-define i1 @length16(i8* %x, i8* %y) nounwind {
+define i1 @length12_eq(i8* %X, i8* %Y) nounwind {
+; X32-LABEL: length12_eq:
+; X32:       # BB#0:
+; X32-NEXT:    pushl $0
+; X32-NEXT:    pushl $12
+; X32-NEXT:    pushl {{[0-9]+}}(%esp)
+; X32-NEXT:    pushl {{[0-9]+}}(%esp)
+; X32-NEXT:    calll memcmp
+; X32-NEXT:    addl $16, %esp
+; X32-NEXT:    testl %eax, %eax
+; X32-NEXT:    setne %al
+; X32-NEXT:    retl
+;
+; X64-LABEL: length12_eq:
+; X64:       # BB#0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movl $12, %edx
+; X64-NEXT:    callq memcmp
+; X64-NEXT:    testl %eax, %eax
+; X64-NEXT:    setne %al
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 12) nounwind
+  %c = icmp ne i32 %m, 0
+  ret i1 %c
+}
+
+define i32 @length12(i8* %X, i8* %Y) nounwind {
+; X32-LABEL: length12:
+; X32:       # BB#0:
+; X32-NEXT:    pushl $0
+; X32-NEXT:    pushl $12
+; X32-NEXT:    pushl {{[0-9]+}}(%esp)
+; X32-NEXT:    pushl {{[0-9]+}}(%esp)
+; X32-NEXT:    calll memcmp
+; X32-NEXT:    addl $16, %esp
+; X32-NEXT:    retl
+;
+; X64-LABEL: length12:
+; X64:       # BB#0:
+; X64-NEXT:    movl $12, %edx
+; X64-NEXT:    jmp memcmp # TAILCALL
+  %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 12) nounwind
+  ret i32 %m
+}
+
+; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329
+
+define i32 @length16(i8* %X, i8* %Y) nounwind {
 ; X32-LABEL: length16:
 ; X32:       # BB#0:
 ; X32-NEXT:    pushl $0
@@ -174,11 +371,30 @@ define i1 @length16(i8* %x, i8* %y) nounwind {
 ; X32-NEXT:    pushl {{[0-9]+}}(%esp)
 ; X32-NEXT:    calll memcmp
 ; X32-NEXT:    addl $16, %esp
+; X32-NEXT:    retl
+;
+; X64-LABEL: length16:
+; X64:       # BB#0:
+; X64-NEXT:    movl $16, %edx
+; X64-NEXT:    jmp memcmp # TAILCALL
+  %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 16) nounwind
+  ret i32 %m
+}
+
+define i1 @length16_eq(i8* %x, i8* %y) nounwind {
+; X32-LABEL: length16_eq:
+; X32:       # BB#0:
+; X32-NEXT:    pushl $0
+; X32-NEXT:    pushl $16
+; X32-NEXT:    pushl {{[0-9]+}}(%esp)
+; X32-NEXT:    pushl {{[0-9]+}}(%esp)
+; X32-NEXT:    calll memcmp
+; X32-NEXT:    addl $16, %esp
 ; X32-NEXT:    testl %eax, %eax
 ; X32-NEXT:    setne %al
 ; X32-NEXT:    retl
 ;
-; SSE2-LABEL: length16:
+; SSE2-LABEL: length16_eq:
 ; SSE2:       # BB#0:
 ; SSE2-NEXT:    movdqu (%rsi), %xmm0
 ; SSE2-NEXT:    movdqu (%rdi), %xmm1
@@ -188,7 +404,7 @@ define i1 @length16(i8* %x, i8* %y) nounwind {
 ; SSE2-NEXT:    setne %al
 ; SSE2-NEXT:    retq
 ;
-; AVX2-LABEL: length16:
+; AVX2-LABEL: length16_eq:
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vmovdqu (%rdi), %xmm0
 ; AVX2-NEXT:    vpcmpeqb (%rsi), %xmm0, %xmm0
@@ -201,8 +417,8 @@ define i1 @length16(i8* %x, i8* %y) nounwind {
   ret i1 %cmp
 }
 
-define i1 @length16_const(i8* %X, i32* nocapture %P) nounwind {
-; X32-LABEL: length16_const:
+define i1 @length16_eq_const(i8* %X) nounwind {
+; X32-LABEL: length16_eq_const:
 ; X32:       # BB#0:
 ; X32-NEXT:    pushl $0
 ; X32-NEXT:    pushl $16
@@ -214,7 +430,7 @@ define i1 @length16_const(i8* %X, i32* nocapture %P) nounwind {
 ; X32-NEXT:    sete %al
 ; X32-NEXT:    retl
 ;
-; SSE2-LABEL: length16_const:
+; SSE2-LABEL: length16_eq_const:
 ; SSE2:       # BB#0:
 ; SSE2-NEXT:    movdqu (%rdi), %xmm0
 ; SSE2-NEXT:    pcmpeqb {{.*}}(%rip), %xmm0
@@ -223,7 +439,7 @@ define i1 @length16_const(i8* %X, i32* nocapture %P) nounwind {
 ; SSE2-NEXT:    sete %al
 ; SSE2-NEXT:    retq
 ;
-; AVX2-LABEL: length16_const:
+; AVX2-LABEL: length16_eq_const:
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vmovdqu (%rdi), %xmm0
 ; AVX2-NEXT:    vpcmpeqb {{.*}}(%rip), %xmm0, %xmm0
@@ -236,7 +452,7 @@ define i1 @length16_const(i8* %X, i32* nocapture %P) nounwind {
   ret i1 %c
 }
 
-define i1 @length32(i8* %x, i8* %y) nounwind {
+define i32 @length32(i8* %X, i8* %Y) nounwind {
 ; X32-LABEL: length32:
 ; X32:       # BB#0:
 ; X32-NEXT:    pushl $0
@@ -245,11 +461,32 @@ define i1 @length32(i8* %x, i8* %y) nounwind {
 ; X32-NEXT:    pushl {{[0-9]+}}(%esp)
 ; X32-NEXT:    calll memcmp
 ; X32-NEXT:    addl $16, %esp
+; X32-NEXT:    retl
+;
+; X64-LABEL: length32:
+; X64:       # BB#0:
+; X64-NEXT:    movl $32, %edx
+; X64-NEXT:    jmp memcmp # TAILCALL
+  %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 32) nounwind
+  ret i32 %m
+}
+
+; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325
+
+define i1 @length32_eq(i8* %x, i8* %y) nounwind {
+; X32-LABEL: length32_eq:
+; X32:       # BB#0:
+; X32-NEXT:    pushl $0
+; X32-NEXT:    pushl $32
+; X32-NEXT:    pushl {{[0-9]+}}(%esp)
+; X32-NEXT:    pushl {{[0-9]+}}(%esp)
+; X32-NEXT:    calll memcmp
+; X32-NEXT:    addl $16, %esp
 ; X32-NEXT:    testl %eax, %eax
 ; X32-NEXT:    sete %al
 ; X32-NEXT:    retl
 ;
-; SSE2-LABEL: length32:
+; SSE2-LABEL: length32_eq:
 ; SSE2:       # BB#0:
 ; SSE2-NEXT:    pushq %rax
 ; SSE2-NEXT:    movl $32, %edx
@@ -259,7 +496,7 @@ define i1 @length32(i8* %x, i8* %y) nounwind {
 ; SSE2-NEXT:    popq %rcx
 ; SSE2-NEXT:    retq
 ;
-; AVX2-LABEL: length32:
+; AVX2-LABEL: length32_eq:
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vmovdqu (%rdi), %ymm0
 ; AVX2-NEXT:    vpcmpeqb (%rsi), %ymm0, %ymm0
@@ -273,8 +510,8 @@ define i1 @length32(i8* %x, i8* %y) nounwind {
   ret i1 %cmp
 }
 
-define i1 @length32_const(i8* %X, i32* nocapture %P) nounwind {
-; X32-LABEL: length32_const:
+define i1 @length32_eq_const(i8* %X) nounwind {
+; X32-LABEL: length32_eq_const:
 ; X32:       # BB#0:
 ; X32-NEXT:    pushl $0
 ; X32-NEXT:    pushl $32
@@ -286,7 +523,7 @@ define i1 @length32_const(i8* %X, i32* nocapture %P) nounwind {
 ; X32-NEXT:    setne %al
 ; X32-NEXT:    retl
 ;
-; SSE2-LABEL: length32_const:
+; SSE2-LABEL: length32_eq_const:
 ; SSE2:       # BB#0:
 ; SSE2-NEXT:    pushq %rax
 ; SSE2-NEXT:    movl $.L.str, %esi
@@ -297,7 +534,7 @@ define i1 @length32_const(i8* %X, i32* nocapture %P) nounwind {
 ; SSE2-NEXT:    popq %rcx
 ; SSE2-NEXT:    retq
 ;
-; AVX2-LABEL: length32_const:
+; AVX2-LABEL: length32_eq_const:
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vmovdqu (%rdi), %ymm0
 ; AVX2-NEXT:    vpcmpeqb {{.*}}(%rip), %ymm0, %ymm0
@@ -311,7 +548,7 @@ define i1 @length32_const(i8* %X, i32* nocapture %P) nounwind {
   ret i1 %c
 }
 
-define i1 @length64(i8* %x, i8* %y) nounwind {
+define i32 @length64(i8* %X, i8* %Y) nounwind {
 ; X32-LABEL: length64:
 ; X32:       # BB#0:
 ; X32-NEXT:    pushl $0
@@ -320,11 +557,30 @@ define i1 @length64(i8* %x, i8* %y) nounwind {
 ; X32-NEXT:    pushl {{[0-9]+}}(%esp)
 ; X32-NEXT:    calll memcmp
 ; X32-NEXT:    addl $16, %esp
+; X32-NEXT:    retl
+;
+; X64-LABEL: length64:
+; X64:       # BB#0:
+; X64-NEXT:    movl $64, %edx
+; X64-NEXT:    jmp memcmp # TAILCALL
+  %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 64) nounwind
+  ret i32 %m
+}
+
+define i1 @length64_eq(i8* %x, i8* %y) nounwind {
+; X32-LABEL: length64_eq:
+; X32:       # BB#0:
+; X32-NEXT:    pushl $0
+; X32-NEXT:    pushl $64
+; X32-NEXT:    pushl {{[0-9]+}}(%esp)
+; X32-NEXT:    pushl {{[0-9]+}}(%esp)
+; X32-NEXT:    calll memcmp
+; X32-NEXT:    addl $16, %esp
 ; X32-NEXT:    testl %eax, %eax
 ; X32-NEXT:    setne %al
 ; X32-NEXT:    retl
 ;
-; X64-LABEL: length64:
+; X64-LABEL: length64_eq:
 ; X64:       # BB#0:
 ; X64-NEXT:    pushq %rax
 ; X64-NEXT:    movl $64, %edx
@@ -338,8 +594,8 @@ define i1 @length64(i8* %x, i8* %y) nounwind {
   ret i1 %cmp
 }
 
-define i1 @length64_const(i8* %X, i32* nocapture %P) nounwind {
-; X32-LABEL: length64_const:
+define i1 @length64_eq_const(i8* %X) nounwind {
+; X32-LABEL: length64_eq_const:
 ; X32:       # BB#0:
 ; X32-NEXT:    pushl $0
 ; X32-NEXT:    pushl $64
@@ -351,7 +607,7 @@ define i1 @length64_const(i8* %X, i32* nocapture %P) nounwind {
 ; X32-NEXT:    sete %al
 ; X32-NEXT:    retl
 ;
-; X64-LABEL: length64_const:
+; X64-LABEL: length64_eq_const:
 ; X64:       # BB#0:
 ; X64-NEXT:    pushq %rax
 ; X64-NEXT:    movl $.L.str, %esi
diff --git a/test/CodeGen/X86/merge-consecutive-loads-128.ll b/test/CodeGen/X86/merge-consecutive-loads-128.ll
index 71417694b0d4..1d5829407b71 100644
--- a/test/CodeGen/X86/merge-consecutive-loads-128.ll
+++ b/test/CodeGen/X86/merge-consecutive-loads-128.ll
@@ -269,10 +269,8 @@ define <4 x float> @merge_4f32_f32_012u(float* %ptr) nounwind uwtable noinline s
 ; SSE2-LABEL: merge_4f32_f32_012u:
 ; SSE2:       # BB#0:
 ; SSE2-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSE2-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE2-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE2-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: merge_4f32_f32_012u:
@@ -290,11 +288,11 @@ define <4 x float> @merge_4f32_f32_012u(float* %ptr) nounwind uwtable noinline s
 ; X32-SSE1-LABEL: merge_4f32_f32_012u:
 ; X32-SSE1:       # BB#0:
 ; X32-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-SSE1-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; X32-SSE1-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X32-SSE1-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; X32-SSE1-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; X32-SSE1-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; X32-SSE1-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-SSE1-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X32-SSE1-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; X32-SSE1-NEXT:    retl
 ;
 ; X32-SSE41-LABEL: merge_4f32_f32_012u:
@@ -320,10 +318,8 @@ define <4 x float> @merge_4f32_f32_019u(float* %ptr) nounwind uwtable noinline s
 ; SSE2-LABEL: merge_4f32_f32_019u:
 ; SSE2:       # BB#0:
 ; SSE2-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSE2-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE2-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE2-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: merge_4f32_f32_019u:
@@ -341,11 +337,11 @@ define <4 x float> @merge_4f32_f32_019u(float* %ptr) nounwind uwtable noinline s
 ; X32-SSE1-LABEL: merge_4f32_f32_019u:
 ; X32-SSE1:       # BB#0:
 ; X32-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-SSE1-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; X32-SSE1-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X32-SSE1-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; X32-SSE1-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; X32-SSE1-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; X32-SSE1-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-SSE1-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X32-SSE1-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; X32-SSE1-NEXT:    retl
 ;
 ; X32-SSE41-LABEL: merge_4f32_f32_019u:
@@ -1037,13 +1033,11 @@ define <2 x i64> @merge_2i64_i64_12_volatile(i64* %ptr) nounwind uwtable noinlin
 define <4 x float> @merge_4f32_f32_2345_volatile(float* %ptr) nounwind uwtable noinline ssp {
 ; SSE2-LABEL: merge_4f32_f32_2345_volatile:
 ; SSE2:       # BB#0:
-; SSE2-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; SSE2-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE2-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSE2-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
 ; SSE2-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE2-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
+; SSE2-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: merge_4f32_f32_2345_volatile:
@@ -1065,13 +1059,13 @@ define <4 x float> @merge_4f32_f32_2345_volatile(float* %ptr) nounwind uwtable n
 ; X32-SSE1-LABEL: merge_4f32_f32_2345_volatile:
 ; X32-SSE1:       # BB#0:
 ; X32-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE1-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; X32-SSE1-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X32-SSE1-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
 ; X32-SSE1-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; X32-SSE1-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; X32-SSE1-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; X32-SSE1-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X32-SSE1-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
 ; X32-SSE1-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-SSE1-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; X32-SSE1-NEXT:    retl
 ;
 ; X32-SSE41-LABEL: merge_4f32_f32_2345_volatile:
diff --git a/test/CodeGen/X86/mul-constant-i16.ll b/test/CodeGen/X86/mul-constant-i16.ll
index e3e2737cf3e6..7b39bfe1c484 100644
--- a/test/CodeGen/X86/mul-constant-i16.ll
+++ b/test/CodeGen/X86/mul-constant-i16.ll
@@ -188,13 +188,16 @@ define i16 @test_mul_by_11(i16 %x) {
 ; X86-LABEL: test_mul_by_11:
 ; X86:       # BB#0:
 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    imull $11, %eax, %eax
+; X86-NEXT:    leal (%eax,%eax,4), %ecx
+; X86-NEXT:    leal (%eax,%ecx,2), %eax
 ; X86-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_mul_by_11:
 ; X64:       # BB#0:
-; X64-NEXT:    imull $11, %edi, %eax
+; X64-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-NEXT:    leal (%rdi,%rdi,4), %eax
+; X64-NEXT:    leal (%rdi,%rax,2), %eax
 ; X64-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
 ; X64-NEXT:    retq
   %mul = mul nsw i16 %x, 11
@@ -225,13 +228,16 @@ define i16 @test_mul_by_13(i16 %x) {
 ; X86-LABEL: test_mul_by_13:
 ; X86:       # BB#0:
 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    imull $13, %eax, %eax
+; X86-NEXT:    leal (%eax,%eax,2), %ecx
+; X86-NEXT:    leal (%eax,%ecx,4), %eax
 ; X86-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_mul_by_13:
 ; X64:       # BB#0:
-; X64-NEXT:    imull $13, %edi, %eax
+; X64-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-NEXT:    leal (%rdi,%rdi,2), %eax
+; X64-NEXT:    leal (%rdi,%rax,4), %eax
 ; X64-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
 ; X64-NEXT:    retq
   %mul = mul nsw i16 %x, 13
@@ -241,14 +247,19 @@ define i16 @test_mul_by_13(i16 %x) {
 define i16 @test_mul_by_14(i16 %x) {
 ; X86-LABEL: test_mul_by_14:
 ; X86:       # BB#0:
-; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    imull $14, %eax, %eax
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    leal (%ecx,%ecx,2), %eax
+; X86-NEXT:    leal (%ecx,%eax,4), %eax
+; X86-NEXT:    addl %ecx, %eax
 ; X86-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_mul_by_14:
 ; X64:       # BB#0:
-; X64-NEXT:    imull $14, %edi, %eax
+; X64-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-NEXT:    leal (%rdi,%rdi,2), %eax
+; X64-NEXT:    leal (%rdi,%rax,4), %eax
+; X64-NEXT:    addl %edi, %eax
 ; X64-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
 ; X64-NEXT:    retq
   %mul = mul nsw i16 %x, 14
@@ -337,14 +348,19 @@ define i16 @test_mul_by_18(i16 %x) {
 define i16 @test_mul_by_19(i16 %x) {
 ; X86-LABEL: test_mul_by_19:
 ; X86:       # BB#0:
-; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    imull $19, %eax, %eax
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    leal (%ecx,%ecx,4), %eax
+; X86-NEXT:    shll $2, %eax
+; X86-NEXT:    subl %ecx, %eax
 ; X86-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_mul_by_19:
 ; X64:       # BB#0:
-; X64-NEXT:    imull $19, %edi, %eax
+; X64-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-NEXT:    leal (%rdi,%rdi,4), %eax
+; X64-NEXT:    shll $2, %eax
+; X64-NEXT:    subl %edi, %eax
 ; X64-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
 ; X64-NEXT:    retq
   %mul = mul nsw i16 %x, 19
@@ -375,13 +391,16 @@ define i16 @test_mul_by_21(i16 %x) {
 ; X86-LABEL: test_mul_by_21:
 ; X86:       # BB#0:
 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    imull $21, %eax, %eax
+; X86-NEXT:    leal (%eax,%eax,4), %ecx
+; X86-NEXT:    leal (%eax,%ecx,4), %eax
 ; X86-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_mul_by_21:
 ; X64:       # BB#0:
-; X64-NEXT:    imull $21, %edi, %eax
+; X64-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-NEXT:    leal (%rdi,%rdi,4), %eax
+; X64-NEXT:    leal (%rdi,%rax,4), %eax
 ; X64-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
 ; X64-NEXT:    retq
   %mul = mul nsw i16 %x, 21
@@ -391,14 +410,19 @@ define i16 @test_mul_by_21(i16 %x) {
 define i16 @test_mul_by_22(i16 %x) {
 ; X86-LABEL: test_mul_by_22:
 ; X86:       # BB#0:
-; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    imull $22, %eax, %eax
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    leal (%ecx,%ecx,4), %eax
+; X86-NEXT:    leal (%ecx,%eax,4), %eax
+; X86-NEXT:    addl %ecx, %eax
 ; X86-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_mul_by_22:
 ; X64:       # BB#0:
-; X64-NEXT:    imull $22, %edi, %eax
+; X64-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-NEXT:    leal (%rdi,%rdi,4), %eax
+; X64-NEXT:    leal (%rdi,%rax,4), %eax
+; X64-NEXT:    addl %edi, %eax
 ; X64-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
 ; X64-NEXT:    retq
   %mul = mul nsw i16 %x, 22
@@ -408,14 +432,19 @@ define i16 @test_mul_by_22(i16 %x) {
 define i16 @test_mul_by_23(i16 %x) {
 ; X86-LABEL: test_mul_by_23:
 ; X86:       # BB#0:
-; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    imull $23, %eax, %eax
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    leal (%ecx,%ecx,2), %eax
+; X86-NEXT:    shll $3, %eax
+; X86-NEXT:    subl %ecx, %eax
 ; X86-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_mul_by_23:
 ; X64:       # BB#0:
-; X64-NEXT:    imull $23, %edi, %eax
+; X64-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-NEXT:    leal (%rdi,%rdi,2), %eax
+; X64-NEXT:    shll $3, %eax
+; X64-NEXT:    subl %edi, %eax
 ; X64-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
 ; X64-NEXT:    retq
   %mul = mul nsw i16 %x, 23
@@ -465,14 +494,19 @@ define i16 @test_mul_by_25(i16 %x) {
 define i16 @test_mul_by_26(i16 %x) {
 ; X86-LABEL: test_mul_by_26:
 ; X86:       # BB#0:
-; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    imull $26, %eax, %eax
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    leal (%ecx,%ecx,8), %eax
+; X86-NEXT:    leal (%eax,%eax,2), %eax
+; X86-NEXT:    subl %ecx, %eax
 ; X86-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_mul_by_26:
 ; X64:       # BB#0:
-; X64-NEXT:    imull $26, %edi, %eax
+; X64-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-NEXT:    leal (%rdi,%rdi,8), %eax
+; X64-NEXT:    leal (%rax,%rax,2), %eax
+; X64-NEXT:    subl %edi, %eax
 ; X64-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
 ; X64-NEXT:    retq
   %mul = mul nsw i16 %x, 26
@@ -502,14 +536,19 @@ define i16 @test_mul_by_27(i16 %x) {
 define i16 @test_mul_by_28(i16 %x) {
 ; X86-LABEL: test_mul_by_28:
 ; X86:       # BB#0:
-; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    imull $28, %eax, %eax
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    leal (%ecx,%ecx,8), %eax
+; X86-NEXT:    leal (%eax,%eax,2), %eax
+; X86-NEXT:    addl %ecx, %eax
 ; X86-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_mul_by_28:
 ; X64:       # BB#0:
-; X64-NEXT:    imull $28, %edi, %eax
+; X64-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-NEXT:    leal (%rdi,%rdi,8), %eax
+; X64-NEXT:    leal (%rax,%rax,2), %eax
+; X64-NEXT:    addl %edi, %eax
 ; X64-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
 ; X64-NEXT:    retq
   %mul = mul nsw i16 %x, 28
@@ -519,14 +558,21 @@ define i16 @test_mul_by_28(i16 %x) {
 define i16 @test_mul_by_29(i16 %x) {
 ; X86-LABEL: test_mul_by_29:
 ; X86:       # BB#0:
-; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    imull $29, %eax, %eax
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    leal (%ecx,%ecx,8), %eax
+; X86-NEXT:    leal (%eax,%eax,2), %eax
+; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    addl %ecx, %eax
 ; X86-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_mul_by_29:
 ; X64:       # BB#0:
-; X64-NEXT:    imull $29, %edi, %eax
+; X64-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-NEXT:    leal (%rdi,%rdi,8), %eax
+; X64-NEXT:    leal (%rax,%rax,2), %eax
+; X64-NEXT:    addl %edi, %eax
+; X64-NEXT:    addl %edi, %eax
 ; X64-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
 ; X64-NEXT:    retq
   %mul = mul nsw i16 %x, 29
@@ -536,14 +582,20 @@ define i16 @test_mul_by_29(i16 %x) {
 define i16 @test_mul_by_30(i16 %x) {
 ; X86-LABEL: test_mul_by_30:
 ; X86:       # BB#0:
-; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    imull $30, %eax, %eax
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    shll $5, %eax
+; X86-NEXT:    subl %ecx, %eax
+; X86-NEXT:    subl %ecx, %eax
 ; X86-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_mul_by_30:
 ; X64:       # BB#0:
-; X64-NEXT:    imull $30, %edi, %eax
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    shll $5, %eax
+; X64-NEXT:    subl %edi, %eax
+; X64-NEXT:    subl %edi, %eax
 ; X64-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
 ; X64-NEXT:    retq
   %mul = mul nsw i16 %x, 30
@@ -587,3 +639,30 @@ define i16 @test_mul_by_32(i16 %x) {
   %mul = mul nsw i16 %x, 32
   ret i16 %mul
 }
+
+; (x*9+42)*(x*5+2)
+define i16 @test_mul_spec(i16 %x) nounwind {
+; X86-LABEL: test_mul_spec:
+; X86:       # BB#0:
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    leal 42(%eax,%eax,8), %ecx
+; X86-NEXT:    leal 2(%eax,%eax,4), %eax
+; X86-NEXT:    imull %ecx, %eax
+; X86-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_mul_spec:
+; X64:       # BB#0:
+; X64-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-NEXT:    leal 42(%rdi,%rdi,8), %ecx
+; X64-NEXT:    leal 2(%rdi,%rdi,4), %eax
+; X64-NEXT:    imull %ecx, %eax
+; X64-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
+; X64-NEXT:    retq
+  %mul = mul nsw i16 %x, 9
+  %add = add nsw i16 %mul, 42
+  %mul2 = mul nsw i16 %x, 5
+  %add2 = add nsw i16 %mul2, 2
+  %mul3 = mul nsw i16 %add, %add2
+  ret i16 %mul3
+}
diff --git a/test/CodeGen/X86/mul-constant-i32.ll b/test/CodeGen/X86/mul-constant-i32.ll
index 76e46e1f1b09..d545b477e102 100644
--- a/test/CodeGen/X86/mul-constant-i32.ll
+++ b/test/CodeGen/X86/mul-constant-i32.ll
@@ -1,6 +1,12 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefix=X86
-; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule=true -mcpu=haswell| FileCheck %s --check-prefix=X64-HSW
+; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule=true -mcpu=btver2| FileCheck %s --check-prefix=X64-JAG
+; RUN: llc < %s -mtriple=i686-unknown -mul-constant-optimization=false | FileCheck %s --check-prefix=X86-NOOPT
+; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -print-schedule=true -mcpu=haswell| FileCheck %s --check-prefix=HSW-NOOPT
+; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -print-schedule=true -mcpu=btver2| FileCheck %s --check-prefix=JAG-NOOPT
+; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule=true -mcpu=slm| FileCheck %s --check-prefix=X64-SLM
+; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -print-schedule=true -mcpu=slm| FileCheck %s --check-prefix=SLM-NOOPT
 
 define i32 @test_mul_by_1(i32 %x) {
 ; X86-LABEL: test_mul_by_1:
@@ -8,10 +14,40 @@ define i32 @test_mul_by_1(i32 %x) {
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_1:
-; X64:       # BB#0:
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_1:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_1:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    movl %edi, %eax # sched: [1:0.17]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_1:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_1:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_1:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    movl %edi, %eax # sched: [1:0.17]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_1:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    movl %edi, %eax # sched: [1:0.50]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_1:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    movl %edi, %eax # sched: [1:0.50]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i32 %x, 1
   ret i32 %mul
 }
@@ -23,11 +59,47 @@ define i32 @test_mul_by_2(i32 %x) {
 ; X86-NEXT:    addl %eax, %eax
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_2:
-; X64:       # BB#0:
-; X64-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT:    leal (%rdi,%rdi), %eax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_2:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT:    leal (%rdi,%rdi), %eax # sched: [1:0.50]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_2:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT:    leal (%rdi,%rdi), %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_2:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT:    addl %eax, %eax
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_2:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; HSW-NOOPT-NEXT:    leal (%rdi,%rdi), %eax # sched: [1:0.50]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_2:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; JAG-NOOPT-NEXT:    leal (%rdi,%rdi), %eax # sched: [1:0.50]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_2:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT:    leal (%rdi,%rdi), %eax # sched: [1:1.00]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_2:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; SLM-NOOPT-NEXT:    leal (%rdi,%rdi), %eax # sched: [1:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i32 %x, 2
   ret i32 %mul
 }
@@ -38,11 +110,46 @@ define i32 @test_mul_by_3(i32 %x) {
 ; X86-NEXT:    imull $3, {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_3:
-; X64:       # BB#0:
-; X64-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT:    leal (%rdi,%rdi,2), %eax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_3:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT:    leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_3:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT:    leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_3:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    imull $3, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_3:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; HSW-NOOPT-NEXT:    leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_3:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; JAG-NOOPT-NEXT:    leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_3:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT:    leal (%rdi,%rdi,2), %eax # sched: [1:1.00]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_3:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; SLM-NOOPT-NEXT:    leal (%rdi,%rdi,2), %eax # sched: [1:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i32 %x, 3
   ret i32 %mul
 }
@@ -54,11 +161,47 @@ define i32 @test_mul_by_4(i32 %x) {
 ; X86-NEXT:    shll $2, %eax
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_4:
-; X64:       # BB#0:
-; X64-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT:    leal (,%rdi,4), %eax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_4:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT:    leal (,%rdi,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_4:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT:    leal (,%rdi,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_4:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT:    shll $2, %eax
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_4:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; HSW-NOOPT-NEXT:    leal (,%rdi,4), %eax # sched: [1:0.50]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_4:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; JAG-NOOPT-NEXT:    leal (,%rdi,4), %eax # sched: [1:0.50]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_4:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT:    leal (,%rdi,4), %eax # sched: [1:1.00]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_4:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; SLM-NOOPT-NEXT:    leal (,%rdi,4), %eax # sched: [1:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i32 %x, 4
   ret i32 %mul
 }
@@ -69,11 +212,46 @@ define i32 @test_mul_by_5(i32 %x) {
 ; X86-NEXT:    imull $5, {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_5:
-; X64:       # BB#0:
-; X64-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT:    leal (%rdi,%rdi,4), %eax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_5:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT:    leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_5:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT:    leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_5:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    imull $5, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_5:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; HSW-NOOPT-NEXT:    leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_5:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; JAG-NOOPT-NEXT:    leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_5:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT:    leal (%rdi,%rdi,4), %eax # sched: [1:1.00]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_5:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; SLM-NOOPT-NEXT:    leal (%rdi,%rdi,4), %eax # sched: [1:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i32 %x, 5
   ret i32 %mul
 }
@@ -86,12 +264,46 @@ define i32 @test_mul_by_6(i32 %x) {
 ; X86-NEXT:    leal (%eax,%eax,2), %eax
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_6:
-; X64:       # BB#0:
-; X64-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT:    addl %edi, %edi
-; X64-NEXT:    leal (%rdi,%rdi,2), %eax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_6:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT:    addl %edi, %edi # sched: [1:0.25]
+; X64-HSW-NEXT:    leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_6:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT:    addl %edi, %edi # sched: [1:0.50]
+; X64-JAG-NEXT:    leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_6:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    imull $6, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_6:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    imull $6, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_6:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    imull $6, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_6:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT:    addl %edi, %edi # sched: [1:0.50]
+; X64-SLM-NEXT:    leal (%rdi,%rdi,2), %eax # sched: [1:1.00]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_6:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    imull $6, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i32 %x, 6
   ret i32 %mul
 }
@@ -104,12 +316,46 @@ define i32 @test_mul_by_7(i32 %x) {
 ; X86-NEXT:    subl %ecx, %eax
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_7:
-; X64:       # BB#0:
-; X64-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT:    leal (,%rdi,8), %eax
-; X64-NEXT:    subl %edi, %eax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_7:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT:    leal (,%rdi,8), %eax # sched: [1:0.50]
+; X64-HSW-NEXT:    subl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_7:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT:    leal (,%rdi,8), %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    subl %edi, %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_7:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    imull $7, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_7:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    imull $7, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_7:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    imull $7, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_7:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT:    leal (,%rdi,8), %eax # sched: [1:1.00]
+; X64-SLM-NEXT:    subl %edi, %eax # sched: [1:0.50]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_7:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    imull $7, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i32 %x, 7
   ret i32 %mul
 }
@@ -121,11 +367,47 @@ define i32 @test_mul_by_8(i32 %x) {
 ; X86-NEXT:    shll $3, %eax
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_8:
-; X64:       # BB#0:
-; X64-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT:    leal (,%rdi,8), %eax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_8:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT:    leal (,%rdi,8), %eax # sched: [1:0.50]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_8:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT:    leal (,%rdi,8), %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_8:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT:    shll $3, %eax
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_8:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; HSW-NOOPT-NEXT:    leal (,%rdi,8), %eax # sched: [1:0.50]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_8:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; JAG-NOOPT-NEXT:    leal (,%rdi,8), %eax # sched: [1:0.50]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_8:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT:    leal (,%rdi,8), %eax # sched: [1:1.00]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_8:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; SLM-NOOPT-NEXT:    leal (,%rdi,8), %eax # sched: [1:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i32 %x, 8
   ret i32 %mul
 }
@@ -136,11 +418,46 @@ define i32 @test_mul_by_9(i32 %x) {
 ; X86-NEXT:    imull $9, {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_9:
-; X64:       # BB#0:
-; X64-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT:    leal (%rdi,%rdi,8), %eax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_9:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT:    leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_9:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT:    leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_9:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    imull $9, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_9:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; HSW-NOOPT-NEXT:    leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_9:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; JAG-NOOPT-NEXT:    leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_9:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT:    leal (%rdi,%rdi,8), %eax # sched: [1:1.00]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_9:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; SLM-NOOPT-NEXT:    leal (%rdi,%rdi,8), %eax # sched: [1:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i32 %x, 9
   ret i32 %mul
 }
@@ -153,12 +470,46 @@ define i32 @test_mul_by_10(i32 %x) {
 ; X86-NEXT:    leal (%eax,%eax,4), %eax
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_10:
-; X64:       # BB#0:
-; X64-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT:    addl %edi, %edi
-; X64-NEXT:    leal (%rdi,%rdi,4), %eax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_10:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT:    addl %edi, %edi # sched: [1:0.25]
+; X64-HSW-NEXT:    leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_10:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT:    addl %edi, %edi # sched: [1:0.50]
+; X64-JAG-NEXT:    leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_10:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    imull $10, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_10:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    imull $10, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_10:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    imull $10, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_10:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT:    addl %edi, %edi # sched: [1:0.50]
+; X64-SLM-NEXT:    leal (%rdi,%rdi,4), %eax # sched: [1:1.00]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_10:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    imull $10, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i32 %x, 10
   ret i32 %mul
 }
@@ -166,13 +517,49 @@ define i32 @test_mul_by_10(i32 %x) {
 define i32 @test_mul_by_11(i32 %x) {
 ; X86-LABEL: test_mul_by_11:
 ; X86:       # BB#0:
-; X86-NEXT:    imull $11, {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    leal (%eax,%eax,4), %ecx
+; X86-NEXT:    leal (%eax,%ecx,2), %eax
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_11:
-; X64:       # BB#0:
-; X64-NEXT:    imull $11, %edi, %eax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_11:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT:    leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT:    leal (%rdi,%rax,2), %eax # sched: [1:0.50]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_11:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT:    leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    leal (%rdi,%rax,2), %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_11:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    imull $11, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_11:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    imull $11, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_11:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    imull $11, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_11:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    imull $11, %edi, %eax # sched: [3:1.00]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_11:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    imull $11, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i32 %x, 11
   ret i32 %mul
 }
@@ -185,12 +572,46 @@ define i32 @test_mul_by_12(i32 %x) {
 ; X86-NEXT:    leal (%eax,%eax,2), %eax
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_12:
-; X64:       # BB#0:
-; X64-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT:    shll $2, %edi
-; X64-NEXT:    leal (%rdi,%rdi,2), %eax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_12:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT:    shll $2, %edi # sched: [1:0.50]
+; X64-HSW-NEXT:    leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_12:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT:    shll $2, %edi # sched: [1:0.50]
+; X64-JAG-NEXT:    leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_12:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    imull $12, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_12:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    imull $12, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_12:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    imull $12, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_12:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT:    shll $2, %edi # sched: [1:1.00]
+; X64-SLM-NEXT:    leal (%rdi,%rdi,2), %eax # sched: [1:1.00]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_12:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    imull $12, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i32 %x, 12
   ret i32 %mul
 }
@@ -198,13 +619,49 @@ define i32 @test_mul_by_12(i32 %x) {
 define i32 @test_mul_by_13(i32 %x) {
 ; X86-LABEL: test_mul_by_13:
 ; X86:       # BB#0:
-; X86-NEXT:    imull $13, {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    leal (%eax,%eax,2), %ecx
+; X86-NEXT:    leal (%eax,%ecx,4), %eax
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_13:
-; X64:       # BB#0:
-; X64-NEXT:    imull $13, %edi, %eax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_13:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT:    leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; X64-HSW-NEXT:    leal (%rdi,%rax,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_13:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT:    leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    leal (%rdi,%rax,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_13:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    imull $13, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_13:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    imull $13, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_13:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    imull $13, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_13:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    imull $13, %edi, %eax # sched: [3:1.00]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_13:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    imull $13, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i32 %x, 13
   ret i32 %mul
 }
@@ -212,13 +669,52 @@ define i32 @test_mul_by_13(i32 %x) {
 define i32 @test_mul_by_14(i32 %x) {
 ; X86-LABEL: test_mul_by_14:
 ; X86:       # BB#0:
-; X86-NEXT:    imull $14, {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    leal (%ecx,%ecx,2), %eax
+; X86-NEXT:    leal (%ecx,%eax,4), %eax
+; X86-NEXT:    addl %ecx, %eax
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_14:
-; X64:       # BB#0:
-; X64-NEXT:    imull $14, %edi, %eax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_14:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT:    leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; X64-HSW-NEXT:    leal (%rdi,%rax,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT:    addl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_14:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT:    leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    leal (%rdi,%rax,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    addl %edi, %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_14:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    imull $14, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_14:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    imull $14, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_14:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    imull $14, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_14:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    imull $14, %edi, %eax # sched: [3:1.00]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_14:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    imull $14, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i32 %x, 14
   ret i32 %mul
 }
@@ -231,12 +727,46 @@ define i32 @test_mul_by_15(i32 %x) {
 ; X86-NEXT:    leal (%eax,%eax,2), %eax
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_15:
-; X64:       # BB#0:
-; X64-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT:    leal (%rdi,%rdi,4), %eax
-; X64-NEXT:    leal (%rax,%rax,2), %eax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_15:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT:    leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT:    leal (%rax,%rax,2), %eax # sched: [1:0.50]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_15:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT:    leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    leal (%rax,%rax,2), %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_15:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    imull $15, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_15:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    imull $15, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_15:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    imull $15, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_15:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT:    leal (%rdi,%rdi,4), %eax # sched: [1:1.00]
+; X64-SLM-NEXT:    leal (%rax,%rax,2), %eax # sched: [1:1.00]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_15:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    imull $15, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i32 %x, 15
   ret i32 %mul
 }
@@ -248,11 +778,47 @@ define i32 @test_mul_by_16(i32 %x) {
 ; X86-NEXT:    shll $4, %eax
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_16:
-; X64:       # BB#0:
-; X64-NEXT:    shll $4, %edi
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_16:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    shll $4, %edi # sched: [1:0.50]
+; X64-HSW-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_16:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    shll $4, %edi # sched: [1:0.50]
+; X64-JAG-NEXT:    movl %edi, %eax # sched: [1:0.17]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_16:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT:    shll $4, %eax
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_16:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    shll $4, %edi # sched: [1:0.50]
+; HSW-NOOPT-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_16:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    shll $4, %edi # sched: [1:0.50]
+; JAG-NOOPT-NEXT:    movl %edi, %eax # sched: [1:0.17]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_16:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    shll $4, %edi # sched: [1:1.00]
+; X64-SLM-NEXT:    movl %edi, %eax # sched: [1:0.50]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_16:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    shll $4, %edi # sched: [1:1.00]
+; SLM-NOOPT-NEXT:    movl %edi, %eax # sched: [1:0.50]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i32 %x, 16
   ret i32 %mul
 }
@@ -266,13 +832,49 @@ define i32 @test_mul_by_17(i32 %x) {
 ; X86-NEXT:    addl %ecx, %eax
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_17:
-; X64:       # BB#0:
-; X64-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    shll $4, %eax
-; X64-NEXT:    leal (%rax,%rdi), %eax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_17:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT:    shll $4, %eax # sched: [1:0.50]
+; X64-HSW-NEXT:    leal (%rax,%rdi), %eax # sched: [1:0.50]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_17:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT:    movl %edi, %eax # sched: [1:0.17]
+; X64-JAG-NEXT:    shll $4, %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    leal (%rax,%rdi), %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_17:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    imull $17, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_17:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    imull $17, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_17:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    imull $17, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_17:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT:    movl %edi, %eax # sched: [1:0.50]
+; X64-SLM-NEXT:    shll $4, %eax # sched: [1:1.00]
+; X64-SLM-NEXT:    leal (%rax,%rdi), %eax # sched: [1:1.00]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_17:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    imull $17, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i32 %x, 17
   ret i32 %mul
 }
@@ -285,12 +887,46 @@ define i32 @test_mul_by_18(i32 %x) {
 ; X86-NEXT:    leal (%eax,%eax,8), %eax
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_18:
-; X64:       # BB#0:
-; X64-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT:    addl %edi, %edi
-; X64-NEXT:    leal (%rdi,%rdi,8), %eax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_18:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT:    addl %edi, %edi # sched: [1:0.25]
+; X64-HSW-NEXT:    leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_18:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT:    addl %edi, %edi # sched: [1:0.50]
+; X64-JAG-NEXT:    leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_18:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    imull $18, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_18:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    imull $18, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_18:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    imull $18, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_18:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT:    addl %edi, %edi # sched: [1:0.50]
+; X64-SLM-NEXT:    leal (%rdi,%rdi,8), %eax # sched: [1:1.00]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_18:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    imull $18, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i32 %x, 18
   ret i32 %mul
 }
@@ -298,13 +934,52 @@ define i32 @test_mul_by_18(i32 %x) {
 define i32 @test_mul_by_19(i32 %x) {
 ; X86-LABEL: test_mul_by_19:
 ; X86:       # BB#0:
-; X86-NEXT:    imull $19, {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    leal (%ecx,%ecx,4), %eax
+; X86-NEXT:    shll $2, %eax
+; X86-NEXT:    subl %ecx, %eax
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_19:
-; X64:       # BB#0:
-; X64-NEXT:    imull $19, %edi, %eax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_19:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT:    leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT:    shll $2, %eax # sched: [1:0.50]
+; X64-HSW-NEXT:    subl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_19:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT:    leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    shll $2, %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    subl %edi, %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_19:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    imull $19, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_19:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    imull $19, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_19:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    imull $19, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_19:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    imull $19, %edi, %eax # sched: [3:1.00]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_19:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    imull $19, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i32 %x, 19
   ret i32 %mul
 }
@@ -317,12 +992,46 @@ define i32 @test_mul_by_20(i32 %x) {
 ; X86-NEXT:    leal (%eax,%eax,4), %eax
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_20:
-; X64:       # BB#0:
-; X64-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT:    shll $2, %edi
-; X64-NEXT:    leal (%rdi,%rdi,4), %eax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_20:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT:    shll $2, %edi # sched: [1:0.50]
+; X64-HSW-NEXT:    leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_20:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT:    shll $2, %edi # sched: [1:0.50]
+; X64-JAG-NEXT:    leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_20:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    imull $20, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_20:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    imull $20, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_20:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    imull $20, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_20:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT:    shll $2, %edi # sched: [1:1.00]
+; X64-SLM-NEXT:    leal (%rdi,%rdi,4), %eax # sched: [1:1.00]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_20:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    imull $20, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i32 %x, 20
   ret i32 %mul
 }
@@ -330,13 +1039,49 @@ define i32 @test_mul_by_20(i32 %x) {
 define i32 @test_mul_by_21(i32 %x) {
 ; X86-LABEL: test_mul_by_21:
 ; X86:       # BB#0:
-; X86-NEXT:    imull $21, {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    leal (%eax,%eax,4), %ecx
+; X86-NEXT:    leal (%eax,%ecx,4), %eax
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_21:
-; X64:       # BB#0:
-; X64-NEXT:    imull $21, %edi, %eax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_21:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT:    leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT:    leal (%rdi,%rax,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_21:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT:    leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    leal (%rdi,%rax,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_21:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    imull $21, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_21:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    imull $21, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_21:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    imull $21, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_21:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    imull $21, %edi, %eax # sched: [3:1.00]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_21:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    imull $21, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i32 %x, 21
   ret i32 %mul
 }
@@ -344,13 +1089,52 @@ define i32 @test_mul_by_21(i32 %x) {
 define i32 @test_mul_by_22(i32 %x) {
 ; X86-LABEL: test_mul_by_22:
 ; X86:       # BB#0:
-; X86-NEXT:    imull $22, {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    leal (%ecx,%ecx,4), %eax
+; X86-NEXT:    leal (%ecx,%eax,4), %eax
+; X86-NEXT:    addl %ecx, %eax
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_22:
-; X64:       # BB#0:
-; X64-NEXT:    imull $22, %edi, %eax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_22:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT:    leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT:    leal (%rdi,%rax,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT:    addl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_22:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT:    leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    leal (%rdi,%rax,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    addl %edi, %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_22:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    imull $22, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_22:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    imull $22, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_22:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    imull $22, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_22:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    imull $22, %edi, %eax # sched: [3:1.00]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_22:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    imull $22, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i32 %x, 22
   ret i32 %mul
 }
@@ -358,13 +1142,52 @@ define i32 @test_mul_by_22(i32 %x) {
 define i32 @test_mul_by_23(i32 %x) {
 ; X86-LABEL: test_mul_by_23:
 ; X86:       # BB#0:
-; X86-NEXT:    imull $23, {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    leal (%ecx,%ecx,2), %eax
+; X86-NEXT:    shll $3, %eax
+; X86-NEXT:    subl %ecx, %eax
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_23:
-; X64:       # BB#0:
-; X64-NEXT:    imull $23, %edi, %eax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_23:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT:    leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; X64-HSW-NEXT:    shll $3, %eax # sched: [1:0.50]
+; X64-HSW-NEXT:    subl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_23:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT:    leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    shll $3, %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    subl %edi, %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_23:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    imull $23, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_23:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    imull $23, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_23:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    imull $23, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_23:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    imull $23, %edi, %eax # sched: [3:1.00]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_23:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    imull $23, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i32 %x, 23
   ret i32 %mul
 }
@@ -377,12 +1200,46 @@ define i32 @test_mul_by_24(i32 %x) {
 ; X86-NEXT:    leal (%eax,%eax,2), %eax
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_24:
-; X64:       # BB#0:
-; X64-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT:    shll $3, %edi
-; X64-NEXT:    leal (%rdi,%rdi,2), %eax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_24:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT:    shll $3, %edi # sched: [1:0.50]
+; X64-HSW-NEXT:    leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_24:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT:    shll $3, %edi # sched: [1:0.50]
+; X64-JAG-NEXT:    leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_24:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    imull $24, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_24:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    imull $24, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_24:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    imull $24, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_24:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT:    shll $3, %edi # sched: [1:1.00]
+; X64-SLM-NEXT:    leal (%rdi,%rdi,2), %eax # sched: [1:1.00]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_24:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    imull $24, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i32 %x, 24
   ret i32 %mul
 }
@@ -395,12 +1252,46 @@ define i32 @test_mul_by_25(i32 %x) {
 ; X86-NEXT:    leal (%eax,%eax,4), %eax
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_25:
-; X64:       # BB#0:
-; X64-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT:    leal (%rdi,%rdi,4), %eax
-; X64-NEXT:    leal (%rax,%rax,4), %eax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_25:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT:    leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT:    leal (%rax,%rax,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_25:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT:    leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    leal (%rax,%rax,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_25:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    imull $25, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_25:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    imull $25, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_25:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    imull $25, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_25:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT:    leal (%rdi,%rdi,4), %eax # sched: [1:1.00]
+; X64-SLM-NEXT:    leal (%rax,%rax,4), %eax # sched: [1:1.00]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_25:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    imull $25, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i32 %x, 25
   ret i32 %mul
 }
@@ -408,13 +1299,52 @@ define i32 @test_mul_by_25(i32 %x) {
 define i32 @test_mul_by_26(i32 %x) {
 ; X86-LABEL: test_mul_by_26:
 ; X86:       # BB#0:
-; X86-NEXT:    imull $26, {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    leal (%ecx,%ecx,8), %eax
+; X86-NEXT:    leal (%eax,%eax,2), %eax
+; X86-NEXT:    subl %ecx, %eax
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_26:
-; X64:       # BB#0:
-; X64-NEXT:    imull $26, %edi, %eax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_26:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT:    leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
+; X64-HSW-NEXT:    leal (%rax,%rax,2), %eax # sched: [1:0.50]
+; X64-HSW-NEXT:    subl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_26:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT:    leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    leal (%rax,%rax,2), %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    subl %edi, %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_26:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    imull $26, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_26:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    imull $26, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_26:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    imull $26, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_26:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    imull $26, %edi, %eax # sched: [3:1.00]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_26:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    imull $26, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i32 %x, 26
   ret i32 %mul
 }
@@ -427,12 +1357,46 @@ define i32 @test_mul_by_27(i32 %x) {
 ; X86-NEXT:    leal (%eax,%eax,2), %eax
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_27:
-; X64:       # BB#0:
-; X64-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT:    leal (%rdi,%rdi,8), %eax
-; X64-NEXT:    leal (%rax,%rax,2), %eax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_27:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT:    leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
+; X64-HSW-NEXT:    leal (%rax,%rax,2), %eax # sched: [1:0.50]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_27:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT:    leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    leal (%rax,%rax,2), %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_27:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    imull $27, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_27:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    imull $27, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_27:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    imull $27, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_27:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT:    leal (%rdi,%rdi,8), %eax # sched: [1:1.00]
+; X64-SLM-NEXT:    leal (%rax,%rax,2), %eax # sched: [1:1.00]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_27:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    imull $27, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i32 %x, 27
   ret i32 %mul
 }
@@ -440,13 +1404,52 @@ define i32 @test_mul_by_27(i32 %x) {
 define i32 @test_mul_by_28(i32 %x) {
 ; X86-LABEL: test_mul_by_28:
 ; X86:       # BB#0:
-; X86-NEXT:    imull $28, {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    leal (%ecx,%ecx,8), %eax
+; X86-NEXT:    leal (%eax,%eax,2), %eax
+; X86-NEXT:    addl %ecx, %eax
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_28:
-; X64:       # BB#0:
-; X64-NEXT:    imull $28, %edi, %eax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_28:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT:    leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
+; X64-HSW-NEXT:    leal (%rax,%rax,2), %eax # sched: [1:0.50]
+; X64-HSW-NEXT:    addl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_28:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT:    leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    leal (%rax,%rax,2), %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    addl %edi, %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_28:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    imull $28, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_28:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    imull $28, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_28:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    imull $28, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_28:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    imull $28, %edi, %eax # sched: [3:1.00]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_28:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    imull $28, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i32 %x, 28
   ret i32 %mul
 }
@@ -454,13 +1457,55 @@ define i32 @test_mul_by_28(i32 %x) {
 define i32 @test_mul_by_29(i32 %x) {
 ; X86-LABEL: test_mul_by_29:
 ; X86:       # BB#0:
-; X86-NEXT:    imull $29, {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    leal (%ecx,%ecx,8), %eax
+; X86-NEXT:    leal (%eax,%eax,2), %eax
+; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    addl %ecx, %eax
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_29:
-; X64:       # BB#0:
-; X64-NEXT:    imull $29, %edi, %eax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_29:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT:    leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
+; X64-HSW-NEXT:    leal (%rax,%rax,2), %eax # sched: [1:0.50]
+; X64-HSW-NEXT:    addl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT:    addl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_29:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT:    leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    leal (%rax,%rax,2), %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    addl %edi, %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    addl %edi, %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_29:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    imull $29, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_29:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    imull $29, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_29:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    imull $29, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_29:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    imull $29, %edi, %eax # sched: [3:1.00]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_29:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    imull $29, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i32 %x, 29
   ret i32 %mul
 }
@@ -468,13 +1513,53 @@ define i32 @test_mul_by_29(i32 %x) {
 define i32 @test_mul_by_30(i32 %x) {
 ; X86-LABEL: test_mul_by_30:
 ; X86:       # BB#0:
-; X86-NEXT:    imull $30, {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    shll $5, %eax
+; X86-NEXT:    subl %ecx, %eax
+; X86-NEXT:    subl %ecx, %eax
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_30:
-; X64:       # BB#0:
-; X64-NEXT:    imull $30, %edi, %eax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_30:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT:    shll $5, %eax # sched: [1:0.50]
+; X64-HSW-NEXT:    subl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT:    subl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_30:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    movl %edi, %eax # sched: [1:0.17]
+; X64-JAG-NEXT:    shll $5, %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    subl %edi, %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    subl %edi, %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_30:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    imull $30, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_30:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    imull $30, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_30:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    imull $30, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_30:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    imull $30, %edi, %eax # sched: [3:1.00]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_30:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    imull $30, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i32 %x, 30
   ret i32 %mul
 }
@@ -488,12 +1573,46 @@ define i32 @test_mul_by_31(i32 %x) {
 ; X86-NEXT:    subl %ecx, %eax
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_31:
-; X64:       # BB#0:
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    shll $5, %eax
-; X64-NEXT:    subl %edi, %eax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_31:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT:    shll $5, %eax # sched: [1:0.50]
+; X64-HSW-NEXT:    subl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_31:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    movl %edi, %eax # sched: [1:0.17]
+; X64-JAG-NEXT:    shll $5, %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    subl %edi, %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_31:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    imull $31, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_31:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    imull $31, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_31:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    imull $31, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_31:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    movl %edi, %eax # sched: [1:0.50]
+; X64-SLM-NEXT:    shll $5, %eax # sched: [1:1.00]
+; X64-SLM-NEXT:    subl %edi, %eax # sched: [1:0.50]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_31:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    imull $31, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i32 %x, 31
   ret i32 %mul
 }
@@ -505,11 +1624,124 @@ define i32 @test_mul_by_32(i32 %x) {
 ; X86-NEXT:    shll $5, %eax
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_32:
-; X64:       # BB#0:
-; X64-NEXT:    shll $5, %edi
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_32:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    shll $5, %edi # sched: [1:0.50]
+; X64-HSW-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_32:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    shll $5, %edi # sched: [1:0.50]
+; X64-JAG-NEXT:    movl %edi, %eax # sched: [1:0.17]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_32:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT:    shll $5, %eax
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_32:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    shll $5, %edi # sched: [1:0.50]
+; HSW-NOOPT-NEXT:    movl %edi, %eax # sched: [1:0.25]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_32:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    shll $5, %edi # sched: [1:0.50]
+; JAG-NOOPT-NEXT:    movl %edi, %eax # sched: [1:0.17]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_32:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    shll $5, %edi # sched: [1:1.00]
+; X64-SLM-NEXT:    movl %edi, %eax # sched: [1:0.50]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_32:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    shll $5, %edi # sched: [1:1.00]
+; SLM-NOOPT-NEXT:    movl %edi, %eax # sched: [1:0.50]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i32 %x, 32
   ret i32 %mul
 }
+
+; (x*9+42)*(x*5+2)
+define i32 @test_mul_spec(i32 %x) nounwind {
+; X86-LABEL: test_mul_spec:
+; X86:       # BB#0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    leal 42(%eax,%eax,8), %ecx
+; X86-NEXT:    leal 2(%eax,%eax,4), %eax
+; X86-NEXT:    imull %ecx, %eax
+; X86-NEXT:    retl
+;
+; X64-HSW-LABEL: test_mul_spec:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT:    leal (%rdi,%rdi,8), %ecx # sched: [1:0.50]
+; X64-HSW-NEXT:    addl $42, %ecx # sched: [1:0.25]
+; X64-HSW-NEXT:    leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT:    addl $2, %eax # sched: [1:0.25]
+; X64-HSW-NEXT:    imull %ecx, %eax # sched: [4:1.00]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_spec:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT:    leal 42(%rdi,%rdi,8), %ecx # sched: [1:0.50]
+; X64-JAG-NEXT:    leal 2(%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT:    imull %ecx, %eax # sched: [3:1.00]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_spec:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT:    leal 42(%eax,%eax,8), %ecx
+; X86-NOOPT-NEXT:    leal 2(%eax,%eax,4), %eax
+; X86-NOOPT-NEXT:    imull %ecx, %eax
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_spec:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; HSW-NOOPT-NEXT:    leal (%rdi,%rdi,8), %ecx # sched: [1:0.50]
+; HSW-NOOPT-NEXT:    addl $42, %ecx # sched: [1:0.25]
+; HSW-NOOPT-NEXT:    leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; HSW-NOOPT-NEXT:    addl $2, %eax # sched: [1:0.25]
+; HSW-NOOPT-NEXT:    imull %ecx, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_spec:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; JAG-NOOPT-NEXT:    leal 42(%rdi,%rdi,8), %ecx # sched: [1:0.50]
+; JAG-NOOPT-NEXT:    leal 2(%rdi,%rdi,4), %eax # sched: [1:0.50]
+; JAG-NOOPT-NEXT:    imull %ecx, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_spec:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT:    leal 42(%rdi,%rdi,8), %ecx # sched: [1:1.00]
+; X64-SLM-NEXT:    leal 2(%rdi,%rdi,4), %eax # sched: [1:1.00]
+; X64-SLM-NEXT:    imull %ecx, %eax # sched: [3:1.00]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_spec:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; SLM-NOOPT-NEXT:    leal 42(%rdi,%rdi,8), %ecx # sched: [1:1.00]
+; SLM-NOOPT-NEXT:    leal 2(%rdi,%rdi,4), %eax # sched: [1:1.00]
+; SLM-NOOPT-NEXT:    imull %ecx, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
+  %mul = mul nsw i32 %x, 9
+  %add = add nsw i32 %mul, 42
+  %mul2 = mul nsw i32 %x, 5
+  %add2 = add nsw i32 %mul2, 2
+  %mul3 = mul nsw i32 %add, %add2
+  ret i32 %mul3
+}
diff --git a/test/CodeGen/X86/mul-constant-i64.ll b/test/CodeGen/X86/mul-constant-i64.ll
index 8579179a8231..ea841c761c7b 100644
--- a/test/CodeGen/X86/mul-constant-i64.ll
+++ b/test/CodeGen/X86/mul-constant-i64.ll
@@ -1,18 +1,55 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefix=X86
-; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule=true -mcpu=haswell| FileCheck %s --check-prefix=X64-HSW
+; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule=true -mcpu=btver2| FileCheck %s --check-prefix=X64-JAG
+; RUN: llc < %s -mtriple=i686-unknown -mul-constant-optimization=false | FileCheck %s --check-prefix=X86-NOOPT
+; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -print-schedule=true -mcpu=haswell| FileCheck %s --check-prefix=HSW-NOOPT
+; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -print-schedule=true -mcpu=btver2| FileCheck %s --check-prefix=JAG-NOOPT
+; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule=true -mcpu=slm| FileCheck %s --check-prefix=X64-SLM
+; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -print-schedule=true -mcpu=slm| FileCheck %s --check-prefix=SLM-NOOPT
 
-define i64 @test_mul_by_1(i64 %x) {
+define i64 @test_mul_by_1(i64 %x) nounwind {
 ; X86-LABEL: test_mul_by_1:
 ; X86:       # BB#0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_1:
-; X64:       # BB#0:
-; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_1:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    movq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_1:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    movq %rdi, %rax # sched: [1:0.17]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_1:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_1:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    movq %rdi, %rax # sched: [1:0.25]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_1:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    movq %rdi, %rax # sched: [1:0.17]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_1:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    movq %rdi, %rax # sched: [1:0.50]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_1:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    movq %rdi, %rax # sched: [1:0.50]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i64 %x, 1
   ret i64 %mul
 }
@@ -26,10 +63,43 @@ define i64 @test_mul_by_2(i64 %x) {
 ; X86-NEXT:    addl %eax, %eax
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_2:
-; X64:       # BB#0:
-; X64-NEXT:    leaq (%rdi,%rdi), %rax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_2:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    leaq (%rdi,%rdi), %rax # sched: [1:0.50]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_2:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    leaq (%rdi,%rdi), %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_2:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NOOPT-NEXT:    shldl $1, %eax, %edx
+; X86-NOOPT-NEXT:    addl %eax, %eax
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_2:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    leaq (%rdi,%rdi), %rax # sched: [1:0.50]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_2:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    leaq (%rdi,%rdi), %rax # sched: [1:0.50]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_2:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    leaq (%rdi,%rdi), %rax # sched: [1:1.00]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_2:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    leaq (%rdi,%rdi), %rax # sched: [1:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i64 %x, 2
   ret i64 %mul
 }
@@ -43,10 +113,43 @@ define i64 @test_mul_by_3(i64 %x) {
 ; X86-NEXT:    addl %ecx, %edx
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_3:
-; X64:       # BB#0:
-; X64-NEXT:    leaq (%rdi,%rdi,2), %rax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_3:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_3:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_3:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    movl $3, %eax
+; X86-NOOPT-NEXT:    mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT:    imull $3, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT:    addl %ecx, %edx
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_3:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_3:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_3:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    leaq (%rdi,%rdi,2), %rax # sched: [1:1.00]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_3:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    leaq (%rdi,%rdi,2), %rax # sched: [1:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i64 %x, 3
   ret i64 %mul
 }
@@ -60,10 +163,43 @@ define i64 @test_mul_by_4(i64 %x) {
 ; X86-NEXT:    shll $2, %eax
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_4:
-; X64:       # BB#0:
-; X64-NEXT:    leaq (,%rdi,4), %rax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_4:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    leaq (,%rdi,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_4:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    leaq (,%rdi,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_4:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NOOPT-NEXT:    shldl $2, %eax, %edx
+; X86-NOOPT-NEXT:    shll $2, %eax
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_4:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    leaq (,%rdi,4), %rax # sched: [1:0.50]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_4:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    leaq (,%rdi,4), %rax # sched: [1:0.50]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_4:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    leaq (,%rdi,4), %rax # sched: [1:1.00]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_4:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    leaq (,%rdi,4), %rax # sched: [1:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i64 %x, 4
   ret i64 %mul
 }
@@ -77,10 +213,43 @@ define i64 @test_mul_by_5(i64 %x) {
 ; X86-NEXT:    addl %ecx, %edx
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_5:
-; X64:       # BB#0:
-; X64-NEXT:    leaq (%rdi,%rdi,4), %rax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_5:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_5:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_5:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    movl $5, %eax
+; X86-NOOPT-NEXT:    mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT:    imull $5, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT:    addl %ecx, %edx
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_5:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_5:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_5:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    leaq (%rdi,%rdi,4), %rax # sched: [1:1.00]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_5:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    leaq (%rdi,%rdi,4), %rax # sched: [1:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i64 %x, 5
   ret i64 %mul
 }
@@ -95,11 +264,46 @@ define i64 @test_mul_by_6(i64 %x) {
 ; X86-NEXT:    leal (%edx,%ecx,2), %edx
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_6:
-; X64:       # BB#0:
-; X64-NEXT:    addq %rdi, %rdi
-; X64-NEXT:    leaq (%rdi,%rdi,2), %rax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_6:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    addq %rdi, %rdi # sched: [1:0.25]
+; X64-HSW-NEXT:    leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_6:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    addq %rdi, %rdi # sched: [1:0.50]
+; X64-JAG-NEXT:    leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_6:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    movl $6, %eax
+; X86-NOOPT-NEXT:    mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT:    imull $6, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT:    addl %ecx, %edx
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_6:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    imulq $6, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_6:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    imulq $6, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_6:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    addq %rdi, %rdi # sched: [1:0.50]
+; X64-SLM-NEXT:    leaq (%rdi,%rdi,2), %rax # sched: [1:1.00]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_6:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    imulq $6, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i64 %x, 6
   ret i64 %mul
 }
@@ -115,11 +319,46 @@ define i64 @test_mul_by_7(i64 %x) {
 ; X86-NEXT:    addl %ecx, %edx
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_7:
-; X64:       # BB#0:
-; X64-NEXT:    leaq (,%rdi,8), %rax
-; X64-NEXT:    subq %rdi, %rax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_7:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    leaq (,%rdi,8), %rax # sched: [1:0.50]
+; X64-HSW-NEXT:    subq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_7:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    leaq (,%rdi,8), %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    subq %rdi, %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_7:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    movl $7, %eax
+; X86-NOOPT-NEXT:    mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT:    imull $7, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT:    addl %ecx, %edx
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_7:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    imulq $7, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_7:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    imulq $7, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_7:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    leaq (,%rdi,8), %rax # sched: [1:1.00]
+; X64-SLM-NEXT:    subq %rdi, %rax # sched: [1:0.50]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_7:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    imulq $7, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i64 %x, 7
   ret i64 %mul
 }
@@ -133,10 +372,43 @@ define i64 @test_mul_by_8(i64 %x) {
 ; X86-NEXT:    shll $3, %eax
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_8:
-; X64:       # BB#0:
-; X64-NEXT:    leaq (,%rdi,8), %rax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_8:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    leaq (,%rdi,8), %rax # sched: [1:0.50]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_8:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    leaq (,%rdi,8), %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_8:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NOOPT-NEXT:    shldl $3, %eax, %edx
+; X86-NOOPT-NEXT:    shll $3, %eax
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_8:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    leaq (,%rdi,8), %rax # sched: [1:0.50]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_8:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    leaq (,%rdi,8), %rax # sched: [1:0.50]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_8:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    leaq (,%rdi,8), %rax # sched: [1:1.00]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_8:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    leaq (,%rdi,8), %rax # sched: [1:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i64 %x, 8
   ret i64 %mul
 }
@@ -150,10 +422,43 @@ define i64 @test_mul_by_9(i64 %x) {
 ; X86-NEXT:    addl %ecx, %edx
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_9:
-; X64:       # BB#0:
-; X64-NEXT:    leaq (%rdi,%rdi,8), %rax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_9:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_9:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_9:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    movl $9, %eax
+; X86-NOOPT-NEXT:    mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT:    imull $9, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT:    addl %ecx, %edx
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_9:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_9:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_9:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    leaq (%rdi,%rdi,8), %rax # sched: [1:1.00]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_9:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    leaq (%rdi,%rdi,8), %rax # sched: [1:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i64 %x, 9
   ret i64 %mul
 }
@@ -168,11 +473,46 @@ define i64 @test_mul_by_10(i64 %x) {
 ; X86-NEXT:    leal (%edx,%ecx,2), %edx
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_10:
-; X64:       # BB#0:
-; X64-NEXT:    addq %rdi, %rdi
-; X64-NEXT:    leaq (%rdi,%rdi,4), %rax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_10:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    addq %rdi, %rdi # sched: [1:0.25]
+; X64-HSW-NEXT:    leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_10:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    addq %rdi, %rdi # sched: [1:0.50]
+; X64-JAG-NEXT:    leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_10:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    movl $10, %eax
+; X86-NOOPT-NEXT:    mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT:    imull $10, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT:    addl %ecx, %edx
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_10:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    imulq $10, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_10:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    imulq $10, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_10:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    addq %rdi, %rdi # sched: [1:0.50]
+; X64-SLM-NEXT:    leaq (%rdi,%rdi,4), %rax # sched: [1:1.00]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_10:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    imulq $10, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i64 %x, 10
   ret i64 %mul
 }
@@ -180,16 +520,53 @@ define i64 @test_mul_by_10(i64 %x) {
 define i64 @test_mul_by_11(i64 %x) {
 ; X86-LABEL: test_mul_by_11:
 ; X86:       # BB#0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    leal (%eax,%eax,4), %ecx
+; X86-NEXT:    leal (%eax,%ecx,2), %ecx
 ; X86-NEXT:    movl $11, %eax
 ; X86-NEXT:    mull {{[0-9]+}}(%esp)
-; X86-NEXT:    imull $11, {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    addl %ecx, %edx
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_11:
-; X64:       # BB#0:
-; X64-NEXT:    imulq $11, %rdi, %rax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_11:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT:    leaq (%rdi,%rax,2), %rax # sched: [1:0.50]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_11:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    leaq (%rdi,%rax,2), %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_11:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    movl $11, %eax
+; X86-NOOPT-NEXT:    mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT:    imull $11, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT:    addl %ecx, %edx
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_11:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    imulq $11, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_11:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    imulq $11, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_11:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    imulq $11, %rdi, %rax # sched: [3:1.00]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_11:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    imulq $11, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i64 %x, 11
   ret i64 %mul
 }
@@ -204,11 +581,46 @@ define i64 @test_mul_by_12(i64 %x) {
 ; X86-NEXT:    leal (%edx,%ecx,4), %edx
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_12:
-; X64:       # BB#0:
-; X64-NEXT:    shlq $2, %rdi
-; X64-NEXT:    leaq (%rdi,%rdi,2), %rax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_12:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    shlq $2, %rdi # sched: [1:0.50]
+; X64-HSW-NEXT:    leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_12:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    shlq $2, %rdi # sched: [1:0.50]
+; X64-JAG-NEXT:    leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_12:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    movl $12, %eax
+; X86-NOOPT-NEXT:    mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT:    imull $12, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT:    addl %ecx, %edx
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_12:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    imulq $12, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_12:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    imulq $12, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_12:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    shlq $2, %rdi # sched: [1:1.00]
+; X64-SLM-NEXT:    leaq (%rdi,%rdi,2), %rax # sched: [1:1.00]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_12:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    imulq $12, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i64 %x, 12
   ret i64 %mul
 }
@@ -216,16 +628,53 @@ define i64 @test_mul_by_12(i64 %x) {
 define i64 @test_mul_by_13(i64 %x) {
 ; X86-LABEL: test_mul_by_13:
 ; X86:       # BB#0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    leal (%eax,%eax,2), %ecx
+; X86-NEXT:    leal (%eax,%ecx,4), %ecx
 ; X86-NEXT:    movl $13, %eax
 ; X86-NEXT:    mull {{[0-9]+}}(%esp)
-; X86-NEXT:    imull $13, {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    addl %ecx, %edx
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_13:
-; X64:       # BB#0:
-; X64-NEXT:    imulq $13, %rdi, %rax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_13:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; X64-HSW-NEXT:    leaq (%rdi,%rax,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_13:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    leaq (%rdi,%rax,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_13:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    movl $13, %eax
+; X86-NOOPT-NEXT:    mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT:    imull $13, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT:    addl %ecx, %edx
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_13:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    imulq $13, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_13:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    imulq $13, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_13:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    imulq $13, %rdi, %rax # sched: [3:1.00]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_13:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    imulq $13, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i64 %x, 13
   ret i64 %mul
 }
@@ -233,16 +682,56 @@ define i64 @test_mul_by_13(i64 %x) {
 define i64 @test_mul_by_14(i64 %x) {
 ; X86-LABEL: test_mul_by_14:
 ; X86:       # BB#0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    leal (%eax,%eax,2), %ecx
+; X86-NEXT:    leal (%eax,%ecx,4), %ecx
+; X86-NEXT:    addl %eax, %ecx
 ; X86-NEXT:    movl $14, %eax
 ; X86-NEXT:    mull {{[0-9]+}}(%esp)
-; X86-NEXT:    imull $14, {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    addl %ecx, %edx
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_14:
-; X64:       # BB#0:
-; X64-NEXT:    imulq $14, %rdi, %rax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_14:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; X64-HSW-NEXT:    leaq (%rdi,%rax,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT:    addq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_14:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    leaq (%rdi,%rax,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    addq %rdi, %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_14:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    movl $14, %eax
+; X86-NOOPT-NEXT:    mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT:    imull $14, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT:    addl %ecx, %edx
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_14:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    imulq $14, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_14:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    imulq $14, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_14:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    imulq $14, %rdi, %rax # sched: [3:1.00]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_14:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    imulq $14, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i64 %x, 14
   ret i64 %mul
 }
@@ -258,11 +747,46 @@ define i64 @test_mul_by_15(i64 %x) {
 ; X86-NEXT:    addl %ecx, %edx
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_15:
-; X64:       # BB#0:
-; X64-NEXT:    leaq (%rdi,%rdi,4), %rax
-; X64-NEXT:    leaq (%rax,%rax,2), %rax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_15:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT:    leaq (%rax,%rax,2), %rax # sched: [1:0.50]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_15:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    leaq (%rax,%rax,2), %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_15:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    movl $15, %eax
+; X86-NOOPT-NEXT:    mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT:    imull $15, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT:    addl %ecx, %edx
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_15:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    imulq $15, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_15:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    imulq $15, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_15:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    leaq (%rdi,%rdi,4), %rax # sched: [1:1.00]
+; X64-SLM-NEXT:    leaq (%rax,%rax,2), %rax # sched: [1:1.00]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_15:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    imulq $15, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i64 %x, 15
   ret i64 %mul
 }
@@ -276,11 +800,49 @@ define i64 @test_mul_by_16(i64 %x) {
 ; X86-NEXT:    shll $4, %eax
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_16:
-; X64:       # BB#0:
-; X64-NEXT:    shlq $4, %rdi
-; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_16:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    shlq $4, %rdi # sched: [1:0.50]
+; X64-HSW-NEXT:    movq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_16:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    shlq $4, %rdi # sched: [1:0.50]
+; X64-JAG-NEXT:    movq %rdi, %rax # sched: [1:0.17]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_16:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NOOPT-NEXT:    shldl $4, %eax, %edx
+; X86-NOOPT-NEXT:    shll $4, %eax
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_16:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    shlq $4, %rdi # sched: [1:0.50]
+; HSW-NOOPT-NEXT:    movq %rdi, %rax # sched: [1:0.25]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_16:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    shlq $4, %rdi # sched: [1:0.50]
+; JAG-NOOPT-NEXT:    movq %rdi, %rax # sched: [1:0.17]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_16:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    shlq $4, %rdi # sched: [1:1.00]
+; X64-SLM-NEXT:    movq %rdi, %rax # sched: [1:0.50]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_16:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    shlq $4, %rdi # sched: [1:1.00]
+; SLM-NOOPT-NEXT:    movq %rdi, %rax # sched: [1:0.50]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i64 %x, 16
   ret i64 %mul
 }
@@ -297,12 +859,49 @@ define i64 @test_mul_by_17(i64 %x) {
 ; X86-NEXT:    addl %ecx, %edx
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_17:
-; X64:       # BB#0:
-; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    shlq $4, %rax
-; X64-NEXT:    leaq (%rax,%rdi), %rax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_17:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    movq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT:    shlq $4, %rax # sched: [1:0.50]
+; X64-HSW-NEXT:    leaq (%rax,%rdi), %rax # sched: [1:0.50]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_17:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    movq %rdi, %rax # sched: [1:0.17]
+; X64-JAG-NEXT:    shlq $4, %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    leaq (%rax,%rdi), %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_17:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    movl $17, %eax
+; X86-NOOPT-NEXT:    mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT:    imull $17, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT:    addl %ecx, %edx
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_17:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    imulq $17, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_17:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    imulq $17, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_17:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    movq %rdi, %rax # sched: [1:0.50]
+; X64-SLM-NEXT:    shlq $4, %rax # sched: [1:1.00]
+; X64-SLM-NEXT:    addq %rdi, %rax # sched: [1:0.50]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_17:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    imulq $17, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i64 %x, 17
   ret i64 %mul
 }
@@ -317,11 +916,46 @@ define i64 @test_mul_by_18(i64 %x) {
 ; X86-NEXT:    leal (%edx,%ecx,2), %edx
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_18:
-; X64:       # BB#0:
-; X64-NEXT:    addq %rdi, %rdi
-; X64-NEXT:    leaq (%rdi,%rdi,8), %rax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_18:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    addq %rdi, %rdi # sched: [1:0.25]
+; X64-HSW-NEXT:    leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_18:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    addq %rdi, %rdi # sched: [1:0.50]
+; X64-JAG-NEXT:    leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_18:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    movl $18, %eax
+; X86-NOOPT-NEXT:    mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT:    imull $18, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT:    addl %ecx, %edx
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_18:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    imulq $18, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_18:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    imulq $18, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_18:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    addq %rdi, %rdi # sched: [1:0.50]
+; X64-SLM-NEXT:    leaq (%rdi,%rdi,8), %rax # sched: [1:1.00]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_18:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    imulq $18, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i64 %x, 18
   ret i64 %mul
 }
@@ -329,16 +963,56 @@ define i64 @test_mul_by_18(i64 %x) {
 define i64 @test_mul_by_19(i64 %x) {
 ; X86-LABEL: test_mul_by_19:
 ; X86:       # BB#0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    leal (%eax,%eax,4), %ecx
+; X86-NEXT:    shll $2, %ecx
+; X86-NEXT:    subl %eax, %ecx
 ; X86-NEXT:    movl $19, %eax
 ; X86-NEXT:    mull {{[0-9]+}}(%esp)
-; X86-NEXT:    imull $19, {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    addl %ecx, %edx
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_19:
-; X64:       # BB#0:
-; X64-NEXT:    imulq $19, %rdi, %rax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_19:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT:    shlq $2, %rax # sched: [1:0.50]
+; X64-HSW-NEXT:    subq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_19:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    shlq $2, %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    subq %rdi, %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_19:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    movl $19, %eax
+; X86-NOOPT-NEXT:    mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT:    imull $19, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT:    addl %ecx, %edx
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_19:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    imulq $19, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_19:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    imulq $19, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_19:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    imulq $19, %rdi, %rax # sched: [3:1.00]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_19:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    imulq $19, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i64 %x, 19
   ret i64 %mul
 }
@@ -353,11 +1027,46 @@ define i64 @test_mul_by_20(i64 %x) {
 ; X86-NEXT:    leal (%edx,%ecx,4), %edx
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_20:
-; X64:       # BB#0:
-; X64-NEXT:    shlq $2, %rdi
-; X64-NEXT:    leaq (%rdi,%rdi,4), %rax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_20:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    shlq $2, %rdi # sched: [1:0.50]
+; X64-HSW-NEXT:    leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_20:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    shlq $2, %rdi # sched: [1:0.50]
+; X64-JAG-NEXT:    leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_20:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    movl $20, %eax
+; X86-NOOPT-NEXT:    mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT:    imull $20, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT:    addl %ecx, %edx
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_20:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    imulq $20, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_20:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    imulq $20, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_20:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    shlq $2, %rdi # sched: [1:1.00]
+; X64-SLM-NEXT:    leaq (%rdi,%rdi,4), %rax # sched: [1:1.00]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_20:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    imulq $20, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i64 %x, 20
   ret i64 %mul
 }
@@ -365,16 +1074,53 @@ define i64 @test_mul_by_20(i64 %x) {
 define i64 @test_mul_by_21(i64 %x) {
 ; X86-LABEL: test_mul_by_21:
 ; X86:       # BB#0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    leal (%eax,%eax,4), %ecx
+; X86-NEXT:    leal (%eax,%ecx,4), %ecx
 ; X86-NEXT:    movl $21, %eax
 ; X86-NEXT:    mull {{[0-9]+}}(%esp)
-; X86-NEXT:    imull $21, {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    addl %ecx, %edx
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_21:
-; X64:       # BB#0:
-; X64-NEXT:    imulq $21, %rdi, %rax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_21:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT:    leaq (%rdi,%rax,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_21:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    leaq (%rdi,%rax,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_21:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    movl $21, %eax
+; X86-NOOPT-NEXT:    mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT:    imull $21, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT:    addl %ecx, %edx
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_21:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    imulq $21, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_21:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    imulq $21, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_21:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    imulq $21, %rdi, %rax # sched: [3:1.00]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_21:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    imulq $21, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i64 %x, 21
   ret i64 %mul
 }
@@ -382,16 +1128,56 @@ define i64 @test_mul_by_21(i64 %x) {
 define i64 @test_mul_by_22(i64 %x) {
 ; X86-LABEL: test_mul_by_22:
 ; X86:       # BB#0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    leal (%eax,%eax,4), %ecx
+; X86-NEXT:    leal (%eax,%ecx,4), %ecx
+; X86-NEXT:    addl %eax, %ecx
 ; X86-NEXT:    movl $22, %eax
 ; X86-NEXT:    mull {{[0-9]+}}(%esp)
-; X86-NEXT:    imull $22, {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    addl %ecx, %edx
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_22:
-; X64:       # BB#0:
-; X64-NEXT:    imulq $22, %rdi, %rax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_22:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT:    leaq (%rdi,%rax,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT:    addq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_22:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    leaq (%rdi,%rax,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    addq %rdi, %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_22:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    movl $22, %eax
+; X86-NOOPT-NEXT:    mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT:    imull $22, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT:    addl %ecx, %edx
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_22:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    imulq $22, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_22:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    imulq $22, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_22:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    imulq $22, %rdi, %rax # sched: [3:1.00]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_22:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    imulq $22, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i64 %x, 22
   ret i64 %mul
 }
@@ -399,16 +1185,56 @@ define i64 @test_mul_by_22(i64 %x) {
 define i64 @test_mul_by_23(i64 %x) {
 ; X86-LABEL: test_mul_by_23:
 ; X86:       # BB#0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    leal (%eax,%eax,2), %ecx
+; X86-NEXT:    shll $3, %ecx
+; X86-NEXT:    subl %eax, %ecx
 ; X86-NEXT:    movl $23, %eax
 ; X86-NEXT:    mull {{[0-9]+}}(%esp)
-; X86-NEXT:    imull $23, {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    addl %ecx, %edx
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_23:
-; X64:       # BB#0:
-; X64-NEXT:    imulq $23, %rdi, %rax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_23:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; X64-HSW-NEXT:    shlq $3, %rax # sched: [1:0.50]
+; X64-HSW-NEXT:    subq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_23:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    shlq $3, %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    subq %rdi, %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_23:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    movl $23, %eax
+; X86-NOOPT-NEXT:    mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT:    imull $23, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT:    addl %ecx, %edx
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_23:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    imulq $23, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_23:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    imulq $23, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_23:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    imulq $23, %rdi, %rax # sched: [3:1.00]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_23:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    imulq $23, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i64 %x, 23
   ret i64 %mul
 }
@@ -423,11 +1249,46 @@ define i64 @test_mul_by_24(i64 %x) {
 ; X86-NEXT:    leal (%edx,%ecx,8), %edx
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_24:
-; X64:       # BB#0:
-; X64-NEXT:    shlq $3, %rdi
-; X64-NEXT:    leaq (%rdi,%rdi,2), %rax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_24:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    shlq $3, %rdi # sched: [1:0.50]
+; X64-HSW-NEXT:    leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_24:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    shlq $3, %rdi # sched: [1:0.50]
+; X64-JAG-NEXT:    leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_24:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    movl $24, %eax
+; X86-NOOPT-NEXT:    mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT:    imull $24, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT:    addl %ecx, %edx
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_24:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    imulq $24, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_24:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    imulq $24, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_24:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    shlq $3, %rdi # sched: [1:1.00]
+; X64-SLM-NEXT:    leaq (%rdi,%rdi,2), %rax # sched: [1:1.00]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_24:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    imulq $24, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i64 %x, 24
   ret i64 %mul
 }
@@ -443,11 +1304,46 @@ define i64 @test_mul_by_25(i64 %x) {
 ; X86-NEXT:    addl %ecx, %edx
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_25:
-; X64:       # BB#0:
-; X64-NEXT:    leaq (%rdi,%rdi,4), %rax
-; X64-NEXT:    leaq (%rax,%rax,4), %rax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_25:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT:    leaq (%rax,%rax,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_25:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    leaq (%rax,%rax,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_25:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    movl $25, %eax
+; X86-NOOPT-NEXT:    mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT:    imull $25, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT:    addl %ecx, %edx
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_25:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    imulq $25, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_25:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    imulq $25, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_25:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    leaq (%rdi,%rdi,4), %rax # sched: [1:1.00]
+; X64-SLM-NEXT:    leaq (%rax,%rax,4), %rax # sched: [1:1.00]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_25:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    imulq $25, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i64 %x, 25
   ret i64 %mul
 }
@@ -455,16 +1351,56 @@ define i64 @test_mul_by_25(i64 %x) {
 define i64 @test_mul_by_26(i64 %x) {
 ; X86-LABEL: test_mul_by_26:
 ; X86:       # BB#0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    leal (%eax,%eax,8), %ecx
+; X86-NEXT:    leal (%ecx,%ecx,2), %ecx
+; X86-NEXT:    subl %eax, %ecx
 ; X86-NEXT:    movl $26, %eax
 ; X86-NEXT:    mull {{[0-9]+}}(%esp)
-; X86-NEXT:    imull $26, {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    addl %ecx, %edx
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_26:
-; X64:       # BB#0:
-; X64-NEXT:    imulq $26, %rdi, %rax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_26:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
+; X64-HSW-NEXT:    leaq (%rax,%rax,2), %rax # sched: [1:0.50]
+; X64-HSW-NEXT:    subq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_26:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    leaq (%rax,%rax,2), %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    subq %rdi, %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_26:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    movl $26, %eax
+; X86-NOOPT-NEXT:    mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT:    imull $26, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT:    addl %ecx, %edx
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_26:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    imulq $26, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_26:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    imulq $26, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_26:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    imulq $26, %rdi, %rax # sched: [3:1.00]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_26:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    imulq $26, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i64 %x, 26
   ret i64 %mul
 }
@@ -480,11 +1416,46 @@ define i64 @test_mul_by_27(i64 %x) {
 ; X86-NEXT:    addl %ecx, %edx
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_27:
-; X64:       # BB#0:
-; X64-NEXT:    leaq (%rdi,%rdi,8), %rax
-; X64-NEXT:    leaq (%rax,%rax,2), %rax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_27:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
+; X64-HSW-NEXT:    leaq (%rax,%rax,2), %rax # sched: [1:0.50]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_27:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    leaq (%rax,%rax,2), %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_27:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    movl $27, %eax
+; X86-NOOPT-NEXT:    mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT:    imull $27, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT:    addl %ecx, %edx
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_27:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    imulq $27, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_27:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    imulq $27, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_27:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    leaq (%rdi,%rdi,8), %rax # sched: [1:1.00]
+; X64-SLM-NEXT:    leaq (%rax,%rax,2), %rax # sched: [1:1.00]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_27:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    imulq $27, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i64 %x, 27
   ret i64 %mul
 }
@@ -492,16 +1463,56 @@ define i64 @test_mul_by_27(i64 %x) {
 define i64 @test_mul_by_28(i64 %x) {
 ; X86-LABEL: test_mul_by_28:
 ; X86:       # BB#0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    leal (%eax,%eax,8), %ecx
+; X86-NEXT:    leal (%ecx,%ecx,2), %ecx
+; X86-NEXT:    addl %eax, %ecx
 ; X86-NEXT:    movl $28, %eax
 ; X86-NEXT:    mull {{[0-9]+}}(%esp)
-; X86-NEXT:    imull $28, {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    addl %ecx, %edx
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_28:
-; X64:       # BB#0:
-; X64-NEXT:    imulq $28, %rdi, %rax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_28:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
+; X64-HSW-NEXT:    leaq (%rax,%rax,2), %rax # sched: [1:0.50]
+; X64-HSW-NEXT:    addq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_28:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    leaq (%rax,%rax,2), %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    addq %rdi, %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_28:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    movl $28, %eax
+; X86-NOOPT-NEXT:    mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT:    imull $28, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT:    addl %ecx, %edx
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_28:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    imulq $28, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_28:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    imulq $28, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_28:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    imulq $28, %rdi, %rax # sched: [3:1.00]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_28:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    imulq $28, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i64 %x, 28
   ret i64 %mul
 }
@@ -509,16 +1520,59 @@ define i64 @test_mul_by_28(i64 %x) {
 define i64 @test_mul_by_29(i64 %x) {
 ; X86-LABEL: test_mul_by_29:
 ; X86:       # BB#0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    leal (%eax,%eax,8), %ecx
+; X86-NEXT:    leal (%ecx,%ecx,2), %ecx
+; X86-NEXT:    addl %eax, %ecx
+; X86-NEXT:    addl %eax, %ecx
 ; X86-NEXT:    movl $29, %eax
 ; X86-NEXT:    mull {{[0-9]+}}(%esp)
-; X86-NEXT:    imull $29, {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    addl %ecx, %edx
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_29:
-; X64:       # BB#0:
-; X64-NEXT:    imulq $29, %rdi, %rax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_29:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
+; X64-HSW-NEXT:    leaq (%rax,%rax,2), %rax # sched: [1:0.50]
+; X64-HSW-NEXT:    addq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT:    addq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_29:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    leaq (%rax,%rax,2), %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    addq %rdi, %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    addq %rdi, %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_29:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    movl $29, %eax
+; X86-NOOPT-NEXT:    mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT:    imull $29, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT:    addl %ecx, %edx
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_29:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    imulq $29, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_29:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    imulq $29, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_29:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    imulq $29, %rdi, %rax # sched: [3:1.00]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_29:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    imulq $29, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i64 %x, 29
   ret i64 %mul
 }
@@ -526,16 +1580,59 @@ define i64 @test_mul_by_29(i64 %x) {
 define i64 @test_mul_by_30(i64 %x) {
 ; X86-LABEL: test_mul_by_30:
 ; X86:       # BB#0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    shll $5, %ecx
+; X86-NEXT:    subl %eax, %ecx
+; X86-NEXT:    subl %eax, %ecx
 ; X86-NEXT:    movl $30, %eax
 ; X86-NEXT:    mull {{[0-9]+}}(%esp)
-; X86-NEXT:    imull $30, {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    addl %ecx, %edx
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_30:
-; X64:       # BB#0:
-; X64-NEXT:    imulq $30, %rdi, %rax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_30:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    movq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT:    shlq $5, %rax # sched: [1:0.50]
+; X64-HSW-NEXT:    subq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT:    subq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_30:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    movq %rdi, %rax # sched: [1:0.17]
+; X64-JAG-NEXT:    shlq $5, %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    subq %rdi, %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    subq %rdi, %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_30:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    movl $30, %eax
+; X86-NOOPT-NEXT:    mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT:    imull $30, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT:    addl %ecx, %edx
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_30:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    imulq $30, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_30:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    imulq $30, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_30:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    imulq $30, %rdi, %rax # sched: [3:1.00]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_30:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    imulq $30, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i64 %x, 30
   ret i64 %mul
 }
@@ -552,12 +1649,49 @@ define i64 @test_mul_by_31(i64 %x) {
 ; X86-NEXT:    addl %ecx, %edx
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_31:
-; X64:       # BB#0:
-; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    shlq $5, %rax
-; X64-NEXT:    subq %rdi, %rax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_31:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    movq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT:    shlq $5, %rax # sched: [1:0.50]
+; X64-HSW-NEXT:    subq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_31:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    movq %rdi, %rax # sched: [1:0.17]
+; X64-JAG-NEXT:    shlq $5, %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    subq %rdi, %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_31:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    movl $31, %eax
+; X86-NOOPT-NEXT:    mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT:    imull $31, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT:    addl %ecx, %edx
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_31:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    imulq $31, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_31:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    imulq $31, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_31:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    movq %rdi, %rax # sched: [1:0.50]
+; X64-SLM-NEXT:    shlq $5, %rax # sched: [1:1.00]
+; X64-SLM-NEXT:    subq %rdi, %rax # sched: [1:0.50]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_31:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    imulq $31, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i64 %x, 31
   ret i64 %mul
 }
@@ -571,11 +1705,168 @@ define i64 @test_mul_by_32(i64 %x) {
 ; X86-NEXT:    shll $5, %eax
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: test_mul_by_32:
-; X64:       # BB#0:
-; X64-NEXT:    shlq $5, %rdi
-; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    retq
+; X64-HSW-LABEL: test_mul_by_32:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    shlq $5, %rdi # sched: [1:0.50]
+; X64-HSW-NEXT:    movq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_32:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    shlq $5, %rdi # sched: [1:0.50]
+; X64-JAG-NEXT:    movq %rdi, %rax # sched: [1:0.17]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_32:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NOOPT-NEXT:    shldl $5, %eax, %edx
+; X86-NOOPT-NEXT:    shll $5, %eax
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_32:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    shlq $5, %rdi # sched: [1:0.50]
+; HSW-NOOPT-NEXT:    movq %rdi, %rax # sched: [1:0.25]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_32:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    shlq $5, %rdi # sched: [1:0.50]
+; JAG-NOOPT-NEXT:    movq %rdi, %rax # sched: [1:0.17]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_32:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    shlq $5, %rdi # sched: [1:1.00]
+; X64-SLM-NEXT:    movq %rdi, %rax # sched: [1:0.50]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_32:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    shlq $5, %rdi # sched: [1:1.00]
+; SLM-NOOPT-NEXT:    movq %rdi, %rax # sched: [1:0.50]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
   %mul = mul nsw i64 %x, 32
   ret i64 %mul
 }
+
+; (x*9+42)*(x*5+2)
+define i64 @test_mul_spec(i64 %x) nounwind {
+; X86-LABEL: test_mul_spec:
+; X86:       # BB#0:
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl $9, %edx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    mull %edx
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    leal (%edi,%edi,8), %ebx
+; X86-NEXT:    addl $42, %esi
+; X86-NEXT:    adcl %edx, %ebx
+; X86-NEXT:    movl $5, %edx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    mull %edx
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    leal (%edi,%edi,4), %edi
+; X86-NEXT:    addl $2, %ecx
+; X86-NEXT:    adcl %edx, %edi
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    imull %esi, %edi
+; X86-NEXT:    addl %edi, %edx
+; X86-NEXT:    imull %ebx, %ecx
+; X86-NEXT:    addl %ecx, %edx
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    retl
+;
+; X64-HSW-LABEL: test_mul_spec:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    leaq (%rdi,%rdi,8), %rcx # sched: [1:0.50]
+; X64-HSW-NEXT:    addq $42, %rcx # sched: [1:0.25]
+; X64-HSW-NEXT:    leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT:    addq $2, %rax # sched: [1:0.25]
+; X64-HSW-NEXT:    imulq %rcx, %rax # sched: [3:1.00]
+; X64-HSW-NEXT:    retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_spec:
+; X64-JAG:       # BB#0:
+; X64-JAG-NEXT:    leaq 42(%rdi,%rdi,8), %rcx # sched: [1:0.50]
+; X64-JAG-NEXT:    leaq 2(%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT:    imulq %rcx, %rax # sched: [3:1.00]
+; X64-JAG-NEXT:    retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_spec:
+; X86-NOOPT:       # BB#0:
+; X86-NOOPT-NEXT:    pushl %ebx
+; X86-NOOPT-NEXT:    pushl %edi
+; X86-NOOPT-NEXT:    pushl %esi
+; X86-NOOPT-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NOOPT-NEXT:    movl $9, %edx
+; X86-NOOPT-NEXT:    movl %ecx, %eax
+; X86-NOOPT-NEXT:    mull %edx
+; X86-NOOPT-NEXT:    movl %eax, %esi
+; X86-NOOPT-NEXT:    leal (%edi,%edi,8), %ebx
+; X86-NOOPT-NEXT:    addl $42, %esi
+; X86-NOOPT-NEXT:    adcl %edx, %ebx
+; X86-NOOPT-NEXT:    movl $5, %edx
+; X86-NOOPT-NEXT:    movl %ecx, %eax
+; X86-NOOPT-NEXT:    mull %edx
+; X86-NOOPT-NEXT:    movl %eax, %ecx
+; X86-NOOPT-NEXT:    leal (%edi,%edi,4), %edi
+; X86-NOOPT-NEXT:    addl $2, %ecx
+; X86-NOOPT-NEXT:    adcl %edx, %edi
+; X86-NOOPT-NEXT:    movl %esi, %eax
+; X86-NOOPT-NEXT:    mull %ecx
+; X86-NOOPT-NEXT:    imull %esi, %edi
+; X86-NOOPT-NEXT:    addl %edi, %edx
+; X86-NOOPT-NEXT:    imull %ebx, %ecx
+; X86-NOOPT-NEXT:    addl %ecx, %edx
+; X86-NOOPT-NEXT:    popl %esi
+; X86-NOOPT-NEXT:    popl %edi
+; X86-NOOPT-NEXT:    popl %ebx
+; X86-NOOPT-NEXT:    retl
+;
+; HSW-NOOPT-LABEL: test_mul_spec:
+; HSW-NOOPT:       # BB#0:
+; HSW-NOOPT-NEXT:    leaq (%rdi,%rdi,8), %rcx # sched: [1:0.50]
+; HSW-NOOPT-NEXT:    addq $42, %rcx # sched: [1:0.25]
+; HSW-NOOPT-NEXT:    leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; HSW-NOOPT-NEXT:    addq $2, %rax # sched: [1:0.25]
+; HSW-NOOPT-NEXT:    imulq %rcx, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT:    retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_spec:
+; JAG-NOOPT:       # BB#0:
+; JAG-NOOPT-NEXT:    leaq 42(%rdi,%rdi,8), %rcx # sched: [1:0.50]
+; JAG-NOOPT-NEXT:    leaq 2(%rdi,%rdi,4), %rax # sched: [1:0.50]
+; JAG-NOOPT-NEXT:    imulq %rcx, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT:    retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_spec:
+; X64-SLM:       # BB#0:
+; X64-SLM-NEXT:    leaq 42(%rdi,%rdi,8), %rcx # sched: [1:1.00]
+; X64-SLM-NEXT:    leaq 2(%rdi,%rdi,4), %rax # sched: [1:1.00]
+; X64-SLM-NEXT:    imulq %rcx, %rax # sched: [3:1.00]
+; X64-SLM-NEXT:    retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_spec:
+; SLM-NOOPT:       # BB#0:
+; SLM-NOOPT-NEXT:    leaq 42(%rdi,%rdi,8), %rcx # sched: [1:1.00]
+; SLM-NOOPT-NEXT:    leaq 2(%rdi,%rdi,4), %rax # sched: [1:1.00]
+; SLM-NOOPT-NEXT:    imulq %rcx, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT:    retq # sched: [4:1.00]
+  %mul = mul nsw i64 %x, 9
+  %add = add nsw i64 %mul, 42
+  %mul2 = mul nsw i64 %x, 5
+  %add2 = add nsw i64 %mul2, 2
+  %mul3 = mul nsw i64 %add, %add2
+  ret i64 %mul3
+}
diff --git a/test/CodeGen/X86/mul-constant-result.ll b/test/CodeGen/X86/mul-constant-result.ll
new file mode 100644
index 000000000000..65d80a699e24
--- /dev/null
+++ b/test/CodeGen/X86/mul-constant-result.ll
@@ -0,0 +1,1291 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=haswell| FileCheck %s --check-prefix=X64-HSW
+
+; Function Attrs: norecurse nounwind readnone uwtable
+define i32 @mult(i32, i32) local_unnamed_addr #0 {
+; X86-LABEL: mult:
+; X86:       # BB#0:
+; X86-NEXT:    pushl %esi
+; X86-NEXT:  .Lcfi0:
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:  .Lcfi1:
+; X86-NEXT:    .cfi_offset %esi, -8
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    cmpl $1, %edx
+; X86-NEXT:    movl $1, %eax
+; X86-NEXT:    movl $1, %esi
+; X86-NEXT:    jg .LBB0_2
+; X86-NEXT:  # BB#1:
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:  .LBB0_2:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    je .LBB0_4
+; X86-NEXT:  # BB#3:
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:  .LBB0_4:
+; X86-NEXT:    decl %ecx
+; X86-NEXT:    cmpl $31, %ecx
+; X86-NEXT:    ja .LBB0_39
+; X86-NEXT:  # BB#5:
+; X86-NEXT:    jmpl *.LJTI0_0(,%ecx,4)
+; X86-NEXT:  .LBB0_6:
+; X86-NEXT:    addl %eax, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+; X86-NEXT:  .LBB0_39:
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:  .LBB0_40:
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+; X86-NEXT:  .LBB0_7:
+; X86-NEXT:    leal (%eax,%eax,2), %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+; X86-NEXT:  .LBB0_8:
+; X86-NEXT:    shll $2, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+; X86-NEXT:  .LBB0_9:
+; X86-NEXT:    leal (%eax,%eax,4), %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+; X86-NEXT:  .LBB0_10:
+; X86-NEXT:    addl %eax, %eax
+; X86-NEXT:    leal (%eax,%eax,2), %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+; X86-NEXT:  .LBB0_11:
+; X86-NEXT:    leal (,%eax,8), %ecx
+; X86-NEXT:    jmp .LBB0_12
+; X86-NEXT:  .LBB0_13:
+; X86-NEXT:    shll $3, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+; X86-NEXT:  .LBB0_14:
+; X86-NEXT:    leal (%eax,%eax,8), %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+; X86-NEXT:  .LBB0_15:
+; X86-NEXT:    addl %eax, %eax
+; X86-NEXT:    leal (%eax,%eax,4), %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+; X86-NEXT:  .LBB0_16:
+; X86-NEXT:    leal (%eax,%eax,4), %ecx
+; X86-NEXT:    leal (%eax,%ecx,2), %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+; X86-NEXT:  .LBB0_17:
+; X86-NEXT:    shll $2, %eax
+; X86-NEXT:    leal (%eax,%eax,2), %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+; X86-NEXT:  .LBB0_18:
+; X86-NEXT:    leal (%eax,%eax,2), %ecx
+; X86-NEXT:    leal (%eax,%ecx,4), %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+; X86-NEXT:  .LBB0_19:
+; X86-NEXT:    leal (%eax,%eax,2), %ecx
+; X86-NEXT:    jmp .LBB0_20
+; X86-NEXT:  .LBB0_21:
+; X86-NEXT:    leal (%eax,%eax,4), %eax
+; X86-NEXT:    leal (%eax,%eax,2), %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+; X86-NEXT:  .LBB0_22:
+; X86-NEXT:    shll $4, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+; X86-NEXT:  .LBB0_23:
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    shll $4, %ecx
+; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+; X86-NEXT:  .LBB0_24:
+; X86-NEXT:    addl %eax, %eax
+; X86-NEXT:    leal (%eax,%eax,8), %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+; X86-NEXT:  .LBB0_25:
+; X86-NEXT:    leal (%eax,%eax,4), %ecx
+; X86-NEXT:    shll $2, %ecx
+; X86-NEXT:    jmp .LBB0_12
+; X86-NEXT:  .LBB0_26:
+; X86-NEXT:    shll $2, %eax
+; X86-NEXT:    leal (%eax,%eax,4), %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+; X86-NEXT:  .LBB0_27:
+; X86-NEXT:    leal (%eax,%eax,4), %ecx
+; X86-NEXT:    leal (%eax,%ecx,4), %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+; X86-NEXT:  .LBB0_28:
+; X86-NEXT:    leal (%eax,%eax,4), %ecx
+; X86-NEXT:  .LBB0_20:
+; X86-NEXT:    leal (%eax,%ecx,4), %ecx
+; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+; X86-NEXT:  .LBB0_29:
+; X86-NEXT:    leal (%eax,%eax,2), %ecx
+; X86-NEXT:    shll $3, %ecx
+; X86-NEXT:    jmp .LBB0_12
+; X86-NEXT:  .LBB0_30:
+; X86-NEXT:    shll $3, %eax
+; X86-NEXT:    leal (%eax,%eax,2), %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+; X86-NEXT:  .LBB0_31:
+; X86-NEXT:    leal (%eax,%eax,4), %eax
+; X86-NEXT:    leal (%eax,%eax,4), %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+; X86-NEXT:  .LBB0_32:
+; X86-NEXT:    leal (%eax,%eax,8), %ecx
+; X86-NEXT:    leal (%ecx,%ecx,2), %ecx
+; X86-NEXT:    jmp .LBB0_12
+; X86-NEXT:  .LBB0_33:
+; X86-NEXT:    leal (%eax,%eax,8), %eax
+; X86-NEXT:    leal (%eax,%eax,2), %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+; X86-NEXT:  .LBB0_34:
+; X86-NEXT:    leal (%eax,%eax,8), %ecx
+; X86-NEXT:    leal (%ecx,%ecx,2), %ecx
+; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+; X86-NEXT:  .LBB0_35:
+; X86-NEXT:    leal (%eax,%eax,8), %ecx
+; X86-NEXT:    leal (%ecx,%ecx,2), %ecx
+; X86-NEXT:    addl %eax, %ecx
+; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+; X86-NEXT:  .LBB0_36:
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    shll $5, %ecx
+; X86-NEXT:    subl %eax, %ecx
+; X86-NEXT:    jmp .LBB0_12
+; X86-NEXT:  .LBB0_37:
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    shll $5, %ecx
+; X86-NEXT:  .LBB0_12:
+; X86-NEXT:    subl %eax, %ecx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+; X86-NEXT:  .LBB0_38:
+; X86-NEXT:    shll $5, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+;
+; X64-HSW-LABEL: mult:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT:    cmpl $1, %esi
+; X64-HSW-NEXT:    movl $1, %ecx
+; X64-HSW-NEXT:    movl %esi, %eax
+; X64-HSW-NEXT:    cmovgl %ecx, %eax
+; X64-HSW-NEXT:    testl %esi, %esi
+; X64-HSW-NEXT:    cmovel %ecx, %eax
+; X64-HSW-NEXT:    addl $-1, %edi
+; X64-HSW-NEXT:    cmpl $31, %edi
+; X64-HSW-NEXT:    ja .LBB0_36
+; X64-HSW-NEXT:  # BB#1:
+; X64-HSW-NEXT:    jmpq *.LJTI0_0(,%rdi,8)
+; X64-HSW-NEXT:  .LBB0_2:
+; X64-HSW-NEXT:    addl %eax, %eax
+; X64-HSW-NEXT:    # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT:    retq
+; X64-HSW-NEXT:  .LBB0_36:
+; X64-HSW-NEXT:    xorl %eax, %eax
+; X64-HSW-NEXT:  .LBB0_37:
+; X64-HSW-NEXT:    # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT:    retq
+; X64-HSW-NEXT:  .LBB0_3:
+; X64-HSW-NEXT:    leal (%rax,%rax,2), %eax
+; X64-HSW-NEXT:    # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT:    retq
+; X64-HSW-NEXT:  .LBB0_4:
+; X64-HSW-NEXT:    shll $2, %eax
+; X64-HSW-NEXT:    # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT:    retq
+; X64-HSW-NEXT:  .LBB0_5:
+; X64-HSW-NEXT:    leal (%rax,%rax,4), %eax
+; X64-HSW-NEXT:    # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT:    retq
+; X64-HSW-NEXT:  .LBB0_6:
+; X64-HSW-NEXT:    addl %eax, %eax
+; X64-HSW-NEXT:    leal (%rax,%rax,2), %eax
+; X64-HSW-NEXT:    # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT:    retq
+; X64-HSW-NEXT:  .LBB0_7:
+; X64-HSW-NEXT:    leal (,%rax,8), %ecx
+; X64-HSW-NEXT:    jmp .LBB0_8
+; X64-HSW-NEXT:  .LBB0_9:
+; X64-HSW-NEXT:    shll $3, %eax
+; X64-HSW-NEXT:    # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT:    retq
+; X64-HSW-NEXT:  .LBB0_10:
+; X64-HSW-NEXT:    leal (%rax,%rax,8), %eax
+; X64-HSW-NEXT:    # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT:    retq
+; X64-HSW-NEXT:  .LBB0_11:
+; X64-HSW-NEXT:    addl %eax, %eax
+; X64-HSW-NEXT:    leal (%rax,%rax,4), %eax
+; X64-HSW-NEXT:    # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT:    retq
+; X64-HSW-NEXT:  .LBB0_12:
+; X64-HSW-NEXT:    leal (%rax,%rax,4), %ecx
+; X64-HSW-NEXT:    leal (%rax,%rcx,2), %eax
+; X64-HSW-NEXT:    # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT:    retq
+; X64-HSW-NEXT:  .LBB0_13:
+; X64-HSW-NEXT:    shll $2, %eax
+; X64-HSW-NEXT:    leal (%rax,%rax,2), %eax
+; X64-HSW-NEXT:    # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT:    retq
+; X64-HSW-NEXT:  .LBB0_14:
+; X64-HSW-NEXT:    leal (%rax,%rax,2), %ecx
+; X64-HSW-NEXT:    leal (%rax,%rcx,4), %eax
+; X64-HSW-NEXT:    # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT:    retq
+; X64-HSW-NEXT:  .LBB0_15:
+; X64-HSW-NEXT:    leal (%rax,%rax,2), %ecx
+; X64-HSW-NEXT:    jmp .LBB0_16
+; X64-HSW-NEXT:  .LBB0_18:
+; X64-HSW-NEXT:    leal (%rax,%rax,4), %eax
+; X64-HSW-NEXT:    leal (%rax,%rax,2), %eax
+; X64-HSW-NEXT:    # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT:    retq
+; X64-HSW-NEXT:  .LBB0_19:
+; X64-HSW-NEXT:    shll $4, %eax
+; X64-HSW-NEXT:    # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT:    retq
+; X64-HSW-NEXT:  .LBB0_20:
+; X64-HSW-NEXT:    movl %eax, %ecx
+; X64-HSW-NEXT:    shll $4, %ecx
+; X64-HSW-NEXT:    jmp .LBB0_17
+; X64-HSW-NEXT:  .LBB0_21:
+; X64-HSW-NEXT:    addl %eax, %eax
+; X64-HSW-NEXT:    leal (%rax,%rax,8), %eax
+; X64-HSW-NEXT:    # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT:    retq
+; X64-HSW-NEXT:  .LBB0_22:
+; X64-HSW-NEXT:    leal (%rax,%rax,4), %ecx
+; X64-HSW-NEXT:    shll $2, %ecx
+; X64-HSW-NEXT:    jmp .LBB0_8
+; X64-HSW-NEXT:  .LBB0_23:
+; X64-HSW-NEXT:    shll $2, %eax
+; X64-HSW-NEXT:    leal (%rax,%rax,4), %eax
+; X64-HSW-NEXT:    # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT:    retq
+; X64-HSW-NEXT:  .LBB0_24:
+; X64-HSW-NEXT:    leal (%rax,%rax,4), %ecx
+; X64-HSW-NEXT:    leal (%rax,%rcx,4), %eax
+; X64-HSW-NEXT:    # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT:    retq
+; X64-HSW-NEXT:  .LBB0_25:
+; X64-HSW-NEXT:    leal (%rax,%rax,4), %ecx
+; X64-HSW-NEXT:  .LBB0_16:
+; X64-HSW-NEXT:    leal (%rax,%rcx,4), %ecx
+; X64-HSW-NEXT:    jmp .LBB0_17
+; X64-HSW-NEXT:  .LBB0_26:
+; X64-HSW-NEXT:    leal (%rax,%rax,2), %ecx
+; X64-HSW-NEXT:    shll $3, %ecx
+; X64-HSW-NEXT:    jmp .LBB0_8
+; X64-HSW-NEXT:  .LBB0_27:
+; X64-HSW-NEXT:    shll $3, %eax
+; X64-HSW-NEXT:    leal (%rax,%rax,2), %eax
+; X64-HSW-NEXT:    # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT:    retq
+; X64-HSW-NEXT:  .LBB0_28:
+; X64-HSW-NEXT:    leal (%rax,%rax,4), %eax
+; X64-HSW-NEXT:    leal (%rax,%rax,4), %eax
+; X64-HSW-NEXT:    # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT:    retq
+; X64-HSW-NEXT:  .LBB0_29:
+; X64-HSW-NEXT:    leal (%rax,%rax,8), %ecx
+; X64-HSW-NEXT:    leal (%rcx,%rcx,2), %ecx
+; X64-HSW-NEXT:    jmp .LBB0_8
+; X64-HSW-NEXT:  .LBB0_30:
+; X64-HSW-NEXT:    leal (%rax,%rax,8), %eax
+; X64-HSW-NEXT:    leal (%rax,%rax,2), %eax
+; X64-HSW-NEXT:    # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT:    retq
+; X64-HSW-NEXT:  .LBB0_31:
+; X64-HSW-NEXT:    leal (%rax,%rax,8), %ecx
+; X64-HSW-NEXT:    leal (%rcx,%rcx,2), %ecx
+; X64-HSW-NEXT:    jmp .LBB0_17
+; X64-HSW-NEXT:  .LBB0_32:
+; X64-HSW-NEXT:    leal (%rax,%rax,8), %ecx
+; X64-HSW-NEXT:    leal (%rcx,%rcx,2), %ecx
+; X64-HSW-NEXT:    addl %eax, %ecx
+; X64-HSW-NEXT:  .LBB0_17:
+; X64-HSW-NEXT:    addl %eax, %ecx
+; X64-HSW-NEXT:    movl %ecx, %eax
+; X64-HSW-NEXT:    # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT:    retq
+; X64-HSW-NEXT:  .LBB0_33:
+; X64-HSW-NEXT:    movl %eax, %ecx
+; X64-HSW-NEXT:    shll $5, %ecx
+; X64-HSW-NEXT:    subl %eax, %ecx
+; X64-HSW-NEXT:    jmp .LBB0_8
+; X64-HSW-NEXT:  .LBB0_34:
+; X64-HSW-NEXT:    movl %eax, %ecx
+; X64-HSW-NEXT:    shll $5, %ecx
+; X64-HSW-NEXT:  .LBB0_8:
+; X64-HSW-NEXT:    subl %eax, %ecx
+; X64-HSW-NEXT:    movl %ecx, %eax
+; X64-HSW-NEXT:    # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT:    retq
+; X64-HSW-NEXT:  .LBB0_35:
+; X64-HSW-NEXT:    shll $5, %eax
+; X64-HSW-NEXT:    # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT:    retq
+  %3 = icmp eq i32 %1, 0
+  %4 = icmp sgt i32 %1, 1
+  %5 = or i1 %3, %4
+  %6 = select i1 %5, i32 1, i32 %1
+  switch i32 %0, label %69 [
+    i32 1, label %70
+    i32 2, label %7
+    i32 3, label %9
+    i32 4, label %11
+    i32 5, label %13
+    i32 6, label %15
+    i32 7, label %17
+    i32 8, label %19
+    i32 9, label %21
+    i32 10, label %23
+    i32 11, label %25
+    i32 12, label %27
+    i32 13, label %29
+    i32 14, label %31
+    i32 15, label %33
+    i32 16, label %35
+    i32 17, label %37
+    i32 18, label %39
+    i32 19, label %41
+    i32 20, label %43
+    i32 21, label %45
+    i32 22, label %47
+    i32 23, label %49
+    i32 24, label %51
+    i32 25, label %53
+    i32 26, label %55
+    i32 27, label %57
+    i32 28, label %59
+    i32 29, label %61
+    i32 30, label %63
+    i32 31, label %65
+    i32 32, label %67
+  ]
+
+; <label>:7:                                      ; preds = %2
+  %8 = shl nsw i32 %6, 1
+  br label %70
+
+; <label>:9:                                      ; preds = %2
+  %10 = mul nsw i32 %6, 3
+  br label %70
+
+; <label>:11:                                     ; preds = %2
+  %12 = shl nsw i32 %6, 2
+  br label %70
+
+; <label>:13:                                     ; preds = %2
+  %14 = mul nsw i32 %6, 5
+  br label %70
+
+; <label>:15:                                     ; preds = %2
+  %16 = mul nsw i32 %6, 6
+  br label %70
+
+; <label>:17:                                     ; preds = %2
+  %18 = mul nsw i32 %6, 7
+  br label %70
+
+; <label>:19:                                     ; preds = %2
+  %20 = shl nsw i32 %6, 3
+  br label %70
+
+; <label>:21:                                     ; preds = %2
+  %22 = mul nsw i32 %6, 9
+  br label %70
+
+; <label>:23:                                     ; preds = %2
+  %24 = mul nsw i32 %6, 10
+  br label %70
+
+; <label>:25:                                     ; preds = %2
+  %26 = mul nsw i32 %6, 11
+  br label %70
+
+; <label>:27:                                     ; preds = %2
+  %28 = mul nsw i32 %6, 12
+  br label %70
+
+; <label>:29:                                     ; preds = %2
+  %30 = mul nsw i32 %6, 13
+  br label %70
+
+; <label>:31:                                     ; preds = %2
+  %32 = mul nsw i32 %6, 14
+  br label %70
+
+; <label>:33:                                     ; preds = %2
+  %34 = mul nsw i32 %6, 15
+  br label %70
+
+; <label>:35:                                     ; preds = %2
+  %36 = shl nsw i32 %6, 4
+  br label %70
+
+; <label>:37:                                     ; preds = %2
+  %38 = mul nsw i32 %6, 17
+  br label %70
+
+; <label>:39:                                     ; preds = %2
+  %40 = mul nsw i32 %6, 18
+  br label %70
+
+; <label>:41:                                     ; preds = %2
+  %42 = mul nsw i32 %6, 19
+  br label %70
+
+; <label>:43:                                     ; preds = %2
+  %44 = mul nsw i32 %6, 20
+  br label %70
+
+; <label>:45:                                     ; preds = %2
+  %46 = mul nsw i32 %6, 21
+  br label %70
+
+; <label>:47:                                     ; preds = %2
+  %48 = mul nsw i32 %6, 22
+  br label %70
+
+; <label>:49:                                     ; preds = %2
+  %50 = mul nsw i32 %6, 23
+  br label %70
+
+; <label>:51:                                     ; preds = %2
+  %52 = mul nsw i32 %6, 24
+  br label %70
+
+; <label>:53:                                     ; preds = %2
+  %54 = mul nsw i32 %6, 25
+  br label %70
+
+; <label>:55:                                     ; preds = %2
+  %56 = mul nsw i32 %6, 26
+  br label %70
+
+; <label>:57:                                     ; preds = %2
+  %58 = mul nsw i32 %6, 27
+  br label %70
+
+; <label>:59:                                     ; preds = %2
+  %60 = mul nsw i32 %6, 28
+  br label %70
+
+; <label>:61:                                     ; preds = %2
+  %62 = mul nsw i32 %6, 29
+  br label %70
+
+; <label>:63:                                     ; preds = %2
+  %64 = mul nsw i32 %6, 30
+  br label %70
+
+; <label>:65:                                     ; preds = %2
+  %66 = mul nsw i32 %6, 31
+  br label %70
+
+; <label>:67:                                     ; preds = %2
+  %68 = shl nsw i32 %6, 5
+  br label %70
+
+; <label>:69:                                     ; preds = %2
+  br label %70
+
+; <label>:70:                                     ; preds = %2, %69, %67, %65, %63, %61, %59, %57, %55, %53, %51, %49, %47, %45, %43, %41, %39, %37, %35, %33, %31, %29, %27, %25, %23, %21, %19, %17, %15, %13, %11, %9, %7
+  %71 = phi i32 [ %8, %7 ], [ %10, %9 ], [ %12, %11 ], [ %14, %13 ], [ %16, %15 ], [ %18, %17 ], [ %20, %19 ], [ %22, %21 ], [ %24, %23 ], [ %26, %25 ], [ %28, %27 ], [ %30, %29 ], [ %32, %31 ], [ %34, %33 ], [ %36, %35 ], [ %38, %37 ], [ %40, %39 ], [ %42, %41 ], [ %44, %43 ], [ %46, %45 ], [ %48, %47 ], [ %50, %49 ], [ %52, %51 ], [ %54, %53 ], [ %56, %55 ], [ %58, %57 ], [ %60, %59 ], [ %62, %61 ], [ %64, %63 ], [ %66, %65 ], [ %68, %67 ], [ 0, %69 ], [ %6, %2 ]
+  ret i32 %71
+}
+
+; Function Attrs: norecurse nounwind readnone uwtable
+define i32 @foo() local_unnamed_addr #0 {
+; X86-LABEL: foo:
+; X86:       # BB#0:
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:  .Lcfi2:
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    pushl %edi
+; X86-NEXT:  .Lcfi3:
+; X86-NEXT:    .cfi_def_cfa_offset 12
+; X86-NEXT:    pushl %esi
+; X86-NEXT:  .Lcfi4:
+; X86-NEXT:    .cfi_def_cfa_offset 16
+; X86-NEXT:  .Lcfi5:
+; X86-NEXT:    .cfi_offset %esi, -16
+; X86-NEXT:  .Lcfi6:
+; X86-NEXT:    .cfi_offset %edi, -12
+; X86-NEXT:  .Lcfi7:
+; X86-NEXT:    .cfi_offset %ebx, -8
+; X86-NEXT:    pushl $0
+; X86-NEXT:  .Lcfi8:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl $1
+; X86-NEXT:  .Lcfi9:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    calll mult
+; X86-NEXT:    addl $8, %esp
+; X86-NEXT:  .Lcfi10:
+; X86-NEXT:    .cfi_adjust_cfa_offset -8
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    xorl $1, %esi
+; X86-NEXT:    pushl $1
+; X86-NEXT:  .Lcfi11:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl $2
+; X86-NEXT:  .Lcfi12:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    calll mult
+; X86-NEXT:    addl $8, %esp
+; X86-NEXT:  .Lcfi13:
+; X86-NEXT:    .cfi_adjust_cfa_offset -8
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    xorl $2, %edi
+; X86-NEXT:    pushl $1
+; X86-NEXT:  .Lcfi14:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl $3
+; X86-NEXT:  .Lcfi15:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    calll mult
+; X86-NEXT:    addl $8, %esp
+; X86-NEXT:  .Lcfi16:
+; X86-NEXT:    .cfi_adjust_cfa_offset -8
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    xorl $3, %ebx
+; X86-NEXT:    orl %edi, %ebx
+; X86-NEXT:    pushl $2
+; X86-NEXT:  .Lcfi17:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl $4
+; X86-NEXT:  .Lcfi18:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    calll mult
+; X86-NEXT:    addl $8, %esp
+; X86-NEXT:  .Lcfi19:
+; X86-NEXT:    .cfi_adjust_cfa_offset -8
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    xorl $4, %edi
+; X86-NEXT:    orl %ebx, %edi
+; X86-NEXT:    pushl $2
+; X86-NEXT:  .Lcfi20:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl $5
+; X86-NEXT:  .Lcfi21:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    calll mult
+; X86-NEXT:    addl $8, %esp
+; X86-NEXT:  .Lcfi22:
+; X86-NEXT:    .cfi_adjust_cfa_offset -8
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    xorl $5, %ebx
+; X86-NEXT:    orl %edi, %ebx
+; X86-NEXT:    pushl $3
+; X86-NEXT:  .Lcfi23:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl $6
+; X86-NEXT:  .Lcfi24:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    calll mult
+; X86-NEXT:    addl $8, %esp
+; X86-NEXT:  .Lcfi25:
+; X86-NEXT:    .cfi_adjust_cfa_offset -8
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    xorl $6, %edi
+; X86-NEXT:    orl %ebx, %edi
+; X86-NEXT:    pushl $3
+; X86-NEXT:  .Lcfi26:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl $7
+; X86-NEXT:  .Lcfi27:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    calll mult
+; X86-NEXT:    addl $8, %esp
+; X86-NEXT:  .Lcfi28:
+; X86-NEXT:    .cfi_adjust_cfa_offset -8
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    xorl $7, %ebx
+; X86-NEXT:    orl %edi, %ebx
+; X86-NEXT:    pushl $4
+; X86-NEXT:  .Lcfi29:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl $8
+; X86-NEXT:  .Lcfi30:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    calll mult
+; X86-NEXT:    addl $8, %esp
+; X86-NEXT:  .Lcfi31:
+; X86-NEXT:    .cfi_adjust_cfa_offset -8
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    xorl $8, %edi
+; X86-NEXT:    orl %ebx, %edi
+; X86-NEXT:    pushl $4
+; X86-NEXT:  .Lcfi32:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl $9
+; X86-NEXT:  .Lcfi33:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    calll mult
+; X86-NEXT:    addl $8, %esp
+; X86-NEXT:  .Lcfi34:
+; X86-NEXT:    .cfi_adjust_cfa_offset -8
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    xorl $9, %ebx
+; X86-NEXT:    orl %edi, %ebx
+; X86-NEXT:    pushl $5
+; X86-NEXT:  .Lcfi35:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl $10
+; X86-NEXT:  .Lcfi36:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    calll mult
+; X86-NEXT:    addl $8, %esp
+; X86-NEXT:  .Lcfi37:
+; X86-NEXT:    .cfi_adjust_cfa_offset -8
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    xorl $10, %edi
+; X86-NEXT:    orl %ebx, %edi
+; X86-NEXT:    pushl $5
+; X86-NEXT:  .Lcfi38:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl $11
+; X86-NEXT:  .Lcfi39:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    calll mult
+; X86-NEXT:    addl $8, %esp
+; X86-NEXT:  .Lcfi40:
+; X86-NEXT:    .cfi_adjust_cfa_offset -8
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    xorl $11, %ebx
+; X86-NEXT:    orl %edi, %ebx
+; X86-NEXT:    pushl $6
+; X86-NEXT:  .Lcfi41:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl $12
+; X86-NEXT:  .Lcfi42:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    calll mult
+; X86-NEXT:    addl $8, %esp
+; X86-NEXT:  .Lcfi43:
+; X86-NEXT:    .cfi_adjust_cfa_offset -8
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    xorl $12, %edi
+; X86-NEXT:    orl %ebx, %edi
+; X86-NEXT:    pushl $6
+; X86-NEXT:  .Lcfi44:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl $13
+; X86-NEXT:  .Lcfi45:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    calll mult
+; X86-NEXT:    addl $8, %esp
+; X86-NEXT:  .Lcfi46:
+; X86-NEXT:    .cfi_adjust_cfa_offset -8
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    xorl $13, %ebx
+; X86-NEXT:    orl %edi, %ebx
+; X86-NEXT:    pushl $7
+; X86-NEXT:  .Lcfi47:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl $14
+; X86-NEXT:  .Lcfi48:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    calll mult
+; X86-NEXT:    addl $8, %esp
+; X86-NEXT:  .Lcfi49:
+; X86-NEXT:    .cfi_adjust_cfa_offset -8
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    xorl $14, %edi
+; X86-NEXT:    orl %ebx, %edi
+; X86-NEXT:    pushl $7
+; X86-NEXT:  .Lcfi50:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl $15
+; X86-NEXT:  .Lcfi51:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    calll mult
+; X86-NEXT:    addl $8, %esp
+; X86-NEXT:  .Lcfi52:
+; X86-NEXT:    .cfi_adjust_cfa_offset -8
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    xorl $15, %ebx
+; X86-NEXT:    orl %edi, %ebx
+; X86-NEXT:    pushl $8
+; X86-NEXT:  .Lcfi53:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl $16
+; X86-NEXT:  .Lcfi54:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    calll mult
+; X86-NEXT:    addl $8, %esp
+; X86-NEXT:  .Lcfi55:
+; X86-NEXT:    .cfi_adjust_cfa_offset -8
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    xorl $16, %edi
+; X86-NEXT:    orl %ebx, %edi
+; X86-NEXT:    pushl $8
+; X86-NEXT:  .Lcfi56:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl $17
+; X86-NEXT:  .Lcfi57:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    calll mult
+; X86-NEXT:    addl $8, %esp
+; X86-NEXT:  .Lcfi58:
+; X86-NEXT:    .cfi_adjust_cfa_offset -8
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    xorl $17, %ebx
+; X86-NEXT:    orl %edi, %ebx
+; X86-NEXT:    pushl $9
+; X86-NEXT:  .Lcfi59:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl $18
+; X86-NEXT:  .Lcfi60:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    calll mult
+; X86-NEXT:    addl $8, %esp
+; X86-NEXT:  .Lcfi61:
+; X86-NEXT:    .cfi_adjust_cfa_offset -8
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    xorl $18, %edi
+; X86-NEXT:    orl %ebx, %edi
+; X86-NEXT:    pushl $9
+; X86-NEXT:  .Lcfi62:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl $19
+; X86-NEXT:  .Lcfi63:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    calll mult
+; X86-NEXT:    addl $8, %esp
+; X86-NEXT:  .Lcfi64:
+; X86-NEXT:    .cfi_adjust_cfa_offset -8
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    xorl $19, %ebx
+; X86-NEXT:    orl %edi, %ebx
+; X86-NEXT:    pushl $10
+; X86-NEXT:  .Lcfi65:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl $20
+; X86-NEXT:  .Lcfi66:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    calll mult
+; X86-NEXT:    addl $8, %esp
+; X86-NEXT:  .Lcfi67:
+; X86-NEXT:    .cfi_adjust_cfa_offset -8
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    xorl $20, %edi
+; X86-NEXT:    orl %ebx, %edi
+; X86-NEXT:    pushl $10
+; X86-NEXT:  .Lcfi68:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl $21
+; X86-NEXT:  .Lcfi69:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    calll mult
+; X86-NEXT:    addl $8, %esp
+; X86-NEXT:  .Lcfi70:
+; X86-NEXT:    .cfi_adjust_cfa_offset -8
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    xorl $21, %ebx
+; X86-NEXT:    orl %edi, %ebx
+; X86-NEXT:    pushl $11
+; X86-NEXT:  .Lcfi71:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl $22
+; X86-NEXT:  .Lcfi72:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    calll mult
+; X86-NEXT:    addl $8, %esp
+; X86-NEXT:  .Lcfi73:
+; X86-NEXT:    .cfi_adjust_cfa_offset -8
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    xorl $22, %edi
+; X86-NEXT:    orl %ebx, %edi
+; X86-NEXT:    pushl $11
+; X86-NEXT:  .Lcfi74:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl $23
+; X86-NEXT:  .Lcfi75:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    calll mult
+; X86-NEXT:    addl $8, %esp
+; X86-NEXT:  .Lcfi76:
+; X86-NEXT:    .cfi_adjust_cfa_offset -8
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    xorl $23, %ebx
+; X86-NEXT:    orl %edi, %ebx
+; X86-NEXT:    pushl $12
+; X86-NEXT:  .Lcfi77:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl $24
+; X86-NEXT:  .Lcfi78:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    calll mult
+; X86-NEXT:    addl $8, %esp
+; X86-NEXT:  .Lcfi79:
+; X86-NEXT:    .cfi_adjust_cfa_offset -8
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    xorl $24, %edi
+; X86-NEXT:    orl %ebx, %edi
+; X86-NEXT:    pushl $12
+; X86-NEXT:  .Lcfi80:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl $25
+; X86-NEXT:  .Lcfi81:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    calll mult
+; X86-NEXT:    addl $8, %esp
+; X86-NEXT:  .Lcfi82:
+; X86-NEXT:    .cfi_adjust_cfa_offset -8
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    xorl $25, %ebx
+; X86-NEXT:    orl %edi, %ebx
+; X86-NEXT:    pushl $13
+; X86-NEXT:  .Lcfi83:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl $26
+; X86-NEXT:  .Lcfi84:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    calll mult
+; X86-NEXT:    addl $8, %esp
+; X86-NEXT:  .Lcfi85:
+; X86-NEXT:    .cfi_adjust_cfa_offset -8
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    xorl $26, %edi
+; X86-NEXT:    orl %ebx, %edi
+; X86-NEXT:    pushl $13
+; X86-NEXT:  .Lcfi86:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl $27
+; X86-NEXT:  .Lcfi87:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    calll mult
+; X86-NEXT:    addl $8, %esp
+; X86-NEXT:  .Lcfi88:
+; X86-NEXT:    .cfi_adjust_cfa_offset -8
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    xorl $27, %ebx
+; X86-NEXT:    orl %edi, %ebx
+; X86-NEXT:    pushl $14
+; X86-NEXT:  .Lcfi89:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl $28
+; X86-NEXT:  .Lcfi90:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    calll mult
+; X86-NEXT:    addl $8, %esp
+; X86-NEXT:  .Lcfi91:
+; X86-NEXT:    .cfi_adjust_cfa_offset -8
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    xorl $28, %edi
+; X86-NEXT:    orl %ebx, %edi
+; X86-NEXT:    pushl $14
+; X86-NEXT:  .Lcfi92:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl $29
+; X86-NEXT:  .Lcfi93:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    calll mult
+; X86-NEXT:    addl $8, %esp
+; X86-NEXT:  .Lcfi94:
+; X86-NEXT:    .cfi_adjust_cfa_offset -8
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    xorl $29, %ebx
+; X86-NEXT:    orl %edi, %ebx
+; X86-NEXT:    pushl $15
+; X86-NEXT:  .Lcfi95:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl $30
+; X86-NEXT:  .Lcfi96:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    calll mult
+; X86-NEXT:    addl $8, %esp
+; X86-NEXT:  .Lcfi97:
+; X86-NEXT:    .cfi_adjust_cfa_offset -8
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    xorl $30, %edi
+; X86-NEXT:    orl %ebx, %edi
+; X86-NEXT:    pushl $15
+; X86-NEXT:  .Lcfi98:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl $31
+; X86-NEXT:  .Lcfi99:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    calll mult
+; X86-NEXT:    addl $8, %esp
+; X86-NEXT:  .Lcfi100:
+; X86-NEXT:    .cfi_adjust_cfa_offset -8
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    xorl $31, %ebx
+; X86-NEXT:    orl %edi, %ebx
+; X86-NEXT:    orl %esi, %ebx
+; X86-NEXT:    pushl $16
+; X86-NEXT:  .Lcfi101:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    pushl $32
+; X86-NEXT:  .Lcfi102:
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    calll mult
+; X86-NEXT:    addl $8, %esp
+; X86-NEXT:  .Lcfi103:
+; X86-NEXT:    .cfi_adjust_cfa_offset -8
+; X86-NEXT:    xorl $32, %eax
+; X86-NEXT:    orl %ebx, %eax
+; X86-NEXT:    movl $-1, %eax
+; X86-NEXT:    jne .LBB1_2
+; X86-NEXT:  # BB#1:
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:  .LBB1_2:
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    retl
+;
+; X64-HSW-LABEL: foo:
+; X64-HSW:       # BB#0:
+; X64-HSW-NEXT:    pushq %rbp
+; X64-HSW-NEXT:  .Lcfi0:
+; X64-HSW-NEXT:    .cfi_def_cfa_offset 16
+; X64-HSW-NEXT:    pushq %r15
+; X64-HSW-NEXT:  .Lcfi1:
+; X64-HSW-NEXT:    .cfi_def_cfa_offset 24
+; X64-HSW-NEXT:    pushq %r14
+; X64-HSW-NEXT:  .Lcfi2:
+; X64-HSW-NEXT:    .cfi_def_cfa_offset 32
+; X64-HSW-NEXT:    pushq %r12
+; X64-HSW-NEXT:  .Lcfi3:
+; X64-HSW-NEXT:    .cfi_def_cfa_offset 40
+; X64-HSW-NEXT:    pushq %rbx
+; X64-HSW-NEXT:  .Lcfi4:
+; X64-HSW-NEXT:    .cfi_def_cfa_offset 48
+; X64-HSW-NEXT:  .Lcfi5:
+; X64-HSW-NEXT:    .cfi_offset %rbx, -48
+; X64-HSW-NEXT:  .Lcfi6:
+; X64-HSW-NEXT:    .cfi_offset %r12, -40
+; X64-HSW-NEXT:  .Lcfi7:
+; X64-HSW-NEXT:    .cfi_offset %r14, -32
+; X64-HSW-NEXT:  .Lcfi8:
+; X64-HSW-NEXT:    .cfi_offset %r15, -24
+; X64-HSW-NEXT:  .Lcfi9:
+; X64-HSW-NEXT:    .cfi_offset %rbp, -16
+; X64-HSW-NEXT:    xorl %r12d, %r12d
+; X64-HSW-NEXT:    movl $1, %edi
+; X64-HSW-NEXT:    xorl %esi, %esi
+; X64-HSW-NEXT:    callq mult
+; X64-HSW-NEXT:    movl %eax, %ebx
+; X64-HSW-NEXT:    xorl $1, %ebx
+; X64-HSW-NEXT:    movl $2, %edi
+; X64-HSW-NEXT:    movl $1, %esi
+; X64-HSW-NEXT:    callq mult
+; X64-HSW-NEXT:    movl %eax, %ebp
+; X64-HSW-NEXT:    xorl $2, %ebp
+; X64-HSW-NEXT:    orl %ebx, %ebp
+; X64-HSW-NEXT:    movl $3, %edi
+; X64-HSW-NEXT:    movl $1, %esi
+; X64-HSW-NEXT:    callq mult
+; X64-HSW-NEXT:    movl %eax, %r14d
+; X64-HSW-NEXT:    xorl $3, %r14d
+; X64-HSW-NEXT:    movl $4, %edi
+; X64-HSW-NEXT:    movl $2, %esi
+; X64-HSW-NEXT:    callq mult
+; X64-HSW-NEXT:    movl %eax, %ebx
+; X64-HSW-NEXT:    xorl $4, %ebx
+; X64-HSW-NEXT:    orl %r14d, %ebx
+; X64-HSW-NEXT:    orl %ebp, %ebx
+; X64-HSW-NEXT:    movl $5, %edi
+; X64-HSW-NEXT:    movl $2, %esi
+; X64-HSW-NEXT:    callq mult
+; X64-HSW-NEXT:    movl %eax, %r14d
+; X64-HSW-NEXT:    xorl $5, %r14d
+; X64-HSW-NEXT:    movl $6, %edi
+; X64-HSW-NEXT:    movl $3, %esi
+; X64-HSW-NEXT:    callq mult
+; X64-HSW-NEXT:    movl %eax, %ebp
+; X64-HSW-NEXT:    xorl $6, %ebp
+; X64-HSW-NEXT:    orl %r14d, %ebp
+; X64-HSW-NEXT:    movl $7, %edi
+; X64-HSW-NEXT:    movl $3, %esi
+; X64-HSW-NEXT:    callq mult
+; X64-HSW-NEXT:    movl %eax, %r14d
+; X64-HSW-NEXT:    xorl $7, %r14d
+; X64-HSW-NEXT:    orl %ebp, %r14d
+; X64-HSW-NEXT:    orl %ebx, %r14d
+; X64-HSW-NEXT:    movl $8, %edi
+; X64-HSW-NEXT:    movl $4, %esi
+; X64-HSW-NEXT:    callq mult
+; X64-HSW-NEXT:    movl %eax, %ebx
+; X64-HSW-NEXT:    xorl $8, %ebx
+; X64-HSW-NEXT:    movl $9, %edi
+; X64-HSW-NEXT:    movl $4, %esi
+; X64-HSW-NEXT:    callq mult
+; X64-HSW-NEXT:    movl %eax, %ebp
+; X64-HSW-NEXT:    xorl $9, %ebp
+; X64-HSW-NEXT:    orl %ebx, %ebp
+; X64-HSW-NEXT:    movl $10, %edi
+; X64-HSW-NEXT:    movl $5, %esi
+; X64-HSW-NEXT:    callq mult
+; X64-HSW-NEXT:    movl %eax, %ebx
+; X64-HSW-NEXT:    xorl $10, %ebx
+; X64-HSW-NEXT:    orl %ebp, %ebx
+; X64-HSW-NEXT:    movl $11, %edi
+; X64-HSW-NEXT:    movl $5, %esi
+; X64-HSW-NEXT:    callq mult
+; X64-HSW-NEXT:    movl %eax, %r15d
+; X64-HSW-NEXT:    xorl $11, %r15d
+; X64-HSW-NEXT:    orl %ebx, %r15d
+; X64-HSW-NEXT:    orl %r14d, %r15d
+; X64-HSW-NEXT:    movl $12, %edi
+; X64-HSW-NEXT:    movl $6, %esi
+; X64-HSW-NEXT:    callq mult
+; X64-HSW-NEXT:    movl %eax, %ebx
+; X64-HSW-NEXT:    xorl $12, %ebx
+; X64-HSW-NEXT:    movl $13, %edi
+; X64-HSW-NEXT:    movl $6, %esi
+; X64-HSW-NEXT:    callq mult
+; X64-HSW-NEXT:    movl %eax, %ebp
+; X64-HSW-NEXT:    xorl $13, %ebp
+; X64-HSW-NEXT:    orl %ebx, %ebp
+; X64-HSW-NEXT:    movl $14, %edi
+; X64-HSW-NEXT:    movl $7, %esi
+; X64-HSW-NEXT:    callq mult
+; X64-HSW-NEXT:    movl %eax, %ebx
+; X64-HSW-NEXT:    xorl $14, %ebx
+; X64-HSW-NEXT:    orl %ebp, %ebx
+; X64-HSW-NEXT:    movl $15, %edi
+; X64-HSW-NEXT:    movl $7, %esi
+; X64-HSW-NEXT:    callq mult
+; X64-HSW-NEXT:    movl %eax, %ebp
+; X64-HSW-NEXT:    xorl $15, %ebp
+; X64-HSW-NEXT:    orl %ebx, %ebp
+; X64-HSW-NEXT:    movl $16, %edi
+; X64-HSW-NEXT:    movl $8, %esi
+; X64-HSW-NEXT:    callq mult
+; X64-HSW-NEXT:    movl %eax, %r14d
+; X64-HSW-NEXT:    xorl $16, %r14d
+; X64-HSW-NEXT:    orl %ebp, %r14d
+; X64-HSW-NEXT:    orl %r15d, %r14d
+; X64-HSW-NEXT:    movl $17, %edi
+; X64-HSW-NEXT:    movl $8, %esi
+; X64-HSW-NEXT:    callq mult
+; X64-HSW-NEXT:    movl %eax, %ebp
+; X64-HSW-NEXT:    xorl $17, %ebp
+; X64-HSW-NEXT:    movl $18, %edi
+; X64-HSW-NEXT:    movl $9, %esi
+; X64-HSW-NEXT:    callq mult
+; X64-HSW-NEXT:    movl %eax, %ebx
+; X64-HSW-NEXT:    xorl $18, %ebx
+; X64-HSW-NEXT:    orl %ebp, %ebx
+; X64-HSW-NEXT:    movl $19, %edi
+; X64-HSW-NEXT:    movl $9, %esi
+; X64-HSW-NEXT:    callq mult
+; X64-HSW-NEXT:    movl %eax, %ebp
+; X64-HSW-NEXT:    xorl $19, %ebp
+; X64-HSW-NEXT:    orl %ebx, %ebp
+; X64-HSW-NEXT:    movl $20, %edi
+; X64-HSW-NEXT:    movl $10, %esi
+; X64-HSW-NEXT:    callq mult
+; X64-HSW-NEXT:    movl %eax, %ebx
+; X64-HSW-NEXT:    xorl $20, %ebx
+; X64-HSW-NEXT:    orl %ebp, %ebx
+; X64-HSW-NEXT:    movl $21, %edi
+; X64-HSW-NEXT:    movl $10, %esi
+; X64-HSW-NEXT:    callq mult
+; X64-HSW-NEXT:    movl %eax, %ebp
+; X64-HSW-NEXT:    xorl $21, %ebp
+; X64-HSW-NEXT:    orl %ebx, %ebp
+; X64-HSW-NEXT:    movl $22, %edi
+; X64-HSW-NEXT:    movl $11, %esi
+; X64-HSW-NEXT:    callq mult
+; X64-HSW-NEXT:    movl %eax, %r15d
+; X64-HSW-NEXT:    xorl $22, %r15d
+; X64-HSW-NEXT:    orl %ebp, %r15d
+; X64-HSW-NEXT:    orl %r14d, %r15d
+; X64-HSW-NEXT:    movl $23, %edi
+; X64-HSW-NEXT:    movl $11, %esi
+; X64-HSW-NEXT:    callq mult
+; X64-HSW-NEXT:    movl %eax, %ebp
+; X64-HSW-NEXT:    xorl $23, %ebp
+; X64-HSW-NEXT:    movl $24, %edi
+; X64-HSW-NEXT:    movl $12, %esi
+; X64-HSW-NEXT:    callq mult
+; X64-HSW-NEXT:    movl %eax, %ebx
+; X64-HSW-NEXT:    xorl $24, %ebx
+; X64-HSW-NEXT:    orl %ebp, %ebx
+; X64-HSW-NEXT:    movl $25, %edi
+; X64-HSW-NEXT:    movl $12, %esi
+; X64-HSW-NEXT:    callq mult
+; X64-HSW-NEXT:    movl %eax, %ebp
+; X64-HSW-NEXT:    xorl $25, %ebp
+; X64-HSW-NEXT:    orl %ebx, %ebp
+; X64-HSW-NEXT:    movl $26, %edi
+; X64-HSW-NEXT:    movl $13, %esi
+; X64-HSW-NEXT:    callq mult
+; X64-HSW-NEXT:    movl %eax, %ebx
+; X64-HSW-NEXT:    xorl $26, %ebx
+; X64-HSW-NEXT:    orl %ebp, %ebx
+; X64-HSW-NEXT:    movl $27, %edi
+; X64-HSW-NEXT:    movl $13, %esi
+; X64-HSW-NEXT:    callq mult
+; X64-HSW-NEXT:    movl %eax, %ebp
+; X64-HSW-NEXT:    xorl $27, %ebp
+; X64-HSW-NEXT:    orl %ebx, %ebp
+; X64-HSW-NEXT:    movl $28, %edi
+; X64-HSW-NEXT:    movl $14, %esi
+; X64-HSW-NEXT:    callq mult
+; X64-HSW-NEXT:    movl %eax, %ebx
+; X64-HSW-NEXT:    xorl $28, %ebx
+; X64-HSW-NEXT:    orl %ebp, %ebx
+; X64-HSW-NEXT:    movl $29, %edi
+; X64-HSW-NEXT:    movl $14, %esi
+; X64-HSW-NEXT:    callq mult
+; X64-HSW-NEXT:    movl %eax, %ebp
+; X64-HSW-NEXT:    xorl $29, %ebp
+; X64-HSW-NEXT:    orl %ebx, %ebp
+; X64-HSW-NEXT:    orl %r15d, %ebp
+; X64-HSW-NEXT:    movl $30, %edi
+; X64-HSW-NEXT:    movl $15, %esi
+; X64-HSW-NEXT:    callq mult
+; X64-HSW-NEXT:    movl %eax, %r14d
+; X64-HSW-NEXT:    xorl $30, %r14d
+; X64-HSW-NEXT:    movl $31, %edi
+; X64-HSW-NEXT:    movl $15, %esi
+; X64-HSW-NEXT:    callq mult
+; X64-HSW-NEXT:    movl %eax, %ebx
+; X64-HSW-NEXT:    xorl $31, %ebx
+; X64-HSW-NEXT:    orl %r14d, %ebx
+; X64-HSW-NEXT:    orl %ebp, %ebx
+; X64-HSW-NEXT:    movl $32, %edi
+; X64-HSW-NEXT:    movl $16, %esi
+; X64-HSW-NEXT:    callq mult
+; X64-HSW-NEXT:    xorl $32, %eax
+; X64-HSW-NEXT:    orl %ebx, %eax
+; X64-HSW-NEXT:    movl $-1, %eax
+; X64-HSW-NEXT:    cmovel %r12d, %eax
+; X64-HSW-NEXT:    popq %rbx
+; X64-HSW-NEXT:    popq %r12
+; X64-HSW-NEXT:    popq %r14
+; X64-HSW-NEXT:    popq %r15
+; X64-HSW-NEXT:    popq %rbp
+; X64-HSW-NEXT:    retq
+  %1 = tail call i32 @mult(i32 1, i32 0)
+  %2 = icmp ne i32 %1, 1
+  %3 = tail call i32 @mult(i32 2, i32 1)
+  %4 = icmp ne i32 %3, 2
+  %5 = or i1 %2, %4
+  %6 = tail call i32 @mult(i32 3, i32 1)
+  %7 = icmp ne i32 %6, 3
+  %8 = or i1 %5, %7
+  %9 = tail call i32 @mult(i32 4, i32 2)
+  %10 = icmp ne i32 %9, 4
+  %11 = or i1 %8, %10
+  %12 = tail call i32 @mult(i32 5, i32 2)
+  %13 = icmp ne i32 %12, 5
+  %14 = or i1 %11, %13
+  %15 = tail call i32 @mult(i32 6, i32 3)
+  %16 = icmp ne i32 %15, 6
+  %17 = or i1 %14, %16
+  %18 = tail call i32 @mult(i32 7, i32 3)
+  %19 = icmp ne i32 %18, 7
+  %20 = or i1 %17, %19
+  %21 = tail call i32 @mult(i32 8, i32 4)
+  %22 = icmp ne i32 %21, 8
+  %23 = or i1 %20, %22
+  %24 = tail call i32 @mult(i32 9, i32 4)
+  %25 = icmp ne i32 %24, 9
+  %26 = or i1 %23, %25
+  %27 = tail call i32 @mult(i32 10, i32 5)
+  %28 = icmp ne i32 %27, 10
+  %29 = or i1 %26, %28
+  %30 = tail call i32 @mult(i32 11, i32 5)
+  %31 = icmp ne i32 %30, 11
+  %32 = or i1 %29, %31
+  %33 = tail call i32 @mult(i32 12, i32 6)
+  %34 = icmp ne i32 %33, 12
+  %35 = or i1 %32, %34
+  %36 = tail call i32 @mult(i32 13, i32 6)
+  %37 = icmp ne i32 %36, 13
+  %38 = or i1 %35, %37
+  %39 = tail call i32 @mult(i32 14, i32 7)
+  %40 = icmp ne i32 %39, 14
+  %41 = or i1 %38, %40
+  %42 = tail call i32 @mult(i32 15, i32 7)
+  %43 = icmp ne i32 %42, 15
+  %44 = or i1 %41, %43
+  %45 = tail call i32 @mult(i32 16, i32 8)
+  %46 = icmp ne i32 %45, 16
+  %47 = or i1 %44, %46
+  %48 = tail call i32 @mult(i32 17, i32 8)
+  %49 = icmp ne i32 %48, 17
+  %50 = or i1 %47, %49
+  %51 = tail call i32 @mult(i32 18, i32 9)
+  %52 = icmp ne i32 %51, 18
+  %53 = or i1 %50, %52
+  %54 = tail call i32 @mult(i32 19, i32 9)
+  %55 = icmp ne i32 %54, 19
+  %56 = or i1 %53, %55
+  %57 = tail call i32 @mult(i32 20, i32 10)
+  %58 = icmp ne i32 %57, 20
+  %59 = or i1 %56, %58
+  %60 = tail call i32 @mult(i32 21, i32 10)
+  %61 = icmp ne i32 %60, 21
+  %62 = or i1 %59, %61
+  %63 = tail call i32 @mult(i32 22, i32 11)
+  %64 = icmp ne i32 %63, 22
+  %65 = or i1 %62, %64
+  %66 = tail call i32 @mult(i32 23, i32 11)
+  %67 = icmp ne i32 %66, 23
+  %68 = or i1 %65, %67
+  %69 = tail call i32 @mult(i32 24, i32 12)
+  %70 = icmp ne i32 %69, 24
+  %71 = or i1 %68, %70
+  %72 = tail call i32 @mult(i32 25, i32 12)
+  %73 = icmp ne i32 %72, 25
+  %74 = or i1 %71, %73
+  %75 = tail call i32 @mult(i32 26, i32 13)
+  %76 = icmp ne i32 %75, 26
+  %77 = or i1 %74, %76
+  %78 = tail call i32 @mult(i32 27, i32 13)
+  %79 = icmp ne i32 %78, 27
+  %80 = or i1 %77, %79
+  %81 = tail call i32 @mult(i32 28, i32 14)
+  %82 = icmp ne i32 %81, 28
+  %83 = or i1 %80, %82
+  %84 = tail call i32 @mult(i32 29, i32 14)
+  %85 = icmp ne i32 %84, 29
+  %86 = or i1 %83, %85
+  %87 = tail call i32 @mult(i32 30, i32 15)
+  %88 = icmp ne i32 %87, 30
+  %89 = or i1 %86, %88
+  %90 = tail call i32 @mult(i32 31, i32 15)
+  %91 = icmp ne i32 %90, 31
+  %92 = or i1 %89, %91
+  %93 = tail call i32 @mult(i32 32, i32 16)
+  %94 = icmp ne i32 %93, 32
+  %95 = or i1 %92, %94
+  %96 = sext i1 %95 to i32
+  ret i32 %96
+}
+
+attributes #0 = { norecurse nounwind readnone uwtable  }
diff --git a/test/CodeGen/X86/nontemporal-loads.ll b/test/CodeGen/X86/nontemporal-loads.ll
index eaab26ef9547..3c916fd38c6c 100644
--- a/test/CodeGen/X86/nontemporal-loads.ll
+++ b/test/CodeGen/X86/nontemporal-loads.ll
@@ -168,7 +168,9 @@ define <8 x float> @test_v8f32(<8 x float>* %src) {
 ;
 ; AVX1-LABEL: test_v8f32:
 ; AVX1:       # BB#0:
-; AVX1-NEXT:    vmovaps (%rdi), %ymm0
+; AVX1-NEXT:    vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT:    vmovntdqa 16(%rdi), %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test_v8f32:
@@ -199,7 +201,9 @@ define <8 x i32> @test_v8i32(<8 x i32>* %src) {
 ;
 ; AVX1-LABEL: test_v8i32:
 ; AVX1:       # BB#0:
-; AVX1-NEXT:    vmovaps (%rdi), %ymm0
+; AVX1-NEXT:    vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT:    vmovntdqa 16(%rdi), %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test_v8i32:
@@ -240,7 +244,9 @@ define <4 x double> @test_v4f64(<4 x double>* %src) {
 ;
 ; AVX1-LABEL: test_v4f64:
 ; AVX1:       # BB#0:
-; AVX1-NEXT:    vmovaps (%rdi), %ymm0
+; AVX1-NEXT:    vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT:    vmovntdqa 16(%rdi), %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test_v4f64:
@@ -271,7 +277,9 @@ define <4 x i64> @test_v4i64(<4 x i64>* %src) {
 ;
 ; AVX1-LABEL: test_v4i64:
 ; AVX1:       # BB#0:
-; AVX1-NEXT:    vmovaps (%rdi), %ymm0
+; AVX1-NEXT:    vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT:    vmovntdqa 16(%rdi), %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test_v4i64:
@@ -302,7 +310,9 @@ define <16 x i16> @test_v16i16(<16 x i16>* %src) {
 ;
 ; AVX1-LABEL: test_v16i16:
 ; AVX1:       # BB#0:
-; AVX1-NEXT:    vmovaps (%rdi), %ymm0
+; AVX1-NEXT:    vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT:    vmovntdqa 16(%rdi), %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test_v16i16:
@@ -333,7 +343,9 @@ define <32 x i8> @test_v32i8(<32 x i8>* %src) {
 ;
 ; AVX1-LABEL: test_v32i8:
 ; AVX1:       # BB#0:
-; AVX1-NEXT:    vmovaps (%rdi), %ymm0
+; AVX1-NEXT:    vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT:    vmovntdqa 16(%rdi), %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test_v32i8:
@@ -370,8 +382,12 @@ define <16 x float> @test_v16f32(<16 x float>* %src) {
 ;
 ; AVX1-LABEL: test_v16f32:
 ; AVX1:       # BB#0:
-; AVX1-NEXT:    vmovaps (%rdi), %ymm0
-; AVX1-NEXT:    vmovaps 32(%rdi), %ymm1
+; AVX1-NEXT:    vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT:    vmovntdqa 16(%rdi), %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vmovntdqa 32(%rdi), %xmm1
+; AVX1-NEXT:    vmovntdqa 48(%rdi), %xmm2
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test_v16f32:
@@ -407,8 +423,12 @@ define <16 x i32> @test_v16i32(<16 x i32>* %src) {
 ;
 ; AVX1-LABEL: test_v16i32:
 ; AVX1:       # BB#0:
-; AVX1-NEXT:    vmovaps (%rdi), %ymm0
-; AVX1-NEXT:    vmovaps 32(%rdi), %ymm1
+; AVX1-NEXT:    vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT:    vmovntdqa 16(%rdi), %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vmovntdqa 32(%rdi), %xmm1
+; AVX1-NEXT:    vmovntdqa 48(%rdi), %xmm2
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test_v16i32:
@@ -444,8 +464,12 @@ define <8 x double> @test_v8f64(<8 x double>* %src) {
 ;
 ; AVX1-LABEL: test_v8f64:
 ; AVX1:       # BB#0:
-; AVX1-NEXT:    vmovaps (%rdi), %ymm0
-; AVX1-NEXT:    vmovaps 32(%rdi), %ymm1
+; AVX1-NEXT:    vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT:    vmovntdqa 16(%rdi), %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vmovntdqa 32(%rdi), %xmm1
+; AVX1-NEXT:    vmovntdqa 48(%rdi), %xmm2
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test_v8f64:
@@ -481,8 +505,12 @@ define <8 x i64> @test_v8i64(<8 x i64>* %src) {
 ;
 ; AVX1-LABEL: test_v8i64:
 ; AVX1:       # BB#0:
-; AVX1-NEXT:    vmovaps (%rdi), %ymm0
-; AVX1-NEXT:    vmovaps 32(%rdi), %ymm1
+; AVX1-NEXT:    vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT:    vmovntdqa 16(%rdi), %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vmovntdqa 32(%rdi), %xmm1
+; AVX1-NEXT:    vmovntdqa 48(%rdi), %xmm2
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test_v8i64:
@@ -518,8 +546,12 @@ define <32 x i16> @test_v32i16(<32 x i16>* %src) {
 ;
 ; AVX1-LABEL: test_v32i16:
 ; AVX1:       # BB#0:
-; AVX1-NEXT:    vmovaps (%rdi), %ymm0
-; AVX1-NEXT:    vmovaps 32(%rdi), %ymm1
+; AVX1-NEXT:    vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT:    vmovntdqa 16(%rdi), %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vmovntdqa 32(%rdi), %xmm1
+; AVX1-NEXT:    vmovntdqa 48(%rdi), %xmm2
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test_v32i16:
@@ -567,8 +599,12 @@ define <64 x i8> @test_v64i8(<64 x i8>* %src) {
 ;
 ; AVX1-LABEL: test_v64i8:
 ; AVX1:       # BB#0:
-; AVX1-NEXT:    vmovaps (%rdi), %ymm0
-; AVX1-NEXT:    vmovaps 32(%rdi), %ymm1
+; AVX1-NEXT:    vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT:    vmovntdqa 16(%rdi), %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vmovntdqa 32(%rdi), %xmm1
+; AVX1-NEXT:    vmovntdqa 48(%rdi), %xmm2
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test_v64i8:
@@ -601,19 +637,27 @@ define <64 x i8> @test_v64i8(<64 x i8>* %src) {
 ; Check cases where the load would be folded.
 
 define <4 x float> @test_arg_v4f32(<4 x float> %arg, <4 x float>* %src) {
-; SSE-LABEL: test_arg_v4f32:
-; SSE:       # BB#0:
-; SSE-NEXT:    addps (%rdi), %xmm0
-; SSE-NEXT:    retq
+; SSE2-LABEL: test_arg_v4f32:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    addps (%rdi), %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: test_arg_v4f32:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movntdqa (%rdi), %xmm1
+; SSE41-NEXT:    addps %xmm1, %xmm0
+; SSE41-NEXT:    retq
 ;
 ; AVX-LABEL: test_arg_v4f32:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vaddps (%rdi), %xmm0, %xmm0
+; AVX-NEXT:    vmovntdqa (%rdi), %xmm1
+; AVX-NEXT:    vaddps %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
 ;
 ; AVX512-LABEL: test_arg_v4f32:
 ; AVX512:       # BB#0:
-; AVX512-NEXT:    vaddps (%rdi), %xmm0, %xmm0
+; AVX512-NEXT:    vmovntdqa (%rdi), %xmm1
+; AVX512-NEXT:    vaddps %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    retq
   %1 = load <4 x float>, <4 x float>* %src, align 16, !nontemporal !1
   %2 = fadd <4 x float> %arg, %1
@@ -621,19 +665,27 @@ define <4 x float> @test_arg_v4f32(<4 x float> %arg, <4 x float>* %src) {
 }
 
 define <4 x i32> @test_arg_v4i32(<4 x i32> %arg, <4 x i32>* %src) {
-; SSE-LABEL: test_arg_v4i32:
-; SSE:       # BB#0:
-; SSE-NEXT:    paddd (%rdi), %xmm0
-; SSE-NEXT:    retq
+; SSE2-LABEL: test_arg_v4i32:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    paddd (%rdi), %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: test_arg_v4i32:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movntdqa (%rdi), %xmm1
+; SSE41-NEXT:    paddd %xmm1, %xmm0
+; SSE41-NEXT:    retq
 ;
 ; AVX-LABEL: test_arg_v4i32:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vpaddd (%rdi), %xmm0, %xmm0
+; AVX-NEXT:    vmovntdqa (%rdi), %xmm1
+; AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
 ;
 ; AVX512-LABEL: test_arg_v4i32:
 ; AVX512:       # BB#0:
-; AVX512-NEXT:    vpaddd (%rdi), %xmm0, %xmm0
+; AVX512-NEXT:    vmovntdqa (%rdi), %xmm1
+; AVX512-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    retq
   %1 = load <4 x i32>, <4 x i32>* %src, align 16, !nontemporal !1
   %2 = add <4 x i32> %arg, %1
@@ -641,19 +693,27 @@ define <4 x i32> @test_arg_v4i32(<4 x i32> %arg, <4 x i32>* %src) {
 }
 
 define <2 x double> @test_arg_v2f64(<2 x double> %arg, <2 x double>* %src) {
-; SSE-LABEL: test_arg_v2f64:
-; SSE:       # BB#0:
-; SSE-NEXT:    addpd (%rdi), %xmm0
-; SSE-NEXT:    retq
+; SSE2-LABEL: test_arg_v2f64:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    addpd (%rdi), %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: test_arg_v2f64:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movntdqa (%rdi), %xmm1
+; SSE41-NEXT:    addpd %xmm1, %xmm0
+; SSE41-NEXT:    retq
 ;
 ; AVX-LABEL: test_arg_v2f64:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vaddpd (%rdi), %xmm0, %xmm0
+; AVX-NEXT:    vmovntdqa (%rdi), %xmm1
+; AVX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
 ;
 ; AVX512-LABEL: test_arg_v2f64:
 ; AVX512:       # BB#0:
-; AVX512-NEXT:    vaddpd (%rdi), %xmm0, %xmm0
+; AVX512-NEXT:    vmovntdqa (%rdi), %xmm1
+; AVX512-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    retq
   %1 = load <2 x double>, <2 x double>* %src, align 16, !nontemporal !1
   %2 = fadd <2 x double> %arg, %1
@@ -661,19 +721,27 @@ define <2 x double> @test_arg_v2f64(<2 x double> %arg, <2 x double>* %src) {
 }
 
 define <2 x i64> @test_arg_v2i64(<2 x i64> %arg, <2 x i64>* %src) {
-; SSE-LABEL: test_arg_v2i64:
-; SSE:       # BB#0:
-; SSE-NEXT:    paddq (%rdi), %xmm0
-; SSE-NEXT:    retq
+; SSE2-LABEL: test_arg_v2i64:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    paddq (%rdi), %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: test_arg_v2i64:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movntdqa (%rdi), %xmm1
+; SSE41-NEXT:    paddq %xmm1, %xmm0
+; SSE41-NEXT:    retq
 ;
 ; AVX-LABEL: test_arg_v2i64:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vpaddq (%rdi), %xmm0, %xmm0
+; AVX-NEXT:    vmovntdqa (%rdi), %xmm1
+; AVX-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
 ;
 ; AVX512-LABEL: test_arg_v2i64:
 ; AVX512:       # BB#0:
-; AVX512-NEXT:    vpaddq (%rdi), %xmm0, %xmm0
+; AVX512-NEXT:    vmovntdqa (%rdi), %xmm1
+; AVX512-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    retq
   %1 = load <2 x i64>, <2 x i64>* %src, align 16, !nontemporal !1
   %2 = add <2 x i64> %arg, %1
@@ -681,19 +749,27 @@ define <2 x i64> @test_arg_v2i64(<2 x i64> %arg, <2 x i64>* %src) {
 }
 
 define <8 x i16> @test_arg_v8i16(<8 x i16> %arg, <8 x i16>* %src) {
-; SSE-LABEL: test_arg_v8i16:
-; SSE:       # BB#0:
-; SSE-NEXT:    paddw (%rdi), %xmm0
-; SSE-NEXT:    retq
+; SSE2-LABEL: test_arg_v8i16:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    paddw (%rdi), %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: test_arg_v8i16:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movntdqa (%rdi), %xmm1
+; SSE41-NEXT:    paddw %xmm1, %xmm0
+; SSE41-NEXT:    retq
 ;
 ; AVX-LABEL: test_arg_v8i16:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vpaddw (%rdi), %xmm0, %xmm0
+; AVX-NEXT:    vmovntdqa (%rdi), %xmm1
+; AVX-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
 ;
 ; AVX512-LABEL: test_arg_v8i16:
 ; AVX512:       # BB#0:
-; AVX512-NEXT:    vpaddw (%rdi), %xmm0, %xmm0
+; AVX512-NEXT:    vmovntdqa (%rdi), %xmm1
+; AVX512-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    retq
   %1 = load <8 x i16>, <8 x i16>* %src, align 16, !nontemporal !1
   %2 = add <8 x i16> %arg, %1
@@ -701,19 +777,27 @@ define <8 x i16> @test_arg_v8i16(<8 x i16> %arg, <8 x i16>* %src) {
 }
 
 define <16 x i8> @test_arg_v16i8(<16 x i8> %arg, <16 x i8>* %src) {
-; SSE-LABEL: test_arg_v16i8:
-; SSE:       # BB#0:
-; SSE-NEXT:    paddb (%rdi), %xmm0
-; SSE-NEXT:    retq
+; SSE2-LABEL: test_arg_v16i8:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    paddb (%rdi), %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: test_arg_v16i8:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movntdqa (%rdi), %xmm1
+; SSE41-NEXT:    paddb %xmm1, %xmm0
+; SSE41-NEXT:    retq
 ;
 ; AVX-LABEL: test_arg_v16i8:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vpaddb (%rdi), %xmm0, %xmm0
+; AVX-NEXT:    vmovntdqa (%rdi), %xmm1
+; AVX-NEXT:    vpaddb %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
 ;
 ; AVX512-LABEL: test_arg_v16i8:
 ; AVX512:       # BB#0:
-; AVX512-NEXT:    vpaddb (%rdi), %xmm0, %xmm0
+; AVX512-NEXT:    vmovntdqa (%rdi), %xmm1
+; AVX512-NEXT:    vpaddb %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    retq
   %1 = load <16 x i8>, <16 x i8>* %src, align 16, !nontemporal !1
   %2 = add <16 x i8> %arg, %1
@@ -723,20 +807,38 @@ define <16 x i8> @test_arg_v16i8(<16 x i8> %arg, <16 x i8>* %src) {
 ; And now YMM versions.
 
 define <8 x float> @test_arg_v8f32(<8 x float> %arg, <8 x float>* %src) {
-; SSE-LABEL: test_arg_v8f32:
-; SSE:       # BB#0:
-; SSE-NEXT:    addps (%rdi), %xmm0
-; SSE-NEXT:    addps 16(%rdi), %xmm1
-; SSE-NEXT:    retq
+; SSE2-LABEL: test_arg_v8f32:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    addps (%rdi), %xmm0
+; SSE2-NEXT:    addps 16(%rdi), %xmm1
+; SSE2-NEXT:    retq
 ;
-; AVX-LABEL: test_arg_v8f32:
-; AVX:       # BB#0:
-; AVX-NEXT:    vaddps (%rdi), %ymm0, %ymm0
-; AVX-NEXT:    retq
+; SSE41-LABEL: test_arg_v8f32:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movntdqa 16(%rdi), %xmm2
+; SSE41-NEXT:    movntdqa (%rdi), %xmm3
+; SSE41-NEXT:    addps %xmm3, %xmm0
+; SSE41-NEXT:    addps %xmm2, %xmm1
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: test_arg_v8f32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vmovntdqa (%rdi), %xmm1
+; AVX1-NEXT:    vmovntdqa 16(%rdi), %xmm2
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    vaddps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test_arg_v8f32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovntdqa (%rdi), %ymm1
+; AVX2-NEXT:    vaddps %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: test_arg_v8f32:
 ; AVX512:       # BB#0:
-; AVX512-NEXT:    vaddps (%rdi), %ymm0, %ymm0
+; AVX512-NEXT:    vmovntdqa (%rdi), %ymm1
+; AVX512-NEXT:    vaddps %ymm1, %ymm0, %ymm0
 ; AVX512-NEXT:    retq
   %1 = load <8 x float>, <8 x float>* %src, align 32, !nontemporal !1
   %2 = fadd <8 x float> %arg, %1
@@ -744,51 +846,90 @@ define <8 x float> @test_arg_v8f32(<8 x float> %arg, <8 x float>* %src) {
 }
 
 define <8 x i32> @test_arg_v8i32(<8 x i32> %arg, <8 x i32>* %src) {
-; SSE-LABEL: test_arg_v8i32:
-; SSE:       # BB#0:
-; SSE-NEXT:    paddd (%rdi), %xmm0
-; SSE-NEXT:    paddd 16(%rdi), %xmm1
-; SSE-NEXT:    retq
+; SSE2-LABEL: test_arg_v8i32:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    paddd (%rdi), %xmm0
+; SSE2-NEXT:    paddd 16(%rdi), %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: test_arg_v8i32:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movntdqa 16(%rdi), %xmm2
+; SSE41-NEXT:    movntdqa (%rdi), %xmm3
+; SSE41-NEXT:    paddd %xmm3, %xmm0
+; SSE41-NEXT:    paddd %xmm2, %xmm1
+; SSE41-NEXT:    retq
 ;
 ; AVX1-LABEL: test_arg_v8i32:
 ; AVX1:       # BB#0:
-; AVX1-NEXT:    vmovdqa (%rdi), %ymm1
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
-; AVX1-NEXT:    vpaddd %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vmovntdqa (%rdi), %xmm1
+; AVX1-NEXT:    vmovntdqa 16(%rdi), %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpaddd %xmm2, %xmm3, %xmm2
 ; AVX1-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test_arg_v8i32:
 ; AVX2:       # BB#0:
-; AVX2-NEXT:    vpaddd (%rdi), %ymm0, %ymm0
+; AVX2-NEXT:    vmovntdqa (%rdi), %ymm1
+; AVX2-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: test_arg_v8i32:
-; AVX512:       # BB#0:
-; AVX512-NEXT:    vpaddd (%rdi), %ymm0, %ymm0
-; AVX512-NEXT:    retq
+; AVX512F-LABEL: test_arg_v8i32:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovntdqa (%rdi), %ymm1
+; AVX512F-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: test_arg_v8i32:
+; AVX512BW:       # BB#0:
+; AVX512BW-NEXT:    vmovntdqa (%rdi), %ymm1
+; AVX512BW-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: test_arg_v8i32:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vpaddd (%rdi), %ymm0, %ymm0
+; AVX512VL-NEXT:    retq
   %1 = load <8 x i32>, <8 x i32>* %src, align 32, !nontemporal !1
   %2 = add <8 x i32> %arg, %1
   ret <8 x i32> %2
 }
 
 define <4 x double> @test_arg_v4f64(<4 x double> %arg, <4 x double>* %src) {
-; SSE-LABEL: test_arg_v4f64:
-; SSE:       # BB#0:
-; SSE-NEXT:    addpd (%rdi), %xmm0
-; SSE-NEXT:    addpd 16(%rdi), %xmm1
-; SSE-NEXT:    retq
+; SSE2-LABEL: test_arg_v4f64:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    addpd (%rdi), %xmm0
+; SSE2-NEXT:    addpd 16(%rdi), %xmm1
+; SSE2-NEXT:    retq
 ;
-; AVX-LABEL: test_arg_v4f64:
-; AVX:       # BB#0:
-; AVX-NEXT:    vaddpd (%rdi), %ymm0, %ymm0
-; AVX-NEXT:    retq
+; SSE41-LABEL: test_arg_v4f64:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movntdqa 16(%rdi), %xmm2
+; SSE41-NEXT:    movntdqa (%rdi), %xmm3
+; SSE41-NEXT:    addpd %xmm3, %xmm0
+; SSE41-NEXT:    addpd %xmm2, %xmm1
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: test_arg_v4f64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vmovntdqa (%rdi), %xmm1
+; AVX1-NEXT:    vmovntdqa 16(%rdi), %xmm2
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test_arg_v4f64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovntdqa (%rdi), %ymm1
+; AVX2-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: test_arg_v4f64:
 ; AVX512:       # BB#0:
-; AVX512-NEXT:    vaddpd (%rdi), %ymm0, %ymm0
+; AVX512-NEXT:    vmovntdqa (%rdi), %ymm1
+; AVX512-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
 ; AVX512-NEXT:    retq
   %1 = load <4 x double>, <4 x double>* %src, align 32, !nontemporal !1
   %2 = fadd <4 x double> %arg, %1
@@ -796,30 +937,40 @@ define <4 x double> @test_arg_v4f64(<4 x double> %arg, <4 x double>* %src) {
 }
 
 define <4 x i64> @test_arg_v4i64(<4 x i64> %arg, <4 x i64>* %src) {
-; SSE-LABEL: test_arg_v4i64:
-; SSE:       # BB#0:
-; SSE-NEXT:    paddq (%rdi), %xmm0
-; SSE-NEXT:    paddq 16(%rdi), %xmm1
-; SSE-NEXT:    retq
+; SSE2-LABEL: test_arg_v4i64:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    paddq (%rdi), %xmm0
+; SSE2-NEXT:    paddq 16(%rdi), %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: test_arg_v4i64:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movntdqa 16(%rdi), %xmm2
+; SSE41-NEXT:    movntdqa (%rdi), %xmm3
+; SSE41-NEXT:    paddq %xmm3, %xmm0
+; SSE41-NEXT:    paddq %xmm2, %xmm1
+; SSE41-NEXT:    retq
 ;
 ; AVX1-LABEL: test_arg_v4i64:
 ; AVX1:       # BB#0:
-; AVX1-NEXT:    vmovdqa (%rdi), %ymm1
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
-; AVX1-NEXT:    vpaddq %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vmovntdqa (%rdi), %xmm1
+; AVX1-NEXT:    vmovntdqa 16(%rdi), %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpaddq %xmm2, %xmm3, %xmm2
 ; AVX1-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test_arg_v4i64:
 ; AVX2:       # BB#0:
-; AVX2-NEXT:    vpaddq (%rdi), %ymm0, %ymm0
+; AVX2-NEXT:    vmovntdqa (%rdi), %ymm1
+; AVX2-NEXT:    vpaddq %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: test_arg_v4i64:
 ; AVX512:       # BB#0:
-; AVX512-NEXT:    vpaddq (%rdi), %ymm0, %ymm0
+; AVX512-NEXT:    vmovntdqa (%rdi), %ymm1
+; AVX512-NEXT:    vpaddq %ymm1, %ymm0, %ymm0
 ; AVX512-NEXT:    retq
   %1 = load <4 x i64>, <4 x i64>* %src, align 32, !nontemporal !1
   %2 = add <4 x i64> %arg, %1
@@ -827,30 +978,40 @@ define <4 x i64> @test_arg_v4i64(<4 x i64> %arg, <4 x i64>* %src) {
 }
 
 define <16 x i16> @test_arg_v16i16(<16 x i16> %arg, <16 x i16>* %src) {
-; SSE-LABEL: test_arg_v16i16:
-; SSE:       # BB#0:
-; SSE-NEXT:    paddw (%rdi), %xmm0
-; SSE-NEXT:    paddw 16(%rdi), %xmm1
-; SSE-NEXT:    retq
+; SSE2-LABEL: test_arg_v16i16:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    paddw (%rdi), %xmm0
+; SSE2-NEXT:    paddw 16(%rdi), %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: test_arg_v16i16:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movntdqa 16(%rdi), %xmm2
+; SSE41-NEXT:    movntdqa (%rdi), %xmm3
+; SSE41-NEXT:    paddw %xmm3, %xmm0
+; SSE41-NEXT:    paddw %xmm2, %xmm1
+; SSE41-NEXT:    retq
 ;
 ; AVX1-LABEL: test_arg_v16i16:
 ; AVX1:       # BB#0:
-; AVX1-NEXT:    vmovdqa (%rdi), %ymm1
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
-; AVX1-NEXT:    vpaddw %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vmovntdqa (%rdi), %xmm1
+; AVX1-NEXT:    vmovntdqa 16(%rdi), %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpaddw %xmm2, %xmm3, %xmm2
 ; AVX1-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test_arg_v16i16:
 ; AVX2:       # BB#0:
-; AVX2-NEXT:    vpaddw (%rdi), %ymm0, %ymm0
+; AVX2-NEXT:    vmovntdqa (%rdi), %ymm1
+; AVX2-NEXT:    vpaddw %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: test_arg_v16i16:
 ; AVX512:       # BB#0:
-; AVX512-NEXT:    vpaddw (%rdi), %ymm0, %ymm0
+; AVX512-NEXT:    vmovntdqa (%rdi), %ymm1
+; AVX512-NEXT:    vpaddw %ymm1, %ymm0, %ymm0
 ; AVX512-NEXT:    retq
   %1 = load <16 x i16>, <16 x i16>* %src, align 32, !nontemporal !1
   %2 = add <16 x i16> %arg, %1
@@ -858,30 +1019,40 @@ define <16 x i16> @test_arg_v16i16(<16 x i16> %arg, <16 x i16>* %src) {
 }
 
 define <32 x i8> @test_arg_v32i8(<32 x i8> %arg, <32 x i8>* %src) {
-; SSE-LABEL: test_arg_v32i8:
-; SSE:       # BB#0:
-; SSE-NEXT:    paddb (%rdi), %xmm0
-; SSE-NEXT:    paddb 16(%rdi), %xmm1
-; SSE-NEXT:    retq
+; SSE2-LABEL: test_arg_v32i8:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    paddb (%rdi), %xmm0
+; SSE2-NEXT:    paddb 16(%rdi), %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: test_arg_v32i8:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movntdqa 16(%rdi), %xmm2
+; SSE41-NEXT:    movntdqa (%rdi), %xmm3
+; SSE41-NEXT:    paddb %xmm3, %xmm0
+; SSE41-NEXT:    paddb %xmm2, %xmm1
+; SSE41-NEXT:    retq
 ;
 ; AVX1-LABEL: test_arg_v32i8:
 ; AVX1:       # BB#0:
-; AVX1-NEXT:    vmovdqa (%rdi), %ymm1
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
-; AVX1-NEXT:    vpaddb %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vmovntdqa (%rdi), %xmm1
+; AVX1-NEXT:    vmovntdqa 16(%rdi), %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpaddb %xmm2, %xmm3, %xmm2
 ; AVX1-NEXT:    vpaddb %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test_arg_v32i8:
 ; AVX2:       # BB#0:
-; AVX2-NEXT:    vpaddb (%rdi), %ymm0, %ymm0
+; AVX2-NEXT:    vmovntdqa (%rdi), %ymm1
+; AVX2-NEXT:    vpaddb %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: test_arg_v32i8:
 ; AVX512:       # BB#0:
-; AVX512-NEXT:    vpaddb (%rdi), %ymm0, %ymm0
+; AVX512-NEXT:    vmovntdqa (%rdi), %ymm1
+; AVX512-NEXT:    vpaddb %ymm1, %ymm0, %ymm0
 ; AVX512-NEXT:    retq
   %1 = load <32 x i8>, <32 x i8>* %src, align 32, !nontemporal !1
   %2 = add <32 x i8> %arg, %1
@@ -891,23 +1062,50 @@ define <32 x i8> @test_arg_v32i8(<32 x i8> %arg, <32 x i8>* %src) {
 ; And now ZMM versions.
 
 define <16 x float> @test_arg_v16f32(<16 x float> %arg, <16 x float>* %src) {
-; SSE-LABEL: test_arg_v16f32:
-; SSE:       # BB#0:
-; SSE-NEXT:    addps (%rdi), %xmm0
-; SSE-NEXT:    addps 16(%rdi), %xmm1
-; SSE-NEXT:    addps 32(%rdi), %xmm2
-; SSE-NEXT:    addps 48(%rdi), %xmm3
-; SSE-NEXT:    retq
+; SSE2-LABEL: test_arg_v16f32:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    addps (%rdi), %xmm0
+; SSE2-NEXT:    addps 16(%rdi), %xmm1
+; SSE2-NEXT:    addps 32(%rdi), %xmm2
+; SSE2-NEXT:    addps 48(%rdi), %xmm3
+; SSE2-NEXT:    retq
 ;
-; AVX-LABEL: test_arg_v16f32:
-; AVX:       # BB#0:
-; AVX-NEXT:    vaddps (%rdi), %ymm0, %ymm0
-; AVX-NEXT:    vaddps 32(%rdi), %ymm1, %ymm1
-; AVX-NEXT:    retq
+; SSE41-LABEL: test_arg_v16f32:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movntdqa 48(%rdi), %xmm4
+; SSE41-NEXT:    movntdqa 32(%rdi), %xmm5
+; SSE41-NEXT:    movntdqa 16(%rdi), %xmm6
+; SSE41-NEXT:    movntdqa (%rdi), %xmm7
+; SSE41-NEXT:    addps %xmm7, %xmm0
+; SSE41-NEXT:    addps %xmm6, %xmm1
+; SSE41-NEXT:    addps %xmm5, %xmm2
+; SSE41-NEXT:    addps %xmm4, %xmm3
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: test_arg_v16f32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vmovntdqa 32(%rdi), %xmm2
+; AVX1-NEXT:    vmovntdqa 48(%rdi), %xmm3
+; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
+; AVX1-NEXT:    vmovntdqa (%rdi), %xmm3
+; AVX1-NEXT:    vmovntdqa 16(%rdi), %xmm4
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
+; AVX1-NEXT:    vaddps %ymm3, %ymm0, %ymm0
+; AVX1-NEXT:    vaddps %ymm2, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test_arg_v16f32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovntdqa 32(%rdi), %ymm2
+; AVX2-NEXT:    vmovntdqa (%rdi), %ymm3
+; AVX2-NEXT:    vaddps %ymm3, %ymm0, %ymm0
+; AVX2-NEXT:    vaddps %ymm2, %ymm1, %ymm1
+; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: test_arg_v16f32:
 ; AVX512:       # BB#0:
-; AVX512-NEXT:    vaddps (%rdi), %zmm0, %zmm0
+; AVX512-NEXT:    vmovntdqa (%rdi), %zmm1
+; AVX512-NEXT:    vaddps %zmm1, %zmm0, %zmm0
 ; AVX512-NEXT:    retq
   %1 = load <16 x float>, <16 x float>* %src, align 64, !nontemporal !1
   %2 = fadd <16 x float> %arg, %1
@@ -915,39 +1113,54 @@ define <16 x float> @test_arg_v16f32(<16 x float> %arg, <16 x float>* %src) {
 }
 
 define <16 x i32> @test_arg_v16i32(<16 x i32> %arg, <16 x i32>* %src) {
-; SSE-LABEL: test_arg_v16i32:
-; SSE:       # BB#0:
-; SSE-NEXT:    paddd (%rdi), %xmm0
-; SSE-NEXT:    paddd 16(%rdi), %xmm1
-; SSE-NEXT:    paddd 32(%rdi), %xmm2
-; SSE-NEXT:    paddd 48(%rdi), %xmm3
-; SSE-NEXT:    retq
+; SSE2-LABEL: test_arg_v16i32:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    paddd (%rdi), %xmm0
+; SSE2-NEXT:    paddd 16(%rdi), %xmm1
+; SSE2-NEXT:    paddd 32(%rdi), %xmm2
+; SSE2-NEXT:    paddd 48(%rdi), %xmm3
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: test_arg_v16i32:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movntdqa 48(%rdi), %xmm4
+; SSE41-NEXT:    movntdqa 32(%rdi), %xmm5
+; SSE41-NEXT:    movntdqa 16(%rdi), %xmm6
+; SSE41-NEXT:    movntdqa (%rdi), %xmm7
+; SSE41-NEXT:    paddd %xmm7, %xmm0
+; SSE41-NEXT:    paddd %xmm6, %xmm1
+; SSE41-NEXT:    paddd %xmm5, %xmm2
+; SSE41-NEXT:    paddd %xmm4, %xmm3
+; SSE41-NEXT:    retq
 ;
 ; AVX1-LABEL: test_arg_v16i32:
 ; AVX1:       # BB#0:
-; AVX1-NEXT:    vmovdqa (%rdi), %ymm2
-; AVX1-NEXT:    vmovdqa 32(%rdi), %ymm3
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
-; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm5
-; AVX1-NEXT:    vpaddd %xmm5, %xmm4, %xmm4
-; AVX1-NEXT:    vpaddd %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
-; AVX1-NEXT:    vpaddd %xmm4, %xmm2, %xmm2
-; AVX1-NEXT:    vpaddd %xmm3, %xmm1, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    vmovntdqa 32(%rdi), %xmm2
+; AVX1-NEXT:    vmovntdqa 48(%rdi), %xmm3
+; AVX1-NEXT:    vmovntdqa (%rdi), %xmm4
+; AVX1-NEXT:    vmovntdqa 16(%rdi), %xmm5
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm6
+; AVX1-NEXT:    vpaddd %xmm5, %xmm6, %xmm5
+; AVX1-NEXT:    vpaddd %xmm4, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpaddd %xmm3, %xmm4, %xmm3
+; AVX1-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test_arg_v16i32:
 ; AVX2:       # BB#0:
-; AVX2-NEXT:    vpaddd (%rdi), %ymm0, %ymm0
-; AVX2-NEXT:    vpaddd 32(%rdi), %ymm1, %ymm1
+; AVX2-NEXT:    vmovntdqa 32(%rdi), %ymm2
+; AVX2-NEXT:    vmovntdqa (%rdi), %ymm3
+; AVX2-NEXT:    vpaddd %ymm3, %ymm0, %ymm0
+; AVX2-NEXT:    vpaddd %ymm2, %ymm1, %ymm1
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: test_arg_v16i32:
 ; AVX512:       # BB#0:
-; AVX512-NEXT:    vpaddd (%rdi), %zmm0, %zmm0
+; AVX512-NEXT:    vmovntdqa (%rdi), %zmm1
+; AVX512-NEXT:    vpaddd %zmm1, %zmm0, %zmm0
 ; AVX512-NEXT:    retq
   %1 = load <16 x i32>, <16 x i32>* %src, align 64, !nontemporal !1
   %2 = add <16 x i32> %arg, %1
@@ -955,23 +1168,50 @@ define <16 x i32> @test_arg_v16i32(<16 x i32> %arg, <16 x i32>* %src) {
 }
 
 define <8 x double> @test_arg_v8f64(<8 x double> %arg, <8 x double>* %src) {
-; SSE-LABEL: test_arg_v8f64:
-; SSE:       # BB#0:
-; SSE-NEXT:    addpd (%rdi), %xmm0
-; SSE-NEXT:    addpd 16(%rdi), %xmm1
-; SSE-NEXT:    addpd 32(%rdi), %xmm2
-; SSE-NEXT:    addpd 48(%rdi), %xmm3
-; SSE-NEXT:    retq
+; SSE2-LABEL: test_arg_v8f64:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    addpd (%rdi), %xmm0
+; SSE2-NEXT:    addpd 16(%rdi), %xmm1
+; SSE2-NEXT:    addpd 32(%rdi), %xmm2
+; SSE2-NEXT:    addpd 48(%rdi), %xmm3
+; SSE2-NEXT:    retq
 ;
-; AVX-LABEL: test_arg_v8f64:
-; AVX:       # BB#0:
-; AVX-NEXT:    vaddpd (%rdi), %ymm0, %ymm0
-; AVX-NEXT:    vaddpd 32(%rdi), %ymm1, %ymm1
-; AVX-NEXT:    retq
+; SSE41-LABEL: test_arg_v8f64:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movntdqa 48(%rdi), %xmm4
+; SSE41-NEXT:    movntdqa 32(%rdi), %xmm5
+; SSE41-NEXT:    movntdqa 16(%rdi), %xmm6
+; SSE41-NEXT:    movntdqa (%rdi), %xmm7
+; SSE41-NEXT:    addpd %xmm7, %xmm0
+; SSE41-NEXT:    addpd %xmm6, %xmm1
+; SSE41-NEXT:    addpd %xmm5, %xmm2
+; SSE41-NEXT:    addpd %xmm4, %xmm3
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: test_arg_v8f64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vmovntdqa 32(%rdi), %xmm2
+; AVX1-NEXT:    vmovntdqa 48(%rdi), %xmm3
+; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
+; AVX1-NEXT:    vmovntdqa (%rdi), %xmm3
+; AVX1-NEXT:    vmovntdqa 16(%rdi), %xmm4
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
+; AVX1-NEXT:    vaddpd %ymm3, %ymm0, %ymm0
+; AVX1-NEXT:    vaddpd %ymm2, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test_arg_v8f64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovntdqa 32(%rdi), %ymm2
+; AVX2-NEXT:    vmovntdqa (%rdi), %ymm3
+; AVX2-NEXT:    vaddpd %ymm3, %ymm0, %ymm0
+; AVX2-NEXT:    vaddpd %ymm2, %ymm1, %ymm1
+; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: test_arg_v8f64:
 ; AVX512:       # BB#0:
-; AVX512-NEXT:    vaddpd (%rdi), %zmm0, %zmm0
+; AVX512-NEXT:    vmovntdqa (%rdi), %zmm1
+; AVX512-NEXT:    vaddpd %zmm1, %zmm0, %zmm0
 ; AVX512-NEXT:    retq
   %1 = load <8 x double>, <8 x double>* %src, align 64, !nontemporal !1
   %2 = fadd <8 x double> %arg, %1
@@ -979,39 +1219,54 @@ define <8 x double> @test_arg_v8f64(<8 x double> %arg, <8 x double>* %src) {
 }
 
 define <8 x i64> @test_arg_v8i64(<8 x i64> %arg, <8 x i64>* %src) {
-; SSE-LABEL: test_arg_v8i64:
-; SSE:       # BB#0:
-; SSE-NEXT:    paddq (%rdi), %xmm0
-; SSE-NEXT:    paddq 16(%rdi), %xmm1
-; SSE-NEXT:    paddq 32(%rdi), %xmm2
-; SSE-NEXT:    paddq 48(%rdi), %xmm3
-; SSE-NEXT:    retq
+; SSE2-LABEL: test_arg_v8i64:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    paddq (%rdi), %xmm0
+; SSE2-NEXT:    paddq 16(%rdi), %xmm1
+; SSE2-NEXT:    paddq 32(%rdi), %xmm2
+; SSE2-NEXT:    paddq 48(%rdi), %xmm3
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: test_arg_v8i64:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movntdqa 48(%rdi), %xmm4
+; SSE41-NEXT:    movntdqa 32(%rdi), %xmm5
+; SSE41-NEXT:    movntdqa 16(%rdi), %xmm6
+; SSE41-NEXT:    movntdqa (%rdi), %xmm7
+; SSE41-NEXT:    paddq %xmm7, %xmm0
+; SSE41-NEXT:    paddq %xmm6, %xmm1
+; SSE41-NEXT:    paddq %xmm5, %xmm2
+; SSE41-NEXT:    paddq %xmm4, %xmm3
+; SSE41-NEXT:    retq
 ;
 ; AVX1-LABEL: test_arg_v8i64:
 ; AVX1:       # BB#0:
-; AVX1-NEXT:    vmovdqa (%rdi), %ymm2
-; AVX1-NEXT:    vmovdqa 32(%rdi), %ymm3
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
-; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm5
-; AVX1-NEXT:    vpaddq %xmm5, %xmm4, %xmm4
-; AVX1-NEXT:    vpaddq %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
-; AVX1-NEXT:    vpaddq %xmm4, %xmm2, %xmm2
-; AVX1-NEXT:    vpaddq %xmm3, %xmm1, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    vmovntdqa 32(%rdi), %xmm2
+; AVX1-NEXT:    vmovntdqa 48(%rdi), %xmm3
+; AVX1-NEXT:    vmovntdqa (%rdi), %xmm4
+; AVX1-NEXT:    vmovntdqa 16(%rdi), %xmm5
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm6
+; AVX1-NEXT:    vpaddq %xmm5, %xmm6, %xmm5
+; AVX1-NEXT:    vpaddq %xmm4, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpaddq %xmm3, %xmm4, %xmm3
+; AVX1-NEXT:    vpaddq %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test_arg_v8i64:
 ; AVX2:       # BB#0:
-; AVX2-NEXT:    vpaddq (%rdi), %ymm0, %ymm0
-; AVX2-NEXT:    vpaddq 32(%rdi), %ymm1, %ymm1
+; AVX2-NEXT:    vmovntdqa 32(%rdi), %ymm2
+; AVX2-NEXT:    vmovntdqa (%rdi), %ymm3
+; AVX2-NEXT:    vpaddq %ymm3, %ymm0, %ymm0
+; AVX2-NEXT:    vpaddq %ymm2, %ymm1, %ymm1
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: test_arg_v8i64:
 ; AVX512:       # BB#0:
-; AVX512-NEXT:    vpaddq (%rdi), %zmm0, %zmm0
+; AVX512-NEXT:    vmovntdqa (%rdi), %zmm1
+; AVX512-NEXT:    vpaddq %zmm1, %zmm0, %zmm0
 ; AVX512-NEXT:    retq
   %1 = load <8 x i64>, <8 x i64>* %src, align 64, !nontemporal !1
   %2 = add <8 x i64> %arg, %1
@@ -1019,51 +1274,70 @@ define <8 x i64> @test_arg_v8i64(<8 x i64> %arg, <8 x i64>* %src) {
 }
 
 define <32 x i16> @test_arg_v32i16(<32 x i16> %arg, <32 x i16>* %src) {
-; SSE-LABEL: test_arg_v32i16:
-; SSE:       # BB#0:
-; SSE-NEXT:    paddw (%rdi), %xmm0
-; SSE-NEXT:    paddw 16(%rdi), %xmm1
-; SSE-NEXT:    paddw 32(%rdi), %xmm2
-; SSE-NEXT:    paddw 48(%rdi), %xmm3
-; SSE-NEXT:    retq
+; SSE2-LABEL: test_arg_v32i16:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    paddw (%rdi), %xmm0
+; SSE2-NEXT:    paddw 16(%rdi), %xmm1
+; SSE2-NEXT:    paddw 32(%rdi), %xmm2
+; SSE2-NEXT:    paddw 48(%rdi), %xmm3
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: test_arg_v32i16:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movntdqa 48(%rdi), %xmm4
+; SSE41-NEXT:    movntdqa 32(%rdi), %xmm5
+; SSE41-NEXT:    movntdqa 16(%rdi), %xmm6
+; SSE41-NEXT:    movntdqa (%rdi), %xmm7
+; SSE41-NEXT:    paddw %xmm7, %xmm0
+; SSE41-NEXT:    paddw %xmm6, %xmm1
+; SSE41-NEXT:    paddw %xmm5, %xmm2
+; SSE41-NEXT:    paddw %xmm4, %xmm3
+; SSE41-NEXT:    retq
 ;
 ; AVX1-LABEL: test_arg_v32i16:
 ; AVX1:       # BB#0:
-; AVX1-NEXT:    vmovdqa (%rdi), %ymm2
-; AVX1-NEXT:    vmovdqa 32(%rdi), %ymm3
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
-; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm5
-; AVX1-NEXT:    vpaddw %xmm5, %xmm4, %xmm4
-; AVX1-NEXT:    vpaddw %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
-; AVX1-NEXT:    vpaddw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT:    vpaddw %xmm3, %xmm1, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    vmovntdqa 32(%rdi), %xmm2
+; AVX1-NEXT:    vmovntdqa 48(%rdi), %xmm3
+; AVX1-NEXT:    vmovntdqa (%rdi), %xmm4
+; AVX1-NEXT:    vmovntdqa 16(%rdi), %xmm5
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm6
+; AVX1-NEXT:    vpaddw %xmm5, %xmm6, %xmm5
+; AVX1-NEXT:    vpaddw %xmm4, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpaddw %xmm3, %xmm4, %xmm3
+; AVX1-NEXT:    vpaddw %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test_arg_v32i16:
 ; AVX2:       # BB#0:
-; AVX2-NEXT:    vpaddw (%rdi), %ymm0, %ymm0
-; AVX2-NEXT:    vpaddw 32(%rdi), %ymm1, %ymm1
+; AVX2-NEXT:    vmovntdqa 32(%rdi), %ymm2
+; AVX2-NEXT:    vmovntdqa (%rdi), %ymm3
+; AVX2-NEXT:    vpaddw %ymm3, %ymm0, %ymm0
+; AVX2-NEXT:    vpaddw %ymm2, %ymm1, %ymm1
 ; AVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: test_arg_v32i16:
 ; AVX512F:       # BB#0:
-; AVX512F-NEXT:    vpaddw (%rdi), %ymm0, %ymm0
-; AVX512F-NEXT:    vpaddw 32(%rdi), %ymm1, %ymm1
+; AVX512F-NEXT:    vmovntdqa 32(%rdi), %ymm2
+; AVX512F-NEXT:    vmovntdqa (%rdi), %ymm3
+; AVX512F-NEXT:    vpaddw %ymm3, %ymm0, %ymm0
+; AVX512F-NEXT:    vpaddw %ymm2, %ymm1, %ymm1
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512BW-LABEL: test_arg_v32i16:
 ; AVX512BW:       # BB#0:
-; AVX512BW-NEXT:    vpaddw (%rdi), %zmm0, %zmm0
+; AVX512BW-NEXT:    vmovntdqa (%rdi), %zmm1
+; AVX512BW-NEXT:    vpaddw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VL-LABEL: test_arg_v32i16:
 ; AVX512VL:       # BB#0:
-; AVX512VL-NEXT:    vpaddw (%rdi), %ymm0, %ymm0
-; AVX512VL-NEXT:    vpaddw 32(%rdi), %ymm1, %ymm1
+; AVX512VL-NEXT:    vmovntdqa 32(%rdi), %ymm2
+; AVX512VL-NEXT:    vmovntdqa (%rdi), %ymm3
+; AVX512VL-NEXT:    vpaddw %ymm3, %ymm0, %ymm0
+; AVX512VL-NEXT:    vpaddw %ymm2, %ymm1, %ymm1
 ; AVX512VL-NEXT:    retq
   %1 = load <32 x i16>, <32 x i16>* %src, align 64, !nontemporal !1
   %2 = add <32 x i16> %arg, %1
@@ -1071,51 +1345,70 @@ define <32 x i16> @test_arg_v32i16(<32 x i16> %arg, <32 x i16>* %src) {
 }
 
 define <64 x i8> @test_arg_v64i8(<64 x i8> %arg, <64 x i8>* %src) {
-; SSE-LABEL: test_arg_v64i8:
-; SSE:       # BB#0:
-; SSE-NEXT:    paddb (%rdi), %xmm0
-; SSE-NEXT:    paddb 16(%rdi), %xmm1
-; SSE-NEXT:    paddb 32(%rdi), %xmm2
-; SSE-NEXT:    paddb 48(%rdi), %xmm3
-; SSE-NEXT:    retq
+; SSE2-LABEL: test_arg_v64i8:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    paddb (%rdi), %xmm0
+; SSE2-NEXT:    paddb 16(%rdi), %xmm1
+; SSE2-NEXT:    paddb 32(%rdi), %xmm2
+; SSE2-NEXT:    paddb 48(%rdi), %xmm3
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: test_arg_v64i8:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movntdqa 48(%rdi), %xmm4
+; SSE41-NEXT:    movntdqa 32(%rdi), %xmm5
+; SSE41-NEXT:    movntdqa 16(%rdi), %xmm6
+; SSE41-NEXT:    movntdqa (%rdi), %xmm7
+; SSE41-NEXT:    paddb %xmm7, %xmm0
+; SSE41-NEXT:    paddb %xmm6, %xmm1
+; SSE41-NEXT:    paddb %xmm5, %xmm2
+; SSE41-NEXT:    paddb %xmm4, %xmm3
+; SSE41-NEXT:    retq
 ;
 ; AVX1-LABEL: test_arg_v64i8:
 ; AVX1:       # BB#0:
-; AVX1-NEXT:    vmovdqa (%rdi), %ymm2
-; AVX1-NEXT:    vmovdqa 32(%rdi), %ymm3
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
-; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm5
-; AVX1-NEXT:    vpaddb %xmm5, %xmm4, %xmm4
-; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
-; AVX1-NEXT:    vpaddb %xmm4, %xmm2, %xmm2
-; AVX1-NEXT:    vpaddb %xmm3, %xmm1, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    vmovntdqa 32(%rdi), %xmm2
+; AVX1-NEXT:    vmovntdqa 48(%rdi), %xmm3
+; AVX1-NEXT:    vmovntdqa (%rdi), %xmm4
+; AVX1-NEXT:    vmovntdqa 16(%rdi), %xmm5
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm6
+; AVX1-NEXT:    vpaddb %xmm5, %xmm6, %xmm5
+; AVX1-NEXT:    vpaddb %xmm4, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpaddb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT:    vpaddb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test_arg_v64i8:
 ; AVX2:       # BB#0:
-; AVX2-NEXT:    vpaddb (%rdi), %ymm0, %ymm0
-; AVX2-NEXT:    vpaddb 32(%rdi), %ymm1, %ymm1
+; AVX2-NEXT:    vmovntdqa 32(%rdi), %ymm2
+; AVX2-NEXT:    vmovntdqa (%rdi), %ymm3
+; AVX2-NEXT:    vpaddb %ymm3, %ymm0, %ymm0
+; AVX2-NEXT:    vpaddb %ymm2, %ymm1, %ymm1
 ; AVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: test_arg_v64i8:
 ; AVX512F:       # BB#0:
-; AVX512F-NEXT:    vpaddb (%rdi), %ymm0, %ymm0
-; AVX512F-NEXT:    vpaddb 32(%rdi), %ymm1, %ymm1
+; AVX512F-NEXT:    vmovntdqa 32(%rdi), %ymm2
+; AVX512F-NEXT:    vmovntdqa (%rdi), %ymm3
+; AVX512F-NEXT:    vpaddb %ymm3, %ymm0, %ymm0
+; AVX512F-NEXT:    vpaddb %ymm2, %ymm1, %ymm1
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512BW-LABEL: test_arg_v64i8:
 ; AVX512BW:       # BB#0:
-; AVX512BW-NEXT:    vpaddb (%rdi), %zmm0, %zmm0
+; AVX512BW-NEXT:    vmovntdqa (%rdi), %zmm1
+; AVX512BW-NEXT:    vpaddb %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512VL-LABEL: test_arg_v64i8:
 ; AVX512VL:       # BB#0:
-; AVX512VL-NEXT:    vpaddb (%rdi), %ymm0, %ymm0
-; AVX512VL-NEXT:    vpaddb 32(%rdi), %ymm1, %ymm1
+; AVX512VL-NEXT:    vmovntdqa 32(%rdi), %ymm2
+; AVX512VL-NEXT:    vmovntdqa (%rdi), %ymm3
+; AVX512VL-NEXT:    vpaddb %ymm3, %ymm0, %ymm0
+; AVX512VL-NEXT:    vpaddb %ymm2, %ymm1, %ymm1
 ; AVX512VL-NEXT:    retq
   %1 = load <64 x i8>, <64 x i8>* %src, align 64, !nontemporal !1
   %2 = add <64 x i8> %arg, %1
diff --git a/test/CodeGen/X86/pr32659.ll b/test/CodeGen/X86/pr32659.ll
new file mode 100644
index 000000000000..aafae9c4f6c9
--- /dev/null
+++ b/test/CodeGen/X86/pr32659.ll
@@ -0,0 +1,83 @@
+; RUN: llc -o - %s | FileCheck %s
+target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
+target triple = "i386-unknown-linux-gnu"
+
+@a = external global i32, align 4
+@d = external global i32*, align 4
+@k = external global i32**, align 4
+@j = external global i32***, align 4
+@h = external global i32, align 4
+@c = external global i32, align 4
+@i = external global i32, align 4
+@b = external global i32, align 4
+@f = external global i64, align 8
+@e = external global i64, align 8
+@g = external global i32, align 4
+
+; Function Attrs: norecurse nounwind optsize readnone
+declare i32 @fn1(i32 returned) #0
+
+
+; CHECK-LABEL: fn2
+; CHECK: calll putchar
+; CHECK: addl $1,
+; CHECK: adcl $0,
+; Function Attrs: nounwind optsize
+define void @fn2() #1 {
+entry:
+  %putchar = tail call i32 @putchar(i32 48)
+  %0 = load volatile i32, i32* @h, align 4
+  %1 = load i32, i32* @c, align 4, !tbaa !2
+  %2 = load i32***, i32**** @j, align 4
+  %3 = load i32**, i32*** %2, align 4
+  %4 = load i32*, i32** %3, align 4
+  %5 = load i32, i32* %4, align 4
+  %cmp = icmp sgt i32 %1, %5
+  %conv = zext i1 %cmp to i32
+  %6 = load i32, i32* @i, align 4
+  %cmp1 = icmp sgt i32 %6, %conv
+  %conv2 = zext i1 %cmp1 to i32
+  store i32 %conv2, i32* @b, align 4
+  %cmp3 = icmp sgt i32 %0, %conv2
+  %conv4 = zext i1 %cmp3 to i32
+  %7 = load i32, i32* @a, align 4
+  %or = xor i32 %7, %conv4
+  store i32 %or, i32* @a, align 4
+  %8 = load i32*, i32** @d, align 4
+  %9 = load i32, i32* %8, align 4
+  %conv6 = sext i32 %9 to i64
+  %10 = load i64, i64* @e, align 8
+  %and = and i64 %10, %conv6
+  store i64 %and, i64* @e, align 8
+  %11 = load i32, i32* @g, align 4
+  %dec = add nsw i32 %11, -1
+  store i32 %dec, i32* @g, align 4
+  %12 = load i64, i64* @f, align 8
+  %inc = add nsw i64 %12, 1
+  store i64 %inc, i64* @f, align 8
+  ret void
+}
+
+; Function Attrs: nounwind optsize
+declare i32 @main() #1
+
+; Function Attrs: nounwind
+declare i32 @putchar(i32) #2
+
+attributes #0 = { optsize readnone }
+attributes #1 = { optsize }
+attributes #2 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"NumRegisterParameters", i32 0}
+!1 = !{!"clang version 5.0.0 (trunk 300074) (llvm/trunk 300078)"}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"int", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}
+!6 = !{!7, !7, i64 0}
+!7 = !{!"any pointer", !4, i64 0}
+!8 = !{!9, !9, i64 0}
+!9 = !{!"long long", !4, i64 0}
diff --git a/test/CodeGen/X86/select.ll b/test/CodeGen/X86/select.ll
index 7c2937936313..0e8db74fe1bd 100644
--- a/test/CodeGen/X86/select.ll
+++ b/test/CodeGen/X86/select.ll
@@ -314,13 +314,13 @@ define void @test8(i1 %c, <6 x i32>* %dst.addr, <6 x i32> %src1,<6 x i32> %src2)
 ; GENERIC-NEXT:    jmp LBB7_6
 ; GENERIC-NEXT:  LBB7_4:
 ; GENERIC-NEXT:    movd %r9d, %xmm1
-; GENERIC-NEXT:    movd %ecx, %xmm2
+; GENERIC-NEXT:    movd %r8d, %xmm2
 ; GENERIC-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; GENERIC-NEXT:    movd %r8d, %xmm3
+; GENERIC-NEXT:    movd %ecx, %xmm3
 ; GENERIC-NEXT:    movd %edx, %xmm1
 ; GENERIC-NEXT:  LBB7_6:
 ; GENERIC-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
-; GENERIC-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; GENERIC-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
 ; GENERIC-NEXT:    psubd {{.*}}(%rip), %xmm1
 ; GENERIC-NEXT:    psubd {{.*}}(%rip), %xmm0
 ; GENERIC-NEXT:    movq %xmm0, 16(%rsi)
@@ -350,16 +350,16 @@ define void @test8(i1 %c, <6 x i32>* %dst.addr, <6 x i32> %src1,<6 x i32> %src2)
 ; ATOM-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; ATOM-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
 ; ATOM-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1]
-; ATOM-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; ATOM-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
 ; ATOM-NEXT:    jmp LBB7_6
 ; ATOM-NEXT:  LBB7_4:
 ; ATOM-NEXT:    movd %r9d, %xmm1
-; ATOM-NEXT:    movd %ecx, %xmm2
+; ATOM-NEXT:    movd %r8d, %xmm2
 ; ATOM-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; ATOM-NEXT:    movd %r8d, %xmm3
+; ATOM-NEXT:    movd %ecx, %xmm3
 ; ATOM-NEXT:    movd %edx, %xmm1
 ; ATOM-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
-; ATOM-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; ATOM-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
 ; ATOM-NEXT:  LBB7_6:
 ; ATOM-NEXT:    psubd {{.*}}(%rip), %xmm0
 ; ATOM-NEXT:    psubd {{.*}}(%rip), %xmm1
diff --git a/test/CodeGen/X86/selectiondag-dominator.ll b/test/CodeGen/X86/selectiondag-dominator.ll
new file mode 100644
index 000000000000..f289a16f29eb
--- /dev/null
+++ b/test/CodeGen/X86/selectiondag-dominator.ll
@@ -0,0 +1,30 @@
+; Make sure we don't crash because we have a stale dominator tree.
+; PR33266
+; REQUIRES: asserts
+; RUN: llc -o /dev/null -verify-dom-info %s
+
+target triple = "x86_64-unknown-linux-gnu"
+
+@global = external global [8 x [8 x [4 x i8]]], align 2
+@global.1 = external global { i8, [3 x i8] }, align 4
+
+define void @patatino() local_unnamed_addr {
+bb:
+  br label %bb1
+
+bb1:
+  br label %bb2
+
+bb2:
+  br i1 icmp ne (i8* getelementptr inbounds ({ i8, [3 x i8] }, { i8, [3 x i8] }* @global.1, i64 0, i32 0), i8* getelementptr inbounds ([8 x [8 x [4 x i8]]], [8 x [8 x [4 x i8]]]* @global, i64 0, i64 6, i64 6, i64 2)), label %bb4, label %bb3
+
+bb3:
+  br i1 icmp eq (i64 ashr (i64 shl (i64 zext (i32 srem (i32 7, i32 zext (i1 icmp eq (i8* getelementptr inbounds ({ i8, [3 x i8] }, { i8, [3 x i8] }* @global.1, i64 0, i32 0), i8* getelementptr inbounds ([8 x [8 x [4 x i8]]], [8 x [8 x [4 x i8]]]* @global, i64 0, i64 6, i64 6, i64 2)) to i32)) to i64), i64 56), i64 56), i64 0), label %bb5, label %bb4
+
+bb4:
+  %tmp = phi i64 [ ashr (i64 shl (i64 zext (i32 srem (i32 7, i32 zext (i1 icmp eq (i8* getelementptr inbounds ({ i8, [3 x i8] }, { i8, [3 x i8] }* @global.1, i64 0, i32 0), i8* getelementptr inbounds ([8 x [8 x [4 x i8]]], [8 x [8 x [4 x i8]]]* @global, i64 0, i64 6, i64 6, i64 2)) to i32)) to i64), i64 56), i64 56), %bb3 ], [ 7, %bb2 ]
+  ret void
+
+bb5:
+  ret void
+}
diff --git a/test/CodeGen/X86/sse-intrinsics-fast-isel.ll b/test/CodeGen/X86/sse-intrinsics-fast-isel.ll
index 0b03dffe99b5..e468c69db5dd 100644
--- a/test/CodeGen/X86/sse-intrinsics-fast-isel.ll
+++ b/test/CodeGen/X86/sse-intrinsics-fast-isel.ll
@@ -53,17 +53,17 @@ define <4 x float> @test_mm_and_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
 ; X32-NEXT:    movl %esi, (%esp)
 ; X32-NEXT:    andl {{[0-9]+}}(%esp), %edx
 ; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp)
-; X32-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
 ; X32-NEXT:    andl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
+; X32-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
 ; X32-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; X32-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; X32-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
 ; X32-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
 ; X32-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; X32-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; X32-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; X32-NEXT:    leal -4(%ebp), %esp
 ; X32-NEXT:    popl %esi
 ; X32-NEXT:    popl %ebp
@@ -86,18 +86,18 @@ define <4 x float> @test_mm_and_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
 ; X64-NEXT:    shrq $32, %rsi
 ; X64-NEXT:    shrq $32, %rdi
 ; X64-NEXT:    movl %ecx, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movl %edx, -{{[0-9]+}}(%rsp)
 ; X64-NEXT:    andl %r8d, %edi
 ; X64-NEXT:    movl %edi, -{{[0-9]+}}(%rsp)
+; X64-NEXT:    movl %edx, -{{[0-9]+}}(%rsp)
 ; X64-NEXT:    andl %eax, %esi
 ; X64-NEXT:    movl %esi, -{{[0-9]+}}(%rsp)
 ; X64-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; X64-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X64-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; X64-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X64-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X64-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; X64-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; X64-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
 ; X64-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X64-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; X64-NEXT:    retq
   %arg0 = bitcast <4 x float> %a0 to <4 x i32>
   %arg1 = bitcast <4 x float> %a1 to <4 x i32>
@@ -121,15 +121,15 @@ define <4 x float> @test_mm_andnot_ps(<4 x float> %a0, <4 x float> %a1) nounwind
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X32-NEXT:    movaps %xmm1, {{[0-9]+}}(%esp)
 ; X32-NEXT:    notl %edx
-; X32-NEXT:    notl %ecx
 ; X32-NEXT:    notl %esi
+; X32-NEXT:    notl %ecx
 ; X32-NEXT:    notl %eax
 ; X32-NEXT:    andl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movl %eax, (%esp)
-; X32-NEXT:    andl {{[0-9]+}}(%esp), %esi
-; X32-NEXT:    movl %esi, {{[0-9]+}}(%esp)
 ; X32-NEXT:    andl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
+; X32-NEXT:    andl {{[0-9]+}}(%esp), %esi
+; X32-NEXT:    movl %esi, {{[0-9]+}}(%esp)
 ; X32-NEXT:    andl {{[0-9]+}}(%esp), %edx
 ; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp)
 ; X32-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
@@ -138,7 +138,7 @@ define <4 x float> @test_mm_andnot_ps(<4 x float> %a0, <4 x float> %a1) nounwind
 ; X32-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
 ; X32-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; X32-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; X32-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; X32-NEXT:    leal -4(%ebp), %esp
 ; X32-NEXT:    popl %esi
 ; X32-NEXT:    popl %ebp
@@ -165,18 +165,18 @@ define <4 x float> @test_mm_andnot_ps(<4 x float> %a0, <4 x float> %a1) nounwind
 ; X64-NEXT:    notl %esi
 ; X64-NEXT:    notl %edx
 ; X64-NEXT:    movl %ecx, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movl %eax, -{{[0-9]+}}(%rsp)
 ; X64-NEXT:    andl %r8d, %edx
 ; X64-NEXT:    movl %edx, -{{[0-9]+}}(%rsp)
+; X64-NEXT:    movl %eax, -{{[0-9]+}}(%rsp)
 ; X64-NEXT:    andl %edi, %esi
 ; X64-NEXT:    movl %esi, -{{[0-9]+}}(%rsp)
 ; X64-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; X64-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X64-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; X64-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X64-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X64-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; X64-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; X64-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
 ; X64-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X64-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; X64-NEXT:    retq
   %arg0 = bitcast <4 x float> %a0 to <4 x i32>
   %arg1 = bitcast <4 x float> %a1 to <4 x i32>
@@ -1277,17 +1277,17 @@ define <4 x float> @test_mm_or_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
 ; X32-NEXT:    movl %esi, (%esp)
 ; X32-NEXT:    orl {{[0-9]+}}(%esp), %edx
 ; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp)
-; X32-NEXT:    orl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
 ; X32-NEXT:    orl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
+; X32-NEXT:    orl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
 ; X32-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; X32-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; X32-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
 ; X32-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
 ; X32-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; X32-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; X32-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; X32-NEXT:    leal -4(%ebp), %esp
 ; X32-NEXT:    popl %esi
 ; X32-NEXT:    popl %ebp
@@ -1310,18 +1310,18 @@ define <4 x float> @test_mm_or_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
 ; X64-NEXT:    shrq $32, %rsi
 ; X64-NEXT:    shrq $32, %rdi
 ; X64-NEXT:    movl %ecx, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movl %edx, -{{[0-9]+}}(%rsp)
 ; X64-NEXT:    orl %r8d, %edi
 ; X64-NEXT:    movl %edi, -{{[0-9]+}}(%rsp)
+; X64-NEXT:    movl %edx, -{{[0-9]+}}(%rsp)
 ; X64-NEXT:    orl %eax, %esi
 ; X64-NEXT:    movl %esi, -{{[0-9]+}}(%rsp)
 ; X64-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; X64-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X64-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; X64-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X64-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X64-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; X64-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; X64-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
 ; X64-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X64-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; X64-NEXT:    retq
   %arg0 = bitcast <4 x float> %a0 to <4 x i32>
   %arg1 = bitcast <4 x float> %a1 to <4 x i32>
@@ -1538,16 +1538,16 @@ define <4 x float> @test_mm_set_ps(float %a0, float %a1, float %a2, float %a3) n
 ; X32-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; X32-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
 ; X32-NEXT:    movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
-; X32-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
-; X32-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
 ; X32-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm_set_ps:
 ; X64:       # BB#0:
-; X64-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
-; X64-NEXT:    unpcklps {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
+; X64-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
 ; X64-NEXT:    unpcklps {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
+; X64-NEXT:    movlhps {{.*#+}} xmm3 = xmm3[0],xmm1[0]
 ; X64-NEXT:    movaps %xmm3, %xmm0
 ; X64-NEXT:    retq
   %res0  = insertelement <4 x float> undef, float %a3, i32 0
@@ -1677,16 +1677,16 @@ define <4 x float> @test_mm_setr_ps(float %a0, float %a1, float %a2, float %a3)
 ; X32-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
 ; X32-NEXT:    movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
 ; X32-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X32-NEXT:    unpcklps {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
-; X32-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
 ; X32-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; X32-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm_setr_ps:
 ; X64:       # BB#0:
-; X64-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
-; X64-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X64-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
 ; X64-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X64-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
 ; X64-NEXT:    retq
   %res0  = insertelement <4 x float> undef, float %a0, i32 0
   %res1  = insertelement <4 x float> %res0, float %a1, i32 1
@@ -2239,17 +2239,17 @@ define <4 x float> @test_mm_xor_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
 ; X32-NEXT:    movl %esi, (%esp)
 ; X32-NEXT:    xorl {{[0-9]+}}(%esp), %edx
 ; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp)
-; X32-NEXT:    xorl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
 ; X32-NEXT:    xorl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
+; X32-NEXT:    xorl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
 ; X32-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; X32-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; X32-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
 ; X32-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
 ; X32-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; X32-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; X32-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; X32-NEXT:    leal -4(%ebp), %esp
 ; X32-NEXT:    popl %esi
 ; X32-NEXT:    popl %ebp
@@ -2272,18 +2272,18 @@ define <4 x float> @test_mm_xor_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
 ; X64-NEXT:    shrq $32, %rsi
 ; X64-NEXT:    shrq $32, %rdi
 ; X64-NEXT:    movl %ecx, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movl %edx, -{{[0-9]+}}(%rsp)
 ; X64-NEXT:    xorl %r8d, %edi
 ; X64-NEXT:    movl %edi, -{{[0-9]+}}(%rsp)
+; X64-NEXT:    movl %edx, -{{[0-9]+}}(%rsp)
 ; X64-NEXT:    xorl %eax, %esi
 ; X64-NEXT:    movl %esi, -{{[0-9]+}}(%rsp)
 ; X64-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; X64-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X64-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; X64-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X64-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X64-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; X64-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; X64-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
 ; X64-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X64-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; X64-NEXT:    retq
   %arg0 = bitcast <4 x float> %a0 to <4 x i32>
   %arg1 = bitcast <4 x float> %a1 to <4 x i32>
diff --git a/test/CodeGen/X86/sse1.ll b/test/CodeGen/X86/sse1.ll
index f4964b5a6f66..c74dec3e21b6 100644
--- a/test/CodeGen/X86/sse1.ll
+++ b/test/CodeGen/X86/sse1.ll
@@ -87,17 +87,17 @@ define <4 x float> @vselect(<4 x float>*%p, <4 x i32> %q) {
 ; X32-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; X32-NEXT:  .LBB1_11: # %entry
 ; X32-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; X32-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: vselect:
 ; X64:       # BB#0: # %entry
-; X64-NEXT:    testl %ecx, %ecx
+; X64-NEXT:    testl %edx, %edx
 ; X64-NEXT:    xorps %xmm0, %xmm0
 ; X64-NEXT:    je .LBB1_1
 ; X64-NEXT:  # BB#2: # %entry
 ; X64-NEXT:    xorps %xmm1, %xmm1
-; X64-NEXT:    testl %edx, %edx
+; X64-NEXT:    testl %ecx, %ecx
 ; X64-NEXT:    jne .LBB1_5
 ; X64-NEXT:  .LBB1_4:
 ; X64-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
@@ -111,7 +111,7 @@ define <4 x float> @vselect(<4 x float>*%p, <4 x i32> %q) {
 ; X64-NEXT:    jmp .LBB1_11
 ; X64-NEXT:  .LBB1_1:
 ; X64-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X64-NEXT:    testl %edx, %edx
+; X64-NEXT:    testl %ecx, %ecx
 ; X64-NEXT:    je .LBB1_4
 ; X64-NEXT:  .LBB1_5: # %entry
 ; X64-NEXT:    xorps %xmm2, %xmm2
@@ -126,7 +126,7 @@ define <4 x float> @vselect(<4 x float>*%p, <4 x i32> %q) {
 ; X64-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; X64-NEXT:  .LBB1_11: # %entry
 ; X64-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; X64-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X64-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
 ; X64-NEXT:    retq
 entry:
   %a1 = icmp eq <4 x i32> %q, zeroinitializer
@@ -252,12 +252,12 @@ define <2 x float> @PR31672() #0 {
 ; X32-NEXT:    movl %eax, (%esp)
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X32-NEXT:    andl %ecx, %edx
-; X32-NEXT:    notl %ecx
-; X32-NEXT:    andl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    orl %edx, %ecx
-; X32-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
+; X32-NEXT:    andl %eax, %ecx
+; X32-NEXT:    notl %eax
+; X32-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    orl %ecx, %eax
+; X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X32-NEXT:    andl %ecx, %edx
@@ -277,7 +277,7 @@ define <2 x float> @PR31672() #0 {
 ; X32-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
 ; X32-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; X32-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; X32-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; X32-NEXT:    movl %ebp, %esp
 ; X32-NEXT:    popl %ebp
 ; X32-NEXT:    retl
@@ -297,48 +297,48 @@ define <2 x float> @PR31672() #0 {
 ; X64-NEXT:    mulps %xmm1, %xmm0
 ; X64-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
 ; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %r8
+; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rsi
 ; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %r9
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %r10
 ; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rdi
-; X64-NEXT:    movl %r9d, %esi
-; X64-NEXT:    andl %edi, %esi
+; X64-NEXT:    movl %esi, %eax
+; X64-NEXT:    andl %edi, %eax
 ; X64-NEXT:    movl %edi, %ecx
 ; X64-NEXT:    notl %ecx
+; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %r10
 ; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rdx
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rax
-; X64-NEXT:    andl %eax, %ecx
-; X64-NEXT:    orl %esi, %ecx
+; X64-NEXT:    andl %edx, %ecx
+; X64-NEXT:    orl %eax, %ecx
 ; X64-NEXT:    movl %ecx, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movl %r8d, %ecx
-; X64-NEXT:    andl %r10d, %ecx
-; X64-NEXT:    movl %r10d, %esi
-; X64-NEXT:    notl %esi
-; X64-NEXT:    andl %edx, %esi
-; X64-NEXT:    orl %ecx, %esi
-; X64-NEXT:    movl %esi, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    shrq $32, %r9
+; X64-NEXT:    shrq $32, %rsi
 ; X64-NEXT:    shrq $32, %rdi
-; X64-NEXT:    andl %edi, %r9d
+; X64-NEXT:    andl %edi, %esi
 ; X64-NEXT:    notl %edi
-; X64-NEXT:    shrq $32, %rax
-; X64-NEXT:    andl %edi, %eax
-; X64-NEXT:    orl %r9d, %eax
-; X64-NEXT:    movl %eax, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    shrq $32, %r8
-; X64-NEXT:    shrq $32, %r10
-; X64-NEXT:    andl %r10d, %r8d
-; X64-NEXT:    notl %r10d
 ; X64-NEXT:    shrq $32, %rdx
-; X64-NEXT:    andl %r10d, %edx
-; X64-NEXT:    orl %r8d, %edx
+; X64-NEXT:    andl %edi, %edx
+; X64-NEXT:    orl %esi, %edx
 ; X64-NEXT:    movl %edx, -{{[0-9]+}}(%rsp)
+; X64-NEXT:    movl %r8d, %eax
+; X64-NEXT:    andl %r9d, %eax
+; X64-NEXT:    movl %r9d, %ecx
+; X64-NEXT:    notl %ecx
+; X64-NEXT:    andl %r10d, %ecx
+; X64-NEXT:    orl %eax, %ecx
+; X64-NEXT:    movl %ecx, -{{[0-9]+}}(%rsp)
+; X64-NEXT:    shrq $32, %r8
+; X64-NEXT:    shrq $32, %r9
+; X64-NEXT:    andl %r9d, %r8d
+; X64-NEXT:    notl %r9d
+; X64-NEXT:    shrq $32, %r10
+; X64-NEXT:    andl %r9d, %r10d
+; X64-NEXT:    orl %r8d, %r10d
+; X64-NEXT:    movl %r10d, -{{[0-9]+}}(%rsp)
 ; X64-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; X64-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X64-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; X64-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X64-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X64-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; X64-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; X64-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
 ; X64-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X64-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; X64-NEXT:    retq
   %t0 = call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> <float 42.0, float 3.0>)
   ret <2 x float> %t0
diff --git a/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll b/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
index 20387ccd6b7a..ff5d624e6042 100644
--- a/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
+++ b/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
@@ -2076,7 +2076,7 @@ define <2 x i64> @test_mm_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a
 ; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movd %eax, %xmm2
 ; X32-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; X32-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; X32-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
 ; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movd %eax, %xmm0
 ; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
@@ -2087,8 +2087,8 @@ define <2 x i64> @test_mm_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a
 ; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movd %eax, %xmm1
 ; X32-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; X32-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
-; X32-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; X32-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; X32-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
 ; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movd %eax, %xmm0
 ; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
@@ -2099,7 +2099,7 @@ define <2 x i64> @test_mm_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a
 ; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movd %eax, %xmm3
 ; X32-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; X32-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; X32-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
 ; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movd %eax, %xmm0
 ; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
@@ -2110,27 +2110,27 @@ define <2 x i64> @test_mm_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a
 ; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movd %eax, %xmm0
 ; X32-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; X32-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; X32-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; X32-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; X32-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; X32-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; X32-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm_set_epi8:
 ; X64:       # BB#0:
 ; X64-NEXT:    movzbl %dil, %eax
 ; X64-NEXT:    movd %eax, %xmm0
-; X64-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; X64-NEXT:    movzbl %sil, %eax
 ; X64-NEXT:    movd %eax, %xmm1
 ; X64-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; X64-NEXT:    movzbl %r8b, %eax
-; X64-NEXT:    movd %eax, %xmm0
-; X64-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
-; X64-NEXT:    movd %eax, %xmm2
-; X64-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; X64-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
 ; X64-NEXT:    movzbl %dl, %eax
 ; X64-NEXT:    movd %eax, %xmm0
-; X64-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; X64-NEXT:    movzbl %cl, %eax
+; X64-NEXT:    movd %eax, %xmm2
+; X64-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; X64-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; X64-NEXT:    movzbl %r8b, %eax
+; X64-NEXT:    movd %eax, %xmm0
+; X64-NEXT:    movzbl %r9b, %eax
 ; X64-NEXT:    movd %eax, %xmm3
 ; X64-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
 ; X64-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
@@ -2138,20 +2138,20 @@ define <2 x i64> @test_mm_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a
 ; X64-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; X64-NEXT:    movd %eax, %xmm1
 ; X64-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; X64-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
-; X64-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; X64-NEXT:    movzbl %sil, %eax
+; X64-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; X64-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; X64-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; X64-NEXT:    movd %eax, %xmm0
 ; X64-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; X64-NEXT:    movd %eax, %xmm2
 ; X64-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; X64-NEXT:    movzbl %r9b, %eax
+; X64-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; X64-NEXT:    movd %eax, %xmm0
 ; X64-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; X64-NEXT:    movd %eax, %xmm3
 ; X64-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; X64-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
-; X64-NEXT:    movzbl %cl, %eax
+; X64-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; X64-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; X64-NEXT:    movd %eax, %xmm0
 ; X64-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; X64-NEXT:    movd %eax, %xmm2
@@ -2161,9 +2161,9 @@ define <2 x i64> @test_mm_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a
 ; X64-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; X64-NEXT:    movd %eax, %xmm0
 ; X64-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; X64-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; X64-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; X64-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; X64-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; X64-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; X64-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; X64-NEXT:    retq
   %res0  = insertelement <16 x i8> undef,  i8 %a15, i32 0
   %res1  = insertelement <16 x i8> %res0,  i8 %a14, i32 1
@@ -2206,11 +2206,11 @@ define <2 x i64> @test_mm_set_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4,
 ; X32-NEXT:    movd %eax, %xmm0
 ; X32-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
 ; X32-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
-; X32-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
+; X32-NEXT:    punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
 ; X32-NEXT:    punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3]
 ; X32-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3]
-; X32-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3]
-; X32-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
+; X32-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1]
+; X32-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm4[0]
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm_set_epi16:
@@ -2218,20 +2218,20 @@ define <2 x i64> @test_mm_set_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4,
 ; X64-NEXT:    movw {{[0-9]+}}(%rsp), %r10w
 ; X64-NEXT:    movw {{[0-9]+}}(%rsp), %ax
 ; X64-NEXT:    movd %edi, %xmm0
-; X64-NEXT:    movd %r8d, %xmm1
+; X64-NEXT:    movd %esi, %xmm1
 ; X64-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
 ; X64-NEXT:    movd %edx, %xmm0
-; X64-NEXT:    movd %eax, %xmm2
+; X64-NEXT:    movd %ecx, %xmm2
 ; X64-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; X64-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; X64-NEXT:    movd %esi, %xmm0
+; X64-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X64-NEXT:    movd %r8d, %xmm0
 ; X64-NEXT:    movd %r9d, %xmm1
 ; X64-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; X64-NEXT:    movd %ecx, %xmm3
+; X64-NEXT:    movd %eax, %xmm3
 ; X64-NEXT:    movd %r10d, %xmm0
 ; X64-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; X64-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; X64-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; X64-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X64-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
 ; X64-NEXT:    retq
   %res0  = insertelement <8 x i16> undef, i16 %a7, i32 0
   %res1  = insertelement <8 x i16> %res0, i16 %a6, i32 1
@@ -2254,18 +2254,18 @@ define <2 x i64> @test_mm_set_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind
 ; X32-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
 ; X32-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; X32-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; X32-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm_set_epi32:
 ; X64:       # BB#0:
 ; X64-NEXT:    movd %edi, %xmm0
-; X64-NEXT:    movd %edx, %xmm1
+; X64-NEXT:    movd %esi, %xmm1
 ; X64-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; X64-NEXT:    movd %esi, %xmm2
+; X64-NEXT:    movd %edx, %xmm2
 ; X64-NEXT:    movd %ecx, %xmm0
 ; X64-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; X64-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X64-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; X64-NEXT:    retq
   %res0  = insertelement <4 x i32> undef, i32 %a3, i32 0
   %res1  = insertelement <4 x i32> %res0, i32 %a2, i32 1
@@ -2282,11 +2282,11 @@ define <2 x i64> @test_mm_set_epi64x(i64 %a0, i64 %a1) nounwind {
 ; X32:       # BB#0:
 ; X32-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; X32-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X32-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; X32-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X32-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X32-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; X32-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; X32-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
 ; X32-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm_set_epi64x:
@@ -2441,10 +2441,9 @@ define <2 x i64> @test_mm_set1_epi64x(i64 %a0) nounwind {
 ; X32-LABEL: test_mm_set1_epi64x:
 ; X32:       # BB#0:
 ; X32-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X32-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
 ; X32-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X32-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
 ; X32-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm_set1_epi64x:
@@ -2486,7 +2485,7 @@ define <2 x i64> @test_mm_setr_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %
 ; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movd %eax, %xmm2
 ; X32-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; X32-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; X32-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
 ; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movd %eax, %xmm0
 ; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
@@ -2497,8 +2496,8 @@ define <2 x i64> @test_mm_setr_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %
 ; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movd %eax, %xmm1
 ; X32-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; X32-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
-; X32-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; X32-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; X32-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
 ; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movd %eax, %xmm0
 ; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
@@ -2509,7 +2508,7 @@ define <2 x i64> @test_mm_setr_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %
 ; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movd %eax, %xmm3
 ; X32-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; X32-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; X32-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
 ; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movd %eax, %xmm0
 ; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
@@ -2520,9 +2519,9 @@ define <2 x i64> @test_mm_setr_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %
 ; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movd %eax, %xmm0
 ; X32-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; X32-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; X32-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; X32-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; X32-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; X32-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; X32-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm_setr_epi8:
@@ -2534,46 +2533,46 @@ define <2 x i64> @test_mm_setr_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %
 ; X64-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
 ; X64-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; X64-NEXT:    movd %eax, %xmm0
-; X64-NEXT:    movzbl %cl, %eax
+; X64-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; X64-NEXT:    movd %eax, %xmm2
 ; X64-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; X64-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; X64-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
 ; X64-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; X64-NEXT:    movd %eax, %xmm0
-; X64-NEXT:    movzbl %r9b, %eax
+; X64-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; X64-NEXT:    movd %eax, %xmm3
 ; X64-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
 ; X64-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; X64-NEXT:    movd %eax, %xmm0
-; X64-NEXT:    movzbl %sil, %eax
+; X64-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; X64-NEXT:    movd %eax, %xmm1
 ; X64-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; X64-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
-; X64-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; X64-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; X64-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
 ; X64-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; X64-NEXT:    movd %eax, %xmm0
 ; X64-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; X64-NEXT:    movd %eax, %xmm2
 ; X64-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; X64-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
-; X64-NEXT:    movd %eax, %xmm0
-; X64-NEXT:    movzbl %dl, %eax
-; X64-NEXT:    movd %eax, %xmm3
-; X64-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; X64-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
-; X64-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; X64-NEXT:    movzbl %r9b, %eax
 ; X64-NEXT:    movd %eax, %xmm0
 ; X64-NEXT:    movzbl %r8b, %eax
+; X64-NEXT:    movd %eax, %xmm3
+; X64-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; X64-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; X64-NEXT:    movzbl %cl, %eax
+; X64-NEXT:    movd %eax, %xmm0
+; X64-NEXT:    movzbl %dl, %eax
 ; X64-NEXT:    movd %eax, %xmm2
 ; X64-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; X64-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; X64-NEXT:    movzbl %sil, %eax
 ; X64-NEXT:    movd %eax, %xmm4
 ; X64-NEXT:    movzbl %dil, %eax
 ; X64-NEXT:    movd %eax, %xmm0
 ; X64-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; X64-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; X64-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; X64-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; X64-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; X64-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; X64-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; X64-NEXT:    retq
   %res0  = insertelement <16 x i8> undef,  i8 %a0 , i32 0
   %res1  = insertelement <16 x i8> %res0,  i8 %a1 , i32 1
@@ -2616,11 +2615,11 @@ define <2 x i64> @test_mm_setr_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4
 ; X32-NEXT:    movd %eax, %xmm0
 ; X32-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
 ; X32-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
-; X32-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
+; X32-NEXT:    punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
 ; X32-NEXT:    punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3]
 ; X32-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3]
-; X32-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3]
-; X32-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
+; X32-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1]
+; X32-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm4[0]
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm_setr_epi16:
@@ -2628,20 +2627,20 @@ define <2 x i64> @test_mm_setr_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4
 ; X64-NEXT:    movw {{[0-9]+}}(%rsp), %ax
 ; X64-NEXT:    movw {{[0-9]+}}(%rsp), %r10w
 ; X64-NEXT:    movd %eax, %xmm0
-; X64-NEXT:    movd %ecx, %xmm1
+; X64-NEXT:    movd %r10d, %xmm1
 ; X64-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
 ; X64-NEXT:    movd %r9d, %xmm0
-; X64-NEXT:    movd %esi, %xmm2
+; X64-NEXT:    movd %r8d, %xmm2
 ; X64-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; X64-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; X64-NEXT:    movd %r10d, %xmm0
+; X64-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X64-NEXT:    movd %ecx, %xmm0
 ; X64-NEXT:    movd %edx, %xmm1
 ; X64-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; X64-NEXT:    movd %r8d, %xmm3
+; X64-NEXT:    movd %esi, %xmm3
 ; X64-NEXT:    movd %edi, %xmm0
 ; X64-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; X64-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; X64-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; X64-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X64-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
 ; X64-NEXT:    retq
   %res0  = insertelement <8 x i16> undef, i16 %a0, i32 0
   %res1  = insertelement <8 x i16> %res0, i16 %a1, i32 1
@@ -2664,18 +2663,18 @@ define <2 x i64> @test_mm_setr_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwin
 ; X32-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
 ; X32-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; X32-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; X32-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm_setr_epi32:
 ; X64:       # BB#0:
 ; X64-NEXT:    movd %ecx, %xmm0
-; X64-NEXT:    movd %esi, %xmm1
+; X64-NEXT:    movd %edx, %xmm1
 ; X64-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; X64-NEXT:    movd %edx, %xmm2
+; X64-NEXT:    movd %esi, %xmm2
 ; X64-NEXT:    movd %edi, %xmm0
 ; X64-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; X64-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X64-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; X64-NEXT:    retq
   %res0  = insertelement <4 x i32> undef, i32 %a0, i32 0
   %res1  = insertelement <4 x i32> %res0, i32 %a1, i32 1
@@ -2692,11 +2691,11 @@ define <2 x i64> @test_mm_setr_epi64x(i64 %a0, i64 %a1) nounwind {
 ; X32:       # BB#0:
 ; X32-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; X32-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X32-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; X32-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X32-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X32-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; X32-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; X32-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
 ; X32-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm_setr_epi64x:
diff --git a/test/CodeGen/X86/sse3-avx-addsub-2.ll b/test/CodeGen/X86/sse3-avx-addsub-2.ll
index 4d895ea264c5..b5aa26f532ef 100644
--- a/test/CodeGen/X86/sse3-avx-addsub-2.ll
+++ b/test/CodeGen/X86/sse3-avx-addsub-2.ll
@@ -342,9 +342,8 @@ define <4 x float> @test14(<4 x float> %A, <4 x float> %B) {
 ; SSE-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
 ; SSE-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1]
 ; SSE-NEXT:    subss %xmm1, %xmm0
-; SSE-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
-; SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,1,1,3]
-; SSE-NEXT:    movaps %xmm2, %xmm0
+; SSE-NEXT:    unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0]
+; SSE-NEXT:    movapd %xmm2, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: test14:
@@ -375,8 +374,7 @@ define <4 x float> @test15(<4 x float> %A, <4 x float> %B) {
 ; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
 ; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
 ; SSE-NEXT:    addss %xmm0, %xmm1
-; SSE-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,0,2,1]
+; SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[0,0]
 ; SSE-NEXT:    movaps %xmm2, %xmm0
 ; SSE-NEXT:    retq
 ;
@@ -417,10 +415,10 @@ define <4 x float> @test16(<4 x float> %A, <4 x float> %B) {
 ; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
 ; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
 ; SSE-NEXT:    addss %xmm0, %xmm1
-; SSE-NEXT:    unpcklps {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
-; SSE-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; SSE-NEXT:    unpcklps {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
 ; SSE-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
-; SSE-NEXT:    movaps %xmm2, %xmm0
+; SSE-NEXT:    unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; SSE-NEXT:    movapd %xmm2, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: test16:
diff --git a/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll b/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll
index 19305d0dad62..383ab21bd404 100644
--- a/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll
+++ b/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll
@@ -354,8 +354,9 @@ declare i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8>, <16 x i8>, i8) nounwind rea
 define i32 @test_mm_crc32_u8(i32 %a0, i8 %a1) {
 ; X32-LABEL: test_mm_crc32_u8:
 ; X32:       # BB#0:
+; X32-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    crc32b {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    crc32b %cl, %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm_crc32_u8:
@@ -371,8 +372,9 @@ declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind readnone
 define i32 @test_mm_crc32_u16(i32 %a0, i16 %a1) {
 ; X32-LABEL: test_mm_crc32_u16:
 ; X32:       # BB#0:
+; X32-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    crc32w {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    crc32w %cx, %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm_crc32_u16:
diff --git a/test/CodeGen/X86/stack-folding-fp-avx1.ll b/test/CodeGen/X86/stack-folding-fp-avx1.ll
index f937d484ce0d..4165aea8794f 100644
--- a/test/CodeGen/X86/stack-folding-fp-avx1.ll
+++ b/test/CodeGen/X86/stack-folding-fp-avx1.ll
@@ -1651,9 +1651,26 @@ define <8 x float> @stack_fold_sqrtps_ymm(<8 x float> %a0) {
 }
 declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone
 
-; TODO stack_fold_sqrtsd
+define double @stack_fold_sqrtsd(double %a0) {
+  ;CHECK-LABEL: stack_fold_sqrtsd
+  ;CHECK:       vsqrtsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+  %2 = call double @llvm.sqrt.f64(double %a0)
+  ret double %2
+}
+declare double @llvm.sqrt.f64(double) nounwind readnone
+
 ; TODO stack_fold_sqrtsd_int
-; TODO stack_fold_sqrtss
+
+define float @stack_fold_sqrtss(float %a0) {
+  ;CHECK-LABEL: stack_fold_sqrtss
+  ;CHECK:       vsqrtss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+  %2 = call float @llvm.sqrt.f32(float %a0)
+  ret float %2
+}
+declare float @llvm.sqrt.f32(float) nounwind readnone
+
 ; TODO stack_fold_sqrtss_int
 
 define <2 x double> @stack_fold_subpd(<2 x double> %a0, <2 x double> %a1) {
diff --git a/test/CodeGen/X86/stack-folding-int-sse42.ll b/test/CodeGen/X86/stack-folding-int-sse42.ll
index 5c6f697610a0..3ca94b7b9467 100644
--- a/test/CodeGen/X86/stack-folding-int-sse42.ll
+++ b/test/CodeGen/X86/stack-folding-int-sse42.ll
@@ -453,6 +453,21 @@ declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwin
 
 ; TODO stack_fold_pextrb
 
+; We can't naively fold pextrw as it only writes to a 16-bit memory location
+; even though it can store to a 32-bit register.
+define i16 @stack_fold_pextrw(<8 x i16> %a0) {
+; CHECK-LABEL: stack_fold_pextrw
+; CHECK:       pextrw $1, {{%xmm[0-9][0-9]*}}, %[[GPR32:(e[a-z]+|r[0-9]+d)]]
+; CHECK:       movl %[[GPR32]], {{-?[0-9]*}}(%rsp) {{.*#+}} 4-byte Spill
+; CHECK:       movl {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Reload
+entry:
+; add forces execution domain
+  %add = add <8 x i16> %a0, <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>
+  %extract = extractelement <8 x i16> %add, i32 1
+  %asm = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+  ret i16 %extract
+}
+
 define i32 @stack_fold_pextrd(<4 x i32> %a0) {
   ;CHECK-LABEL: stack_fold_pextrd
   ;CHECK:       pextrd $1, {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 4-byte Folded Spill
@@ -473,8 +488,6 @@ define i64 @stack_fold_pextrq(<2 x i64> %a0) {
   ret i64 %1
 }
 
-; TODO stack_fold_pextrw
-
 define <4 x i32> @stack_fold_phaddd(<4 x i32> %a0, <4 x i32> %a1) {
   ;CHECK-LABEL: stack_fold_phaddd
   ;CHECK:       phaddd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
diff --git a/test/CodeGen/X86/trunc-to-bool.ll b/test/CodeGen/X86/trunc-to-bool.ll
index 3c99928824bc..8e253f11e93e 100644
--- a/test/CodeGen/X86/trunc-to-bool.ll
+++ b/test/CodeGen/X86/trunc-to-bool.ll
@@ -1,16 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; An integer truncation to i1 should be done with an and instruction to make
 ; sure only the LSBit survives. Test that this is the case both for a returned
 ; value and as the operand of a branch.
 ; RUN: llc < %s -mtriple=i686-unknown-linux-gnu | FileCheck %s
 
 define zeroext i1 @test1(i32 %X)  nounwind {
+; CHECK-LABEL: test1:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movb {{[0-9]+}}(%esp), %al
+; CHECK-NEXT:    andb $1, %al
+; CHECK-NEXT:    retl
     %Y = trunc i32 %X to i1
     ret i1 %Y
 }
-; CHECK-LABEL: test1:
-; CHECK: andb $1, %al
 
 define i1 @test2(i32 %val, i32 %mask) nounwind {
+; CHECK-LABEL: test2:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT:    btl %ecx, %eax
+; CHECK-NEXT:    jae .LBB1_2
+; CHECK-NEXT:  # BB#1: # %ret_true
+; CHECK-NEXT:    movb $1, %al
+; CHECK-NEXT:    retl
+; CHECK-NEXT:  .LBB1_2: # %ret_false
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    retl
 entry:
     %shifted = ashr i32 %val, %mask
     %anded = and i32 %shifted, 1
@@ -21,10 +37,19 @@ ret_true:
 ret_false:
     ret i1 false
 }
-; CHECK-LABEL: test2:
-; CHECK: btl
 
 define i32 @test3(i8* %ptr) nounwind {
+; CHECK-LABEL: test3:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    testb $1, (%eax)
+; CHECK-NEXT:    je .LBB2_2
+; CHECK-NEXT:  # BB#1: # %cond_true
+; CHECK-NEXT:    movl $21, %eax
+; CHECK-NEXT:    retl
+; CHECK-NEXT:  .LBB2_2: # %cond_false
+; CHECK-NEXT:    movl $42, %eax
+; CHECK-NEXT:    retl
     %val = load i8, i8* %ptr
     %tmp = trunc i8 %val to i1
     br i1 %tmp, label %cond_true, label %cond_false
@@ -33,10 +58,18 @@ cond_true:
 cond_false:
     ret i32 42
 }
-; CHECK-LABEL: test3:
-; CHECK: testb $1, (%eax)
 
 define i32 @test4(i8* %ptr) nounwind {
+; CHECK-LABEL: test4:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    testb $1, {{[0-9]+}}(%esp)
+; CHECK-NEXT:    je .LBB3_2
+; CHECK-NEXT:  # BB#1: # %cond_true
+; CHECK-NEXT:    movl $21, %eax
+; CHECK-NEXT:    retl
+; CHECK-NEXT:  .LBB3_2: # %cond_false
+; CHECK-NEXT:    movl $42, %eax
+; CHECK-NEXT:    retl
     %tmp = ptrtoint i8* %ptr to i1
     br i1 %tmp, label %cond_true, label %cond_false
 cond_true:
@@ -44,10 +77,29 @@ cond_true:
 cond_false:
     ret i32 42
 }
-; CHECK-LABEL: test4:
-; CHECK: testb $1, 4(%esp)
 
 define i32 @test5(double %d) nounwind {
+; CHECK-LABEL: test5:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    pushl %eax
+; CHECK-NEXT:    fldl {{[0-9]+}}(%esp)
+; CHECK-NEXT:    fnstcw (%esp)
+; CHECK-NEXT:    movzwl (%esp), %eax
+; CHECK-NEXT:    movw $3199, (%esp) # imm = 0xC7F
+; CHECK-NEXT:    fldcw (%esp)
+; CHECK-NEXT:    movw %ax, (%esp)
+; CHECK-NEXT:    fistps {{[0-9]+}}(%esp)
+; CHECK-NEXT:    fldcw (%esp)
+; CHECK-NEXT:    testb $1, {{[0-9]+}}(%esp)
+; CHECK-NEXT:    je .LBB4_2
+; CHECK-NEXT:  # BB#1: # %cond_true
+; CHECK-NEXT:    movl $21, %eax
+; CHECK-NEXT:    popl %ecx
+; CHECK-NEXT:    retl
+; CHECK-NEXT:  .LBB4_2: # %cond_false
+; CHECK-NEXT:    movl $42, %eax
+; CHECK-NEXT:    popl %ecx
+; CHECK-NEXT:    retl
     %tmp = fptosi double %d to i1
     br i1 %tmp, label %cond_true, label %cond_false
 cond_true:
@@ -55,5 +107,3 @@ cond_true:
 cond_false:
     ret i32 42
 }
-; CHECK-LABEL: test5:
-; CHECK: testb $1
diff --git a/test/CodeGen/X86/vec_fp_to_int.ll b/test/CodeGen/X86/vec_fp_to_int.ll
index 477150016486..6cfe41ac503d 100644
--- a/test/CodeGen/X86/vec_fp_to_int.ll
+++ b/test/CodeGen/X86/vec_fp_to_int.ll
@@ -1320,17 +1320,17 @@ define <4 x i32> @fptoui_4f32_to_4i32(<4 x float> %a) {
 ; SSE-NEXT:    cvttss2si %xmm1, %rax
 ; SSE-NEXT:    movd %eax, %xmm1
 ; SSE-NEXT:    movaps %xmm0, %xmm2
-; SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[1,1,2,3]
+; SSE-NEXT:    movhlps {{.*#+}} xmm2 = xmm2[1,1]
 ; SSE-NEXT:    cvttss2si %xmm2, %rax
 ; SSE-NEXT:    movd %eax, %xmm2
 ; SSE-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
 ; SSE-NEXT:    cvttss2si %xmm0, %rax
 ; SSE-NEXT:    movd %eax, %xmm1
-; SSE-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
 ; SSE-NEXT:    cvttss2si %xmm0, %rax
 ; SSE-NEXT:    movd %eax, %xmm0
 ; SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
 ; SSE-NEXT:    movdqa %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
@@ -1560,33 +1560,33 @@ define <8 x i32> @fptoui_8f32_to_8i32(<8 x float> %a) {
 ; SSE-NEXT:    cvttss2si %xmm0, %rax
 ; SSE-NEXT:    movd %eax, %xmm0
 ; SSE-NEXT:    movaps %xmm2, %xmm3
-; SSE-NEXT:    shufps {{.*#+}} xmm3 = xmm3[1,1,2,3]
+; SSE-NEXT:    movhlps {{.*#+}} xmm3 = xmm3[1,1]
 ; SSE-NEXT:    cvttss2si %xmm3, %rax
 ; SSE-NEXT:    movd %eax, %xmm3
 ; SSE-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
 ; SSE-NEXT:    cvttss2si %xmm2, %rax
 ; SSE-NEXT:    movd %eax, %xmm0
-; SSE-NEXT:    movhlps {{.*#+}} xmm2 = xmm2[1,1]
+; SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[1,1,2,3]
 ; SSE-NEXT:    cvttss2si %xmm2, %rax
 ; SSE-NEXT:    movd %eax, %xmm2
 ; SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
 ; SSE-NEXT:    movaps %xmm1, %xmm2
 ; SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
 ; SSE-NEXT:    cvttss2si %xmm2, %rax
 ; SSE-NEXT:    movd %eax, %xmm2
 ; SSE-NEXT:    movaps %xmm1, %xmm3
-; SSE-NEXT:    shufps {{.*#+}} xmm3 = xmm3[1,1,2,3]
+; SSE-NEXT:    movhlps {{.*#+}} xmm3 = xmm3[1,1]
 ; SSE-NEXT:    cvttss2si %xmm3, %rax
 ; SSE-NEXT:    movd %eax, %xmm3
 ; SSE-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
 ; SSE-NEXT:    cvttss2si %xmm1, %rax
 ; SSE-NEXT:    movd %eax, %xmm2
-; SSE-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1]
+; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
 ; SSE-NEXT:    cvttss2si %xmm1, %rax
 ; SSE-NEXT:    movd %eax, %xmm1
 ; SSE-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; SSE-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
 ; SSE-NEXT:    movdqa %xmm2, %xmm1
 ; SSE-NEXT:    retq
 ;
diff --git a/test/CodeGen/X86/vec_int_to_fp.ll b/test/CodeGen/X86/vec_int_to_fp.ll
index a42b3c96c3ae..7cb1c95cb01a 100644
--- a/test/CodeGen/X86/vec_int_to_fp.ll
+++ b/test/CodeGen/X86/vec_int_to_fp.ll
@@ -1169,16 +1169,16 @@ define <4 x float> @sitofp_2i64_to_4f32_zero(<2 x i64> %a) {
 define <4 x float> @sitofp_4i64_to_4f32_undef(<2 x i64> %a) {
 ; SSE-LABEL: sitofp_4i64_to_4f32_undef:
 ; SSE:       # BB#0:
-; SSE-NEXT:    cvtsi2ssq %rax, %xmm2
 ; SSE-NEXT:    movq %xmm0, %rax
 ; SSE-NEXT:    cvtsi2ssq %rax, %xmm1
-; SSE-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
 ; SSE-NEXT:    movq %xmm0, %rax
 ; SSE-NEXT:    xorps %xmm0, %xmm0
 ; SSE-NEXT:    cvtsi2ssq %rax, %xmm0
-; SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
 ; SSE-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE-NEXT:    xorps %xmm0, %xmm0
+; SSE-NEXT:    cvtsi2ssq %rax, %xmm0
+; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,0]
 ; SSE-NEXT:    movaps %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
@@ -1368,21 +1368,22 @@ define <4 x float> @sitofp_4i64_to_4f32(<4 x i64> %a) {
 ; SSE-LABEL: sitofp_4i64_to_4f32:
 ; SSE:       # BB#0:
 ; SSE-NEXT:    movq %xmm1, %rax
-; SSE-NEXT:    cvtsi2ssq %rax, %xmm3
-; SSE-NEXT:    movq %xmm0, %rax
 ; SSE-NEXT:    cvtsi2ssq %rax, %xmm2
-; SSE-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
 ; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
 ; SSE-NEXT:    movq %xmm1, %rax
 ; SSE-NEXT:    xorps %xmm1, %xmm1
 ; SSE-NEXT:    cvtsi2ssq %rax, %xmm1
+; SSE-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE-NEXT:    movq %xmm0, %rax
+; SSE-NEXT:    xorps %xmm1, %xmm1
+; SSE-NEXT:    cvtsi2ssq %rax, %xmm1
 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
 ; SSE-NEXT:    movq %xmm0, %rax
 ; SSE-NEXT:    xorps %xmm0, %xmm0
 ; SSE-NEXT:    cvtsi2ssq %rax, %xmm0
-; SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
-; SSE-NEXT:    movaps %xmm2, %xmm0
+; SSE-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; SSE-NEXT:    movapd %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX1-LABEL: sitofp_4i64_to_4f32:
@@ -1838,21 +1839,14 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
 ; SSE-LABEL: uitofp_4i64_to_4f32_undef:
 ; SSE:       # BB#0:
 ; SSE-NEXT:    movdqa %xmm0, %xmm1
-; SSE-NEXT:    testq %rax, %rax
-; SSE-NEXT:    xorps %xmm2, %xmm2
-; SSE-NEXT:    js .LBB41_2
-; SSE-NEXT:  # BB#1:
-; SSE-NEXT:    xorps %xmm2, %xmm2
-; SSE-NEXT:    cvtsi2ssq %rax, %xmm2
-; SSE-NEXT:  .LBB41_2:
 ; SSE-NEXT:    movq %xmm1, %rax
 ; SSE-NEXT:    testq %rax, %rax
-; SSE-NEXT:    js .LBB41_3
-; SSE-NEXT:  # BB#4:
+; SSE-NEXT:    js .LBB41_1
+; SSE-NEXT:  # BB#2:
 ; SSE-NEXT:    xorps %xmm0, %xmm0
 ; SSE-NEXT:    cvtsi2ssq %rax, %xmm0
-; SSE-NEXT:    jmp .LBB41_5
-; SSE-NEXT:  .LBB41_3:
+; SSE-NEXT:    jmp .LBB41_3
+; SSE-NEXT:  .LBB41_1:
 ; SSE-NEXT:    movq %rax, %rcx
 ; SSE-NEXT:    shrq %rcx
 ; SSE-NEXT:    andl $1, %eax
@@ -1860,17 +1854,16 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
 ; SSE-NEXT:    xorps %xmm0, %xmm0
 ; SSE-NEXT:    cvtsi2ssq %rax, %xmm0
 ; SSE-NEXT:    addss %xmm0, %xmm0
-; SSE-NEXT:  .LBB41_5:
-; SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE-NEXT:  .LBB41_3:
 ; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
 ; SSE-NEXT:    movq %xmm1, %rax
 ; SSE-NEXT:    testq %rax, %rax
-; SSE-NEXT:    js .LBB41_6
-; SSE-NEXT:  # BB#7:
+; SSE-NEXT:    js .LBB41_4
+; SSE-NEXT:  # BB#5:
 ; SSE-NEXT:    xorps %xmm1, %xmm1
 ; SSE-NEXT:    cvtsi2ssq %rax, %xmm1
-; SSE-NEXT:    jmp .LBB41_8
-; SSE-NEXT:  .LBB41_6:
+; SSE-NEXT:    jmp .LBB41_6
+; SSE-NEXT:  .LBB41_4:
 ; SSE-NEXT:    movq %rax, %rcx
 ; SSE-NEXT:    shrq %rcx
 ; SSE-NEXT:    andl $1, %eax
@@ -1878,9 +1871,16 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
 ; SSE-NEXT:    xorps %xmm1, %xmm1
 ; SSE-NEXT:    cvtsi2ssq %rax, %xmm1
 ; SSE-NEXT:    addss %xmm1, %xmm1
-; SSE-NEXT:  .LBB41_8:
-; SSE-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE-NEXT:  .LBB41_6:
 ; SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT:    testq %rax, %rax
+; SSE-NEXT:    xorps %xmm1, %xmm1
+; SSE-NEXT:    js .LBB41_8
+; SSE-NEXT:  # BB#7:
+; SSE-NEXT:    xorps %xmm1, %xmm1
+; SSE-NEXT:    cvtsi2ssq %rax, %xmm1
+; SSE-NEXT:  .LBB41_8:
+; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
 ; SSE-NEXT:    retq
 ;
 ; VEX-LABEL: uitofp_4i64_to_4f32_undef:
@@ -2149,32 +2149,32 @@ define <4 x float> @uitofp_4i64_to_4f32(<4 x i64> %a) {
 ; SSE-NEXT:    testq %rax, %rax
 ; SSE-NEXT:    js .LBB47_1
 ; SSE-NEXT:  # BB#2:
-; SSE-NEXT:    cvtsi2ssq %rax, %xmm3
+; SSE-NEXT:    cvtsi2ssq %rax, %xmm2
 ; SSE-NEXT:    jmp .LBB47_3
 ; SSE-NEXT:  .LBB47_1:
 ; SSE-NEXT:    movq %rax, %rcx
 ; SSE-NEXT:    shrq %rcx
 ; SSE-NEXT:    andl $1, %eax
 ; SSE-NEXT:    orq %rcx, %rax
-; SSE-NEXT:    cvtsi2ssq %rax, %xmm3
-; SSE-NEXT:    addss %xmm3, %xmm3
+; SSE-NEXT:    cvtsi2ssq %rax, %xmm2
+; SSE-NEXT:    addss %xmm2, %xmm2
 ; SSE-NEXT:  .LBB47_3:
-; SSE-NEXT:    movq %xmm0, %rax
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSE-NEXT:    movq %xmm1, %rax
 ; SSE-NEXT:    testq %rax, %rax
 ; SSE-NEXT:    js .LBB47_4
 ; SSE-NEXT:  # BB#5:
-; SSE-NEXT:    cvtsi2ssq %rax, %xmm2
+; SSE-NEXT:    cvtsi2ssq %rax, %xmm3
 ; SSE-NEXT:    jmp .LBB47_6
 ; SSE-NEXT:  .LBB47_4:
 ; SSE-NEXT:    movq %rax, %rcx
 ; SSE-NEXT:    shrq %rcx
 ; SSE-NEXT:    andl $1, %eax
 ; SSE-NEXT:    orq %rcx, %rax
-; SSE-NEXT:    cvtsi2ssq %rax, %xmm2
-; SSE-NEXT:    addss %xmm2, %xmm2
+; SSE-NEXT:    cvtsi2ssq %rax, %xmm3
+; SSE-NEXT:    addss %xmm3, %xmm3
 ; SSE-NEXT:  .LBB47_6:
-; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; SSE-NEXT:    movq %xmm1, %rax
+; SSE-NEXT:    movq %xmm0, %rax
 ; SSE-NEXT:    testq %rax, %rax
 ; SSE-NEXT:    js .LBB47_7
 ; SSE-NEXT:  # BB#8:
@@ -2208,9 +2208,9 @@ define <4 x float> @uitofp_4i64_to_4f32(<4 x i64> %a) {
 ; SSE-NEXT:    cvtsi2ssq %rax, %xmm0
 ; SSE-NEXT:    addss %xmm0, %xmm0
 ; SSE-NEXT:  .LBB47_12:
-; SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
-; SSE-NEXT:    movaps %xmm2, %xmm0
+; SSE-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; SSE-NEXT:    movapd %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX1-LABEL: uitofp_4i64_to_4f32:
@@ -3381,22 +3381,23 @@ define <4 x float> @sitofp_load_4i64_to_4f32(<4 x i64> *%a) {
 ; SSE-LABEL: sitofp_load_4i64_to_4f32:
 ; SSE:       # BB#0:
 ; SSE-NEXT:    movdqa (%rdi), %xmm1
-; SSE-NEXT:    movdqa 16(%rdi), %xmm2
-; SSE-NEXT:    movq %xmm2, %rax
-; SSE-NEXT:    cvtsi2ssq %rax, %xmm3
-; SSE-NEXT:    movq %xmm1, %rax
-; SSE-NEXT:    cvtsi2ssq %rax, %xmm0
-; SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
-; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
-; SSE-NEXT:    movq %xmm2, %rax
-; SSE-NEXT:    xorps %xmm2, %xmm2
+; SSE-NEXT:    movdqa 16(%rdi), %xmm0
+; SSE-NEXT:    movq %xmm0, %rax
 ; SSE-NEXT:    cvtsi2ssq %rax, %xmm2
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE-NEXT:    movq %xmm0, %rax
+; SSE-NEXT:    xorps %xmm0, %xmm0
+; SSE-NEXT:    cvtsi2ssq %rax, %xmm0
+; SSE-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
+; SSE-NEXT:    movq %xmm1, %rax
+; SSE-NEXT:    xorps %xmm0, %xmm0
+; SSE-NEXT:    cvtsi2ssq %rax, %xmm0
 ; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
 ; SSE-NEXT:    movq %xmm1, %rax
 ; SSE-NEXT:    xorps %xmm1, %xmm1
 ; SSE-NEXT:    cvtsi2ssq %rax, %xmm1
-; SSE-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
 ; SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
 ; SSE-NEXT:    retq
 ;
 ; AVX1-LABEL: sitofp_load_4i64_to_4f32:
@@ -3546,41 +3547,42 @@ define <8 x float> @sitofp_load_8i64_to_8f32(<8 x i64> *%a) {
 ; SSE-LABEL: sitofp_load_8i64_to_8f32:
 ; SSE:       # BB#0:
 ; SSE-NEXT:    movdqa (%rdi), %xmm1
-; SSE-NEXT:    movdqa 16(%rdi), %xmm2
-; SSE-NEXT:    movdqa 32(%rdi), %xmm3
-; SSE-NEXT:    movdqa 48(%rdi), %xmm4
-; SSE-NEXT:    movq %xmm2, %rax
-; SSE-NEXT:    cvtsi2ssq %rax, %xmm5
-; SSE-NEXT:    movq %xmm1, %rax
+; SSE-NEXT:    movdqa 16(%rdi), %xmm0
+; SSE-NEXT:    movdqa 32(%rdi), %xmm2
+; SSE-NEXT:    movdqa 48(%rdi), %xmm3
+; SSE-NEXT:    movq %xmm0, %rax
+; SSE-NEXT:    cvtsi2ssq %rax, %xmm4
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE-NEXT:    movq %xmm0, %rax
+; SSE-NEXT:    xorps %xmm0, %xmm0
+; SSE-NEXT:    cvtsi2ssq %rax, %xmm0
+; SSE-NEXT:    unpcklps {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1]
+; SSE-NEXT:    movq %xmm1, %rax
+; SSE-NEXT:    xorps %xmm0, %xmm0
 ; SSE-NEXT:    cvtsi2ssq %rax, %xmm0
-; SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1]
-; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
-; SSE-NEXT:    movq %xmm2, %rax
-; SSE-NEXT:    xorps %xmm2, %xmm2
-; SSE-NEXT:    cvtsi2ssq %rax, %xmm2
 ; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
 ; SSE-NEXT:    movq %xmm1, %rax
 ; SSE-NEXT:    xorps %xmm1, %xmm1
 ; SSE-NEXT:    cvtsi2ssq %rax, %xmm1
-; SSE-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
 ; SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE-NEXT:    movq %xmm4, %rax
-; SSE-NEXT:    xorps %xmm2, %xmm2
-; SSE-NEXT:    cvtsi2ssq %rax, %xmm2
+; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm4[0]
 ; SSE-NEXT:    movq %xmm3, %rax
+; SSE-NEXT:    xorps %xmm4, %xmm4
+; SSE-NEXT:    cvtsi2ssq %rax, %xmm4
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[2,3,0,1]
+; SSE-NEXT:    movq %xmm1, %rax
 ; SSE-NEXT:    xorps %xmm1, %xmm1
 ; SSE-NEXT:    cvtsi2ssq %rax, %xmm1
-; SSE-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm4[2,3,0,1]
+; SSE-NEXT:    unpcklps {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
+; SSE-NEXT:    movq %xmm2, %rax
+; SSE-NEXT:    xorps %xmm1, %xmm1
+; SSE-NEXT:    cvtsi2ssq %rax, %xmm1
+; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
 ; SSE-NEXT:    movq %xmm2, %rax
 ; SSE-NEXT:    xorps %xmm2, %xmm2
 ; SSE-NEXT:    cvtsi2ssq %rax, %xmm2
-; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
-; SSE-NEXT:    movq %xmm3, %rax
-; SSE-NEXT:    xorps %xmm3, %xmm3
-; SSE-NEXT:    cvtsi2ssq %rax, %xmm3
-; SSE-NEXT:    unpcklps {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
-; SSE-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; SSE-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm4[0]
 ; SSE-NEXT:    retq
 ;
 ; AVX1-LABEL: sitofp_load_8i64_to_8f32:
@@ -3822,73 +3824,73 @@ define <8 x float> @sitofp_load_8i8_to_8f32(<8 x i8> *%a) {
 define <4 x float> @uitofp_load_4i64_to_4f32(<4 x i64> *%a) {
 ; SSE-LABEL: uitofp_load_4i64_to_4f32:
 ; SSE:       # BB#0:
-; SSE-NEXT:    movdqa (%rdi), %xmm1
-; SSE-NEXT:    movdqa 16(%rdi), %xmm3
-; SSE-NEXT:    movq %xmm3, %rax
+; SSE-NEXT:    movdqa (%rdi), %xmm2
+; SSE-NEXT:    movdqa 16(%rdi), %xmm0
+; SSE-NEXT:    movq %xmm0, %rax
 ; SSE-NEXT:    testq %rax, %rax
 ; SSE-NEXT:    js .LBB76_1
 ; SSE-NEXT:  # BB#2:
-; SSE-NEXT:    cvtsi2ssq %rax, %xmm2
+; SSE-NEXT:    cvtsi2ssq %rax, %xmm1
 ; SSE-NEXT:    jmp .LBB76_3
 ; SSE-NEXT:  .LBB76_1:
 ; SSE-NEXT:    movq %rax, %rcx
 ; SSE-NEXT:    shrq %rcx
 ; SSE-NEXT:    andl $1, %eax
 ; SSE-NEXT:    orq %rcx, %rax
-; SSE-NEXT:    cvtsi2ssq %rax, %xmm2
-; SSE-NEXT:    addss %xmm2, %xmm2
+; SSE-NEXT:    cvtsi2ssq %rax, %xmm1
+; SSE-NEXT:    addss %xmm1, %xmm1
 ; SSE-NEXT:  .LBB76_3:
-; SSE-NEXT:    movq %xmm1, %rax
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE-NEXT:    movq %xmm0, %rax
 ; SSE-NEXT:    testq %rax, %rax
 ; SSE-NEXT:    js .LBB76_4
 ; SSE-NEXT:  # BB#5:
-; SSE-NEXT:    cvtsi2ssq %rax, %xmm0
+; SSE-NEXT:    cvtsi2ssq %rax, %xmm3
 ; SSE-NEXT:    jmp .LBB76_6
 ; SSE-NEXT:  .LBB76_4:
 ; SSE-NEXT:    movq %rax, %rcx
 ; SSE-NEXT:    shrq %rcx
 ; SSE-NEXT:    andl $1, %eax
 ; SSE-NEXT:    orq %rcx, %rax
-; SSE-NEXT:    cvtsi2ssq %rax, %xmm0
-; SSE-NEXT:    addss %xmm0, %xmm0
+; SSE-NEXT:    cvtsi2ssq %rax, %xmm3
+; SSE-NEXT:    addss %xmm3, %xmm3
 ; SSE-NEXT:  .LBB76_6:
-; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
-; SSE-NEXT:    movq %xmm3, %rax
+; SSE-NEXT:    movq %xmm2, %rax
 ; SSE-NEXT:    testq %rax, %rax
 ; SSE-NEXT:    js .LBB76_7
 ; SSE-NEXT:  # BB#8:
-; SSE-NEXT:    xorps %xmm3, %xmm3
-; SSE-NEXT:    cvtsi2ssq %rax, %xmm3
+; SSE-NEXT:    xorps %xmm0, %xmm0
+; SSE-NEXT:    cvtsi2ssq %rax, %xmm0
 ; SSE-NEXT:    jmp .LBB76_9
 ; SSE-NEXT:  .LBB76_7:
 ; SSE-NEXT:    movq %rax, %rcx
 ; SSE-NEXT:    shrq %rcx
 ; SSE-NEXT:    andl $1, %eax
 ; SSE-NEXT:    orq %rcx, %rax
-; SSE-NEXT:    xorps %xmm3, %xmm3
-; SSE-NEXT:    cvtsi2ssq %rax, %xmm3
-; SSE-NEXT:    addss %xmm3, %xmm3
+; SSE-NEXT:    xorps %xmm0, %xmm0
+; SSE-NEXT:    cvtsi2ssq %rax, %xmm0
+; SSE-NEXT:    addss %xmm0, %xmm0
 ; SSE-NEXT:  .LBB76_9:
-; SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; SSE-NEXT:    movq %xmm1, %rax
+; SSE-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
+; SSE-NEXT:    movq %xmm2, %rax
 ; SSE-NEXT:    testq %rax, %rax
 ; SSE-NEXT:    js .LBB76_10
 ; SSE-NEXT:  # BB#11:
-; SSE-NEXT:    xorps %xmm1, %xmm1
-; SSE-NEXT:    cvtsi2ssq %rax, %xmm1
+; SSE-NEXT:    xorps %xmm2, %xmm2
+; SSE-NEXT:    cvtsi2ssq %rax, %xmm2
 ; SSE-NEXT:    jmp .LBB76_12
 ; SSE-NEXT:  .LBB76_10:
 ; SSE-NEXT:    movq %rax, %rcx
 ; SSE-NEXT:    shrq %rcx
 ; SSE-NEXT:    andl $1, %eax
 ; SSE-NEXT:    orq %rcx, %rax
-; SSE-NEXT:    xorps %xmm1, %xmm1
-; SSE-NEXT:    cvtsi2ssq %rax, %xmm1
-; SSE-NEXT:    addss %xmm1, %xmm1
+; SSE-NEXT:    xorps %xmm2, %xmm2
+; SSE-NEXT:    cvtsi2ssq %rax, %xmm2
+; SSE-NEXT:    addss %xmm2, %xmm2
 ; SSE-NEXT:  .LBB76_12:
-; SSE-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
-; SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE-NEXT:    retq
 ;
 ; AVX1-LABEL: uitofp_load_4i64_to_4f32:
@@ -4186,92 +4188,111 @@ define <4 x float> @uitofp_load_4i8_to_4f32(<4 x i8> *%a) {
 define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) {
 ; SSE-LABEL: uitofp_load_8i64_to_8f32:
 ; SSE:       # BB#0:
-; SSE-NEXT:    movdqa (%rdi), %xmm1
-; SSE-NEXT:    movdqa 16(%rdi), %xmm5
+; SSE-NEXT:    movdqa (%rdi), %xmm5
+; SSE-NEXT:    movdqa 16(%rdi), %xmm0
 ; SSE-NEXT:    movdqa 32(%rdi), %xmm2
-; SSE-NEXT:    movdqa 48(%rdi), %xmm3
-; SSE-NEXT:    movq %xmm5, %rax
+; SSE-NEXT:    movdqa 48(%rdi), %xmm1
+; SSE-NEXT:    movq %xmm0, %rax
 ; SSE-NEXT:    testq %rax, %rax
 ; SSE-NEXT:    js .LBB80_1
 ; SSE-NEXT:  # BB#2:
-; SSE-NEXT:    cvtsi2ssq %rax, %xmm4
+; SSE-NEXT:    cvtsi2ssq %rax, %xmm3
 ; SSE-NEXT:    jmp .LBB80_3
 ; SSE-NEXT:  .LBB80_1:
 ; SSE-NEXT:    movq %rax, %rcx
 ; SSE-NEXT:    shrq %rcx
 ; SSE-NEXT:    andl $1, %eax
 ; SSE-NEXT:    orq %rcx, %rax
-; SSE-NEXT:    cvtsi2ssq %rax, %xmm4
-; SSE-NEXT:    addss %xmm4, %xmm4
+; SSE-NEXT:    cvtsi2ssq %rax, %xmm3
+; SSE-NEXT:    addss %xmm3, %xmm3
 ; SSE-NEXT:  .LBB80_3:
-; SSE-NEXT:    movq %xmm1, %rax
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE-NEXT:    movq %xmm0, %rax
 ; SSE-NEXT:    testq %rax, %rax
 ; SSE-NEXT:    js .LBB80_4
 ; SSE-NEXT:  # BB#5:
-; SSE-NEXT:    cvtsi2ssq %rax, %xmm0
+; SSE-NEXT:    cvtsi2ssq %rax, %xmm4
 ; SSE-NEXT:    jmp .LBB80_6
 ; SSE-NEXT:  .LBB80_4:
 ; SSE-NEXT:    movq %rax, %rcx
 ; SSE-NEXT:    shrq %rcx
 ; SSE-NEXT:    andl $1, %eax
 ; SSE-NEXT:    orq %rcx, %rax
-; SSE-NEXT:    cvtsi2ssq %rax, %xmm0
-; SSE-NEXT:    addss %xmm0, %xmm0
+; SSE-NEXT:    cvtsi2ssq %rax, %xmm4
+; SSE-NEXT:    addss %xmm4, %xmm4
 ; SSE-NEXT:  .LBB80_6:
-; SSE-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[2,3,0,1]
 ; SSE-NEXT:    movq %xmm5, %rax
 ; SSE-NEXT:    testq %rax, %rax
 ; SSE-NEXT:    js .LBB80_7
 ; SSE-NEXT:  # BB#8:
-; SSE-NEXT:    cvtsi2ssq %rax, %xmm6
+; SSE-NEXT:    xorps %xmm0, %xmm0
+; SSE-NEXT:    cvtsi2ssq %rax, %xmm0
 ; SSE-NEXT:    jmp .LBB80_9
 ; SSE-NEXT:  .LBB80_7:
 ; SSE-NEXT:    movq %rax, %rcx
 ; SSE-NEXT:    shrq %rcx
 ; SSE-NEXT:    andl $1, %eax
 ; SSE-NEXT:    orq %rcx, %rax
-; SSE-NEXT:    cvtsi2ssq %rax, %xmm6
-; SSE-NEXT:    addss %xmm6, %xmm6
+; SSE-NEXT:    xorps %xmm0, %xmm0
+; SSE-NEXT:    cvtsi2ssq %rax, %xmm0
+; SSE-NEXT:    addss %xmm0, %xmm0
 ; SSE-NEXT:  .LBB80_9:
-; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; SSE-NEXT:    movq %xmm1, %rax
+; SSE-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[2,3,0,1]
+; SSE-NEXT:    movq %xmm5, %rax
 ; SSE-NEXT:    testq %rax, %rax
 ; SSE-NEXT:    js .LBB80_10
 ; SSE-NEXT:  # BB#11:
-; SSE-NEXT:    xorps %xmm5, %xmm5
-; SSE-NEXT:    cvtsi2ssq %rax, %xmm5
+; SSE-NEXT:    cvtsi2ssq %rax, %xmm6
 ; SSE-NEXT:    jmp .LBB80_12
 ; SSE-NEXT:  .LBB80_10:
 ; SSE-NEXT:    movq %rax, %rcx
 ; SSE-NEXT:    shrq %rcx
 ; SSE-NEXT:    andl $1, %eax
 ; SSE-NEXT:    orq %rcx, %rax
+; SSE-NEXT:    cvtsi2ssq %rax, %xmm6
+; SSE-NEXT:    addss %xmm6, %xmm6
+; SSE-NEXT:  .LBB80_12:
+; SSE-NEXT:    movq %xmm1, %rax
+; SSE-NEXT:    testq %rax, %rax
+; SSE-NEXT:    js .LBB80_13
+; SSE-NEXT:  # BB#14:
+; SSE-NEXT:    xorps %xmm5, %xmm5
+; SSE-NEXT:    cvtsi2ssq %rax, %xmm5
+; SSE-NEXT:    jmp .LBB80_15
+; SSE-NEXT:  .LBB80_13:
+; SSE-NEXT:    movq %rax, %rcx
+; SSE-NEXT:    shrq %rcx
+; SSE-NEXT:    andl $1, %eax
+; SSE-NEXT:    orq %rcx, %rax
 ; SSE-NEXT:    xorps %xmm5, %xmm5
 ; SSE-NEXT:    cvtsi2ssq %rax, %xmm5
 ; SSE-NEXT:    addss %xmm5, %xmm5
-; SSE-NEXT:  .LBB80_12:
-; SSE-NEXT:    movq %xmm3, %rax
+; SSE-NEXT:  .LBB80_15:
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSE-NEXT:    movq %xmm1, %rax
 ; SSE-NEXT:    testq %rax, %rax
-; SSE-NEXT:    js .LBB80_13
-; SSE-NEXT:  # BB#14:
+; SSE-NEXT:    js .LBB80_16
+; SSE-NEXT:  # BB#17:
 ; SSE-NEXT:    cvtsi2ssq %rax, %xmm7
-; SSE-NEXT:    jmp .LBB80_15
-; SSE-NEXT:  .LBB80_13:
+; SSE-NEXT:    jmp .LBB80_18
+; SSE-NEXT:  .LBB80_16:
 ; SSE-NEXT:    movq %rax, %rcx
 ; SSE-NEXT:    shrq %rcx
 ; SSE-NEXT:    andl $1, %eax
 ; SSE-NEXT:    orq %rcx, %rax
 ; SSE-NEXT:    cvtsi2ssq %rax, %xmm7
 ; SSE-NEXT:    addss %xmm7, %xmm7
-; SSE-NEXT:  .LBB80_15:
+; SSE-NEXT:  .LBB80_18:
+; SSE-NEXT:    unpcklps {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
+; SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1]
 ; SSE-NEXT:    movq %xmm2, %rax
 ; SSE-NEXT:    testq %rax, %rax
-; SSE-NEXT:    js .LBB80_16
-; SSE-NEXT:  # BB#17:
+; SSE-NEXT:    js .LBB80_19
+; SSE-NEXT:  # BB#20:
 ; SSE-NEXT:    xorps %xmm1, %xmm1
 ; SSE-NEXT:    cvtsi2ssq %rax, %xmm1
-; SSE-NEXT:    jmp .LBB80_18
-; SSE-NEXT:  .LBB80_16:
+; SSE-NEXT:    jmp .LBB80_21
+; SSE-NEXT:  .LBB80_19:
 ; SSE-NEXT:    movq %rax, %rcx
 ; SSE-NEXT:    shrq %rcx
 ; SSE-NEXT:    andl $1, %eax
@@ -4279,28 +4300,9 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) {
 ; SSE-NEXT:    xorps %xmm1, %xmm1
 ; SSE-NEXT:    cvtsi2ssq %rax, %xmm1
 ; SSE-NEXT:    addss %xmm1, %xmm1
-; SSE-NEXT:  .LBB80_18:
-; SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
-; SSE-NEXT:    unpcklps {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1]
-; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
-; SSE-NEXT:    movq %xmm3, %rax
-; SSE-NEXT:    testq %rax, %rax
-; SSE-NEXT:    js .LBB80_19
-; SSE-NEXT:  # BB#20:
-; SSE-NEXT:    xorps %xmm3, %xmm3
-; SSE-NEXT:    cvtsi2ssq %rax, %xmm3
-; SSE-NEXT:    jmp .LBB80_21
-; SSE-NEXT:  .LBB80_19:
-; SSE-NEXT:    movq %rax, %rcx
-; SSE-NEXT:    shrq %rcx
-; SSE-NEXT:    andl $1, %eax
-; SSE-NEXT:    orq %rcx, %rax
-; SSE-NEXT:    xorps %xmm3, %xmm3
-; SSE-NEXT:    cvtsi2ssq %rax, %xmm3
-; SSE-NEXT:    addss %xmm3, %xmm3
 ; SSE-NEXT:  .LBB80_21:
-; SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1]
-; SSE-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm7[0],xmm1[1],xmm7[1]
+; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0]
+; SSE-NEXT:    unpcklps {{.*#+}} xmm5 = xmm5[0],xmm7[0],xmm5[1],xmm7[1]
 ; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
 ; SSE-NEXT:    movq %xmm2, %rax
 ; SSE-NEXT:    testq %rax, %rax
@@ -4318,8 +4320,8 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) {
 ; SSE-NEXT:    cvtsi2ssq %rax, %xmm2
 ; SSE-NEXT:    addss %xmm2, %xmm2
 ; SSE-NEXT:  .LBB80_24:
-; SSE-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
 ; SSE-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm5[0]
 ; SSE-NEXT:    retq
 ;
 ; AVX1-LABEL: uitofp_load_8i64_to_8f32:
diff --git a/test/CodeGen/X86/vec_set.ll b/test/CodeGen/X86/vec_set.ll
index 6439a6dcb00b..918430efea1d 100644
--- a/test/CodeGen/X86/vec_set.ll
+++ b/test/CodeGen/X86/vec_set.ll
@@ -12,35 +12,35 @@ define void @test(<8 x i16>* %b, i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i1
 ; X86-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; X86-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
 ; X86-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; X86-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; X86-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
 ; X86-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; X86-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; X86-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
 ; X86-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; X86-NEXT:    movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
 ; X86-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3]
-; X86-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
-; X86-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; X86-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
+; X86-NEXT:    punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0]
 ; X86-NEXT:    movdqa %xmm3, (%eax)
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test:
 ; X64:       # BB#0:
-; X64-NEXT:    movd %r8d, %xmm0
+; X64-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; X64-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X64-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; X64-NEXT:    movd %edx, %xmm1
-; X64-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; X64-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
 ; X64-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; X64-NEXT:    movd %ecx, %xmm0
+; X64-NEXT:    movd %r9d, %xmm0
 ; X64-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
 ; X64-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; X64-NEXT:    movd %r9d, %xmm2
+; X64-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X64-NEXT:    movd %r8d, %xmm1
+; X64-NEXT:    movd %ecx, %xmm2
+; X64-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; X64-NEXT:    movd %edx, %xmm1
 ; X64-NEXT:    movd %esi, %xmm3
-; X64-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
-; X64-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3]
 ; X64-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
+; X64-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
+; X64-NEXT:    punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0]
 ; X64-NEXT:    movdqa %xmm3, (%rdi)
 ; X64-NEXT:    retq
   %tmp = insertelement <8 x i16> zeroinitializer, i16 %a0, i32 0
diff --git a/test/CodeGen/X86/vector-compare-results.ll b/test/CodeGen/X86/vector-compare-results.ll
index 4fa9596192a6..ce0b067f5043 100644
--- a/test/CodeGen/X86/vector-compare-results.ll
+++ b/test/CodeGen/X86/vector-compare-results.ll
@@ -5345,217 +5345,213 @@ define <64 x i1> @test_cmp_v64i16(<64 x i16> %a0, <64 x i16> %a1) nounwind {
 ;
 ; AVX1-LABEL: test_cmp_v64i16:
 ; AVX1:       # BB#0:
-; AVX1-NEXT:    vextractf128 $1, %ymm4, %xmm8
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm9
-; AVX1-NEXT:    vpcmpgtw %xmm8, %xmm9, %xmm8
-; AVX1-NEXT:    vpcmpgtw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT:    vpacksswb %xmm8, %xmm0, %xmm8
-; AVX1-NEXT:    vextractf128 $1, %ymm5, %xmm4
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
+; AVX1-NEXT:    vpcmpgtw %xmm4, %xmm0, %xmm8
+; AVX1-NEXT:    vextractf128 $1, %ymm4, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    vpcmpgtw %xmm4, %xmm0, %xmm0
+; AVX1-NEXT:    vpcmpgtw %xmm5, %xmm1, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm5, %xmm5
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
 ; AVX1-NEXT:    vpcmpgtw %xmm5, %xmm1, %xmm1
-; AVX1-NEXT:    vpacksswb %xmm0, %xmm1, %xmm1
-; AVX1-NEXT:    vextractf128 $1, %ymm6, %xmm0
-; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
-; AVX1-NEXT:    vpcmpgtw %xmm0, %xmm4, %xmm0
+; AVX1-NEXT:    vpcmpgtw %xmm6, %xmm2, %xmm5
+; AVX1-NEXT:    vextractf128 $1, %ymm6, %xmm6
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm2
 ; AVX1-NEXT:    vpcmpgtw %xmm6, %xmm2, %xmm2
-; AVX1-NEXT:    vpacksswb %xmm0, %xmm2, %xmm2
-; AVX1-NEXT:    vextractf128 $1, %ymm7, %xmm0
-; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
-; AVX1-NEXT:    vpcmpgtw %xmm0, %xmm4, %xmm0
+; AVX1-NEXT:    vpcmpgtw %xmm7, %xmm3, %xmm6
+; AVX1-NEXT:    vextractf128 $1, %ymm7, %xmm7
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm3
 ; AVX1-NEXT:    vpcmpgtw %xmm7, %xmm3, %xmm3
-; AVX1-NEXT:    vpacksswb %xmm0, %xmm3, %xmm3
-; AVX1-NEXT:    vpextrb $15, %xmm3, %eax
-; AVX1-NEXT:    andb $1, %al
-; AVX1-NEXT:    movb %al, 4(%rdi)
 ; AVX1-NEXT:    vpextrb $14, %xmm3, %eax
-; AVX1-NEXT:    andb $1, %al
-; AVX1-NEXT:    movb %al, 4(%rdi)
-; AVX1-NEXT:    vpextrb $13, %xmm3, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, 4(%rdi)
 ; AVX1-NEXT:    vpextrb $12, %xmm3, %eax
-; AVX1-NEXT:    andb $1, %al
-; AVX1-NEXT:    movb %al, 4(%rdi)
-; AVX1-NEXT:    vpextrb $11, %xmm3, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, 4(%rdi)
 ; AVX1-NEXT:    vpextrb $10, %xmm3, %eax
-; AVX1-NEXT:    andb $1, %al
-; AVX1-NEXT:    movb %al, 4(%rdi)
-; AVX1-NEXT:    vpextrb $9, %xmm3, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, 4(%rdi)
 ; AVX1-NEXT:    vpextrb $8, %xmm3, %eax
-; AVX1-NEXT:    andb $1, %al
-; AVX1-NEXT:    movb %al, 4(%rdi)
-; AVX1-NEXT:    vpextrb $7, %xmm3, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, 4(%rdi)
 ; AVX1-NEXT:    vpextrb $6, %xmm3, %eax
-; AVX1-NEXT:    andb $1, %al
-; AVX1-NEXT:    movb %al, 4(%rdi)
-; AVX1-NEXT:    vpextrb $5, %xmm3, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, 4(%rdi)
 ; AVX1-NEXT:    vpextrb $4, %xmm3, %eax
-; AVX1-NEXT:    andb $1, %al
-; AVX1-NEXT:    movb %al, 4(%rdi)
-; AVX1-NEXT:    vpextrb $3, %xmm3, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, 4(%rdi)
 ; AVX1-NEXT:    vpextrb $2, %xmm3, %eax
-; AVX1-NEXT:    andb $1, %al
-; AVX1-NEXT:    movb %al, 4(%rdi)
-; AVX1-NEXT:    vpextrb $1, %xmm3, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, 4(%rdi)
 ; AVX1-NEXT:    vpextrb $0, %xmm3, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, 4(%rdi)
-; AVX1-NEXT:    vpextrb $15, %xmm2, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    vpextrb $14, %xmm6, %eax
+; AVX1-NEXT:    andl $1, %eax
+; AVX1-NEXT:    movb %al, 4(%rdi)
+; AVX1-NEXT:    vpextrb $12, %xmm6, %eax
+; AVX1-NEXT:    andl $1, %eax
+; AVX1-NEXT:    movb %al, 4(%rdi)
+; AVX1-NEXT:    vpextrb $10, %xmm6, %eax
+; AVX1-NEXT:    andl $1, %eax
+; AVX1-NEXT:    movb %al, 4(%rdi)
+; AVX1-NEXT:    vpextrb $8, %xmm6, %eax
+; AVX1-NEXT:    andl $1, %eax
+; AVX1-NEXT:    movb %al, 4(%rdi)
+; AVX1-NEXT:    vpextrb $6, %xmm6, %eax
+; AVX1-NEXT:    andl $1, %eax
+; AVX1-NEXT:    movb %al, 4(%rdi)
+; AVX1-NEXT:    vpextrb $4, %xmm6, %eax
+; AVX1-NEXT:    andl $1, %eax
+; AVX1-NEXT:    movb %al, 4(%rdi)
+; AVX1-NEXT:    vpextrb $2, %xmm6, %eax
+; AVX1-NEXT:    andl $1, %eax
+; AVX1-NEXT:    movb %al, 4(%rdi)
+; AVX1-NEXT:    vpextrb $0, %xmm6, %eax
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, 4(%rdi)
 ; AVX1-NEXT:    vpextrb $14, %xmm2, %eax
-; AVX1-NEXT:    andb $1, %al
-; AVX1-NEXT:    movb %al, 4(%rdi)
-; AVX1-NEXT:    vpextrb $13, %xmm2, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, 4(%rdi)
 ; AVX1-NEXT:    vpextrb $12, %xmm2, %eax
-; AVX1-NEXT:    andb $1, %al
-; AVX1-NEXT:    movb %al, 4(%rdi)
-; AVX1-NEXT:    vpextrb $11, %xmm2, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, 4(%rdi)
 ; AVX1-NEXT:    vpextrb $10, %xmm2, %eax
-; AVX1-NEXT:    andb $1, %al
-; AVX1-NEXT:    movb %al, 4(%rdi)
-; AVX1-NEXT:    vpextrb $9, %xmm2, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, 4(%rdi)
 ; AVX1-NEXT:    vpextrb $8, %xmm2, %eax
-; AVX1-NEXT:    andb $1, %al
-; AVX1-NEXT:    movb %al, 4(%rdi)
-; AVX1-NEXT:    vpextrb $7, %xmm2, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, 4(%rdi)
 ; AVX1-NEXT:    vpextrb $6, %xmm2, %eax
-; AVX1-NEXT:    andb $1, %al
-; AVX1-NEXT:    movb %al, 4(%rdi)
-; AVX1-NEXT:    vpextrb $5, %xmm2, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, 4(%rdi)
 ; AVX1-NEXT:    vpextrb $4, %xmm2, %eax
-; AVX1-NEXT:    andb $1, %al
-; AVX1-NEXT:    movb %al, 4(%rdi)
-; AVX1-NEXT:    vpextrb $3, %xmm2, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, 4(%rdi)
 ; AVX1-NEXT:    vpextrb $2, %xmm2, %eax
-; AVX1-NEXT:    andb $1, %al
-; AVX1-NEXT:    movb %al, 4(%rdi)
-; AVX1-NEXT:    vpextrb $1, %xmm2, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, 4(%rdi)
 ; AVX1-NEXT:    vpextrb $0, %xmm2, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    andl $1, %eax
+; AVX1-NEXT:    movb %al, 4(%rdi)
+; AVX1-NEXT:    vpextrb $14, %xmm5, %eax
+; AVX1-NEXT:    andl $1, %eax
+; AVX1-NEXT:    movb %al, 4(%rdi)
+; AVX1-NEXT:    vpextrb $12, %xmm5, %eax
+; AVX1-NEXT:    andl $1, %eax
+; AVX1-NEXT:    movb %al, 4(%rdi)
+; AVX1-NEXT:    vpextrb $10, %xmm5, %eax
+; AVX1-NEXT:    andl $1, %eax
+; AVX1-NEXT:    movb %al, 4(%rdi)
+; AVX1-NEXT:    vpextrb $8, %xmm5, %eax
+; AVX1-NEXT:    andl $1, %eax
+; AVX1-NEXT:    movb %al, 4(%rdi)
+; AVX1-NEXT:    vpextrb $6, %xmm5, %eax
+; AVX1-NEXT:    andl $1, %eax
+; AVX1-NEXT:    movb %al, 4(%rdi)
+; AVX1-NEXT:    vpextrb $4, %xmm5, %eax
+; AVX1-NEXT:    andl $1, %eax
+; AVX1-NEXT:    movb %al, 4(%rdi)
+; AVX1-NEXT:    vpextrb $2, %xmm5, %eax
+; AVX1-NEXT:    andl $1, %eax
+; AVX1-NEXT:    movb %al, 4(%rdi)
+; AVX1-NEXT:    vpextrb $0, %xmm5, %eax
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, 4(%rdi)
-; AVX1-NEXT:    vpextrb $15, %xmm1, %eax
-; AVX1-NEXT:    andb $1, %al
-; AVX1-NEXT:    movb %al, (%rdi)
 ; AVX1-NEXT:    vpextrb $14, %xmm1, %eax
-; AVX1-NEXT:    andb $1, %al
-; AVX1-NEXT:    movb %al, (%rdi)
-; AVX1-NEXT:    vpextrb $13, %xmm1, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, (%rdi)
 ; AVX1-NEXT:    vpextrb $12, %xmm1, %eax
-; AVX1-NEXT:    andb $1, %al
-; AVX1-NEXT:    movb %al, (%rdi)
-; AVX1-NEXT:    vpextrb $11, %xmm1, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, (%rdi)
 ; AVX1-NEXT:    vpextrb $10, %xmm1, %eax
-; AVX1-NEXT:    andb $1, %al
-; AVX1-NEXT:    movb %al, (%rdi)
-; AVX1-NEXT:    vpextrb $9, %xmm1, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, (%rdi)
 ; AVX1-NEXT:    vpextrb $8, %xmm1, %eax
-; AVX1-NEXT:    andb $1, %al
-; AVX1-NEXT:    movb %al, (%rdi)
-; AVX1-NEXT:    vpextrb $7, %xmm1, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, (%rdi)
 ; AVX1-NEXT:    vpextrb $6, %xmm1, %eax
-; AVX1-NEXT:    andb $1, %al
-; AVX1-NEXT:    movb %al, (%rdi)
-; AVX1-NEXT:    vpextrb $5, %xmm1, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, (%rdi)
 ; AVX1-NEXT:    vpextrb $4, %xmm1, %eax
-; AVX1-NEXT:    andb $1, %al
-; AVX1-NEXT:    movb %al, (%rdi)
-; AVX1-NEXT:    vpextrb $3, %xmm1, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, (%rdi)
 ; AVX1-NEXT:    vpextrb $2, %xmm1, %eax
-; AVX1-NEXT:    andb $1, %al
-; AVX1-NEXT:    movb %al, (%rdi)
-; AVX1-NEXT:    vpextrb $1, %xmm1, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, (%rdi)
 ; AVX1-NEXT:    vpextrb $0, %xmm1, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, (%rdi)
-; AVX1-NEXT:    vpextrb $15, %xmm8, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    vpextrb $14, %xmm4, %eax
+; AVX1-NEXT:    andl $1, %eax
+; AVX1-NEXT:    movb %al, (%rdi)
+; AVX1-NEXT:    vpextrb $12, %xmm4, %eax
+; AVX1-NEXT:    andl $1, %eax
+; AVX1-NEXT:    movb %al, (%rdi)
+; AVX1-NEXT:    vpextrb $10, %xmm4, %eax
+; AVX1-NEXT:    andl $1, %eax
+; AVX1-NEXT:    movb %al, (%rdi)
+; AVX1-NEXT:    vpextrb $8, %xmm4, %eax
+; AVX1-NEXT:    andl $1, %eax
+; AVX1-NEXT:    movb %al, (%rdi)
+; AVX1-NEXT:    vpextrb $6, %xmm4, %eax
+; AVX1-NEXT:    andl $1, %eax
+; AVX1-NEXT:    movb %al, (%rdi)
+; AVX1-NEXT:    vpextrb $4, %xmm4, %eax
+; AVX1-NEXT:    andl $1, %eax
+; AVX1-NEXT:    movb %al, (%rdi)
+; AVX1-NEXT:    vpextrb $2, %xmm4, %eax
+; AVX1-NEXT:    andl $1, %eax
+; AVX1-NEXT:    movb %al, (%rdi)
+; AVX1-NEXT:    vpextrb $0, %xmm4, %eax
+; AVX1-NEXT:    andl $1, %eax
+; AVX1-NEXT:    movb %al, (%rdi)
+; AVX1-NEXT:    vpextrb $14, %xmm0, %eax
+; AVX1-NEXT:    andl $1, %eax
+; AVX1-NEXT:    movb %al, (%rdi)
+; AVX1-NEXT:    vpextrb $12, %xmm0, %eax
+; AVX1-NEXT:    andl $1, %eax
+; AVX1-NEXT:    movb %al, (%rdi)
+; AVX1-NEXT:    vpextrb $10, %xmm0, %eax
+; AVX1-NEXT:    andl $1, %eax
+; AVX1-NEXT:    movb %al, (%rdi)
+; AVX1-NEXT:    vpextrb $8, %xmm0, %eax
+; AVX1-NEXT:    andl $1, %eax
+; AVX1-NEXT:    movb %al, (%rdi)
+; AVX1-NEXT:    vpextrb $6, %xmm0, %eax
+; AVX1-NEXT:    andl $1, %eax
+; AVX1-NEXT:    movb %al, (%rdi)
+; AVX1-NEXT:    vpextrb $4, %xmm0, %eax
+; AVX1-NEXT:    andl $1, %eax
+; AVX1-NEXT:    movb %al, (%rdi)
+; AVX1-NEXT:    vpextrb $2, %xmm0, %eax
+; AVX1-NEXT:    andl $1, %eax
+; AVX1-NEXT:    movb %al, (%rdi)
+; AVX1-NEXT:    vpextrb $0, %xmm0, %eax
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, (%rdi)
 ; AVX1-NEXT:    vpextrb $14, %xmm8, %eax
-; AVX1-NEXT:    andb $1, %al
-; AVX1-NEXT:    movb %al, (%rdi)
-; AVX1-NEXT:    vpextrb $13, %xmm8, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, (%rdi)
 ; AVX1-NEXT:    vpextrb $12, %xmm8, %eax
-; AVX1-NEXT:    andb $1, %al
-; AVX1-NEXT:    movb %al, (%rdi)
-; AVX1-NEXT:    vpextrb $11, %xmm8, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, (%rdi)
 ; AVX1-NEXT:    vpextrb $10, %xmm8, %eax
-; AVX1-NEXT:    andb $1, %al
-; AVX1-NEXT:    movb %al, (%rdi)
-; AVX1-NEXT:    vpextrb $9, %xmm8, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, (%rdi)
 ; AVX1-NEXT:    vpextrb $8, %xmm8, %eax
-; AVX1-NEXT:    andb $1, %al
-; AVX1-NEXT:    movb %al, (%rdi)
-; AVX1-NEXT:    vpextrb $7, %xmm8, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, (%rdi)
 ; AVX1-NEXT:    vpextrb $6, %xmm8, %eax
-; AVX1-NEXT:    andb $1, %al
-; AVX1-NEXT:    movb %al, (%rdi)
-; AVX1-NEXT:    vpextrb $5, %xmm8, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, (%rdi)
 ; AVX1-NEXT:    vpextrb $4, %xmm8, %eax
-; AVX1-NEXT:    andb $1, %al
-; AVX1-NEXT:    movb %al, (%rdi)
-; AVX1-NEXT:    vpextrb $3, %xmm8, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, (%rdi)
 ; AVX1-NEXT:    vpextrb $2, %xmm8, %eax
-; AVX1-NEXT:    andb $1, %al
-; AVX1-NEXT:    movb %al, (%rdi)
-; AVX1-NEXT:    vpextrb $1, %xmm8, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, (%rdi)
 ; AVX1-NEXT:    vpextrb $0, %xmm8, %eax
-; AVX1-NEXT:    andb $1, %al
+; AVX1-NEXT:    andl $1, %eax
 ; AVX1-NEXT:    movb %al, (%rdi)
 ; AVX1-NEXT:    movq %rdi, %rax
 ; AVX1-NEXT:    vzeroupper
@@ -5565,207 +5561,203 @@ define <64 x i1> @test_cmp_v64i16(<64 x i16> %a0, <64 x i16> %a1) nounwind {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpcmpgtw %ymm4, %ymm0, %ymm0
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm4
-; AVX2-NEXT:    vpacksswb %xmm4, %xmm0, %xmm0
 ; AVX2-NEXT:    vpcmpgtw %ymm5, %ymm1, %ymm1
-; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm4
-; AVX2-NEXT:    vpacksswb %xmm4, %xmm1, %xmm1
+; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm5
 ; AVX2-NEXT:    vpcmpgtw %ymm6, %ymm2, %ymm2
-; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm4
-; AVX2-NEXT:    vpacksswb %xmm4, %xmm2, %xmm2
+; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm6
 ; AVX2-NEXT:    vpcmpgtw %ymm7, %ymm3, %ymm3
-; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm4
-; AVX2-NEXT:    vpacksswb %xmm4, %xmm3, %xmm3
-; AVX2-NEXT:    vpextrb $15, %xmm3, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm7
+; AVX2-NEXT:    vpextrb $14, %xmm7, %eax
+; AVX2-NEXT:    andl $1, %eax
+; AVX2-NEXT:    movb %al, 4(%rdi)
+; AVX2-NEXT:    vpextrb $12, %xmm7, %eax
+; AVX2-NEXT:    andl $1, %eax
+; AVX2-NEXT:    movb %al, 4(%rdi)
+; AVX2-NEXT:    vpextrb $10, %xmm7, %eax
+; AVX2-NEXT:    andl $1, %eax
+; AVX2-NEXT:    movb %al, 4(%rdi)
+; AVX2-NEXT:    vpextrb $8, %xmm7, %eax
+; AVX2-NEXT:    andl $1, %eax
+; AVX2-NEXT:    movb %al, 4(%rdi)
+; AVX2-NEXT:    vpextrb $6, %xmm7, %eax
+; AVX2-NEXT:    andl $1, %eax
+; AVX2-NEXT:    movb %al, 4(%rdi)
+; AVX2-NEXT:    vpextrb $4, %xmm7, %eax
+; AVX2-NEXT:    andl $1, %eax
+; AVX2-NEXT:    movb %al, 4(%rdi)
+; AVX2-NEXT:    vpextrb $2, %xmm7, %eax
+; AVX2-NEXT:    andl $1, %eax
+; AVX2-NEXT:    movb %al, 4(%rdi)
+; AVX2-NEXT:    vpextrb $0, %xmm7, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, 4(%rdi)
 ; AVX2-NEXT:    vpextrb $14, %xmm3, %eax
-; AVX2-NEXT:    andb $1, %al
-; AVX2-NEXT:    movb %al, 4(%rdi)
-; AVX2-NEXT:    vpextrb $13, %xmm3, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, 4(%rdi)
 ; AVX2-NEXT:    vpextrb $12, %xmm3, %eax
-; AVX2-NEXT:    andb $1, %al
-; AVX2-NEXT:    movb %al, 4(%rdi)
-; AVX2-NEXT:    vpextrb $11, %xmm3, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, 4(%rdi)
 ; AVX2-NEXT:    vpextrb $10, %xmm3, %eax
-; AVX2-NEXT:    andb $1, %al
-; AVX2-NEXT:    movb %al, 4(%rdi)
-; AVX2-NEXT:    vpextrb $9, %xmm3, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, 4(%rdi)
 ; AVX2-NEXT:    vpextrb $8, %xmm3, %eax
-; AVX2-NEXT:    andb $1, %al
-; AVX2-NEXT:    movb %al, 4(%rdi)
-; AVX2-NEXT:    vpextrb $7, %xmm3, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, 4(%rdi)
 ; AVX2-NEXT:    vpextrb $6, %xmm3, %eax
-; AVX2-NEXT:    andb $1, %al
-; AVX2-NEXT:    movb %al, 4(%rdi)
-; AVX2-NEXT:    vpextrb $5, %xmm3, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, 4(%rdi)
 ; AVX2-NEXT:    vpextrb $4, %xmm3, %eax
-; AVX2-NEXT:    andb $1, %al
-; AVX2-NEXT:    movb %al, 4(%rdi)
-; AVX2-NEXT:    vpextrb $3, %xmm3, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, 4(%rdi)
 ; AVX2-NEXT:    vpextrb $2, %xmm3, %eax
-; AVX2-NEXT:    andb $1, %al
-; AVX2-NEXT:    movb %al, 4(%rdi)
-; AVX2-NEXT:    vpextrb $1, %xmm3, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, 4(%rdi)
 ; AVX2-NEXT:    vpextrb $0, %xmm3, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, 4(%rdi)
-; AVX2-NEXT:    vpextrb $15, %xmm2, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $14, %xmm6, %eax
+; AVX2-NEXT:    andl $1, %eax
+; AVX2-NEXT:    movb %al, 4(%rdi)
+; AVX2-NEXT:    vpextrb $12, %xmm6, %eax
+; AVX2-NEXT:    andl $1, %eax
+; AVX2-NEXT:    movb %al, 4(%rdi)
+; AVX2-NEXT:    vpextrb $10, %xmm6, %eax
+; AVX2-NEXT:    andl $1, %eax
+; AVX2-NEXT:    movb %al, 4(%rdi)
+; AVX2-NEXT:    vpextrb $8, %xmm6, %eax
+; AVX2-NEXT:    andl $1, %eax
+; AVX2-NEXT:    movb %al, 4(%rdi)
+; AVX2-NEXT:    vpextrb $6, %xmm6, %eax
+; AVX2-NEXT:    andl $1, %eax
+; AVX2-NEXT:    movb %al, 4(%rdi)
+; AVX2-NEXT:    vpextrb $4, %xmm6, %eax
+; AVX2-NEXT:    andl $1, %eax
+; AVX2-NEXT:    movb %al, 4(%rdi)
+; AVX2-NEXT:    vpextrb $2, %xmm6, %eax
+; AVX2-NEXT:    andl $1, %eax
+; AVX2-NEXT:    movb %al, 4(%rdi)
+; AVX2-NEXT:    vpextrb $0, %xmm6, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, 4(%rdi)
 ; AVX2-NEXT:    vpextrb $14, %xmm2, %eax
-; AVX2-NEXT:    andb $1, %al
-; AVX2-NEXT:    movb %al, 4(%rdi)
-; AVX2-NEXT:    vpextrb $13, %xmm2, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, 4(%rdi)
 ; AVX2-NEXT:    vpextrb $12, %xmm2, %eax
-; AVX2-NEXT:    andb $1, %al
-; AVX2-NEXT:    movb %al, 4(%rdi)
-; AVX2-NEXT:    vpextrb $11, %xmm2, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, 4(%rdi)
 ; AVX2-NEXT:    vpextrb $10, %xmm2, %eax
-; AVX2-NEXT:    andb $1, %al
-; AVX2-NEXT:    movb %al, 4(%rdi)
-; AVX2-NEXT:    vpextrb $9, %xmm2, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, 4(%rdi)
 ; AVX2-NEXT:    vpextrb $8, %xmm2, %eax
-; AVX2-NEXT:    andb $1, %al
-; AVX2-NEXT:    movb %al, 4(%rdi)
-; AVX2-NEXT:    vpextrb $7, %xmm2, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, 4(%rdi)
 ; AVX2-NEXT:    vpextrb $6, %xmm2, %eax
-; AVX2-NEXT:    andb $1, %al
-; AVX2-NEXT:    movb %al, 4(%rdi)
-; AVX2-NEXT:    vpextrb $5, %xmm2, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, 4(%rdi)
 ; AVX2-NEXT:    vpextrb $4, %xmm2, %eax
-; AVX2-NEXT:    andb $1, %al
-; AVX2-NEXT:    movb %al, 4(%rdi)
-; AVX2-NEXT:    vpextrb $3, %xmm2, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, 4(%rdi)
 ; AVX2-NEXT:    vpextrb $2, %xmm2, %eax
-; AVX2-NEXT:    andb $1, %al
-; AVX2-NEXT:    movb %al, 4(%rdi)
-; AVX2-NEXT:    vpextrb $1, %xmm2, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, 4(%rdi)
 ; AVX2-NEXT:    vpextrb $0, %xmm2, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, 4(%rdi)
-; AVX2-NEXT:    vpextrb $15, %xmm1, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $14, %xmm5, %eax
+; AVX2-NEXT:    andl $1, %eax
+; AVX2-NEXT:    movb %al, (%rdi)
+; AVX2-NEXT:    vpextrb $12, %xmm5, %eax
+; AVX2-NEXT:    andl $1, %eax
+; AVX2-NEXT:    movb %al, (%rdi)
+; AVX2-NEXT:    vpextrb $10, %xmm5, %eax
+; AVX2-NEXT:    andl $1, %eax
+; AVX2-NEXT:    movb %al, (%rdi)
+; AVX2-NEXT:    vpextrb $8, %xmm5, %eax
+; AVX2-NEXT:    andl $1, %eax
+; AVX2-NEXT:    movb %al, (%rdi)
+; AVX2-NEXT:    vpextrb $6, %xmm5, %eax
+; AVX2-NEXT:    andl $1, %eax
+; AVX2-NEXT:    movb %al, (%rdi)
+; AVX2-NEXT:    vpextrb $4, %xmm5, %eax
+; AVX2-NEXT:    andl $1, %eax
+; AVX2-NEXT:    movb %al, (%rdi)
+; AVX2-NEXT:    vpextrb $2, %xmm5, %eax
+; AVX2-NEXT:    andl $1, %eax
+; AVX2-NEXT:    movb %al, (%rdi)
+; AVX2-NEXT:    vpextrb $0, %xmm5, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, (%rdi)
 ; AVX2-NEXT:    vpextrb $14, %xmm1, %eax
-; AVX2-NEXT:    andb $1, %al
-; AVX2-NEXT:    movb %al, (%rdi)
-; AVX2-NEXT:    vpextrb $13, %xmm1, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, (%rdi)
 ; AVX2-NEXT:    vpextrb $12, %xmm1, %eax
-; AVX2-NEXT:    andb $1, %al
-; AVX2-NEXT:    movb %al, (%rdi)
-; AVX2-NEXT:    vpextrb $11, %xmm1, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, (%rdi)
 ; AVX2-NEXT:    vpextrb $10, %xmm1, %eax
-; AVX2-NEXT:    andb $1, %al
-; AVX2-NEXT:    movb %al, (%rdi)
-; AVX2-NEXT:    vpextrb $9, %xmm1, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, (%rdi)
 ; AVX2-NEXT:    vpextrb $8, %xmm1, %eax
-; AVX2-NEXT:    andb $1, %al
-; AVX2-NEXT:    movb %al, (%rdi)
-; AVX2-NEXT:    vpextrb $7, %xmm1, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, (%rdi)
 ; AVX2-NEXT:    vpextrb $6, %xmm1, %eax
-; AVX2-NEXT:    andb $1, %al
-; AVX2-NEXT:    movb %al, (%rdi)
-; AVX2-NEXT:    vpextrb $5, %xmm1, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, (%rdi)
 ; AVX2-NEXT:    vpextrb $4, %xmm1, %eax
-; AVX2-NEXT:    andb $1, %al
-; AVX2-NEXT:    movb %al, (%rdi)
-; AVX2-NEXT:    vpextrb $3, %xmm1, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, (%rdi)
 ; AVX2-NEXT:    vpextrb $2, %xmm1, %eax
-; AVX2-NEXT:    andb $1, %al
-; AVX2-NEXT:    movb %al, (%rdi)
-; AVX2-NEXT:    vpextrb $1, %xmm1, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, (%rdi)
 ; AVX2-NEXT:    vpextrb $0, %xmm1, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, (%rdi)
-; AVX2-NEXT:    vpextrb $15, %xmm0, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    vpextrb $14, %xmm4, %eax
+; AVX2-NEXT:    andl $1, %eax
+; AVX2-NEXT:    movb %al, (%rdi)
+; AVX2-NEXT:    vpextrb $12, %xmm4, %eax
+; AVX2-NEXT:    andl $1, %eax
+; AVX2-NEXT:    movb %al, (%rdi)
+; AVX2-NEXT:    vpextrb $10, %xmm4, %eax
+; AVX2-NEXT:    andl $1, %eax
+; AVX2-NEXT:    movb %al, (%rdi)
+; AVX2-NEXT:    vpextrb $8, %xmm4, %eax
+; AVX2-NEXT:    andl $1, %eax
+; AVX2-NEXT:    movb %al, (%rdi)
+; AVX2-NEXT:    vpextrb $6, %xmm4, %eax
+; AVX2-NEXT:    andl $1, %eax
+; AVX2-NEXT:    movb %al, (%rdi)
+; AVX2-NEXT:    vpextrb $4, %xmm4, %eax
+; AVX2-NEXT:    andl $1, %eax
+; AVX2-NEXT:    movb %al, (%rdi)
+; AVX2-NEXT:    vpextrb $2, %xmm4, %eax
+; AVX2-NEXT:    andl $1, %eax
+; AVX2-NEXT:    movb %al, (%rdi)
+; AVX2-NEXT:    vpextrb $0, %xmm4, %eax
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, (%rdi)
 ; AVX2-NEXT:    vpextrb $14, %xmm0, %eax
-; AVX2-NEXT:    andb $1, %al
-; AVX2-NEXT:    movb %al, (%rdi)
-; AVX2-NEXT:    vpextrb $13, %xmm0, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, (%rdi)
 ; AVX2-NEXT:    vpextrb $12, %xmm0, %eax
-; AVX2-NEXT:    andb $1, %al
-; AVX2-NEXT:    movb %al, (%rdi)
-; AVX2-NEXT:    vpextrb $11, %xmm0, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, (%rdi)
 ; AVX2-NEXT:    vpextrb $10, %xmm0, %eax
-; AVX2-NEXT:    andb $1, %al
-; AVX2-NEXT:    movb %al, (%rdi)
-; AVX2-NEXT:    vpextrb $9, %xmm0, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, (%rdi)
 ; AVX2-NEXT:    vpextrb $8, %xmm0, %eax
-; AVX2-NEXT:    andb $1, %al
-; AVX2-NEXT:    movb %al, (%rdi)
-; AVX2-NEXT:    vpextrb $7, %xmm0, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, (%rdi)
 ; AVX2-NEXT:    vpextrb $6, %xmm0, %eax
-; AVX2-NEXT:    andb $1, %al
-; AVX2-NEXT:    movb %al, (%rdi)
-; AVX2-NEXT:    vpextrb $5, %xmm0, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, (%rdi)
 ; AVX2-NEXT:    vpextrb $4, %xmm0, %eax
-; AVX2-NEXT:    andb $1, %al
-; AVX2-NEXT:    movb %al, (%rdi)
-; AVX2-NEXT:    vpextrb $3, %xmm0, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, (%rdi)
 ; AVX2-NEXT:    vpextrb $2, %xmm0, %eax
-; AVX2-NEXT:    andb $1, %al
-; AVX2-NEXT:    movb %al, (%rdi)
-; AVX2-NEXT:    vpextrb $1, %xmm0, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, (%rdi)
 ; AVX2-NEXT:    vpextrb $0, %xmm0, %eax
-; AVX2-NEXT:    andb $1, %al
+; AVX2-NEXT:    andl $1, %eax
 ; AVX2-NEXT:    movb %al, (%rdi)
 ; AVX2-NEXT:    movq %rdi, %rax
 ; AVX2-NEXT:    vzeroupper
diff --git a/test/CodeGen/X86/vector-rem.ll b/test/CodeGen/X86/vector-rem.ll
index 340dd77ec481..3e3e93a7d5b0 100644
--- a/test/CodeGen/X86/vector-rem.ll
+++ b/test/CodeGen/X86/vector-rem.ll
@@ -11,9 +11,9 @@ define <4 x i32> @foo(<4 x i32> %t, <4 x i32> %u) nounwind {
 ; CHECK-NEXT:    cltd
 ; CHECK-NEXT:    idivl %ecx
 ; CHECK-NEXT:    movd %edx, %xmm2
-; CHECK-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,2,3]
+; CHECK-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
 ; CHECK-NEXT:    movd %xmm3, %eax
-; CHECK-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,1,2,3]
+; CHECK-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
 ; CHECK-NEXT:    movd %xmm3, %ecx
 ; CHECK-NEXT:    cltd
 ; CHECK-NEXT:    idivl %ecx
@@ -24,15 +24,15 @@ define <4 x i32> @foo(<4 x i32> %t, <4 x i32> %u) nounwind {
 ; CHECK-NEXT:    cltd
 ; CHECK-NEXT:    idivl %ecx
 ; CHECK-NEXT:    movd %edx, %xmm2
-; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
 ; CHECK-NEXT:    movd %xmm0, %eax
-; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
+; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
 ; CHECK-NEXT:    movd %xmm0, %ecx
 ; CHECK-NEXT:    cltd
 ; CHECK-NEXT:    idivl %ecx
 ; CHECK-NEXT:    movd %edx, %xmm0
 ; CHECK-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
-; CHECK-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
 ; CHECK-NEXT:    movdqa %xmm2, %xmm0
 ; CHECK-NEXT:    retq
   %m = srem <4 x i32> %t, %u
@@ -49,9 +49,9 @@ define <4 x i32> @bar(<4 x i32> %t, <4 x i32> %u) nounwind {
 ; CHECK-NEXT:    xorl %edx, %edx
 ; CHECK-NEXT:    divl %ecx
 ; CHECK-NEXT:    movd %edx, %xmm2
-; CHECK-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,2,3]
+; CHECK-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
 ; CHECK-NEXT:    movd %xmm3, %eax
-; CHECK-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,1,2,3]
+; CHECK-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
 ; CHECK-NEXT:    movd %xmm3, %ecx
 ; CHECK-NEXT:    xorl %edx, %edx
 ; CHECK-NEXT:    divl %ecx
@@ -62,15 +62,15 @@ define <4 x i32> @bar(<4 x i32> %t, <4 x i32> %u) nounwind {
 ; CHECK-NEXT:    xorl %edx, %edx
 ; CHECK-NEXT:    divl %ecx
 ; CHECK-NEXT:    movd %edx, %xmm2
-; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
 ; CHECK-NEXT:    movd %xmm0, %eax
-; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
+; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
 ; CHECK-NEXT:    movd %xmm0, %ecx
 ; CHECK-NEXT:    xorl %edx, %edx
 ; CHECK-NEXT:    divl %ecx
 ; CHECK-NEXT:    movd %edx, %xmm0
 ; CHECK-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
-; CHECK-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
 ; CHECK-NEXT:    movdqa %xmm2, %xmm0
 ; CHECK-NEXT:    retq
   %m = urem <4 x i32> %t, %u
@@ -88,9 +88,9 @@ define <4 x float> @qux(<4 x float> %t, <4 x float> %u) nounwind {
 ; CHECK-NEXT:    callq fmodf
 ; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
 ; CHECK-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
-; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
 ; CHECK-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload
-; CHECK-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
+; CHECK-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1]
 ; CHECK-NEXT:    callq fmodf
 ; CHECK-NEXT:    unpcklps (%rsp), %xmm0 # 16-byte Folded Reload
 ; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
@@ -100,15 +100,15 @@ define <4 x float> @qux(<4 x float> %t, <4 x float> %u) nounwind {
 ; CHECK-NEXT:    callq fmodf
 ; CHECK-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
 ; CHECK-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
-; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
 ; CHECK-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload
-; CHECK-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1]
+; CHECK-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
 ; CHECK-NEXT:    callq fmodf
 ; CHECK-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload
 ; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; CHECK-NEXT:    unpcklps (%rsp), %xmm1 # 16-byte Folded Reload
-; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
-; CHECK-NEXT:    movaps %xmm1, %xmm0
+; CHECK-NEXT:    unpcklpd (%rsp), %xmm1 # 16-byte Folded Reload
+; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
+; CHECK-NEXT:    movapd %xmm1, %xmm0
 ; CHECK-NEXT:    addq $72, %rsp
 ; CHECK-NEXT:    retq
   %m = frem <4 x float> %t, %u
diff --git a/test/CodeGen/X86/vector-sext.ll b/test/CodeGen/X86/vector-sext.ll
index 53e471d6f175..392c0de95f24 100644
--- a/test/CodeGen/X86/vector-sext.ll
+++ b/test/CodeGen/X86/vector-sext.ll
@@ -1333,19 +1333,19 @@ define <4 x i32> @load_sext_4i1_to_4i32(<4 x i1> *%ptr) {
 ; SSE2-NEXT:    sarq $63, %rcx
 ; SSE2-NEXT:    movd %ecx, %xmm0
 ; SSE2-NEXT:    movq %rax, %rcx
-; SSE2-NEXT:    shlq $62, %rcx
+; SSE2-NEXT:    shlq $61, %rcx
 ; SSE2-NEXT:    sarq $63, %rcx
 ; SSE2-NEXT:    movd %ecx, %xmm1
 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
 ; SSE2-NEXT:    movq %rax, %rcx
-; SSE2-NEXT:    shlq $61, %rcx
+; SSE2-NEXT:    shlq $62, %rcx
 ; SSE2-NEXT:    sarq $63, %rcx
 ; SSE2-NEXT:    movd %ecx, %xmm2
 ; SSE2-NEXT:    shlq $63, %rax
 ; SSE2-NEXT:    sarq $63, %rax
 ; SSE2-NEXT:    movd %eax, %xmm0
 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE2-NEXT:    retq
 ;
 ; SSSE3-LABEL: load_sext_4i1_to_4i32:
@@ -1356,19 +1356,19 @@ define <4 x i32> @load_sext_4i1_to_4i32(<4 x i1> *%ptr) {
 ; SSSE3-NEXT:    sarq $63, %rcx
 ; SSSE3-NEXT:    movd %ecx, %xmm0
 ; SSSE3-NEXT:    movq %rax, %rcx
-; SSSE3-NEXT:    shlq $62, %rcx
+; SSSE3-NEXT:    shlq $61, %rcx
 ; SSSE3-NEXT:    sarq $63, %rcx
 ; SSSE3-NEXT:    movd %ecx, %xmm1
 ; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
 ; SSSE3-NEXT:    movq %rax, %rcx
-; SSSE3-NEXT:    shlq $61, %rcx
+; SSSE3-NEXT:    shlq $62, %rcx
 ; SSSE3-NEXT:    sarq $63, %rcx
 ; SSSE3-NEXT:    movd %ecx, %xmm2
 ; SSSE3-NEXT:    shlq $63, %rax
 ; SSSE3-NEXT:    sarq $63, %rax
 ; SSSE3-NEXT:    movd %eax, %xmm0
 ; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSSE3-NEXT:    retq
 ;
 ; SSE41-LABEL: load_sext_4i1_to_4i32:
@@ -1523,14 +1523,14 @@ define <4 x i64> @load_sext_4i1_to_4i64(<4 x i1> *%ptr) {
 ; SSE2-NEXT:    shrl $3, %ecx
 ; SSE2-NEXT:    movd %ecx, %xmm0
 ; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    shrl %ecx
+; SSE2-NEXT:    shrl $2, %ecx
 ; SSE2-NEXT:    movd %ecx, %xmm1
 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
 ; SSE2-NEXT:    movd %eax, %xmm2
-; SSE2-NEXT:    shrl $2, %eax
+; SSE2-NEXT:    shrl %eax
 ; SSE2-NEXT:    movd %eax, %xmm0
 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
 ; SSE2-NEXT:    pand {{.*}}(%rip), %xmm2
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[0,1,1,3]
 ; SSE2-NEXT:    psllq $63, %xmm0
@@ -1549,14 +1549,14 @@ define <4 x i64> @load_sext_4i1_to_4i64(<4 x i1> *%ptr) {
 ; SSSE3-NEXT:    shrl $3, %ecx
 ; SSSE3-NEXT:    movd %ecx, %xmm0
 ; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    shrl %ecx
+; SSSE3-NEXT:    shrl $2, %ecx
 ; SSSE3-NEXT:    movd %ecx, %xmm1
 ; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
 ; SSSE3-NEXT:    movd %eax, %xmm2
-; SSSE3-NEXT:    shrl $2, %eax
+; SSSE3-NEXT:    shrl %eax
 ; SSSE3-NEXT:    movd %eax, %xmm0
 ; SSSE3-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
-; SSSE3-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
 ; SSSE3-NEXT:    pand {{.*}}(%rip), %xmm2
 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[0,1,1,3]
 ; SSSE3-NEXT:    psllq $63, %xmm0
@@ -1813,7 +1813,7 @@ define <8 x i16> @load_sext_8i1_to_8i16(<8 x i1> *%ptr) {
 ; SSE2-NEXT:    shrq $7, %rcx
 ; SSE2-NEXT:    movd %ecx, %xmm0
 ; SSE2-NEXT:    movq %rax, %rcx
-; SSE2-NEXT:    shlq $60, %rcx
+; SSE2-NEXT:    shlq $57, %rcx
 ; SSE2-NEXT:    sarq $63, %rcx
 ; SSE2-NEXT:    movd %ecx, %xmm2
 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
@@ -1822,13 +1822,13 @@ define <8 x i16> @load_sext_8i1_to_8i16(<8 x i1> *%ptr) {
 ; SSE2-NEXT:    sarq $63, %rcx
 ; SSE2-NEXT:    movd %ecx, %xmm0
 ; SSE2-NEXT:    movq %rax, %rcx
-; SSE2-NEXT:    shlq $62, %rcx
+; SSE2-NEXT:    shlq $59, %rcx
 ; SSE2-NEXT:    sarq $63, %rcx
 ; SSE2-NEXT:    movd %ecx, %xmm1
 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
 ; SSE2-NEXT:    movq %rax, %rcx
-; SSE2-NEXT:    shlq $57, %rcx
+; SSE2-NEXT:    shlq $60, %rcx
 ; SSE2-NEXT:    sarq $63, %rcx
 ; SSE2-NEXT:    movd %ecx, %xmm0
 ; SSE2-NEXT:    movq %rax, %rcx
@@ -1837,15 +1837,15 @@ define <8 x i16> @load_sext_8i1_to_8i16(<8 x i1> *%ptr) {
 ; SSE2-NEXT:    movd %ecx, %xmm2
 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
 ; SSE2-NEXT:    movq %rax, %rcx
-; SSE2-NEXT:    shlq $59, %rcx
+; SSE2-NEXT:    shlq $62, %rcx
 ; SSE2-NEXT:    sarq $63, %rcx
 ; SSE2-NEXT:    movd %ecx, %xmm3
 ; SSE2-NEXT:    shlq $63, %rax
 ; SSE2-NEXT:    sarq $63, %rax
 ; SSE2-NEXT:    movd %eax, %xmm0
 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE2-NEXT:    retq
 ;
 ; SSSE3-LABEL: load_sext_8i1_to_8i16:
@@ -1855,7 +1855,7 @@ define <8 x i16> @load_sext_8i1_to_8i16(<8 x i1> *%ptr) {
 ; SSSE3-NEXT:    shrq $7, %rcx
 ; SSSE3-NEXT:    movd %ecx, %xmm0
 ; SSSE3-NEXT:    movq %rax, %rcx
-; SSSE3-NEXT:    shlq $60, %rcx
+; SSSE3-NEXT:    shlq $57, %rcx
 ; SSSE3-NEXT:    sarq $63, %rcx
 ; SSSE3-NEXT:    movd %ecx, %xmm2
 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
@@ -1864,13 +1864,13 @@ define <8 x i16> @load_sext_8i1_to_8i16(<8 x i1> *%ptr) {
 ; SSSE3-NEXT:    sarq $63, %rcx
 ; SSSE3-NEXT:    movd %ecx, %xmm0
 ; SSSE3-NEXT:    movq %rax, %rcx
-; SSSE3-NEXT:    shlq $62, %rcx
+; SSSE3-NEXT:    shlq $59, %rcx
 ; SSSE3-NEXT:    sarq $63, %rcx
 ; SSSE3-NEXT:    movd %ecx, %xmm1
 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
 ; SSSE3-NEXT:    movq %rax, %rcx
-; SSSE3-NEXT:    shlq $57, %rcx
+; SSSE3-NEXT:    shlq $60, %rcx
 ; SSSE3-NEXT:    sarq $63, %rcx
 ; SSSE3-NEXT:    movd %ecx, %xmm0
 ; SSSE3-NEXT:    movq %rax, %rcx
@@ -1879,15 +1879,15 @@ define <8 x i16> @load_sext_8i1_to_8i16(<8 x i1> *%ptr) {
 ; SSSE3-NEXT:    movd %ecx, %xmm2
 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
 ; SSSE3-NEXT:    movq %rax, %rcx
-; SSSE3-NEXT:    shlq $59, %rcx
+; SSSE3-NEXT:    shlq $62, %rcx
 ; SSSE3-NEXT:    sarq $63, %rcx
 ; SSSE3-NEXT:    movd %ecx, %xmm3
 ; SSSE3-NEXT:    shlq $63, %rax
 ; SSSE3-NEXT:    sarq $63, %rax
 ; SSSE3-NEXT:    movd %eax, %xmm0
 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSSE3-NEXT:    retq
 ;
 ; SSE41-LABEL: load_sext_8i1_to_8i16:
@@ -2191,7 +2191,7 @@ define <8 x i32> @load_sext_8i1_to_8i32(<8 x i1> *%ptr) {
 ; SSE2:       # BB#0: # %entry
 ; SSE2-NEXT:    movzbl (%rdi), %eax
 ; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    shrl $6, %ecx
+; SSE2-NEXT:    shrl $3, %ecx
 ; SSE2-NEXT:    andl $1, %ecx
 ; SSE2-NEXT:    movd %ecx, %xmm0
 ; SSE2-NEXT:    movl %eax, %ecx
@@ -2203,30 +2203,30 @@ define <8 x i32> @load_sext_8i1_to_8i32(<8 x i1> *%ptr) {
 ; SSE2-NEXT:    andl $1, %ecx
 ; SSE2-NEXT:    movd %ecx, %xmm1
 ; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    shrl $4, %ecx
+; SSE2-NEXT:    shrl %ecx
 ; SSE2-NEXT:    andl $1, %ecx
 ; SSE2-NEXT:    movd %ecx, %xmm0
 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    shrl $5, %ecx
 ; SSE2-NEXT:    andl $1, %ecx
 ; SSE2-NEXT:    movd %ecx, %xmm0
 ; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    shrl %ecx
+; SSE2-NEXT:    shrl $4, %ecx
 ; SSE2-NEXT:    andl $1, %ecx
 ; SSE2-NEXT:    movd %ecx, %xmm2
 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
 ; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    shrl $3, %ecx
+; SSE2-NEXT:    shrl $6, %ecx
 ; SSE2-NEXT:    andl $1, %ecx
 ; SSE2-NEXT:    movd %ecx, %xmm0
 ; SSE2-NEXT:    shrl $7, %eax
 ; SSE2-NEXT:    movzwl %ax, %eax
 ; SSE2-NEXT:    movd %eax, %xmm3
 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
 ; SSE2-NEXT:    pslld $31, %xmm0
@@ -2240,7 +2240,7 @@ define <8 x i32> @load_sext_8i1_to_8i32(<8 x i1> *%ptr) {
 ; SSSE3:       # BB#0: # %entry
 ; SSSE3-NEXT:    movzbl (%rdi), %eax
 ; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    shrl $6, %ecx
+; SSSE3-NEXT:    shrl $3, %ecx
 ; SSSE3-NEXT:    andl $1, %ecx
 ; SSSE3-NEXT:    movd %ecx, %xmm0
 ; SSSE3-NEXT:    movl %eax, %ecx
@@ -2252,30 +2252,30 @@ define <8 x i32> @load_sext_8i1_to_8i32(<8 x i1> *%ptr) {
 ; SSSE3-NEXT:    andl $1, %ecx
 ; SSSE3-NEXT:    movd %ecx, %xmm1
 ; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    shrl $4, %ecx
+; SSSE3-NEXT:    shrl %ecx
 ; SSSE3-NEXT:    andl $1, %ecx
 ; SSSE3-NEXT:    movd %ecx, %xmm0
 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
 ; SSSE3-NEXT:    movl %eax, %ecx
 ; SSSE3-NEXT:    shrl $5, %ecx
 ; SSSE3-NEXT:    andl $1, %ecx
 ; SSSE3-NEXT:    movd %ecx, %xmm0
 ; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    shrl %ecx
+; SSSE3-NEXT:    shrl $4, %ecx
 ; SSSE3-NEXT:    andl $1, %ecx
 ; SSSE3-NEXT:    movd %ecx, %xmm2
 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
 ; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    shrl $3, %ecx
+; SSSE3-NEXT:    shrl $6, %ecx
 ; SSSE3-NEXT:    andl $1, %ecx
 ; SSSE3-NEXT:    movd %ecx, %xmm0
 ; SSSE3-NEXT:    shrl $7, %eax
 ; SSSE3-NEXT:    movzwl %ax, %eax
 ; SSSE3-NEXT:    movd %eax, %xmm3
 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; SSSE3-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
+; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
 ; SSSE3-NEXT:    pslld $31, %xmm0
@@ -2546,69 +2546,69 @@ define <16 x i8> @load_sext_16i1_to_16i8(<16 x i1> *%ptr) nounwind readnone {
 ; SSE2-NEXT:    movq %rax, %rsi
 ; SSE2-NEXT:    movq %rax, %rdi
 ; SSE2-NEXT:    movq %rax, %rbp
-; SSE2-NEXT:    shlq $49, %rbp
-; SSE2-NEXT:    sarq $63, %rbp
+; SSE2-NEXT:    shrq $15, %rbp
 ; SSE2-NEXT:    movd %ebp, %xmm0
 ; SSE2-NEXT:    movq %rax, %rbp
 ; SSE2-NEXT:    movsbq %al, %rax
-; SSE2-NEXT:    shlq $57, %r8
+; SSE2-NEXT:    shlq $49, %r8
 ; SSE2-NEXT:    sarq $63, %r8
 ; SSE2-NEXT:    movd %r8d, %xmm1
-; SSE2-NEXT:    shlq $53, %r9
+; SSE2-NEXT:    shlq $50, %r9
 ; SSE2-NEXT:    sarq $63, %r9
 ; SSE2-NEXT:    movd %r9d, %xmm2
-; SSE2-NEXT:    shlq $61, %r10
+; SSE2-NEXT:    shlq $51, %r10
 ; SSE2-NEXT:    sarq $63, %r10
 ; SSE2-NEXT:    movd %r10d, %xmm3
-; SSE2-NEXT:    shlq $51, %r11
+; SSE2-NEXT:    shlq $52, %r11
 ; SSE2-NEXT:    sarq $63, %r11
 ; SSE2-NEXT:    movd %r11d, %xmm4
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE2-NEXT:    shlq $59, %r14
+; SSE2-NEXT:    shlq $53, %r14
 ; SSE2-NEXT:    sarq $63, %r14
-; SSE2-NEXT:    movd %r14d, %xmm5
+; SSE2-NEXT:    movd %r14d, %xmm0
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
-; SSE2-NEXT:    shlq $55, %r15
+; SSE2-NEXT:    shlq $54, %r15
 ; SSE2-NEXT:    sarq $63, %r15
 ; SSE2-NEXT:    movd %r15d, %xmm2
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
-; SSE2-NEXT:    shlq $63, %r12
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
+; SSE2-NEXT:    shlq $55, %r12
 ; SSE2-NEXT:    sarq $63, %r12
-; SSE2-NEXT:    movd %r12d, %xmm0
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3],xmm5[4],xmm4[4],xmm5[5],xmm4[5],xmm5[6],xmm4[6],xmm5[7],xmm4[7]
-; SSE2-NEXT:    shlq $50, %r13
+; SSE2-NEXT:    movd %r12d, %xmm1
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
+; SSE2-NEXT:    shlq $60, %r13
 ; SSE2-NEXT:    sarq $63, %r13
-; SSE2-NEXT:    movd %r13d, %xmm1
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSE2-NEXT:    shlq $58, %rbx
+; SSE2-NEXT:    movd %r13d, %xmm4
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSE2-NEXT:    shlq $61, %rbx
 ; SSE2-NEXT:    sarq $63, %rbx
 ; SSE2-NEXT:    movd %ebx, %xmm2
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3],xmm0[4],xmm5[4],xmm0[5],xmm5[5],xmm0[6],xmm5[6],xmm0[7],xmm5[7]
-; SSE2-NEXT:    shlq $54, %rcx
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSE2-NEXT:    shlq $62, %rcx
 ; SSE2-NEXT:    sarq $63, %rcx
-; SSE2-NEXT:    movd %ecx, %xmm4
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; SSE2-NEXT:    shlq $62, %rdx
+; SSE2-NEXT:    movd %ecx, %xmm5
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; SSE2-NEXT:    shlq $63, %rdx
 ; SSE2-NEXT:    sarq $63, %rdx
-; SSE2-NEXT:    movd %edx, %xmm3
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
-; SSE2-NEXT:    shlq $52, %rsi
+; SSE2-NEXT:    movd %edx, %xmm0
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
+; SSE2-NEXT:    shlq $58, %rsi
 ; SSE2-NEXT:    sarq $63, %rsi
-; SSE2-NEXT:    movd %esi, %xmm1
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
-; SSE2-NEXT:    shlq $60, %rdi
+; SSE2-NEXT:    movd %esi, %xmm3
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3],xmm0[4],xmm5[4],xmm0[5],xmm5[5],xmm0[6],xmm5[6],xmm0[7],xmm5[7]
+; SSE2-NEXT:    shlq $59, %rdi
 ; SSE2-NEXT:    sarq $63, %rdi
 ; SSE2-NEXT:    movd %edi, %xmm4
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7]
-; SSE2-NEXT:    shrq $15, %rbp
-; SSE2-NEXT:    movd %ebp, %xmm1
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
+; SSE2-NEXT:    shlq $57, %rbp
+; SSE2-NEXT:    sarq $63, %rbp
+; SSE2-NEXT:    movd %ebp, %xmm2
 ; SSE2-NEXT:    shrq $7, %rax
-; SSE2-NEXT:    movd %eax, %xmm2
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
+; SSE2-NEXT:    movd %eax, %xmm3
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE2-NEXT:    popq %rbx
 ; SSE2-NEXT:    popq %r12
 ; SSE2-NEXT:    popq %r13
@@ -2640,69 +2640,69 @@ define <16 x i8> @load_sext_16i1_to_16i8(<16 x i1> *%ptr) nounwind readnone {
 ; SSSE3-NEXT:    movq %rax, %rsi
 ; SSSE3-NEXT:    movq %rax, %rdi
 ; SSSE3-NEXT:    movq %rax, %rbp
-; SSSE3-NEXT:    shlq $49, %rbp
-; SSSE3-NEXT:    sarq $63, %rbp
+; SSSE3-NEXT:    shrq $15, %rbp
 ; SSSE3-NEXT:    movd %ebp, %xmm0
 ; SSSE3-NEXT:    movq %rax, %rbp
 ; SSSE3-NEXT:    movsbq %al, %rax
-; SSSE3-NEXT:    shlq $57, %r8
+; SSSE3-NEXT:    shlq $49, %r8
 ; SSSE3-NEXT:    sarq $63, %r8
 ; SSSE3-NEXT:    movd %r8d, %xmm1
-; SSSE3-NEXT:    shlq $53, %r9
+; SSSE3-NEXT:    shlq $50, %r9
 ; SSSE3-NEXT:    sarq $63, %r9
 ; SSSE3-NEXT:    movd %r9d, %xmm2
-; SSSE3-NEXT:    shlq $61, %r10
+; SSSE3-NEXT:    shlq $51, %r10
 ; SSSE3-NEXT:    sarq $63, %r10
 ; SSSE3-NEXT:    movd %r10d, %xmm3
-; SSSE3-NEXT:    shlq $51, %r11
+; SSSE3-NEXT:    shlq $52, %r11
 ; SSSE3-NEXT:    sarq $63, %r11
 ; SSSE3-NEXT:    movd %r11d, %xmm4
 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSSE3-NEXT:    shlq $59, %r14
+; SSSE3-NEXT:    shlq $53, %r14
 ; SSSE3-NEXT:    sarq $63, %r14
-; SSSE3-NEXT:    movd %r14d, %xmm5
+; SSSE3-NEXT:    movd %r14d, %xmm0
 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
-; SSSE3-NEXT:    shlq $55, %r15
+; SSSE3-NEXT:    shlq $54, %r15
 ; SSSE3-NEXT:    sarq $63, %r15
 ; SSSE3-NEXT:    movd %r15d, %xmm2
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
-; SSSE3-NEXT:    shlq $63, %r12
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
+; SSSE3-NEXT:    shlq $55, %r12
 ; SSSE3-NEXT:    sarq $63, %r12
-; SSSE3-NEXT:    movd %r12d, %xmm0
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3],xmm5[4],xmm4[4],xmm5[5],xmm4[5],xmm5[6],xmm4[6],xmm5[7],xmm4[7]
-; SSSE3-NEXT:    shlq $50, %r13
+; SSSE3-NEXT:    movd %r12d, %xmm1
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
+; SSSE3-NEXT:    shlq $60, %r13
 ; SSSE3-NEXT:    sarq $63, %r13
-; SSSE3-NEXT:    movd %r13d, %xmm1
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSSE3-NEXT:    shlq $58, %rbx
+; SSSE3-NEXT:    movd %r13d, %xmm4
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSSE3-NEXT:    shlq $61, %rbx
 ; SSSE3-NEXT:    sarq $63, %rbx
 ; SSSE3-NEXT:    movd %ebx, %xmm2
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3],xmm0[4],xmm5[4],xmm0[5],xmm5[5],xmm0[6],xmm5[6],xmm0[7],xmm5[7]
-; SSSE3-NEXT:    shlq $54, %rcx
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSSE3-NEXT:    shlq $62, %rcx
 ; SSSE3-NEXT:    sarq $63, %rcx
-; SSSE3-NEXT:    movd %ecx, %xmm4
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; SSSE3-NEXT:    shlq $62, %rdx
+; SSSE3-NEXT:    movd %ecx, %xmm5
+; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; SSSE3-NEXT:    shlq $63, %rdx
 ; SSSE3-NEXT:    sarq $63, %rdx
-; SSSE3-NEXT:    movd %edx, %xmm3
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
-; SSSE3-NEXT:    shlq $52, %rsi
+; SSSE3-NEXT:    movd %edx, %xmm0
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
+; SSSE3-NEXT:    shlq $58, %rsi
 ; SSSE3-NEXT:    sarq $63, %rsi
-; SSSE3-NEXT:    movd %esi, %xmm1
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
-; SSSE3-NEXT:    shlq $60, %rdi
+; SSSE3-NEXT:    movd %esi, %xmm3
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3],xmm0[4],xmm5[4],xmm0[5],xmm5[5],xmm0[6],xmm5[6],xmm0[7],xmm5[7]
+; SSSE3-NEXT:    shlq $59, %rdi
 ; SSSE3-NEXT:    sarq $63, %rdi
 ; SSSE3-NEXT:    movd %edi, %xmm4
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7]
-; SSSE3-NEXT:    shrq $15, %rbp
-; SSSE3-NEXT:    movd %ebp, %xmm1
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
+; SSSE3-NEXT:    shlq $57, %rbp
+; SSSE3-NEXT:    sarq $63, %rbp
+; SSSE3-NEXT:    movd %ebp, %xmm2
 ; SSSE3-NEXT:    shrq $7, %rax
-; SSSE3-NEXT:    movd %eax, %xmm2
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
+; SSSE3-NEXT:    movd %eax, %xmm3
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
+; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
+; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSSE3-NEXT:    popq %rbx
 ; SSSE3-NEXT:    popq %r12
 ; SSSE3-NEXT:    popq %r13
@@ -3002,7 +3002,7 @@ define <16 x i16> @load_sext_16i1_to_16i16(<16 x i1> *%ptr) {
 ; SSE2:       # BB#0: # %entry
 ; SSE2-NEXT:    movzwl (%rdi), %eax
 ; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    shrl $14, %ecx
+; SSE2-NEXT:    shrl $7, %ecx
 ; SSE2-NEXT:    andl $1, %ecx
 ; SSE2-NEXT:    movd %ecx, %xmm0
 ; SSE2-NEXT:    movl %eax, %ecx
@@ -3011,40 +3011,40 @@ define <16 x i16> @load_sext_16i1_to_16i16(<16 x i1> *%ptr) {
 ; SSE2-NEXT:    movd %ecx, %xmm1
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
 ; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    shrl $10, %ecx
-; SSE2-NEXT:    andl $1, %ecx
-; SSE2-NEXT:    movd %ecx, %xmm0
-; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    shrl $2, %ecx
-; SSE2-NEXT:    andl $1, %ecx
-; SSE2-NEXT:    movd %ecx, %xmm2
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
-; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    shrl $12, %ecx
+; SSE2-NEXT:    shrl $5, %ecx
 ; SSE2-NEXT:    andl $1, %ecx
 ; SSE2-NEXT:    movd %ecx, %xmm0
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    shrl $4, %ecx
 ; SSE2-NEXT:    andl $1, %ecx
+; SSE2-NEXT:    movd %ecx, %xmm2
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSE2-NEXT:    movl %eax, %ecx
+; SSE2-NEXT:    shrl $3, %ecx
+; SSE2-NEXT:    andl $1, %ecx
+; SSE2-NEXT:    movd %ecx, %xmm0
+; SSE2-NEXT:    movl %eax, %ecx
+; SSE2-NEXT:    shrl $2, %ecx
+; SSE2-NEXT:    andl $1, %ecx
 ; SSE2-NEXT:    movd %ecx, %xmm3
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    andl $1, %ecx
 ; SSE2-NEXT:    movd %ecx, %xmm1
 ; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    shrl %ecx
 ; SSE2-NEXT:    andl $1, %ecx
 ; SSE2-NEXT:    movd %ecx, %xmm0
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
 ; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    shrl $13, %ecx
+; SSE2-NEXT:    shrl $11, %ecx
 ; SSE2-NEXT:    andl $1, %ecx
 ; SSE2-NEXT:    movd %ecx, %xmm0
 ; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    shrl $5, %ecx
+; SSE2-NEXT:    shrl $10, %ecx
 ; SSE2-NEXT:    andl $1, %ecx
 ; SSE2-NEXT:    movd %ecx, %xmm2
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
@@ -3053,31 +3053,31 @@ define <16 x i16> @load_sext_16i1_to_16i16(<16 x i1> *%ptr) {
 ; SSE2-NEXT:    andl $1, %ecx
 ; SSE2-NEXT:    movd %ecx, %xmm3
 ; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    shrl %ecx
+; SSE2-NEXT:    shrl $8, %ecx
 ; SSE2-NEXT:    andl $1, %ecx
 ; SSE2-NEXT:    movd %ecx, %xmm0
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
 ; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    shrl $11, %ecx
+; SSE2-NEXT:    shrl $13, %ecx
 ; SSE2-NEXT:    andl $1, %ecx
 ; SSE2-NEXT:    movd %ecx, %xmm2
 ; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    shrl $3, %ecx
+; SSE2-NEXT:    shrl $12, %ecx
 ; SSE2-NEXT:    andl $1, %ecx
 ; SSE2-NEXT:    movd %ecx, %xmm3
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
 ; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    shrl $7, %ecx
+; SSE2-NEXT:    shrl $14, %ecx
 ; SSE2-NEXT:    andl $1, %ecx
 ; SSE2-NEXT:    movd %ecx, %xmm2
 ; SSE2-NEXT:    shrl $15, %eax
 ; SSE2-NEXT:    movzwl %ax, %eax
 ; SSE2-NEXT:    movd %eax, %xmm4
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
 ; SSE2-NEXT:    psllw $15, %xmm0
@@ -3091,7 +3091,7 @@ define <16 x i16> @load_sext_16i1_to_16i16(<16 x i1> *%ptr) {
 ; SSSE3:       # BB#0: # %entry
 ; SSSE3-NEXT:    movzwl (%rdi), %eax
 ; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    shrl $14, %ecx
+; SSSE3-NEXT:    shrl $7, %ecx
 ; SSSE3-NEXT:    andl $1, %ecx
 ; SSSE3-NEXT:    movd %ecx, %xmm0
 ; SSSE3-NEXT:    movl %eax, %ecx
@@ -3100,40 +3100,40 @@ define <16 x i16> @load_sext_16i1_to_16i16(<16 x i1> *%ptr) {
 ; SSSE3-NEXT:    movd %ecx, %xmm1
 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
 ; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    shrl $10, %ecx
-; SSSE3-NEXT:    andl $1, %ecx
-; SSSE3-NEXT:    movd %ecx, %xmm0
-; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    shrl $2, %ecx
-; SSSE3-NEXT:    andl $1, %ecx
-; SSSE3-NEXT:    movd %ecx, %xmm2
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
-; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    shrl $12, %ecx
+; SSSE3-NEXT:    shrl $5, %ecx
 ; SSSE3-NEXT:    andl $1, %ecx
 ; SSSE3-NEXT:    movd %ecx, %xmm0
 ; SSSE3-NEXT:    movl %eax, %ecx
 ; SSSE3-NEXT:    shrl $4, %ecx
 ; SSSE3-NEXT:    andl $1, %ecx
+; SSSE3-NEXT:    movd %ecx, %xmm2
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSSE3-NEXT:    movl %eax, %ecx
+; SSSE3-NEXT:    shrl $3, %ecx
+; SSSE3-NEXT:    andl $1, %ecx
+; SSSE3-NEXT:    movd %ecx, %xmm0
+; SSSE3-NEXT:    movl %eax, %ecx
+; SSSE3-NEXT:    shrl $2, %ecx
+; SSSE3-NEXT:    andl $1, %ecx
 ; SSSE3-NEXT:    movd %ecx, %xmm3
 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
 ; SSSE3-NEXT:    movl %eax, %ecx
 ; SSSE3-NEXT:    andl $1, %ecx
 ; SSSE3-NEXT:    movd %ecx, %xmm1
 ; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    shrl $8, %ecx
+; SSSE3-NEXT:    shrl %ecx
 ; SSSE3-NEXT:    andl $1, %ecx
 ; SSSE3-NEXT:    movd %ecx, %xmm0
 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
 ; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    shrl $13, %ecx
+; SSSE3-NEXT:    shrl $11, %ecx
 ; SSSE3-NEXT:    andl $1, %ecx
 ; SSSE3-NEXT:    movd %ecx, %xmm0
 ; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    shrl $5, %ecx
+; SSSE3-NEXT:    shrl $10, %ecx
 ; SSSE3-NEXT:    andl $1, %ecx
 ; SSSE3-NEXT:    movd %ecx, %xmm2
 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
@@ -3142,31 +3142,31 @@ define <16 x i16> @load_sext_16i1_to_16i16(<16 x i1> *%ptr) {
 ; SSSE3-NEXT:    andl $1, %ecx
 ; SSSE3-NEXT:    movd %ecx, %xmm3
 ; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    shrl %ecx
+; SSSE3-NEXT:    shrl $8, %ecx
 ; SSSE3-NEXT:    andl $1, %ecx
 ; SSSE3-NEXT:    movd %ecx, %xmm0
 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
 ; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    shrl $11, %ecx
+; SSSE3-NEXT:    shrl $13, %ecx
 ; SSSE3-NEXT:    andl $1, %ecx
 ; SSSE3-NEXT:    movd %ecx, %xmm2
 ; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    shrl $3, %ecx
+; SSSE3-NEXT:    shrl $12, %ecx
 ; SSSE3-NEXT:    andl $1, %ecx
 ; SSSE3-NEXT:    movd %ecx, %xmm3
 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
 ; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    shrl $7, %ecx
+; SSSE3-NEXT:    shrl $14, %ecx
 ; SSSE3-NEXT:    andl $1, %ecx
 ; SSSE3-NEXT:    movd %ecx, %xmm2
 ; SSSE3-NEXT:    shrl $15, %eax
 ; SSSE3-NEXT:    movzwl %ax, %eax
 ; SSSE3-NEXT:    movd %eax, %xmm4
 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
 ; SSSE3-NEXT:    psllw $15, %xmm0
@@ -3556,162 +3556,162 @@ define <32 x i8> @load_sext_32i1_to_32i8(<32 x i1> *%ptr) nounwind readnone {
 ; SSE2-NEXT:    pushq %r13
 ; SSE2-NEXT:    pushq %r12
 ; SSE2-NEXT:    pushq %rbx
-; SSE2-NEXT:    movswq (%rdi), %rbx
-; SSE2-NEXT:    movq %rbx, %r10
-; SSE2-NEXT:    movq %rbx, %r8
-; SSE2-NEXT:    movq %rbx, %r9
-; SSE2-NEXT:    movq %rbx, %r11
-; SSE2-NEXT:    movq %rbx, %r14
-; SSE2-NEXT:    movq %rbx, %r15
-; SSE2-NEXT:    movq %rbx, %r12
-; SSE2-NEXT:    movq %rbx, %r13
-; SSE2-NEXT:    movq %rbx, %rdx
-; SSE2-NEXT:    movq %rbx, %rsi
-; SSE2-NEXT:    movq %rbx, %rcx
-; SSE2-NEXT:    movq %rbx, %rbp
-; SSE2-NEXT:    movq %rbx, %rax
-; SSE2-NEXT:    shlq $49, %rax
-; SSE2-NEXT:    sarq $63, %rax
-; SSE2-NEXT:    movd %eax, %xmm0
-; SSE2-NEXT:    movq %rbx, %rax
-; SSE2-NEXT:    shlq $57, %r10
+; SSE2-NEXT:    movswq (%rdi), %rax
+; SSE2-NEXT:    movq %rax, %r10
+; SSE2-NEXT:    movq %rax, %r8
+; SSE2-NEXT:    movq %rax, %r9
+; SSE2-NEXT:    movq %rax, %r11
+; SSE2-NEXT:    movq %rax, %r14
+; SSE2-NEXT:    movq %rax, %r15
+; SSE2-NEXT:    movq %rax, %r12
+; SSE2-NEXT:    movq %rax, %r13
+; SSE2-NEXT:    movq %rax, %rdx
+; SSE2-NEXT:    movq %rax, %rsi
+; SSE2-NEXT:    movq %rax, %rcx
+; SSE2-NEXT:    movq %rax, %rbp
+; SSE2-NEXT:    movq %rax, %rbx
+; SSE2-NEXT:    shrq $15, %rbx
+; SSE2-NEXT:    movd %ebx, %xmm0
+; SSE2-NEXT:    movq %rax, %rbx
+; SSE2-NEXT:    shlq $49, %r10
 ; SSE2-NEXT:    sarq $63, %r10
 ; SSE2-NEXT:    movd %r10d, %xmm15
-; SSE2-NEXT:    movq %rbx, %r10
-; SSE2-NEXT:    movsbq %bl, %rbx
+; SSE2-NEXT:    movq %rax, %r10
+; SSE2-NEXT:    movsbq %al, %rax
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm15 = xmm15[0],xmm0[0],xmm15[1],xmm0[1],xmm15[2],xmm0[2],xmm15[3],xmm0[3],xmm15[4],xmm0[4],xmm15[5],xmm0[5],xmm15[6],xmm0[6],xmm15[7],xmm0[7]
-; SSE2-NEXT:    shlq $53, %r8
+; SSE2-NEXT:    shlq $50, %r8
 ; SSE2-NEXT:    sarq $63, %r8
 ; SSE2-NEXT:    movd %r8d, %xmm8
-; SSE2-NEXT:    shlq $61, %r9
-; SSE2-NEXT:    sarq $63, %r9
-; SSE2-NEXT:    movd %r9d, %xmm2
-; SSE2-NEXT:    shlq $51, %r11
-; SSE2-NEXT:    sarq $63, %r11
-; SSE2-NEXT:    movd %r11d, %xmm9
-; SSE2-NEXT:    shlq $59, %r14
-; SSE2-NEXT:    sarq $63, %r14
-; SSE2-NEXT:    movd %r14d, %xmm5
-; SSE2-NEXT:    shlq $55, %r15
-; SSE2-NEXT:    sarq $63, %r15
-; SSE2-NEXT:    movd %r15d, %xmm10
-; SSE2-NEXT:    shlq $63, %r12
-; SSE2-NEXT:    sarq $63, %r12
-; SSE2-NEXT:    movd %r12d, %xmm0
-; SSE2-NEXT:    shlq $50, %r13
-; SSE2-NEXT:    sarq $63, %r13
-; SSE2-NEXT:    movd %r13d, %xmm11
-; SSE2-NEXT:    shlq $58, %rdx
-; SSE2-NEXT:    sarq $63, %rdx
-; SSE2-NEXT:    movd %edx, %xmm4
-; SSE2-NEXT:    shlq $54, %rsi
-; SSE2-NEXT:    sarq $63, %rsi
-; SSE2-NEXT:    movd %esi, %xmm12
-; SSE2-NEXT:    shlq $62, %rcx
-; SSE2-NEXT:    sarq $63, %rcx
-; SSE2-NEXT:    movd %ecx, %xmm6
-; SSE2-NEXT:    shlq $52, %rbp
-; SSE2-NEXT:    sarq $63, %rbp
-; SSE2-NEXT:    movd %ebp, %xmm13
-; SSE2-NEXT:    shlq $60, %rax
-; SSE2-NEXT:    sarq $63, %rax
-; SSE2-NEXT:    movd %eax, %xmm7
-; SSE2-NEXT:    shrq $15, %r10
-; SSE2-NEXT:    movd %r10d, %xmm14
-; SSE2-NEXT:    shrq $7, %rbx
-; SSE2-NEXT:    movd %ebx, %xmm3
-; SSE2-NEXT:    movswq 2(%rdi), %rdx
-; SSE2-NEXT:    movq %rdx, %r8
-; SSE2-NEXT:    movq %rdx, %r9
-; SSE2-NEXT:    movq %rdx, %r10
-; SSE2-NEXT:    movq %rdx, %r11
-; SSE2-NEXT:    movq %rdx, %r14
-; SSE2-NEXT:    movq %rdx, %r15
-; SSE2-NEXT:    movq %rdx, %r12
-; SSE2-NEXT:    movq %rdx, %r13
-; SSE2-NEXT:    movq %rdx, %rbx
-; SSE2-NEXT:    movq %rdx, %rax
-; SSE2-NEXT:    movq %rdx, %rcx
-; SSE2-NEXT:    movq %rdx, %rsi
-; SSE2-NEXT:    movq %rdx, %rdi
-; SSE2-NEXT:    movq %rdx, %rbp
-; SSE2-NEXT:    shlq $49, %rbp
-; SSE2-NEXT:    sarq $63, %rbp
-; SSE2-NEXT:    movd %ebp, %xmm1
-; SSE2-NEXT:    movq %rdx, %rbp
-; SSE2-NEXT:    movsbq %dl, %rdx
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm8[0],xmm2[1],xmm8[1],xmm2[2],xmm8[2],xmm2[3],xmm8[3],xmm2[4],xmm8[4],xmm2[5],xmm8[5],xmm2[6],xmm8[6],xmm2[7],xmm8[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm15[0],xmm2[1],xmm15[1],xmm2[2],xmm15[2],xmm2[3],xmm15[3],xmm2[4],xmm15[4],xmm2[5],xmm15[5],xmm2[6],xmm15[6],xmm2[7],xmm15[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm9[0],xmm5[1],xmm9[1],xmm5[2],xmm9[2],xmm5[3],xmm9[3],xmm5[4],xmm9[4],xmm5[5],xmm9[5],xmm5[6],xmm9[6],xmm5[7],xmm9[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm10[0],xmm0[1],xmm10[1],xmm0[2],xmm10[2],xmm0[3],xmm10[3],xmm0[4],xmm10[4],xmm0[5],xmm10[5],xmm0[6],xmm10[6],xmm0[7],xmm10[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3],xmm0[4],xmm5[4],xmm0[5],xmm5[5],xmm0[6],xmm5[6],xmm0[7],xmm5[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm11[0],xmm4[1],xmm11[1],xmm4[2],xmm11[2],xmm4[3],xmm11[3],xmm4[4],xmm11[4],xmm4[5],xmm11[5],xmm4[6],xmm11[6],xmm4[7],xmm11[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm12[0],xmm6[1],xmm12[1],xmm6[2],xmm12[2],xmm6[3],xmm12[3],xmm6[4],xmm12[4],xmm6[5],xmm12[5],xmm6[6],xmm12[6],xmm6[7],xmm12[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm4[0],xmm6[1],xmm4[1],xmm6[2],xmm4[2],xmm6[3],xmm4[3],xmm6[4],xmm4[4],xmm6[5],xmm4[5],xmm6[6],xmm4[6],xmm6[7],xmm4[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm13[0],xmm7[1],xmm13[1],xmm7[2],xmm13[2],xmm7[3],xmm13[3],xmm7[4],xmm13[4],xmm7[5],xmm13[5],xmm7[6],xmm13[6],xmm7[7],xmm13[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm14[0],xmm3[1],xmm14[1],xmm3[2],xmm14[2],xmm3[3],xmm14[3],xmm3[4],xmm14[4],xmm3[5],xmm14[5],xmm3[6],xmm14[6],xmm3[7],xmm14[7]
-; SSE2-NEXT:    shlq $57, %r8
-; SSE2-NEXT:    sarq $63, %r8
-; SSE2-NEXT:    movd %r8d, %xmm2
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm3[0],xmm7[1],xmm3[1],xmm7[2],xmm3[2],xmm7[3],xmm3[3],xmm7[4],xmm3[4],xmm7[5],xmm3[5],xmm7[6],xmm3[6],xmm7[7],xmm3[7]
-; SSE2-NEXT:    shlq $53, %r9
+; SSE2-NEXT:    shlq $51, %r9
 ; SSE2-NEXT:    sarq $63, %r9
 ; SSE2-NEXT:    movd %r9d, %xmm3
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3],xmm6[4],xmm7[4],xmm6[5],xmm7[5],xmm6[6],xmm7[6],xmm6[7],xmm7[7]
-; SSE2-NEXT:    shlq $61, %r10
-; SSE2-NEXT:    sarq $63, %r10
-; SSE2-NEXT:    movd %r10d, %xmm4
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3],xmm0[4],xmm6[4],xmm0[5],xmm6[5],xmm0[6],xmm6[6],xmm0[7],xmm6[7]
-; SSE2-NEXT:    shlq $51, %r11
+; SSE2-NEXT:    shlq $52, %r11
 ; SSE2-NEXT:    sarq $63, %r11
-; SSE2-NEXT:    movd %r11d, %xmm5
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
-; SSE2-NEXT:    shlq $59, %r14
+; SSE2-NEXT:    movd %r11d, %xmm9
+; SSE2-NEXT:    shlq $53, %r14
 ; SSE2-NEXT:    sarq $63, %r14
 ; SSE2-NEXT:    movd %r14d, %xmm6
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
-; SSE2-NEXT:    shlq $55, %r15
+; SSE2-NEXT:    shlq $54, %r15
 ; SSE2-NEXT:    sarq $63, %r15
-; SSE2-NEXT:    movd %r15d, %xmm3
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
-; SSE2-NEXT:    shlq $63, %r12
+; SSE2-NEXT:    movd %r15d, %xmm10
+; SSE2-NEXT:    shlq $55, %r12
 ; SSE2-NEXT:    sarq $63, %r12
-; SSE2-NEXT:    movd %r12d, %xmm1
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3],xmm6[4],xmm5[4],xmm6[5],xmm5[5],xmm6[6],xmm5[6],xmm6[7],xmm5[7]
-; SSE2-NEXT:    shlq $50, %r13
+; SSE2-NEXT:    movd %r12d, %xmm2
+; SSE2-NEXT:    shlq $60, %r13
+; SSE2-NEXT:    sarq $63, %r13
+; SSE2-NEXT:    movd %r13d, %xmm11
+; SSE2-NEXT:    shlq $61, %rdx
+; SSE2-NEXT:    sarq $63, %rdx
+; SSE2-NEXT:    movd %edx, %xmm5
+; SSE2-NEXT:    shlq $62, %rsi
+; SSE2-NEXT:    sarq $63, %rsi
+; SSE2-NEXT:    movd %esi, %xmm12
+; SSE2-NEXT:    shlq $63, %rcx
+; SSE2-NEXT:    sarq $63, %rcx
+; SSE2-NEXT:    movd %ecx, %xmm0
+; SSE2-NEXT:    shlq $58, %rbp
+; SSE2-NEXT:    sarq $63, %rbp
+; SSE2-NEXT:    movd %ebp, %xmm13
+; SSE2-NEXT:    shlq $59, %rbx
+; SSE2-NEXT:    sarq $63, %rbx
+; SSE2-NEXT:    movd %ebx, %xmm7
+; SSE2-NEXT:    shlq $57, %r10
+; SSE2-NEXT:    sarq $63, %r10
+; SSE2-NEXT:    movd %r10d, %xmm4
+; SSE2-NEXT:    shrq $7, %rax
+; SSE2-NEXT:    movd %eax, %xmm14
+; SSE2-NEXT:    movswq 2(%rdi), %rsi
+; SSE2-NEXT:    movq %rsi, %r8
+; SSE2-NEXT:    movq %rsi, %r9
+; SSE2-NEXT:    movq %rsi, %r10
+; SSE2-NEXT:    movq %rsi, %r11
+; SSE2-NEXT:    movq %rsi, %r14
+; SSE2-NEXT:    movq %rsi, %r15
+; SSE2-NEXT:    movq %rsi, %r12
+; SSE2-NEXT:    movq %rsi, %r13
+; SSE2-NEXT:    movq %rsi, %rbx
+; SSE2-NEXT:    movq %rsi, %rax
+; SSE2-NEXT:    movq %rsi, %rcx
+; SSE2-NEXT:    movq %rsi, %rdx
+; SSE2-NEXT:    movq %rsi, %rdi
+; SSE2-NEXT:    movq %rsi, %rbp
+; SSE2-NEXT:    shrq $15, %rbp
+; SSE2-NEXT:    movd %ebp, %xmm1
+; SSE2-NEXT:    movq %rsi, %rbp
+; SSE2-NEXT:    movsbq %sil, %rsi
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm8[0],xmm3[1],xmm8[1],xmm3[2],xmm8[2],xmm3[3],xmm8[3],xmm3[4],xmm8[4],xmm3[5],xmm8[5],xmm3[6],xmm8[6],xmm3[7],xmm8[7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm15[0],xmm3[1],xmm15[1],xmm3[2],xmm15[2],xmm3[3],xmm15[3]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm9[0],xmm6[1],xmm9[1],xmm6[2],xmm9[2],xmm6[3],xmm9[3],xmm6[4],xmm9[4],xmm6[5],xmm9[5],xmm6[6],xmm9[6],xmm6[7],xmm9[7]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm10[0],xmm2[1],xmm10[1],xmm2[2],xmm10[2],xmm2[3],xmm10[3],xmm2[4],xmm10[4],xmm2[5],xmm10[5],xmm2[6],xmm10[6],xmm2[7],xmm10[7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm6[0],xmm2[1],xmm6[1],xmm2[2],xmm6[2],xmm2[3],xmm6[3]
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm11[0],xmm5[1],xmm11[1],xmm5[2],xmm11[2],xmm5[3],xmm11[3],xmm5[4],xmm11[4],xmm5[5],xmm11[5],xmm5[6],xmm11[6],xmm5[7],xmm11[7]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm12[0],xmm0[1],xmm12[1],xmm0[2],xmm12[2],xmm0[3],xmm12[3],xmm0[4],xmm12[4],xmm0[5],xmm12[5],xmm0[6],xmm12[6],xmm0[7],xmm12[7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm13[0],xmm7[1],xmm13[1],xmm7[2],xmm13[2],xmm7[3],xmm13[3],xmm7[4],xmm13[4],xmm7[5],xmm13[5],xmm7[6],xmm13[6],xmm7[7],xmm13[7]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm14[0],xmm4[1],xmm14[1],xmm4[2],xmm14[2],xmm4[3],xmm14[3],xmm4[4],xmm14[4],xmm4[5],xmm14[5],xmm4[6],xmm14[6],xmm4[7],xmm14[7]
+; SSE2-NEXT:    shlq $49, %r8
+; SSE2-NEXT:    sarq $63, %r8
+; SSE2-NEXT:    movd %r8d, %xmm3
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm4[0],xmm7[1],xmm4[1],xmm7[2],xmm4[2],xmm7[3],xmm4[3]
+; SSE2-NEXT:    shlq $50, %r9
+; SSE2-NEXT:    sarq $63, %r9
+; SSE2-NEXT:    movd %r9d, %xmm4
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1]
+; SSE2-NEXT:    shlq $51, %r10
+; SSE2-NEXT:    sarq $63, %r10
+; SSE2-NEXT:    movd %r10d, %xmm5
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; SSE2-NEXT:    shlq $52, %r11
+; SSE2-NEXT:    sarq $63, %r11
+; SSE2-NEXT:    movd %r11d, %xmm2
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
+; SSE2-NEXT:    shlq $53, %r14
+; SSE2-NEXT:    sarq $63, %r14
+; SSE2-NEXT:    movd %r14d, %xmm1
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3],xmm5[4],xmm4[4],xmm5[5],xmm4[5],xmm5[6],xmm4[6],xmm5[7],xmm4[7]
+; SSE2-NEXT:    shlq $54, %r15
+; SSE2-NEXT:    sarq $63, %r15
+; SSE2-NEXT:    movd %r15d, %xmm4
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1],xmm5[2],xmm3[2],xmm5[3],xmm3[3]
+; SSE2-NEXT:    shlq $55, %r12
+; SSE2-NEXT:    sarq $63, %r12
+; SSE2-NEXT:    movd %r12d, %xmm3
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSE2-NEXT:    shlq $60, %r13
 ; SSE2-NEXT:    sarq $63, %r13
 ; SSE2-NEXT:    movd %r13d, %xmm2
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
-; SSE2-NEXT:    shlq $58, %rbx
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
+; SSE2-NEXT:    shlq $61, %rbx
 ; SSE2-NEXT:    sarq $63, %rbx
-; SSE2-NEXT:    movd %ebx, %xmm3
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3],xmm1[4],xmm6[4],xmm1[5],xmm6[5],xmm1[6],xmm6[6],xmm1[7],xmm6[7]
-; SSE2-NEXT:    shlq $54, %rax
+; SSE2-NEXT:    movd %ebx, %xmm4
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
+; SSE2-NEXT:    shlq $62, %rax
 ; SSE2-NEXT:    sarq $63, %rax
-; SSE2-NEXT:    movd %eax, %xmm5
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
-; SSE2-NEXT:    shlq $62, %rcx
+; SSE2-NEXT:    movd %eax, %xmm6
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1]
+; SSE2-NEXT:    shlq $63, %rcx
 ; SSE2-NEXT:    sarq $63, %rcx
-; SSE2-NEXT:    movd %ecx, %xmm4
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
-; SSE2-NEXT:    shlq $52, %rsi
-; SSE2-NEXT:    sarq $63, %rsi
-; SSE2-NEXT:    movd %esi, %xmm2
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3],xmm4[4],xmm5[4],xmm4[5],xmm5[5],xmm4[6],xmm5[6],xmm4[7],xmm5[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
-; SSE2-NEXT:    shlq $60, %rdi
+; SSE2-NEXT:    movd %ecx, %xmm1
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
+; SSE2-NEXT:    shlq $58, %rdx
+; SSE2-NEXT:    sarq $63, %rdx
+; SSE2-NEXT:    movd %edx, %xmm2
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3],xmm1[4],xmm6[4],xmm1[5],xmm6[5],xmm1[6],xmm6[6],xmm1[7],xmm6[7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
+; SSE2-NEXT:    shlq $59, %rdi
 ; SSE2-NEXT:    sarq $63, %rdi
-; SSE2-NEXT:    movd %edi, %xmm3
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
-; SSE2-NEXT:    shrq $15, %rbp
+; SSE2-NEXT:    movd %edi, %xmm4
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
+; SSE2-NEXT:    shlq $57, %rbp
+; SSE2-NEXT:    sarq $63, %rbp
 ; SSE2-NEXT:    movd %ebp, %xmm2
-; SSE2-NEXT:    shrq $7, %rdx
-; SSE2-NEXT:    movd %edx, %xmm5
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3],xmm5[4],xmm2[4],xmm5[5],xmm2[5],xmm5[6],xmm2[6],xmm5[7],xmm2[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1],xmm3[2],xmm5[2],xmm3[3],xmm5[3],xmm3[4],xmm5[4],xmm3[5],xmm5[5],xmm3[6],xmm5[6],xmm3[7],xmm5[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
+; SSE2-NEXT:    shrq $7, %rsi
+; SSE2-NEXT:    movd %esi, %xmm5
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1],xmm2[2],xmm5[2],xmm2[3],xmm5[3],xmm2[4],xmm5[4],xmm2[5],xmm5[5],xmm2[6],xmm5[6],xmm2[7],xmm5[7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1]
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
 ; SSE2-NEXT:    popq %rbx
 ; SSE2-NEXT:    popq %r12
 ; SSE2-NEXT:    popq %r13
@@ -3728,162 +3728,162 @@ define <32 x i8> @load_sext_32i1_to_32i8(<32 x i1> *%ptr) nounwind readnone {
 ; SSSE3-NEXT:    pushq %r13
 ; SSSE3-NEXT:    pushq %r12
 ; SSSE3-NEXT:    pushq %rbx
-; SSSE3-NEXT:    movswq (%rdi), %rbx
-; SSSE3-NEXT:    movq %rbx, %r10
-; SSSE3-NEXT:    movq %rbx, %r8
-; SSSE3-NEXT:    movq %rbx, %r9
-; SSSE3-NEXT:    movq %rbx, %r11
-; SSSE3-NEXT:    movq %rbx, %r14
-; SSSE3-NEXT:    movq %rbx, %r15
-; SSSE3-NEXT:    movq %rbx, %r12
-; SSSE3-NEXT:    movq %rbx, %r13
-; SSSE3-NEXT:    movq %rbx, %rdx
-; SSSE3-NEXT:    movq %rbx, %rsi
-; SSSE3-NEXT:    movq %rbx, %rcx
-; SSSE3-NEXT:    movq %rbx, %rbp
-; SSSE3-NEXT:    movq %rbx, %rax
-; SSSE3-NEXT:    shlq $49, %rax
-; SSSE3-NEXT:    sarq $63, %rax
-; SSSE3-NEXT:    movd %eax, %xmm0
-; SSSE3-NEXT:    movq %rbx, %rax
-; SSSE3-NEXT:    shlq $57, %r10
+; SSSE3-NEXT:    movswq (%rdi), %rax
+; SSSE3-NEXT:    movq %rax, %r10
+; SSSE3-NEXT:    movq %rax, %r8
+; SSSE3-NEXT:    movq %rax, %r9
+; SSSE3-NEXT:    movq %rax, %r11
+; SSSE3-NEXT:    movq %rax, %r14
+; SSSE3-NEXT:    movq %rax, %r15
+; SSSE3-NEXT:    movq %rax, %r12
+; SSSE3-NEXT:    movq %rax, %r13
+; SSSE3-NEXT:    movq %rax, %rdx
+; SSSE3-NEXT:    movq %rax, %rsi
+; SSSE3-NEXT:    movq %rax, %rcx
+; SSSE3-NEXT:    movq %rax, %rbp
+; SSSE3-NEXT:    movq %rax, %rbx
+; SSSE3-NEXT:    shrq $15, %rbx
+; SSSE3-NEXT:    movd %ebx, %xmm0
+; SSSE3-NEXT:    movq %rax, %rbx
+; SSSE3-NEXT:    shlq $49, %r10
 ; SSSE3-NEXT:    sarq $63, %r10
 ; SSSE3-NEXT:    movd %r10d, %xmm15
-; SSSE3-NEXT:    movq %rbx, %r10
-; SSSE3-NEXT:    movsbq %bl, %rbx
+; SSSE3-NEXT:    movq %rax, %r10
+; SSSE3-NEXT:    movsbq %al, %rax
 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm15 = xmm15[0],xmm0[0],xmm15[1],xmm0[1],xmm15[2],xmm0[2],xmm15[3],xmm0[3],xmm15[4],xmm0[4],xmm15[5],xmm0[5],xmm15[6],xmm0[6],xmm15[7],xmm0[7]
-; SSSE3-NEXT:    shlq $53, %r8
+; SSSE3-NEXT:    shlq $50, %r8
 ; SSSE3-NEXT:    sarq $63, %r8
 ; SSSE3-NEXT:    movd %r8d, %xmm8
-; SSSE3-NEXT:    shlq $61, %r9
-; SSSE3-NEXT:    sarq $63, %r9
-; SSSE3-NEXT:    movd %r9d, %xmm2
-; SSSE3-NEXT:    shlq $51, %r11
-; SSSE3-NEXT:    sarq $63, %r11
-; SSSE3-NEXT:    movd %r11d, %xmm9
-; SSSE3-NEXT:    shlq $59, %r14
-; SSSE3-NEXT:    sarq $63, %r14
-; SSSE3-NEXT:    movd %r14d, %xmm5
-; SSSE3-NEXT:    shlq $55, %r15
-; SSSE3-NEXT:    sarq $63, %r15
-; SSSE3-NEXT:    movd %r15d, %xmm10
-; SSSE3-NEXT:    shlq $63, %r12
-; SSSE3-NEXT:    sarq $63, %r12
-; SSSE3-NEXT:    movd %r12d, %xmm0
-; SSSE3-NEXT:    shlq $50, %r13
-; SSSE3-NEXT:    sarq $63, %r13
-; SSSE3-NEXT:    movd %r13d, %xmm11
-; SSSE3-NEXT:    shlq $58, %rdx
-; SSSE3-NEXT:    sarq $63, %rdx
-; SSSE3-NEXT:    movd %edx, %xmm4
-; SSSE3-NEXT:    shlq $54, %rsi
-; SSSE3-NEXT:    sarq $63, %rsi
-; SSSE3-NEXT:    movd %esi, %xmm12
-; SSSE3-NEXT:    shlq $62, %rcx
-; SSSE3-NEXT:    sarq $63, %rcx
-; SSSE3-NEXT:    movd %ecx, %xmm6
-; SSSE3-NEXT:    shlq $52, %rbp
-; SSSE3-NEXT:    sarq $63, %rbp
-; SSSE3-NEXT:    movd %ebp, %xmm13
-; SSSE3-NEXT:    shlq $60, %rax
-; SSSE3-NEXT:    sarq $63, %rax
-; SSSE3-NEXT:    movd %eax, %xmm7
-; SSSE3-NEXT:    shrq $15, %r10
-; SSSE3-NEXT:    movd %r10d, %xmm14
-; SSSE3-NEXT:    shrq $7, %rbx
-; SSSE3-NEXT:    movd %ebx, %xmm3
-; SSSE3-NEXT:    movswq 2(%rdi), %rdx
-; SSSE3-NEXT:    movq %rdx, %r8
-; SSSE3-NEXT:    movq %rdx, %r9
-; SSSE3-NEXT:    movq %rdx, %r10
-; SSSE3-NEXT:    movq %rdx, %r11
-; SSSE3-NEXT:    movq %rdx, %r14
-; SSSE3-NEXT:    movq %rdx, %r15
-; SSSE3-NEXT:    movq %rdx, %r12
-; SSSE3-NEXT:    movq %rdx, %r13
-; SSSE3-NEXT:    movq %rdx, %rbx
-; SSSE3-NEXT:    movq %rdx, %rax
-; SSSE3-NEXT:    movq %rdx, %rcx
-; SSSE3-NEXT:    movq %rdx, %rsi
-; SSSE3-NEXT:    movq %rdx, %rdi
-; SSSE3-NEXT:    movq %rdx, %rbp
-; SSSE3-NEXT:    shlq $49, %rbp
-; SSSE3-NEXT:    sarq $63, %rbp
-; SSSE3-NEXT:    movd %ebp, %xmm1
-; SSSE3-NEXT:    movq %rdx, %rbp
-; SSSE3-NEXT:    movsbq %dl, %rdx
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm8[0],xmm2[1],xmm8[1],xmm2[2],xmm8[2],xmm2[3],xmm8[3],xmm2[4],xmm8[4],xmm2[5],xmm8[5],xmm2[6],xmm8[6],xmm2[7],xmm8[7]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm15[0],xmm2[1],xmm15[1],xmm2[2],xmm15[2],xmm2[3],xmm15[3],xmm2[4],xmm15[4],xmm2[5],xmm15[5],xmm2[6],xmm15[6],xmm2[7],xmm15[7]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm9[0],xmm5[1],xmm9[1],xmm5[2],xmm9[2],xmm5[3],xmm9[3],xmm5[4],xmm9[4],xmm5[5],xmm9[5],xmm5[6],xmm9[6],xmm5[7],xmm9[7]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm10[0],xmm0[1],xmm10[1],xmm0[2],xmm10[2],xmm0[3],xmm10[3],xmm0[4],xmm10[4],xmm0[5],xmm10[5],xmm0[6],xmm10[6],xmm0[7],xmm10[7]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3],xmm0[4],xmm5[4],xmm0[5],xmm5[5],xmm0[6],xmm5[6],xmm0[7],xmm5[7]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm11[0],xmm4[1],xmm11[1],xmm4[2],xmm11[2],xmm4[3],xmm11[3],xmm4[4],xmm11[4],xmm4[5],xmm11[5],xmm4[6],xmm11[6],xmm4[7],xmm11[7]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm12[0],xmm6[1],xmm12[1],xmm6[2],xmm12[2],xmm6[3],xmm12[3],xmm6[4],xmm12[4],xmm6[5],xmm12[5],xmm6[6],xmm12[6],xmm6[7],xmm12[7]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm4[0],xmm6[1],xmm4[1],xmm6[2],xmm4[2],xmm6[3],xmm4[3],xmm6[4],xmm4[4],xmm6[5],xmm4[5],xmm6[6],xmm4[6],xmm6[7],xmm4[7]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm13[0],xmm7[1],xmm13[1],xmm7[2],xmm13[2],xmm7[3],xmm13[3],xmm7[4],xmm13[4],xmm7[5],xmm13[5],xmm7[6],xmm13[6],xmm7[7],xmm13[7]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm14[0],xmm3[1],xmm14[1],xmm3[2],xmm14[2],xmm3[3],xmm14[3],xmm3[4],xmm14[4],xmm3[5],xmm14[5],xmm3[6],xmm14[6],xmm3[7],xmm14[7]
-; SSSE3-NEXT:    shlq $57, %r8
-; SSSE3-NEXT:    sarq $63, %r8
-; SSSE3-NEXT:    movd %r8d, %xmm2
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm3[0],xmm7[1],xmm3[1],xmm7[2],xmm3[2],xmm7[3],xmm3[3],xmm7[4],xmm3[4],xmm7[5],xmm3[5],xmm7[6],xmm3[6],xmm7[7],xmm3[7]
-; SSSE3-NEXT:    shlq $53, %r9
+; SSSE3-NEXT:    shlq $51, %r9
 ; SSSE3-NEXT:    sarq $63, %r9
 ; SSSE3-NEXT:    movd %r9d, %xmm3
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3],xmm6[4],xmm7[4],xmm6[5],xmm7[5],xmm6[6],xmm7[6],xmm6[7],xmm7[7]
-; SSSE3-NEXT:    shlq $61, %r10
-; SSSE3-NEXT:    sarq $63, %r10
-; SSSE3-NEXT:    movd %r10d, %xmm4
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3],xmm0[4],xmm6[4],xmm0[5],xmm6[5],xmm0[6],xmm6[6],xmm0[7],xmm6[7]
-; SSSE3-NEXT:    shlq $51, %r11
+; SSSE3-NEXT:    shlq $52, %r11
 ; SSSE3-NEXT:    sarq $63, %r11
-; SSSE3-NEXT:    movd %r11d, %xmm5
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
-; SSSE3-NEXT:    shlq $59, %r14
+; SSSE3-NEXT:    movd %r11d, %xmm9
+; SSSE3-NEXT:    shlq $53, %r14
 ; SSSE3-NEXT:    sarq $63, %r14
 ; SSSE3-NEXT:    movd %r14d, %xmm6
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
-; SSSE3-NEXT:    shlq $55, %r15
+; SSSE3-NEXT:    shlq $54, %r15
 ; SSSE3-NEXT:    sarq $63, %r15
-; SSSE3-NEXT:    movd %r15d, %xmm3
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
-; SSSE3-NEXT:    shlq $63, %r12
+; SSSE3-NEXT:    movd %r15d, %xmm10
+; SSSE3-NEXT:    shlq $55, %r12
 ; SSSE3-NEXT:    sarq $63, %r12
-; SSSE3-NEXT:    movd %r12d, %xmm1
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3],xmm6[4],xmm5[4],xmm6[5],xmm5[5],xmm6[6],xmm5[6],xmm6[7],xmm5[7]
-; SSSE3-NEXT:    shlq $50, %r13
+; SSSE3-NEXT:    movd %r12d, %xmm2
+; SSSE3-NEXT:    shlq $60, %r13
+; SSSE3-NEXT:    sarq $63, %r13
+; SSSE3-NEXT:    movd %r13d, %xmm11
+; SSSE3-NEXT:    shlq $61, %rdx
+; SSSE3-NEXT:    sarq $63, %rdx
+; SSSE3-NEXT:    movd %edx, %xmm5
+; SSSE3-NEXT:    shlq $62, %rsi
+; SSSE3-NEXT:    sarq $63, %rsi
+; SSSE3-NEXT:    movd %esi, %xmm12
+; SSSE3-NEXT:    shlq $63, %rcx
+; SSSE3-NEXT:    sarq $63, %rcx
+; SSSE3-NEXT:    movd %ecx, %xmm0
+; SSSE3-NEXT:    shlq $58, %rbp
+; SSSE3-NEXT:    sarq $63, %rbp
+; SSSE3-NEXT:    movd %ebp, %xmm13
+; SSSE3-NEXT:    shlq $59, %rbx
+; SSSE3-NEXT:    sarq $63, %rbx
+; SSSE3-NEXT:    movd %ebx, %xmm7
+; SSSE3-NEXT:    shlq $57, %r10
+; SSSE3-NEXT:    sarq $63, %r10
+; SSSE3-NEXT:    movd %r10d, %xmm4
+; SSSE3-NEXT:    shrq $7, %rax
+; SSSE3-NEXT:    movd %eax, %xmm14
+; SSSE3-NEXT:    movswq 2(%rdi), %rsi
+; SSSE3-NEXT:    movq %rsi, %r8
+; SSSE3-NEXT:    movq %rsi, %r9
+; SSSE3-NEXT:    movq %rsi, %r10
+; SSSE3-NEXT:    movq %rsi, %r11
+; SSSE3-NEXT:    movq %rsi, %r14
+; SSSE3-NEXT:    movq %rsi, %r15
+; SSSE3-NEXT:    movq %rsi, %r12
+; SSSE3-NEXT:    movq %rsi, %r13
+; SSSE3-NEXT:    movq %rsi, %rbx
+; SSSE3-NEXT:    movq %rsi, %rax
+; SSSE3-NEXT:    movq %rsi, %rcx
+; SSSE3-NEXT:    movq %rsi, %rdx
+; SSSE3-NEXT:    movq %rsi, %rdi
+; SSSE3-NEXT:    movq %rsi, %rbp
+; SSSE3-NEXT:    shrq $15, %rbp
+; SSSE3-NEXT:    movd %ebp, %xmm1
+; SSSE3-NEXT:    movq %rsi, %rbp
+; SSSE3-NEXT:    movsbq %sil, %rsi
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm8[0],xmm3[1],xmm8[1],xmm3[2],xmm8[2],xmm3[3],xmm8[3],xmm3[4],xmm8[4],xmm3[5],xmm8[5],xmm3[6],xmm8[6],xmm3[7],xmm8[7]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm15[0],xmm3[1],xmm15[1],xmm3[2],xmm15[2],xmm3[3],xmm15[3]
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm9[0],xmm6[1],xmm9[1],xmm6[2],xmm9[2],xmm6[3],xmm9[3],xmm6[4],xmm9[4],xmm6[5],xmm9[5],xmm6[6],xmm9[6],xmm6[7],xmm9[7]
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm10[0],xmm2[1],xmm10[1],xmm2[2],xmm10[2],xmm2[3],xmm10[3],xmm2[4],xmm10[4],xmm2[5],xmm10[5],xmm2[6],xmm10[6],xmm2[7],xmm10[7]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm6[0],xmm2[1],xmm6[1],xmm2[2],xmm6[2],xmm2[3],xmm6[3]
+; SSSE3-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm11[0],xmm5[1],xmm11[1],xmm5[2],xmm11[2],xmm5[3],xmm11[3],xmm5[4],xmm11[4],xmm5[5],xmm11[5],xmm5[6],xmm11[6],xmm5[7],xmm11[7]
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm12[0],xmm0[1],xmm12[1],xmm0[2],xmm12[2],xmm0[3],xmm12[3],xmm0[4],xmm12[4],xmm0[5],xmm12[5],xmm0[6],xmm12[6],xmm0[7],xmm12[7]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3]
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm13[0],xmm7[1],xmm13[1],xmm7[2],xmm13[2],xmm7[3],xmm13[3],xmm7[4],xmm13[4],xmm7[5],xmm13[5],xmm7[6],xmm13[6],xmm7[7],xmm13[7]
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm14[0],xmm4[1],xmm14[1],xmm4[2],xmm14[2],xmm4[3],xmm14[3],xmm4[4],xmm14[4],xmm4[5],xmm14[5],xmm4[6],xmm14[6],xmm4[7],xmm14[7]
+; SSSE3-NEXT:    shlq $49, %r8
+; SSSE3-NEXT:    sarq $63, %r8
+; SSSE3-NEXT:    movd %r8d, %xmm3
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm4[0],xmm7[1],xmm4[1],xmm7[2],xmm4[2],xmm7[3],xmm4[3]
+; SSSE3-NEXT:    shlq $50, %r9
+; SSSE3-NEXT:    sarq $63, %r9
+; SSSE3-NEXT:    movd %r9d, %xmm4
+; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1]
+; SSSE3-NEXT:    shlq $51, %r10
+; SSSE3-NEXT:    sarq $63, %r10
+; SSSE3-NEXT:    movd %r10d, %xmm5
+; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; SSSE3-NEXT:    shlq $52, %r11
+; SSSE3-NEXT:    sarq $63, %r11
+; SSSE3-NEXT:    movd %r11d, %xmm2
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
+; SSSE3-NEXT:    shlq $53, %r14
+; SSSE3-NEXT:    sarq $63, %r14
+; SSSE3-NEXT:    movd %r14d, %xmm1
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3],xmm5[4],xmm4[4],xmm5[5],xmm4[5],xmm5[6],xmm4[6],xmm5[7],xmm4[7]
+; SSSE3-NEXT:    shlq $54, %r15
+; SSSE3-NEXT:    sarq $63, %r15
+; SSSE3-NEXT:    movd %r15d, %xmm4
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1],xmm5[2],xmm3[2],xmm5[3],xmm3[3]
+; SSSE3-NEXT:    shlq $55, %r12
+; SSSE3-NEXT:    sarq $63, %r12
+; SSSE3-NEXT:    movd %r12d, %xmm3
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSSE3-NEXT:    shlq $60, %r13
 ; SSSE3-NEXT:    sarq $63, %r13
 ; SSSE3-NEXT:    movd %r13d, %xmm2
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
-; SSSE3-NEXT:    shlq $58, %rbx
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
+; SSSE3-NEXT:    shlq $61, %rbx
 ; SSSE3-NEXT:    sarq $63, %rbx
-; SSSE3-NEXT:    movd %ebx, %xmm3
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3],xmm1[4],xmm6[4],xmm1[5],xmm6[5],xmm1[6],xmm6[6],xmm1[7],xmm6[7]
-; SSSE3-NEXT:    shlq $54, %rax
+; SSSE3-NEXT:    movd %ebx, %xmm4
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
+; SSSE3-NEXT:    shlq $62, %rax
 ; SSSE3-NEXT:    sarq $63, %rax
-; SSSE3-NEXT:    movd %eax, %xmm5
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
-; SSSE3-NEXT:    shlq $62, %rcx
+; SSSE3-NEXT:    movd %eax, %xmm6
+; SSSE3-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1]
+; SSSE3-NEXT:    shlq $63, %rcx
 ; SSSE3-NEXT:    sarq $63, %rcx
-; SSSE3-NEXT:    movd %ecx, %xmm4
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
-; SSSE3-NEXT:    shlq $52, %rsi
-; SSSE3-NEXT:    sarq $63, %rsi
-; SSSE3-NEXT:    movd %esi, %xmm2
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3],xmm4[4],xmm5[4],xmm4[5],xmm5[5],xmm4[6],xmm5[6],xmm4[7],xmm5[7]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
-; SSSE3-NEXT:    shlq $60, %rdi
+; SSSE3-NEXT:    movd %ecx, %xmm1
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
+; SSSE3-NEXT:    shlq $58, %rdx
+; SSSE3-NEXT:    sarq $63, %rdx
+; SSSE3-NEXT:    movd %edx, %xmm2
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3],xmm1[4],xmm6[4],xmm1[5],xmm6[5],xmm1[6],xmm6[6],xmm1[7],xmm6[7]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
+; SSSE3-NEXT:    shlq $59, %rdi
 ; SSSE3-NEXT:    sarq $63, %rdi
-; SSSE3-NEXT:    movd %edi, %xmm3
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
-; SSSE3-NEXT:    shrq $15, %rbp
+; SSSE3-NEXT:    movd %edi, %xmm4
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
+; SSSE3-NEXT:    shlq $57, %rbp
+; SSSE3-NEXT:    sarq $63, %rbp
 ; SSSE3-NEXT:    movd %ebp, %xmm2
-; SSSE3-NEXT:    shrq $7, %rdx
-; SSSE3-NEXT:    movd %edx, %xmm5
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3],xmm5[4],xmm2[4],xmm5[5],xmm2[5],xmm5[6],xmm2[6],xmm5[7],xmm2[7]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1],xmm3[2],xmm5[2],xmm3[3],xmm5[3],xmm3[4],xmm5[4],xmm3[5],xmm5[5],xmm3[6],xmm5[6],xmm3[7],xmm5[7]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
+; SSSE3-NEXT:    shrq $7, %rsi
+; SSSE3-NEXT:    movd %esi, %xmm5
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1],xmm2[2],xmm5[2],xmm2[3],xmm5[3],xmm2[4],xmm5[4],xmm2[5],xmm5[5],xmm2[6],xmm5[6],xmm2[7],xmm5[7]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
+; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1]
+; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
 ; SSSE3-NEXT:    popq %rbx
 ; SSSE3-NEXT:    popq %r12
 ; SSSE3-NEXT:    popq %r13
diff --git a/test/CodeGen/X86/vector-shuffle-v48.ll b/test/CodeGen/X86/vector-shuffle-v48.ll
new file mode 100644
index 000000000000..9bd75148ecd1
--- /dev/null
+++ b/test/CodeGen/X86/vector-shuffle-v48.ll
@@ -0,0 +1,49 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-pc-linux  -mattr=+avx2 < %s | FileCheck %s
+define <16 x i8> @foo(<48 x i8>* %x0, <16 x i32> %x1, <16 x i32> %x2) {
+; CHECK-LABEL: foo:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vmovdqu (%rdi), %ymm4
+; CHECK-NEXT:    vmovdqu 32(%rdi), %xmm5
+; CHECK-NEXT:    vpextrb $13, %xmm5, %eax
+; CHECK-NEXT:    vpextrb $10, %xmm5, %ecx
+; CHECK-NEXT:    vpextrb $7, %xmm5, %edx
+; CHECK-NEXT:    vpextrb $4, %xmm5, %esi
+; CHECK-NEXT:    vpextrb $1, %xmm5, %edi
+; CHECK-NEXT:    vextracti128 $1, %ymm4, %xmm5
+; CHECK-NEXT:    vpshufb {{.*#+}} xmm6 = xmm5[2,2,5,5,5,5,3,3,4,4,5,5,6,6,7,7]
+; CHECK-NEXT:    vpshufb {{.*#+}} xmm7 = xmm4[12,12,13,13,15,15,15,15,12,12,13,13,14,14,15,15]
+; CHECK-NEXT:    vpunpcklqdq {{.*#+}} xmm6 = xmm7[0],xmm6[0]
+; CHECK-NEXT:    vpshufb {{.*#+}} xmm4 = xmm4[0,0,1,1,3,3,3,3,6,6,9,9,9,9,7,7]
+; CHECK-NEXT:    vinserti128 $1, %xmm6, %ymm4, %ymm4
+; CHECK-NEXT:    vpand {{.*}}(%rip), %ymm4, %ymm4
+; CHECK-NEXT:    vpshufb {{.*#+}} xmm5 = xmm5[8,11,14],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; CHECK-NEXT:    vpinsrb $3, %edi, %xmm5, %xmm5
+; CHECK-NEXT:    vpinsrb $4, %esi, %xmm5, %xmm5
+; CHECK-NEXT:    vpinsrb $5, %edx, %xmm5, %xmm5
+; CHECK-NEXT:    vpinsrb $6, %ecx, %xmm5, %xmm5
+; CHECK-NEXT:    vpinsrb $7, %eax, %xmm5, %xmm5
+; CHECK-NEXT:    vpmovzxbd {{.*#+}} ymm5 = xmm5[0],zero,zero,zero,xmm5[1],zero,zero,zero,xmm5[2],zero,zero,zero,xmm5[3],zero,zero,zero,xmm5[4],zero,zero,zero,xmm5[5],zero,zero,zero,xmm5[6],zero,zero,zero,xmm5[7],zero,zero,zero
+; CHECK-NEXT:    vpmulld %ymm0, %ymm4, %ymm0
+; CHECK-NEXT:    vpmulld %ymm1, %ymm5, %ymm1
+; CHECK-NEXT:    vpsrlvd %ymm2, %ymm0, %ymm0
+; CHECK-NEXT:    vpsrlvd %ymm3, %ymm1, %ymm1
+; CHECK-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
+; CHECK-NEXT:    vpshufb %ymm2, %ymm0, %ymm0
+; CHECK-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
+; CHECK-NEXT:    vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
+; CHECK-NEXT:    vpshufb %xmm3, %xmm0, %xmm0
+; CHECK-NEXT:    vpshufb %ymm2, %ymm1, %ymm1
+; CHECK-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
+; CHECK-NEXT:    vpshufb %xmm3, %xmm1, %xmm1
+; CHECK-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %1 = load <48 x i8>, <48 x i8>* %x0, align 1
+  %2 = shufflevector <48 x i8> %1, <48 x i8> undef, <16 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21, i32 24, i32 27, i32 30, i32 33, i32 36, i32 39, i32 42, i32 45>
+  %3 = zext <16 x i8> %2 to <16 x i32>
+  %4 = mul <16 x i32> %3, %x1
+  %5 = lshr <16 x i32> %4, %x2
+  %6 = trunc <16 x i32> %5 to <16 x i8>
+  ret <16 x i8> %6
+}
diff --git a/test/CodeGen/X86/vector-shuffle-variable-128.ll b/test/CodeGen/X86/vector-shuffle-variable-128.ll
index bde8a16d2a5a..452f387a4fee 100644
--- a/test/CodeGen/X86/vector-shuffle-variable-128.ll
+++ b/test/CodeGen/X86/vector-shuffle-variable-128.ll
@@ -83,7 +83,7 @@ define <4 x float> @var_shuffle_v4f32_v4f32_xxxx_i32(<4 x float> %x, i32 %i0, i3
 ; SSE2-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
 ; SSE2-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE2-NEXT:    retq
 ;
 ; SSSE3-LABEL: var_shuffle_v4f32_v4f32_xxxx_i32:
@@ -103,7 +103,7 @@ define <4 x float> @var_shuffle_v4f32_v4f32_xxxx_i32(<4 x float> %x, i32 %i0, i3
 ; SSSE3-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
 ; SSSE3-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; SSSE3-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSSE3-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSSE3-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSSE3-NEXT:    retq
 ;
 ; SSE41-LABEL: var_shuffle_v4f32_v4f32_xxxx_i32:
@@ -168,7 +168,7 @@ define <4 x i32> @var_shuffle_v4i32_v4i32_xxxx_i32(<4 x i32> %x, i32 %i0, i32 %i
 ; SSE2-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
 ; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE2-NEXT:    retq
 ;
 ; SSSE3-LABEL: var_shuffle_v4i32_v4i32_xxxx_i32:
@@ -188,7 +188,7 @@ define <4 x i32> @var_shuffle_v4i32_v4i32_xxxx_i32(<4 x i32> %x, i32 %i0, i32 %i
 ; SSSE3-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
 ; SSSE3-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSSE3-NEXT:    retq
 ;
 ; SSE41-LABEL: var_shuffle_v4i32_v4i32_xxxx_i32:
@@ -257,27 +257,27 @@ define <8 x i16> @var_shuffle_v8i16_v8i16_xxxxxxxx_i16(<8 x i16> %x, i16 %i0, i1
 ; SSE2-NEXT:    andl $7, %eax
 ; SSE2-NEXT:    movzwl -24(%rsp,%rax,2), %eax
 ; SSE2-NEXT:    movd %eax, %xmm0
-; SSE2-NEXT:    movzwl -24(%rsp,%rcx,2), %eax
+; SSE2-NEXT:    movzwl -24(%rsp,%r10,2), %eax
 ; SSE2-NEXT:    movd %eax, %xmm1
 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
 ; SSE2-NEXT:    movzwl -24(%rsp,%r9,2), %eax
 ; SSE2-NEXT:    movd %eax, %xmm0
-; SSE2-NEXT:    movzwl -24(%rsp,%rsi,2), %eax
+; SSE2-NEXT:    movzwl -24(%rsp,%r8,2), %eax
 ; SSE2-NEXT:    movd %eax, %xmm2
 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; SSE2-NEXT:    movzwl -24(%rsp,%r10,2), %eax
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE2-NEXT:    movzwl -24(%rsp,%rcx,2), %eax
 ; SSE2-NEXT:    movd %eax, %xmm0
 ; SSE2-NEXT:    movzwl -24(%rsp,%rdx,2), %eax
 ; SSE2-NEXT:    movd %eax, %xmm1
 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSE2-NEXT:    movzwl -24(%rsp,%r8,2), %eax
+; SSE2-NEXT:    movzwl -24(%rsp,%rsi,2), %eax
 ; SSE2-NEXT:    movd %eax, %xmm3
 ; SSE2-NEXT:    movzwl -24(%rsp,%rdi,2), %eax
 ; SSE2-NEXT:    movd %eax, %xmm0
 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
 ; SSE2-NEXT:    retq
 ;
 ; SSSE3-LABEL: var_shuffle_v8i16_v8i16_xxxxxxxx_i16:
@@ -301,27 +301,27 @@ define <8 x i16> @var_shuffle_v8i16_v8i16_xxxxxxxx_i16(<8 x i16> %x, i16 %i0, i1
 ; SSSE3-NEXT:    andl $7, %eax
 ; SSSE3-NEXT:    movzwl -24(%rsp,%rax,2), %eax
 ; SSSE3-NEXT:    movd %eax, %xmm0
-; SSSE3-NEXT:    movzwl -24(%rsp,%rcx,2), %eax
+; SSSE3-NEXT:    movzwl -24(%rsp,%r10,2), %eax
 ; SSSE3-NEXT:    movd %eax, %xmm1
 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
 ; SSSE3-NEXT:    movzwl -24(%rsp,%r9,2), %eax
 ; SSSE3-NEXT:    movd %eax, %xmm0
-; SSSE3-NEXT:    movzwl -24(%rsp,%rsi,2), %eax
+; SSSE3-NEXT:    movzwl -24(%rsp,%r8,2), %eax
 ; SSSE3-NEXT:    movd %eax, %xmm2
 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; SSSE3-NEXT:    movzwl -24(%rsp,%r10,2), %eax
+; SSSE3-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSSE3-NEXT:    movzwl -24(%rsp,%rcx,2), %eax
 ; SSSE3-NEXT:    movd %eax, %xmm0
 ; SSSE3-NEXT:    movzwl -24(%rsp,%rdx,2), %eax
 ; SSSE3-NEXT:    movd %eax, %xmm1
 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSSE3-NEXT:    movzwl -24(%rsp,%r8,2), %eax
+; SSSE3-NEXT:    movzwl -24(%rsp,%rsi,2), %eax
 ; SSSE3-NEXT:    movd %eax, %xmm3
 ; SSSE3-NEXT:    movzwl -24(%rsp,%rdi,2), %eax
 ; SSSE3-NEXT:    movd %eax, %xmm0
 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
 ; SSSE3-NEXT:    retq
 ;
 ; SSE41-LABEL: var_shuffle_v8i16_v8i16_xxxxxxxx_i16:
@@ -425,22 +425,25 @@ define <16 x i8> @var_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8 %
 ; SSE2-NEXT:    andl $15, %eax
 ; SSE2-NEXT:    movzbl (%rax,%r10), %eax
 ; SSE2-NEXT:    movd %eax, %xmm9
-; SSE2-NEXT:    andl $15, %ecx
-; SSE2-NEXT:    movzbl (%rcx,%r10), %eax
+; SSE2-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; SSE2-NEXT:    andl $15, %eax
+; SSE2-NEXT:    movzbl (%rax,%r10), %eax
 ; SSE2-NEXT:    movd %eax, %xmm3
 ; SSE2-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; SSE2-NEXT:    andl $15, %eax
 ; SSE2-NEXT:    movzbl (%rax,%r10), %eax
 ; SSE2-NEXT:    movd %eax, %xmm10
-; SSE2-NEXT:    andl $15, %r9d
-; SSE2-NEXT:    movzbl (%r9,%r10), %eax
+; SSE2-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; SSE2-NEXT:    andl $15, %eax
+; SSE2-NEXT:    movzbl (%rax,%r10), %eax
 ; SSE2-NEXT:    movd %eax, %xmm7
 ; SSE2-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; SSE2-NEXT:    andl $15, %eax
 ; SSE2-NEXT:    movzbl (%rax,%r10), %eax
 ; SSE2-NEXT:    movd %eax, %xmm11
-; SSE2-NEXT:    andl $15, %esi
-; SSE2-NEXT:    movzbl (%rsi,%r10), %eax
+; SSE2-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; SSE2-NEXT:    andl $15, %eax
+; SSE2-NEXT:    movzbl (%rax,%r10), %eax
 ; SSE2-NEXT:    movd %eax, %xmm6
 ; SSE2-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; SSE2-NEXT:    andl $15, %eax
@@ -450,42 +453,39 @@ define <16 x i8> @var_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8 %
 ; SSE2-NEXT:    andl $15, %eax
 ; SSE2-NEXT:    movzbl (%rax,%r10), %eax
 ; SSE2-NEXT:    movd %eax, %xmm5
-; SSE2-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
-; SSE2-NEXT:    andl $15, %eax
-; SSE2-NEXT:    movzbl (%rax,%r10), %eax
+; SSE2-NEXT:    andl $15, %r9d
+; SSE2-NEXT:    movzbl (%r9,%r10), %eax
 ; SSE2-NEXT:    movd %eax, %xmm13
-; SSE2-NEXT:    andl $15, %edx
-; SSE2-NEXT:    movzbl (%rdx,%r10), %eax
-; SSE2-NEXT:    movd %eax, %xmm4
-; SSE2-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
-; SSE2-NEXT:    andl $15, %eax
-; SSE2-NEXT:    movzbl (%rax,%r10), %eax
-; SSE2-NEXT:    movd %eax, %xmm14
 ; SSE2-NEXT:    andl $15, %r8d
 ; SSE2-NEXT:    movzbl (%r8,%r10), %eax
+; SSE2-NEXT:    movd %eax, %xmm4
+; SSE2-NEXT:    andl $15, %ecx
+; SSE2-NEXT:    movzbl (%rcx,%r10), %eax
+; SSE2-NEXT:    movd %eax, %xmm14
+; SSE2-NEXT:    andl $15, %edx
+; SSE2-NEXT:    movzbl (%rdx,%r10), %eax
 ; SSE2-NEXT:    movd %eax, %xmm1
-; SSE2-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
-; SSE2-NEXT:    andl $15, %eax
-; SSE2-NEXT:    movzbl (%rax,%r10), %eax
+; SSE2-NEXT:    andl $15, %esi
+; SSE2-NEXT:    movzbl (%rsi,%r10), %eax
 ; SSE2-NEXT:    movd %eax, %xmm2
 ; SSE2-NEXT:    andl $15, %edi
 ; SSE2-NEXT:    movzbl (%rdi,%r10), %eax
 ; SSE2-NEXT:    movd %eax, %xmm0
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm15 = xmm15[0],xmm8[0],xmm15[1],xmm8[1],xmm15[2],xmm8[2],xmm15[3],xmm8[3],xmm15[4],xmm8[4],xmm15[5],xmm8[5],xmm15[6],xmm8[6],xmm15[7],xmm8[7]
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm9[0],xmm3[1],xmm9[1],xmm3[2],xmm9[2],xmm3[3],xmm9[3],xmm3[4],xmm9[4],xmm3[5],xmm9[5],xmm3[6],xmm9[6],xmm3[7],xmm9[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm15[0],xmm3[1],xmm15[1],xmm3[2],xmm15[2],xmm3[3],xmm15[3],xmm3[4],xmm15[4],xmm3[5],xmm15[5],xmm3[6],xmm15[6],xmm3[7],xmm15[7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm15[0],xmm3[1],xmm15[1],xmm3[2],xmm15[2],xmm3[3],xmm15[3]
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm10[0],xmm7[1],xmm10[1],xmm7[2],xmm10[2],xmm7[3],xmm10[3],xmm7[4],xmm10[4],xmm7[5],xmm10[5],xmm7[6],xmm10[6],xmm7[7],xmm10[7]
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm11[0],xmm6[1],xmm11[1],xmm6[2],xmm11[2],xmm6[3],xmm11[3],xmm6[4],xmm11[4],xmm6[5],xmm11[5],xmm6[6],xmm11[6],xmm6[7],xmm11[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3],xmm6[4],xmm7[4],xmm6[5],xmm7[5],xmm6[6],xmm7[6],xmm6[7],xmm7[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm3[0],xmm6[1],xmm3[1],xmm6[2],xmm3[2],xmm6[3],xmm3[3],xmm6[4],xmm3[4],xmm6[5],xmm3[5],xmm6[6],xmm3[6],xmm6[7],xmm3[7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3]
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm6 = xmm6[0],xmm3[0],xmm6[1],xmm3[1]
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm12[0],xmm5[1],xmm12[1],xmm5[2],xmm12[2],xmm5[3],xmm12[3],xmm5[4],xmm12[4],xmm5[5],xmm12[5],xmm5[6],xmm12[6],xmm5[7],xmm12[7]
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm13[0],xmm4[1],xmm13[1],xmm4[2],xmm13[2],xmm4[3],xmm13[3],xmm4[4],xmm13[4],xmm4[5],xmm13[5],xmm4[6],xmm13[6],xmm4[7],xmm13[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3],xmm4[4],xmm5[4],xmm4[5],xmm5[5],xmm4[6],xmm5[6],xmm4[7],xmm5[7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3]
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm14[0],xmm1[1],xmm14[1],xmm1[2],xmm14[2],xmm1[3],xmm14[3],xmm1[4],xmm14[4],xmm1[5],xmm14[5],xmm1[6],xmm14[6],xmm1[7],xmm14[7]
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3],xmm0[4],xmm6[4],xmm0[5],xmm6[5],xmm0[6],xmm6[6],xmm0[7],xmm6[7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm6[0]
 ; SSE2-NEXT:    retq
 ;
 ; SSSE3-LABEL: var_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8:
@@ -510,22 +510,25 @@ define <16 x i8> @var_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8 %
 ; SSSE3-NEXT:    andl $15, %eax
 ; SSSE3-NEXT:    movzbl (%rax,%r10), %eax
 ; SSSE3-NEXT:    movd %eax, %xmm9
-; SSSE3-NEXT:    andl $15, %ecx
-; SSSE3-NEXT:    movzbl (%rcx,%r10), %eax
+; SSSE3-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; SSSE3-NEXT:    andl $15, %eax
+; SSSE3-NEXT:    movzbl (%rax,%r10), %eax
 ; SSSE3-NEXT:    movd %eax, %xmm3
 ; SSSE3-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; SSSE3-NEXT:    andl $15, %eax
 ; SSSE3-NEXT:    movzbl (%rax,%r10), %eax
 ; SSSE3-NEXT:    movd %eax, %xmm10
-; SSSE3-NEXT:    andl $15, %r9d
-; SSSE3-NEXT:    movzbl (%r9,%r10), %eax
+; SSSE3-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; SSSE3-NEXT:    andl $15, %eax
+; SSSE3-NEXT:    movzbl (%rax,%r10), %eax
 ; SSSE3-NEXT:    movd %eax, %xmm7
 ; SSSE3-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; SSSE3-NEXT:    andl $15, %eax
 ; SSSE3-NEXT:    movzbl (%rax,%r10), %eax
 ; SSSE3-NEXT:    movd %eax, %xmm11
-; SSSE3-NEXT:    andl $15, %esi
-; SSSE3-NEXT:    movzbl (%rsi,%r10), %eax
+; SSSE3-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
+; SSSE3-NEXT:    andl $15, %eax
+; SSSE3-NEXT:    movzbl (%rax,%r10), %eax
 ; SSSE3-NEXT:    movd %eax, %xmm6
 ; SSSE3-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
 ; SSSE3-NEXT:    andl $15, %eax
@@ -535,42 +538,39 @@ define <16 x i8> @var_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8 %
 ; SSSE3-NEXT:    andl $15, %eax
 ; SSSE3-NEXT:    movzbl (%rax,%r10), %eax
 ; SSSE3-NEXT:    movd %eax, %xmm5
-; SSSE3-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
-; SSSE3-NEXT:    andl $15, %eax
-; SSSE3-NEXT:    movzbl (%rax,%r10), %eax
+; SSSE3-NEXT:    andl $15, %r9d
+; SSSE3-NEXT:    movzbl (%r9,%r10), %eax
 ; SSSE3-NEXT:    movd %eax, %xmm13
-; SSSE3-NEXT:    andl $15, %edx
-; SSSE3-NEXT:    movzbl (%rdx,%r10), %eax
-; SSSE3-NEXT:    movd %eax, %xmm4
-; SSSE3-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
-; SSSE3-NEXT:    andl $15, %eax
-; SSSE3-NEXT:    movzbl (%rax,%r10), %eax
-; SSSE3-NEXT:    movd %eax, %xmm14
 ; SSSE3-NEXT:    andl $15, %r8d
 ; SSSE3-NEXT:    movzbl (%r8,%r10), %eax
+; SSSE3-NEXT:    movd %eax, %xmm4
+; SSSE3-NEXT:    andl $15, %ecx
+; SSSE3-NEXT:    movzbl (%rcx,%r10), %eax
+; SSSE3-NEXT:    movd %eax, %xmm14
+; SSSE3-NEXT:    andl $15, %edx
+; SSSE3-NEXT:    movzbl (%rdx,%r10), %eax
 ; SSSE3-NEXT:    movd %eax, %xmm1
-; SSSE3-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
-; SSSE3-NEXT:    andl $15, %eax
-; SSSE3-NEXT:    movzbl (%rax,%r10), %eax
+; SSSE3-NEXT:    andl $15, %esi
+; SSSE3-NEXT:    movzbl (%rsi,%r10), %eax
 ; SSSE3-NEXT:    movd %eax, %xmm2
 ; SSSE3-NEXT:    andl $15, %edi
 ; SSSE3-NEXT:    movzbl (%rdi,%r10), %eax
 ; SSSE3-NEXT:    movd %eax, %xmm0
 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm15 = xmm15[0],xmm8[0],xmm15[1],xmm8[1],xmm15[2],xmm8[2],xmm15[3],xmm8[3],xmm15[4],xmm8[4],xmm15[5],xmm8[5],xmm15[6],xmm8[6],xmm15[7],xmm8[7]
 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm9[0],xmm3[1],xmm9[1],xmm3[2],xmm9[2],xmm3[3],xmm9[3],xmm3[4],xmm9[4],xmm3[5],xmm9[5],xmm3[6],xmm9[6],xmm3[7],xmm9[7]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm15[0],xmm3[1],xmm15[1],xmm3[2],xmm15[2],xmm3[3],xmm15[3],xmm3[4],xmm15[4],xmm3[5],xmm15[5],xmm3[6],xmm15[6],xmm3[7],xmm15[7]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm15[0],xmm3[1],xmm15[1],xmm3[2],xmm15[2],xmm3[3],xmm15[3]
 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm10[0],xmm7[1],xmm10[1],xmm7[2],xmm10[2],xmm7[3],xmm10[3],xmm7[4],xmm10[4],xmm7[5],xmm10[5],xmm7[6],xmm10[6],xmm7[7],xmm10[7]
 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm11[0],xmm6[1],xmm11[1],xmm6[2],xmm11[2],xmm6[3],xmm11[3],xmm6[4],xmm11[4],xmm6[5],xmm11[5],xmm6[6],xmm11[6],xmm6[7],xmm11[7]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3],xmm6[4],xmm7[4],xmm6[5],xmm7[5],xmm6[6],xmm7[6],xmm6[7],xmm7[7]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm3[0],xmm6[1],xmm3[1],xmm6[2],xmm3[2],xmm6[3],xmm3[3],xmm6[4],xmm3[4],xmm6[5],xmm3[5],xmm6[6],xmm3[6],xmm6[7],xmm3[7]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3]
+; SSSE3-NEXT:    punpckldq {{.*#+}} xmm6 = xmm6[0],xmm3[0],xmm6[1],xmm3[1]
 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm12[0],xmm5[1],xmm12[1],xmm5[2],xmm12[2],xmm5[3],xmm12[3],xmm5[4],xmm12[4],xmm5[5],xmm12[5],xmm5[6],xmm12[6],xmm5[7],xmm12[7]
 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm13[0],xmm4[1],xmm13[1],xmm4[2],xmm13[2],xmm4[3],xmm13[3],xmm4[4],xmm13[4],xmm4[5],xmm13[5],xmm4[6],xmm13[6],xmm4[7],xmm13[7]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3],xmm4[4],xmm5[4],xmm4[5],xmm5[5],xmm4[6],xmm5[6],xmm4[7],xmm5[7]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3]
 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm14[0],xmm1[1],xmm14[1],xmm1[2],xmm14[2],xmm1[3],xmm14[3],xmm1[4],xmm14[4],xmm1[5],xmm14[5],xmm1[6],xmm14[6],xmm1[7],xmm14[7]
 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3],xmm0[4],xmm6[4],xmm0[5],xmm6[5],xmm0[6],xmm6[6],xmm0[7],xmm6[7]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
+; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm6[0]
 ; SSSE3-NEXT:    retq
 ;
 ; SSE41-LABEL: var_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8:
@@ -739,7 +739,7 @@ define <4 x i32> @mem_shuffle_v4i32_v4i32_xxxx_i32(<4 x i32> %x, i32* %i) nounwi
 ; SSE2-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
 ; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE2-NEXT:    retq
 ;
 ; SSSE3-LABEL: mem_shuffle_v4i32_v4i32_xxxx_i32:
@@ -759,7 +759,7 @@ define <4 x i32> @mem_shuffle_v4i32_v4i32_xxxx_i32(<4 x i32> %x, i32* %i) nounwi
 ; SSSE3-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
 ; SSSE3-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSSE3-NEXT:    retq
 ;
 ; SSE41-LABEL: mem_shuffle_v4i32_v4i32_xxxx_i32:
@@ -824,23 +824,23 @@ define <16 x i8> @mem_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8*
 ; SSE2-NEXT:    leaq -{{[0-9]+}}(%rsp), %rcx
 ; SSE2-NEXT:    movzbl (%rdx,%rcx), %edx
 ; SSE2-NEXT:    movd %edx, %xmm8
-; SSE2-NEXT:    movzbl 7(%rdi), %edx
+; SSE2-NEXT:    movzbl 14(%rdi), %edx
 ; SSE2-NEXT:    andl $15, %edx
 ; SSE2-NEXT:    movzbl (%rdx,%rcx), %edx
 ; SSE2-NEXT:    movd %edx, %xmm15
-; SSE2-NEXT:    movzbl 11(%rdi), %edx
-; SSE2-NEXT:    andl $15, %edx
-; SSE2-NEXT:    movzbl (%rdx,%rcx), %edx
-; SSE2-NEXT:    movd %edx, %xmm9
-; SSE2-NEXT:    movzbl 3(%rdi), %edx
-; SSE2-NEXT:    andl $15, %edx
-; SSE2-NEXT:    movzbl (%rdx,%rcx), %edx
-; SSE2-NEXT:    movd %edx, %xmm3
 ; SSE2-NEXT:    movzbl 13(%rdi), %edx
 ; SSE2-NEXT:    andl $15, %edx
 ; SSE2-NEXT:    movzbl (%rdx,%rcx), %edx
+; SSE2-NEXT:    movd %edx, %xmm9
+; SSE2-NEXT:    movzbl 12(%rdi), %edx
+; SSE2-NEXT:    andl $15, %edx
+; SSE2-NEXT:    movzbl (%rdx,%rcx), %edx
+; SSE2-NEXT:    movd %edx, %xmm3
+; SSE2-NEXT:    movzbl 11(%rdi), %edx
+; SSE2-NEXT:    andl $15, %edx
+; SSE2-NEXT:    movzbl (%rdx,%rcx), %edx
 ; SSE2-NEXT:    movd %edx, %xmm10
-; SSE2-NEXT:    movzbl 5(%rdi), %edx
+; SSE2-NEXT:    movzbl 10(%rdi), %edx
 ; SSE2-NEXT:    andl $15, %edx
 ; SSE2-NEXT:    movzbl (%rdx,%rcx), %edx
 ; SSE2-NEXT:    movd %edx, %xmm7
@@ -848,11 +848,11 @@ define <16 x i8> @mem_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8*
 ; SSE2-NEXT:    andl $15, %edx
 ; SSE2-NEXT:    movzbl (%rdx,%rcx), %edx
 ; SSE2-NEXT:    movd %edx, %xmm11
-; SSE2-NEXT:    movzbl 1(%rdi), %edx
+; SSE2-NEXT:    movzbl 8(%rdi), %edx
 ; SSE2-NEXT:    andl $15, %edx
 ; SSE2-NEXT:    movzbl (%rdx,%rcx), %edx
 ; SSE2-NEXT:    movd %edx, %xmm6
-; SSE2-NEXT:    movzbl 14(%rdi), %edx
+; SSE2-NEXT:    movzbl 7(%rdi), %edx
 ; SSE2-NEXT:    andl $15, %edx
 ; SSE2-NEXT:    movzbl (%rdx,%rcx), %edx
 ; SSE2-NEXT:    movd %edx, %xmm12
@@ -860,23 +860,23 @@ define <16 x i8> @mem_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8*
 ; SSE2-NEXT:    andl $15, %edx
 ; SSE2-NEXT:    movzbl (%rdx,%rcx), %edx
 ; SSE2-NEXT:    movd %edx, %xmm5
-; SSE2-NEXT:    movzbl 10(%rdi), %edx
+; SSE2-NEXT:    movzbl 5(%rdi), %edx
 ; SSE2-NEXT:    andl $15, %edx
 ; SSE2-NEXT:    movzbl (%rdx,%rcx), %edx
 ; SSE2-NEXT:    movd %edx, %xmm13
-; SSE2-NEXT:    movzbl 2(%rdi), %edx
-; SSE2-NEXT:    andl $15, %edx
-; SSE2-NEXT:    movzbl (%rdx,%rcx), %edx
-; SSE2-NEXT:    movd %edx, %xmm4
-; SSE2-NEXT:    movzbl 12(%rdi), %edx
-; SSE2-NEXT:    andl $15, %edx
-; SSE2-NEXT:    movzbl (%rdx,%rcx), %edx
-; SSE2-NEXT:    movd %edx, %xmm14
 ; SSE2-NEXT:    movzbl 4(%rdi), %edx
 ; SSE2-NEXT:    andl $15, %edx
 ; SSE2-NEXT:    movzbl (%rdx,%rcx), %edx
+; SSE2-NEXT:    movd %edx, %xmm4
+; SSE2-NEXT:    movzbl 3(%rdi), %edx
+; SSE2-NEXT:    andl $15, %edx
+; SSE2-NEXT:    movzbl (%rdx,%rcx), %edx
+; SSE2-NEXT:    movd %edx, %xmm14
+; SSE2-NEXT:    movzbl 2(%rdi), %edx
+; SSE2-NEXT:    andl $15, %edx
+; SSE2-NEXT:    movzbl (%rdx,%rcx), %edx
 ; SSE2-NEXT:    movd %edx, %xmm1
-; SSE2-NEXT:    movzbl 8(%rdi), %edx
+; SSE2-NEXT:    movzbl 1(%rdi), %edx
 ; SSE2-NEXT:    andl $15, %edx
 ; SSE2-NEXT:    movzbl (%rdx,%rcx), %edx
 ; SSE2-NEXT:    movd %edx, %xmm2
@@ -885,19 +885,19 @@ define <16 x i8> @mem_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8*
 ; SSE2-NEXT:    movd %eax, %xmm0
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm15 = xmm15[0],xmm8[0],xmm15[1],xmm8[1],xmm15[2],xmm8[2],xmm15[3],xmm8[3],xmm15[4],xmm8[4],xmm15[5],xmm8[5],xmm15[6],xmm8[6],xmm15[7],xmm8[7]
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm9[0],xmm3[1],xmm9[1],xmm3[2],xmm9[2],xmm3[3],xmm9[3],xmm3[4],xmm9[4],xmm3[5],xmm9[5],xmm3[6],xmm9[6],xmm3[7],xmm9[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm15[0],xmm3[1],xmm15[1],xmm3[2],xmm15[2],xmm3[3],xmm15[3],xmm3[4],xmm15[4],xmm3[5],xmm15[5],xmm3[6],xmm15[6],xmm3[7],xmm15[7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm15[0],xmm3[1],xmm15[1],xmm3[2],xmm15[2],xmm3[3],xmm15[3]
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm10[0],xmm7[1],xmm10[1],xmm7[2],xmm10[2],xmm7[3],xmm10[3],xmm7[4],xmm10[4],xmm7[5],xmm10[5],xmm7[6],xmm10[6],xmm7[7],xmm10[7]
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm11[0],xmm6[1],xmm11[1],xmm6[2],xmm11[2],xmm6[3],xmm11[3],xmm6[4],xmm11[4],xmm6[5],xmm11[5],xmm6[6],xmm11[6],xmm6[7],xmm11[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3],xmm6[4],xmm7[4],xmm6[5],xmm7[5],xmm6[6],xmm7[6],xmm6[7],xmm7[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm3[0],xmm6[1],xmm3[1],xmm6[2],xmm3[2],xmm6[3],xmm3[3],xmm6[4],xmm3[4],xmm6[5],xmm3[5],xmm6[6],xmm3[6],xmm6[7],xmm3[7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3]
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm6 = xmm6[0],xmm3[0],xmm6[1],xmm3[1]
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm12[0],xmm5[1],xmm12[1],xmm5[2],xmm12[2],xmm5[3],xmm12[3],xmm5[4],xmm12[4],xmm5[5],xmm12[5],xmm5[6],xmm12[6],xmm5[7],xmm12[7]
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm13[0],xmm4[1],xmm13[1],xmm4[2],xmm13[2],xmm4[3],xmm13[3],xmm4[4],xmm13[4],xmm4[5],xmm13[5],xmm4[6],xmm13[6],xmm4[7],xmm13[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3],xmm4[4],xmm5[4],xmm4[5],xmm5[5],xmm4[6],xmm5[6],xmm4[7],xmm5[7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3]
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm14[0],xmm1[1],xmm14[1],xmm1[2],xmm14[2],xmm1[3],xmm14[3],xmm1[4],xmm14[4],xmm1[5],xmm14[5],xmm1[6],xmm14[6],xmm1[7],xmm14[7]
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3],xmm0[4],xmm6[4],xmm0[5],xmm6[5],xmm0[6],xmm6[6],xmm0[7],xmm6[7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm6[0]
 ; SSE2-NEXT:    retq
 ;
 ; SSSE3-LABEL: mem_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8:
@@ -909,23 +909,23 @@ define <16 x i8> @mem_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8*
 ; SSSE3-NEXT:    leaq -{{[0-9]+}}(%rsp), %rcx
 ; SSSE3-NEXT:    movzbl (%rdx,%rcx), %edx
 ; SSSE3-NEXT:    movd %edx, %xmm8
-; SSSE3-NEXT:    movzbl 7(%rdi), %edx
+; SSSE3-NEXT:    movzbl 14(%rdi), %edx
 ; SSSE3-NEXT:    andl $15, %edx
 ; SSSE3-NEXT:    movzbl (%rdx,%rcx), %edx
 ; SSSE3-NEXT:    movd %edx, %xmm15
-; SSSE3-NEXT:    movzbl 11(%rdi), %edx
-; SSSE3-NEXT:    andl $15, %edx
-; SSSE3-NEXT:    movzbl (%rdx,%rcx), %edx
-; SSSE3-NEXT:    movd %edx, %xmm9
-; SSSE3-NEXT:    movzbl 3(%rdi), %edx
-; SSSE3-NEXT:    andl $15, %edx
-; SSSE3-NEXT:    movzbl (%rdx,%rcx), %edx
-; SSSE3-NEXT:    movd %edx, %xmm3
 ; SSSE3-NEXT:    movzbl 13(%rdi), %edx
 ; SSSE3-NEXT:    andl $15, %edx
 ; SSSE3-NEXT:    movzbl (%rdx,%rcx), %edx
+; SSSE3-NEXT:    movd %edx, %xmm9
+; SSSE3-NEXT:    movzbl 12(%rdi), %edx
+; SSSE3-NEXT:    andl $15, %edx
+; SSSE3-NEXT:    movzbl (%rdx,%rcx), %edx
+; SSSE3-NEXT:    movd %edx, %xmm3
+; SSSE3-NEXT:    movzbl 11(%rdi), %edx
+; SSSE3-NEXT:    andl $15, %edx
+; SSSE3-NEXT:    movzbl (%rdx,%rcx), %edx
 ; SSSE3-NEXT:    movd %edx, %xmm10
-; SSSE3-NEXT:    movzbl 5(%rdi), %edx
+; SSSE3-NEXT:    movzbl 10(%rdi), %edx
 ; SSSE3-NEXT:    andl $15, %edx
 ; SSSE3-NEXT:    movzbl (%rdx,%rcx), %edx
 ; SSSE3-NEXT:    movd %edx, %xmm7
@@ -933,11 +933,11 @@ define <16 x i8> @mem_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8*
 ; SSSE3-NEXT:    andl $15, %edx
 ; SSSE3-NEXT:    movzbl (%rdx,%rcx), %edx
 ; SSSE3-NEXT:    movd %edx, %xmm11
-; SSSE3-NEXT:    movzbl 1(%rdi), %edx
+; SSSE3-NEXT:    movzbl 8(%rdi), %edx
 ; SSSE3-NEXT:    andl $15, %edx
 ; SSSE3-NEXT:    movzbl (%rdx,%rcx), %edx
 ; SSSE3-NEXT:    movd %edx, %xmm6
-; SSSE3-NEXT:    movzbl 14(%rdi), %edx
+; SSSE3-NEXT:    movzbl 7(%rdi), %edx
 ; SSSE3-NEXT:    andl $15, %edx
 ; SSSE3-NEXT:    movzbl (%rdx,%rcx), %edx
 ; SSSE3-NEXT:    movd %edx, %xmm12
@@ -945,23 +945,23 @@ define <16 x i8> @mem_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8*
 ; SSSE3-NEXT:    andl $15, %edx
 ; SSSE3-NEXT:    movzbl (%rdx,%rcx), %edx
 ; SSSE3-NEXT:    movd %edx, %xmm5
-; SSSE3-NEXT:    movzbl 10(%rdi), %edx
+; SSSE3-NEXT:    movzbl 5(%rdi), %edx
 ; SSSE3-NEXT:    andl $15, %edx
 ; SSSE3-NEXT:    movzbl (%rdx,%rcx), %edx
 ; SSSE3-NEXT:    movd %edx, %xmm13
-; SSSE3-NEXT:    movzbl 2(%rdi), %edx
-; SSSE3-NEXT:    andl $15, %edx
-; SSSE3-NEXT:    movzbl (%rdx,%rcx), %edx
-; SSSE3-NEXT:    movd %edx, %xmm4
-; SSSE3-NEXT:    movzbl 12(%rdi), %edx
-; SSSE3-NEXT:    andl $15, %edx
-; SSSE3-NEXT:    movzbl (%rdx,%rcx), %edx
-; SSSE3-NEXT:    movd %edx, %xmm14
 ; SSSE3-NEXT:    movzbl 4(%rdi), %edx
 ; SSSE3-NEXT:    andl $15, %edx
 ; SSSE3-NEXT:    movzbl (%rdx,%rcx), %edx
+; SSSE3-NEXT:    movd %edx, %xmm4
+; SSSE3-NEXT:    movzbl 3(%rdi), %edx
+; SSSE3-NEXT:    andl $15, %edx
+; SSSE3-NEXT:    movzbl (%rdx,%rcx), %edx
+; SSSE3-NEXT:    movd %edx, %xmm14
+; SSSE3-NEXT:    movzbl 2(%rdi), %edx
+; SSSE3-NEXT:    andl $15, %edx
+; SSSE3-NEXT:    movzbl (%rdx,%rcx), %edx
 ; SSSE3-NEXT:    movd %edx, %xmm1
-; SSSE3-NEXT:    movzbl 8(%rdi), %edx
+; SSSE3-NEXT:    movzbl 1(%rdi), %edx
 ; SSSE3-NEXT:    andl $15, %edx
 ; SSSE3-NEXT:    movzbl (%rdx,%rcx), %edx
 ; SSSE3-NEXT:    movd %edx, %xmm2
@@ -970,19 +970,19 @@ define <16 x i8> @mem_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8*
 ; SSSE3-NEXT:    movd %eax, %xmm0
 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm15 = xmm15[0],xmm8[0],xmm15[1],xmm8[1],xmm15[2],xmm8[2],xmm15[3],xmm8[3],xmm15[4],xmm8[4],xmm15[5],xmm8[5],xmm15[6],xmm8[6],xmm15[7],xmm8[7]
 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm9[0],xmm3[1],xmm9[1],xmm3[2],xmm9[2],xmm3[3],xmm9[3],xmm3[4],xmm9[4],xmm3[5],xmm9[5],xmm3[6],xmm9[6],xmm3[7],xmm9[7]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm15[0],xmm3[1],xmm15[1],xmm3[2],xmm15[2],xmm3[3],xmm15[3],xmm3[4],xmm15[4],xmm3[5],xmm15[5],xmm3[6],xmm15[6],xmm3[7],xmm15[7]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm15[0],xmm3[1],xmm15[1],xmm3[2],xmm15[2],xmm3[3],xmm15[3]
 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm10[0],xmm7[1],xmm10[1],xmm7[2],xmm10[2],xmm7[3],xmm10[3],xmm7[4],xmm10[4],xmm7[5],xmm10[5],xmm7[6],xmm10[6],xmm7[7],xmm10[7]
 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm11[0],xmm6[1],xmm11[1],xmm6[2],xmm11[2],xmm6[3],xmm11[3],xmm6[4],xmm11[4],xmm6[5],xmm11[5],xmm6[6],xmm11[6],xmm6[7],xmm11[7]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3],xmm6[4],xmm7[4],xmm6[5],xmm7[5],xmm6[6],xmm7[6],xmm6[7],xmm7[7]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm3[0],xmm6[1],xmm3[1],xmm6[2],xmm3[2],xmm6[3],xmm3[3],xmm6[4],xmm3[4],xmm6[5],xmm3[5],xmm6[6],xmm3[6],xmm6[7],xmm3[7]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3]
+; SSSE3-NEXT:    punpckldq {{.*#+}} xmm6 = xmm6[0],xmm3[0],xmm6[1],xmm3[1]
 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm12[0],xmm5[1],xmm12[1],xmm5[2],xmm12[2],xmm5[3],xmm12[3],xmm5[4],xmm12[4],xmm5[5],xmm12[5],xmm5[6],xmm12[6],xmm5[7],xmm12[7]
 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm13[0],xmm4[1],xmm13[1],xmm4[2],xmm13[2],xmm4[3],xmm13[3],xmm4[4],xmm13[4],xmm4[5],xmm13[5],xmm4[6],xmm13[6],xmm4[7],xmm13[7]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3],xmm4[4],xmm5[4],xmm4[5],xmm5[5],xmm4[6],xmm5[6],xmm4[7],xmm5[7]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3]
 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm14[0],xmm1[1],xmm14[1],xmm1[2],xmm14[2],xmm1[3],xmm14[3],xmm1[4],xmm14[4],xmm1[5],xmm14[5],xmm1[6],xmm14[6],xmm1[7],xmm14[7]
 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3],xmm0[4],xmm6[4],xmm0[5],xmm6[5],xmm0[6],xmm6[6],xmm0[7],xmm6[7]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
+; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm6[0]
 ; SSSE3-NEXT:    retq
 ;
 ; SSE41-LABEL: mem_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8:
@@ -1225,28 +1225,27 @@ define <8 x i16> @var_shuffle_v8i16_v8i16_xyxyxy00_i16(<8 x i16> %x, <8 x i16> %
 ; SSE2-NEXT:    andl $7, %ecx
 ; SSE2-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
 ; SSE2-NEXT:    andl $7, %r8d
-; SSE2-NEXT:    movdqa %xmm1, -{{[0-9]+}}(%rsp)
+; SSE2-NEXT:    movaps %xmm1, -{{[0-9]+}}(%rsp)
 ; SSE2-NEXT:    andl $7, %r9d
 ; SSE2-NEXT:    movzwl -24(%rsp,%rcx,2), %eax
 ; SSE2-NEXT:    movd %eax, %xmm0
-; SSE2-NEXT:    pxor %xmm1, %xmm1
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSE2-NEXT:    movzwl -24(%rsp,%r9,2), %eax
-; SSE2-NEXT:    movd %eax, %xmm2
-; SSE2-NEXT:    movzwl -24(%rsp,%rsi,2), %eax
-; SSE2-NEXT:    movd %eax, %xmm3
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3]
 ; SSE2-NEXT:    movzwl -40(%rsp,%rdx,2), %eax
-; SSE2-NEXT:    movd %eax, %xmm2
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; SSE2-NEXT:    movzwl -40(%rsp,%r8,2), %eax
 ; SSE2-NEXT:    movd %eax, %xmm1
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSE2-NEXT:    movzwl -24(%rsp,%rsi,2), %eax
+; SSE2-NEXT:    movd %eax, %xmm2
 ; SSE2-NEXT:    movzwl -40(%rsp,%rdi,2), %eax
 ; SSE2-NEXT:    movd %eax, %xmm0
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT:    movzwl -24(%rsp,%r9,2), %eax
+; SSE2-NEXT:    movd %eax, %xmm1
+; SSE2-NEXT:    movzwl -40(%rsp,%r8,2), %eax
+; SSE2-NEXT:    movd %eax, %xmm2
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSE2-NEXT:    pxor %xmm1, %xmm1
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
 ; SSE2-NEXT:    retq
 ;
 ; SSSE3-LABEL: var_shuffle_v8i16_v8i16_xyxyxy00_i16:
@@ -1263,28 +1262,27 @@ define <8 x i16> @var_shuffle_v8i16_v8i16_xyxyxy00_i16(<8 x i16> %x, <8 x i16> %
 ; SSSE3-NEXT:    andl $7, %ecx
 ; SSSE3-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
 ; SSSE3-NEXT:    andl $7, %r8d
-; SSSE3-NEXT:    movdqa %xmm1, -{{[0-9]+}}(%rsp)
+; SSSE3-NEXT:    movaps %xmm1, -{{[0-9]+}}(%rsp)
 ; SSSE3-NEXT:    andl $7, %r9d
 ; SSSE3-NEXT:    movzwl -24(%rsp,%rcx,2), %eax
 ; SSSE3-NEXT:    movd %eax, %xmm0
-; SSSE3-NEXT:    pxor %xmm1, %xmm1
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSSE3-NEXT:    movzwl -24(%rsp,%r9,2), %eax
-; SSSE3-NEXT:    movd %eax, %xmm2
-; SSSE3-NEXT:    movzwl -24(%rsp,%rsi,2), %eax
-; SSSE3-NEXT:    movd %eax, %xmm3
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3]
 ; SSSE3-NEXT:    movzwl -40(%rsp,%rdx,2), %eax
-; SSSE3-NEXT:    movd %eax, %xmm2
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; SSSE3-NEXT:    movzwl -40(%rsp,%r8,2), %eax
 ; SSSE3-NEXT:    movd %eax, %xmm1
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSSE3-NEXT:    movzwl -24(%rsp,%rsi,2), %eax
+; SSSE3-NEXT:    movd %eax, %xmm2
 ; SSSE3-NEXT:    movzwl -40(%rsp,%rdi,2), %eax
 ; SSSE3-NEXT:    movd %eax, %xmm0
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSSE3-NEXT:    movzwl -24(%rsp,%r9,2), %eax
+; SSSE3-NEXT:    movd %eax, %xmm1
+; SSSE3-NEXT:    movzwl -40(%rsp,%r8,2), %eax
+; SSSE3-NEXT:    movd %eax, %xmm2
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSSE3-NEXT:    pxor %xmm1, %xmm1
+; SSSE3-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
 ; SSSE3-NEXT:    retq
 ;
 ; SSE41-LABEL: var_shuffle_v8i16_v8i16_xyxyxy00_i16:
diff --git a/test/CodeGen/X86/vector-sqrt.ll b/test/CodeGen/X86/vector-sqrt.ll
index 13088b7fa5f2..c5ac4466b5fa 100644
--- a/test/CodeGen/X86/vector-sqrt.ll
+++ b/test/CodeGen/X86/vector-sqrt.ll
@@ -5,10 +5,8 @@
 define <2 x double> @sqrtd2(double* nocapture readonly %v) local_unnamed_addr #0 {
 ; CHECK-LABEL: sqrtd2:
 ; CHECK:       # BB#0: # %entry
-; CHECK-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
-; CHECK-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0
-; CHECK-NEXT:    vsqrtsd %xmm1, %xmm1, %xmm1
+; CHECK-NEXT:    vsqrtsd (%rdi), %xmm0, %xmm0
+; CHECK-NEXT:    vsqrtsd 8(%rdi), %xmm1, %xmm1
 ; CHECK-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; CHECK-NEXT:    retq
 entry:
@@ -29,14 +27,10 @@ declare double @sqrt(double) local_unnamed_addr #1
 define <4 x float> @sqrtf4(float* nocapture readonly %v) local_unnamed_addr #0 {
 ; CHECK-LABEL: sqrtf4:
 ; CHECK:       # BB#0: # %entry
-; CHECK-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; CHECK-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0
-; CHECK-NEXT:    vsqrtss %xmm1, %xmm1, %xmm1
-; CHECK-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; CHECK-NEXT:    vsqrtss %xmm2, %xmm2, %xmm2
-; CHECK-NEXT:    vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
-; CHECK-NEXT:    vsqrtss %xmm3, %xmm3, %xmm3
+; CHECK-NEXT:    vsqrtss (%rdi), %xmm0, %xmm0
+; CHECK-NEXT:    vsqrtss 4(%rdi), %xmm1, %xmm1
+; CHECK-NEXT:    vsqrtss 8(%rdi), %xmm2, %xmm2
+; CHECK-NEXT:    vsqrtss 12(%rdi), %xmm3, %xmm3
 ; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
 ; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
 ; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[0]
diff --git a/test/CodeGen/X86/vector-unsigned-cmp.ll b/test/CodeGen/X86/vector-unsigned-cmp.ll
index fc246669992c..3e4b9aedf2b8 100644
--- a/test/CodeGen/X86/vector-unsigned-cmp.ll
+++ b/test/CodeGen/X86/vector-unsigned-cmp.ll
@@ -13,7 +13,7 @@ define <2 x i1> @ugt_v2i64(<2 x i64> %x, <2 x i64> %y) {
 ; SSE:       # BB#0:
 ; SSE-NEXT:    psrlq $1, %xmm0
 ; SSE-NEXT:    psrlq $1, %xmm1
-; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
 ; SSE-NEXT:    pxor %xmm2, %xmm1
 ; SSE-NEXT:    pxor %xmm2, %xmm0
 ; SSE-NEXT:    movdqa %xmm0, %xmm2
@@ -30,9 +30,6 @@ define <2 x i1> @ugt_v2i64(<2 x i64> %x, <2 x i64> %y) {
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vpsrlq $1, %xmm0, %xmm0
 ; AVX-NEXT:    vpsrlq $1, %xmm1, %xmm1
-; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
-; AVX-NEXT:    vpor %xmm2, %xmm1, %xmm1
-; AVX-NEXT:    vpor %xmm2, %xmm0, %xmm0
 ; AVX-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
   %sh1 = lshr <2 x i64> %x, <i64 1, i64 1>
@@ -46,7 +43,7 @@ define <2 x i1> @ult_v2i64(<2 x i64> %x, <2 x i64> %y) {
 ; SSE:       # BB#0:
 ; SSE-NEXT:    psrlq $1, %xmm0
 ; SSE-NEXT:    psrlq $1, %xmm1
-; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
 ; SSE-NEXT:    pxor %xmm2, %xmm0
 ; SSE-NEXT:    pxor %xmm2, %xmm1
 ; SSE-NEXT:    movdqa %xmm1, %xmm2
@@ -63,9 +60,6 @@ define <2 x i1> @ult_v2i64(<2 x i64> %x, <2 x i64> %y) {
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vpsrlq $1, %xmm0, %xmm0
 ; AVX-NEXT:    vpsrlq $1, %xmm1, %xmm1
-; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
-; AVX-NEXT:    vpor %xmm2, %xmm0, %xmm0
-; AVX-NEXT:    vpor %xmm2, %xmm1, %xmm1
 ; AVX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX-NEXT:    retq
   %sh1 = lshr <2 x i64> %x, <i64 1, i64 1>
@@ -79,7 +73,7 @@ define <2 x i1> @uge_v2i64(<2 x i64> %x, <2 x i64> %y) {
 ; SSE:       # BB#0:
 ; SSE-NEXT:    psrlq $1, %xmm0
 ; SSE-NEXT:    psrlq $1, %xmm1
-; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
 ; SSE-NEXT:    pxor %xmm2, %xmm0
 ; SSE-NEXT:    pxor %xmm2, %xmm1
 ; SSE-NEXT:    movdqa %xmm1, %xmm2
@@ -98,9 +92,6 @@ define <2 x i1> @uge_v2i64(<2 x i64> %x, <2 x i64> %y) {
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vpsrlq $1, %xmm0, %xmm0
 ; AVX-NEXT:    vpsrlq $1, %xmm1, %xmm1
-; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
-; AVX-NEXT:    vpor %xmm2, %xmm0, %xmm0
-; AVX-NEXT:    vpor %xmm2, %xmm1, %xmm1
 ; AVX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
 ; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
@@ -116,7 +107,7 @@ define <2 x i1> @ule_v2i64(<2 x i64> %x, <2 x i64> %y) {
 ; SSE:       # BB#0:
 ; SSE-NEXT:    psrlq $1, %xmm0
 ; SSE-NEXT:    psrlq $1, %xmm1
-; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
 ; SSE-NEXT:    pxor %xmm2, %xmm1
 ; SSE-NEXT:    pxor %xmm2, %xmm0
 ; SSE-NEXT:    movdqa %xmm0, %xmm2
@@ -135,9 +126,6 @@ define <2 x i1> @ule_v2i64(<2 x i64> %x, <2 x i64> %y) {
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vpsrlq $1, %xmm0, %xmm0
 ; AVX-NEXT:    vpsrlq $1, %xmm1, %xmm1
-; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
-; AVX-NEXT:    vpor %xmm2, %xmm1, %xmm1
-; AVX-NEXT:    vpor %xmm2, %xmm0, %xmm0
 ; AVX-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
 ; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
@@ -153,31 +141,15 @@ define <4 x i1> @ugt_v4i32(<4 x i32> %x, <4 x i32> %y) {
 ; SSE:       # BB#0:
 ; SSE-NEXT:    psrld $1, %xmm0
 ; SSE-NEXT:    psrld $1, %xmm1
-; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
-; SSE-NEXT:    pxor %xmm2, %xmm1
-; SSE-NEXT:    pxor %xmm2, %xmm0
 ; SSE-NEXT:    pcmpgtd %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: ugt_v4i32:
-; AVX1:       # BB#0:
-; AVX1-NEXT:    vpsrld $1, %xmm0, %xmm0
-; AVX1-NEXT:    vpsrld $1, %xmm1, %xmm1
-; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
-; AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm1
-; AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    retq
-;
-; AVX2-LABEL: ugt_v4i32:
-; AVX2:       # BB#0:
-; AVX2-NEXT:    vpsrld $1, %xmm0, %xmm0
-; AVX2-NEXT:    vpsrld $1, %xmm1, %xmm1
-; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm2
-; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm1
-; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    retq
+; AVX-LABEL: ugt_v4i32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpsrld $1, %xmm0, %xmm0
+; AVX-NEXT:    vpsrld $1, %xmm1, %xmm1
+; AVX-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
   %sh1 = lshr <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
   %sh2 = lshr <4 x i32> %y, <i32 1, i32 1, i32 1, i32 1>
   %cmp = icmp ugt <4 x i32> %sh1, %sh2
@@ -189,32 +161,16 @@ define <4 x i1> @ult_v4i32(<4 x i32> %x, <4 x i32> %y) {
 ; SSE:       # BB#0:
 ; SSE-NEXT:    psrld $1, %xmm0
 ; SSE-NEXT:    psrld $1, %xmm1
-; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
-; SSE-NEXT:    pxor %xmm2, %xmm0
-; SSE-NEXT:    pxor %xmm1, %xmm2
-; SSE-NEXT:    pcmpgtd %xmm0, %xmm2
-; SSE-NEXT:    movdqa %xmm2, %xmm0
+; SSE-NEXT:    pcmpgtd %xmm0, %xmm1
+; SSE-NEXT:    movdqa %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: ult_v4i32:
-; AVX1:       # BB#0:
-; AVX1-NEXT:    vpsrld $1, %xmm0, %xmm0
-; AVX1-NEXT:    vpsrld $1, %xmm1, %xmm1
-; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
-; AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm1
-; AVX1-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
-; AVX1-NEXT:    retq
-;
-; AVX2-LABEL: ult_v4i32:
-; AVX2:       # BB#0:
-; AVX2-NEXT:    vpsrld $1, %xmm0, %xmm0
-; AVX2-NEXT:    vpsrld $1, %xmm1, %xmm1
-; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm2
-; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm0
-; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm1
-; AVX2-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
-; AVX2-NEXT:    retq
+; AVX-LABEL: ult_v4i32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpsrld $1, %xmm0, %xmm0
+; AVX-NEXT:    vpsrld $1, %xmm1, %xmm1
+; AVX-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
   %sh1 = lshr <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
   %sh2 = lshr <4 x i32> %y, <i32 1, i32 1, i32 1, i32 1>
   %cmp = icmp ult <4 x i32> %sh1, %sh2
@@ -226,12 +182,9 @@ define <4 x i1> @uge_v4i32(<4 x i32> %x, <4 x i32> %y) {
 ; SSE2:       # BB#0:
 ; SSE2-NEXT:    psrld $1, %xmm0
 ; SSE2-NEXT:    psrld $1, %xmm1
-; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
-; SSE2-NEXT:    pxor %xmm2, %xmm0
-; SSE2-NEXT:    pxor %xmm1, %xmm2
-; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
+; SSE2-NEXT:    pcmpgtd %xmm0, %xmm1
 ; SSE2-NEXT:    pcmpeqd %xmm0, %xmm0
-; SSE2-NEXT:    pxor %xmm2, %xmm0
+; SSE2-NEXT:    pxor %xmm1, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: uge_v4i32:
@@ -260,9 +213,6 @@ define <4 x i1> @ule_v4i32(<4 x i32> %x, <4 x i32> %y) {
 ; SSE2:       # BB#0:
 ; SSE2-NEXT:    psrld $1, %xmm0
 ; SSE2-NEXT:    psrld $1, %xmm1
-; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
-; SSE2-NEXT:    pxor %xmm2, %xmm1
-; SSE2-NEXT:    pxor %xmm2, %xmm0
 ; SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
 ; SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
 ; SSE2-NEXT:    pxor %xmm1, %xmm0
@@ -294,9 +244,6 @@ define <8 x i1> @ugt_v8i16(<8 x i16> %x, <8 x i16> %y) {
 ; SSE:       # BB#0:
 ; SSE-NEXT:    psrlw $1, %xmm0
 ; SSE-NEXT:    psrlw $1, %xmm1
-; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; SSE-NEXT:    pxor %xmm2, %xmm1
-; SSE-NEXT:    pxor %xmm2, %xmm0
 ; SSE-NEXT:    pcmpgtw %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
@@ -304,9 +251,6 @@ define <8 x i1> @ugt_v8i16(<8 x i16> %x, <8 x i16> %y) {
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vpsrlw $1, %xmm0, %xmm0
 ; AVX-NEXT:    vpsrlw $1, %xmm1, %xmm1
-; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; AVX-NEXT:    vpxor %xmm2, %xmm1, %xmm1
-; AVX-NEXT:    vpxor %xmm2, %xmm0, %xmm0
 ; AVX-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
   %sh1 = lshr <8 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
@@ -320,20 +264,14 @@ define <8 x i1> @ult_v8i16(<8 x i16> %x, <8 x i16> %y) {
 ; SSE:       # BB#0:
 ; SSE-NEXT:    psrlw $1, %xmm0
 ; SSE-NEXT:    psrlw $1, %xmm1
-; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; SSE-NEXT:    pxor %xmm2, %xmm0
-; SSE-NEXT:    pxor %xmm1, %xmm2
-; SSE-NEXT:    pcmpgtw %xmm0, %xmm2
-; SSE-NEXT:    movdqa %xmm2, %xmm0
+; SSE-NEXT:    pcmpgtw %xmm0, %xmm1
+; SSE-NEXT:    movdqa %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: ult_v8i16:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vpsrlw $1, %xmm0, %xmm0
 ; AVX-NEXT:    vpsrlw $1, %xmm1, %xmm1
-; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; AVX-NEXT:    vpxor %xmm2, %xmm0, %xmm0
-; AVX-NEXT:    vpxor %xmm2, %xmm1, %xmm1
 ; AVX-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
 ; AVX-NEXT:    retq
   %sh1 = lshr <8 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
@@ -408,22 +346,20 @@ define <16 x i1> @ugt_v16i8(<16 x i8> %x, <16 x i8> %y) {
 ; SSE-LABEL: ugt_v16i8:
 ; SSE:       # BB#0:
 ; SSE-NEXT:    psrlw $1, %xmm0
-; SSE-NEXT:    pand {{.*}}(%rip), %xmm0
+; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; SSE-NEXT:    pand %xmm2, %xmm0
 ; SSE-NEXT:    psrlw $1, %xmm1
-; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; SSE-NEXT:    por %xmm2, %xmm1
-; SSE-NEXT:    pxor %xmm2, %xmm0
+; SSE-NEXT:    pand %xmm2, %xmm1
 ; SSE-NEXT:    pcmpgtb %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: ugt_v16i8:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vpsrlw $1, %xmm0, %xmm0
-; AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; AVX-NEXT:    vpand %xmm2, %xmm0, %xmm0
 ; AVX-NEXT:    vpsrlw $1, %xmm1, %xmm1
-; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; AVX-NEXT:    vpor %xmm2, %xmm1, %xmm1
-; AVX-NEXT:    vpxor %xmm2, %xmm0, %xmm0
+; AVX-NEXT:    vpand %xmm2, %xmm1, %xmm1
 ; AVX-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
   %sh1 = lshr <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
@@ -436,11 +372,10 @@ define <16 x i1> @ult_v16i8(<16 x i8> %x, <16 x i8> %y) {
 ; SSE-LABEL: ult_v16i8:
 ; SSE:       # BB#0:
 ; SSE-NEXT:    psrlw $1, %xmm0
+; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; SSE-NEXT:    pand %xmm2, %xmm0
 ; SSE-NEXT:    psrlw $1, %xmm1
-; SSE-NEXT:    pand {{.*}}(%rip), %xmm1
-; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; SSE-NEXT:    por %xmm2, %xmm0
-; SSE-NEXT:    pxor %xmm1, %xmm2
+; SSE-NEXT:    pand %xmm1, %xmm2
 ; SSE-NEXT:    pcmpgtb %xmm0, %xmm2
 ; SSE-NEXT:    movdqa %xmm2, %xmm0
 ; SSE-NEXT:    retq
@@ -448,11 +383,10 @@ define <16 x i1> @ult_v16i8(<16 x i8> %x, <16 x i8> %y) {
 ; AVX-LABEL: ult_v16i8:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vpsrlw $1, %xmm0, %xmm0
+; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; AVX-NEXT:    vpand %xmm2, %xmm0, %xmm0
 ; AVX-NEXT:    vpsrlw $1, %xmm1, %xmm1
-; AVX-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
-; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; AVX-NEXT:    vpor %xmm2, %xmm0, %xmm0
-; AVX-NEXT:    vpxor %xmm2, %xmm1, %xmm1
+; AVX-NEXT:    vpand %xmm2, %xmm1, %xmm1
 ; AVX-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
 ; AVX-NEXT:    retq
   %sh1 = lshr <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
diff --git a/test/CodeGen/X86/virtual-registers-cleared-in-machine-functions-liveins.ll b/test/CodeGen/X86/virtual-registers-cleared-in-machine-functions-liveins.ll
index 0eb17fb6c14d..c1d242575253 100644
--- a/test/CodeGen/X86/virtual-registers-cleared-in-machine-functions-liveins.ll
+++ b/test/CodeGen/X86/virtual-registers-cleared-in-machine-functions-liveins.ll
@@ -15,5 +15,5 @@ body:
 ; PRE-RA-NEXT: - { reg: '%esi', virtual-reg: '%1' }
 
 ; POST-RA: liveins:
-; POST-RA-NEXT: - { reg: '%edi' }
-; POST-RA-NEXT: - { reg: '%esi' }
+; POST-RA-NEXT: - { reg: '%edi', virtual-reg: '' }
+; POST-RA-NEXT: - { reg: '%esi', virtual-reg: '' }
diff --git a/test/CodeGen/X86/vshift-1.ll b/test/CodeGen/X86/vshift-1.ll
index c9a34de12369..a31adc337906 100644
--- a/test/CodeGen/X86/vshift-1.ll
+++ b/test/CodeGen/X86/vshift-1.ll
@@ -28,12 +28,9 @@ define void @shift1b(<2 x i64> %val, <2 x i64>* %dst, i64 %amt) nounwind {
 ; X32-LABEL: shift1b:
 ; X32:       # BB#0: # %entry
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X32-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
-; X32-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; X32-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
-; X32-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; X32-NEXT:    psllq %xmm2, %xmm0
+; X32-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
+; X32-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
+; X32-NEXT:    psllq %xmm1, %xmm0
 ; X32-NEXT:    movdqa %xmm0, (%eax)
 ; X32-NEXT:    retl
 ;
diff --git a/test/CodeGen/X86/vshift-2.ll b/test/CodeGen/X86/vshift-2.ll
index 88cba8a4d6ac..a381637b40a9 100644
--- a/test/CodeGen/X86/vshift-2.ll
+++ b/test/CodeGen/X86/vshift-2.ll
@@ -28,12 +28,9 @@ define void @shift1b(<2 x i64> %val, <2 x i64>* %dst, i64 %amt) nounwind {
 ; X32-LABEL: shift1b:
 ; X32:       # BB#0: # %entry
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X32-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
-; X32-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; X32-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
-; X32-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; X32-NEXT:    psrlq %xmm2, %xmm0
+; X32-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
+; X32-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
+; X32-NEXT:    psrlq %xmm1, %xmm0
 ; X32-NEXT:    movdqa %xmm0, (%eax)
 ; X32-NEXT:    retl
 ;
diff --git a/test/CodeGen/X86/x86-interleaved-access.ll b/test/CodeGen/X86/x86-interleaved-access.ll
index 4181a374c61c..74214aa1b8b7 100644
--- a/test/CodeGen/X86/x86-interleaved-access.ll
+++ b/test/CodeGen/X86/x86-interleaved-access.ll
@@ -135,3 +135,96 @@ define <4 x i64> @load_factori64_4(<16 x i64>* %ptr) {
   %add3 = add <4 x i64> %add2, %strided.v3
   ret <4 x i64> %add3
 }
+
+define void @store_factorf64_4(<16 x double>* %ptr, <4 x double> %v0, <4 x double> %v1, <4 x double> %v2, <4 x double> %v3) {
+; AVX-LABEL: store_factorf64_4:
+; AVX:       # BB#0:
+; AVX-NEXT:    vunpcklpd {{.*#+}} xmm4 = xmm2[0],xmm3[0]
+; AVX-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm4
+; AVX-NEXT:    vunpcklpd {{.*#+}} xmm5 = xmm0[0],xmm1[0]
+; AVX-NEXT:    vblendpd {{.*#+}} ymm4 = ymm5[0,1],ymm4[2,3]
+; AVX-NEXT:    vunpckhpd {{.*#+}} xmm5 = xmm2[1],xmm3[1]
+; AVX-NEXT:    vinsertf128 $1, %xmm5, %ymm0, %ymm5
+; AVX-NEXT:    vunpckhpd {{.*#+}} xmm6 = xmm0[1],xmm1[1]
+; AVX-NEXT:    vblendpd {{.*#+}} ymm5 = ymm6[0,1],ymm5[2,3]
+; AVX-NEXT:    vunpcklpd {{.*#+}} ymm6 = ymm2[0],ymm3[0],ymm2[2],ymm3[2]
+; AVX-NEXT:    vunpcklpd {{.*#+}} ymm7 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
+; AVX-NEXT:    vextractf128 $1, %ymm7, %xmm7
+; AVX-NEXT:    vblendpd {{.*#+}} ymm6 = ymm7[0,1],ymm6[2,3]
+; AVX-NEXT:    vunpckhpd {{.*#+}} ymm2 = ymm2[1],ymm3[1],ymm2[3],ymm3[3]
+; AVX-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
+; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm2[2,3]
+; AVX-NEXT:    vmovupd %ymm0, 96(%rdi)
+; AVX-NEXT:    vmovupd %ymm6, 64(%rdi)
+; AVX-NEXT:    vmovupd %ymm5, 32(%rdi)
+; AVX-NEXT:    vmovupd %ymm4, (%rdi)
+; AVX-NEXT:    vzeroupper
+; AVX-NEXT:    retq
+  %s0 = shufflevector <4 x double> %v0, <4 x double> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %s1 = shufflevector <4 x double> %v2, <4 x double> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %interleaved.vec = shufflevector <8 x double> %s0, <8 x double> %s1, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
+  store <16 x double> %interleaved.vec, <16 x double>* %ptr, align 16
+  ret void
+}
+
+define void @store_factori64_4(<16 x i64>* %ptr, <4 x i64> %v0, <4 x i64> %v1, <4 x i64> %v2, <4 x i64> %v3) {
+; AVX1-LABEL: store_factori64_4:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm4 = xmm2[0],xmm3[0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm4
+; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm5 = xmm0[0],xmm1[0]
+; AVX1-NEXT:    vblendpd {{.*#+}} ymm4 = ymm5[0,1],ymm4[2,3]
+; AVX1-NEXT:    vpunpckhqdq {{.*#+}} xmm5 = xmm2[1],xmm3[1]
+; AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm0, %ymm5
+; AVX1-NEXT:    vpunpckhqdq {{.*#+}} xmm6 = xmm0[1],xmm1[1]
+; AVX1-NEXT:    vblendpd {{.*#+}} ymm5 = ymm6[0,1],ymm5[2,3]
+; AVX1-NEXT:    vunpcklpd {{.*#+}} ymm6 = ymm2[0],ymm3[0],ymm2[2],ymm3[2]
+; AVX1-NEXT:    vunpcklpd {{.*#+}} ymm7 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
+; AVX1-NEXT:    vextractf128 $1, %ymm7, %xmm7
+; AVX1-NEXT:    vblendpd {{.*#+}} ymm6 = ymm7[0,1],ymm6[2,3]
+; AVX1-NEXT:    vunpckhpd {{.*#+}} ymm2 = ymm2[1],ymm3[1],ymm2[3],ymm3[3]
+; AVX1-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm2[2,3]
+; AVX1-NEXT:    vmovupd %ymm0, 96(%rdi)
+; AVX1-NEXT:    vmovupd %ymm6, 64(%rdi)
+; AVX1-NEXT:    vmovupd %ymm5, 32(%rdi)
+; AVX1-NEXT:    vmovupd %ymm4, (%rdi)
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: store_factori64_4:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpunpcklqdq {{.*#+}} ymm4 = ymm2[0],ymm3[0],ymm2[2],ymm3[2]
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm5
+; AVX2-NEXT:    vpermq {{.*#+}} ymm6 = ymm1[0,2,2,3]
+; AVX2-NEXT:    vpblendd {{.*#+}} xmm5 = xmm5[0,1],xmm6[2,3]
+; AVX2-NEXT:    vpblendd {{.*#+}} ymm4 = ymm5[0,1,2,3],ymm4[4,5,6,7]
+; AVX2-NEXT:    vpunpckhqdq {{.*#+}} ymm5 = ymm2[1],ymm3[1],ymm2[3],ymm3[3]
+; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm6
+; AVX2-NEXT:    vpermq {{.*#+}} ymm7 = ymm0[3,1,2,3]
+; AVX2-NEXT:    vpblendd {{.*#+}} xmm6 = xmm7[0,1],xmm6[2,3]
+; AVX2-NEXT:    vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7]
+; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm6
+; AVX2-NEXT:    vpbroadcastq %xmm3, %ymm7
+; AVX2-NEXT:    vpblendd {{.*#+}} ymm6 = ymm6[0,1,2,3,4,5],ymm7[6,7]
+; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm7 = xmm0[0],xmm1[0]
+; AVX2-NEXT:    vpblendd {{.*#+}} ymm6 = ymm7[0,1,2,3],ymm6[4,5,6,7]
+; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm0, %ymm3
+; AVX2-NEXT:    vpermq {{.*#+}} ymm2 = ymm2[0,1,1,3]
+; AVX2-NEXT:    vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3,4,5],ymm3[6,7]
+; AVX2-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm2[4,5,6,7]
+; AVX2-NEXT:    vmovdqu %ymm0, 32(%rdi)
+; AVX2-NEXT:    vmovdqu %ymm6, (%rdi)
+; AVX2-NEXT:    vmovdqu %ymm5, 96(%rdi)
+; AVX2-NEXT:    vmovdqu %ymm4, 64(%rdi)
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+  %s0 = shufflevector <4 x i64> %v0, <4 x i64> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %s1 = shufflevector <4 x i64> %v2, <4 x i64> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %interleaved.vec = shufflevector <8 x i64> %s0, <8 x i64> %s1, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
+  store <16 x i64> %interleaved.vec, <16 x i64>* %ptr, align 16
+  ret void
+}
diff --git a/test/DebugInfo/Inputs/dwarfdump-str-offsets-dwp.s b/test/DebugInfo/Inputs/dwarfdump-str-offsets-dwp.s
new file mode 100644
index 000000000000..8a9c03b77c0d
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-str-offsets-dwp.s
@@ -0,0 +1,277 @@
+# Test object to verify that dwarfdump handles dwp files with DWARF v5 string
+# offset tables. We have 2 CUs and 2 TUs, where it is assumed that 
+# CU1 and TU1 came from one object file, CU2 and TU2 from a second object
+# file.
+#
+# To generate the test object:
+# llvm-mc -triple x86_64-unknown-linux dwarfdump-str-offsets-dwp.s -filetype=obj \
+#         -o dwarfdump-str_offsets-dwp.x86_64.o
+
+        .section .debug_str.dwo,"MS",@progbits,1
+str_producer:
+        .asciz "Handmade DWARF producer"
+str_CU1:
+        .asciz "Compile_Unit_1"
+str_CU1_dir:
+        .asciz "/home/test/CU1"
+str_CU2:
+        .asciz "Compile_Unit_2"
+str_CU2_dir:
+        .asciz "/home/test/CU2"
+str_TU1:
+        .asciz "Type_Unit_1"
+str_TU1_type:
+        .asciz "MyStruct_1"
+str_TU2:
+        .asciz "Type_Unit_2"
+str_TU2_type:
+        .asciz "MyStruct_2"
+
+        .section .debug_str_offsets.dwo,"",@progbits
+# Object files 1's portion of the .debug_str_offsets.dwo section.
+.debug_str_offsets_object_file1:
+
+# CU1's contribution (from object file 1)
+.debug_str_offsets_start_CU1:
+        .long .debug_str_offsets_end_CU1-.debug_str_offsets_base_CU1
+        .short 5    # DWARF version
+        .short 0    # Padding
+.debug_str_offsets_base_CU1:
+        .long str_producer-.debug_str.dwo
+        .long str_CU1-.debug_str.dwo
+        .long str_CU1_dir-.debug_str.dwo
+.debug_str_offsets_end_CU1:
+
+# TU1's contribution (from object file 1)
+.debug_str_offsets_start_TU1:
+        .long .debug_str_offsets_end_TU1-.debug_str_offsets_base_TU1
+        .short 5    # DWARF version
+        .short 0    # Padding
+.debug_str_offsets_base_TU1:
+        .long str_TU1-.debug_str.dwo
+        .long str_TU1_type-.debug_str.dwo
+.debug_str_offsets_end_TU1:
+
+# Object files 2's portion of the .debug_str_offsets.dwo section.
+.debug_str_offsets_object_file2:
+
+# CU2's contribution (from object file 2)
+.debug_str_offsets_start_CU2:
+        .long .debug_str_offsets_end_CU2-.debug_str_offsets_base_CU2
+        .short 5    # DWARF version
+        .short 0    # Padding
+.debug_str_offsets_base_CU2:
+        .long str_producer-.debug_str.dwo
+        .long str_CU2-.debug_str.dwo
+        .long str_CU2_dir-.debug_str.dwo
+.debug_str_offsets_end_CU2:
+
+# TU2's contribution (from object file 2)
+.debug_str_offsets_start_TU2:
+        .long .debug_str_offsets_end_TU2-.debug_str_offsets_base_TU2
+        .short 5    # DWARF version
+        .short 0    # Padding
+.debug_str_offsets_base_TU2:
+        .long str_TU2-.debug_str.dwo
+        .long str_TU2_type-.debug_str.dwo
+.debug_str_offsets_end_TU2:
+
+
+# Abbrevs are shared for all compile and type units
+        .section .debug_abbrev.dwo,"",@progbits
+        .byte 0x01  # Abbrev code
+        .byte 0x11  # DW_TAG_compile_unit
+        .byte 0x00  # DW_CHILDREN_no
+        .byte 0x25  # DW_AT_producer
+        .byte 0x1a  # DW_FORM_strx
+        .byte 0x03  # DW_AT_name
+        .byte 0x1a  # DW_FORM_strx
+        .byte 0x72  # DW_AT_str_offsets_base
+        .byte 0x17  # DW_FORM_sec_offset
+        .byte 0x03  # DW_AT_name
+        .byte 0x1a  # DW_FORM_strx
+        .byte 0x00  # EOM(1)
+        .byte 0x00  # EOM(2)
+        .byte 0x02  # Abbrev code
+        .byte 0x41  # DW_TAG_type_unit
+        .byte 0x01  # DW_CHILDREN_yes
+        .byte 0x03  # DW_AT_name
+        .byte 0x1a  # DW_FORM_strx
+        .byte 0x72  # DW_AT_str_offsets_base
+        .byte 0x17  # DW_FORM_sec_offset
+        .byte 0x00  # EOM(1)
+        .byte 0x00  # EOM(2)
+        .byte 0x03  # Abbrev code
+        .byte 0x13  # DW_TAG_structure_type
+        .byte 0x00  # DW_CHILDREN_no (no members)
+        .byte 0x03  # DW_AT_name
+        .byte 0x1a  # DW_FORM_strx
+        .byte 0x00  # EOM(1)
+        .byte 0x00  # EOM(2)
+        .byte 0x00  # EOM(3)
+abbrev_end:
+
+        .section .debug_info.dwo,"",@progbits
+
+# DWARF v5 CU header.
+CU1_5_start:
+        .long  CU1_5_end-CU1_5_version  # Length of Unit
+CU1_5_version:
+        .short 5               # DWARF version number
+        .byte 1                # DWARF Unit Type
+        .byte 8                # Address Size (in bytes)
+        .long .debug_abbrev.dwo # Offset Into Abbrev. Section
+# The compile-unit DIE, which has a DW_AT_producer, DW_AT_name,
+# DW_AT_str_offsets and DW_AT_compdir.
+        .byte 1                # Abbreviation code
+        .byte 0                # The index of the producer string
+        .byte 1                # The index of the CU name string
+# The DW_AT_str_offsets_base attribute for CU1 contains the offset of CU1's
+# contribution relative to the start of object file 1's portion of the
+# .debug_str_offsets section.
+        .long .debug_str_offsets_base_CU1-.debug_str_offsets_object_file1
+        .byte 2                # The index of the comp dir string
+        .byte 0 # NULL
+CU1_5_end:
+
+CU2_5_start:
+        .long  CU2_5_end-CU2_5_version  # Length of Unit
+CU2_5_version:
+        .short 5               # DWARF version number
+        .byte 1                # DWARF Unit Type
+        .byte 8                # Address Size (in bytes)
+        .long .debug_abbrev.dwo # Offset Into Abbrev. Section
+# The compile-unit DIE, which has a DW_AT_producer, DW_AT_name,
+# DW_AT_str_offsets and DW_AT_compdir.
+        .byte 1                # Abbreviation code
+        .byte 0                # The index of the producer string
+        .byte 1                # The index of the CU name string
+# The DW_AT_str_offsets_base attribute for CU2 contains the offset of CU2's
+# contribution relative to the start of object file 2's portion of the
+# .debug_str_offsets section.
+        .long .debug_str_offsets_base_CU2-.debug_str_offsets_object_file2
+        .byte 2                # The index of the comp dir string
+        .byte 0 # NULL
+CU2_5_end:
+
+        .section .debug_types.dwo,"",@progbits
+# DWARF v5 Type unit header.
+TU1_5_start:
+        .long  TU1_5_end-TU1_5_version  # Length of Unit
+TU1_5_version:
+        .short 5               # DWARF version number
+        .byte 2                # DWARF Unit Type
+        .byte 8                # Address Size (in bytes)
+        .long .debug_abbrev.dwo    # Offset Into Abbrev. Section
+        .quad 0x0011223344556677 # Type Signature
+        .long TU1_5_type-TU1_5_start # Type offset
+# The type-unit DIE, which has a name.
+        .byte 2                # Abbreviation code
+        .byte 0                # Index of the unit type name string
+# The DW_AT_str_offsets_base attribute for TU1 contains the offset of TU1's
+# contribution relative to the start of object file 1's portion of the
+# .debug_str_offsets section.
+        .long .debug_str_offsets_base_TU1-.debug_str_offsets_object_file1
+# The type DIE, which has a name.
+TU1_5_type:
+        .byte 3                # Abbreviation code
+        .byte 1                # Index of the type name string
+        .byte 0 # NULL
+        .byte 0 # NULL
+TU1_5_end:
+
+TU2_5_start:
+        .long  TU2_5_end-TU2_5_version  # Length of Unit
+TU2_5_version:
+        .short 5               # DWARF version number
+        .byte 2                # DWARF Unit Type
+        .byte 8                # Address Size (in bytes)
+        .long .debug_abbrev.dwo    # Offset Into Abbrev. Section
+        .quad 0x00aabbccddeeff99 # Type Signature
+        .long TU2_5_type-TU2_5_start # Type offset
+# The type-unit DIE, which has a name.
+        .byte 2                # Abbreviation code
+        .byte 0                # Index of the unit type name string
+# The DW_AT_str_offsets_base attribute for TU2 contains the offset of TU2's
+# contribution relative to the start of object file 2's portion of the
+# .debug_str_offsets section.
+        .long .debug_str_offsets_base_TU2-.debug_str_offsets_object_file2
+# The type DIE, which has a name.
+TU2_5_type:
+        .byte 3                # Abbreviation code
+        .byte 1                # Index of the type name string
+        .byte 0 # NULL
+        .byte 0 # NULL
+TU2_5_end:
+
+        .section .debug_cu_index,"",@progbits
+        # The index header
+        .long 2                # Version 
+        .long 3                # Columns of contribution matrix
+        .long 2                # number of units
+        .long 2                # number of hash buckets in table
+
+        # The signatures for both CUs.
+        .quad 0xddeeaaddbbaabbee # signature 1
+        .quad 0xff00ffeeffaaff00 # signature 2
+        # The indexes for both CUs.
+        .long 1                # index 1
+        .long 2                # index 2
+        # The sections to which both CUs contribute.
+        .long 1                # DW_SECT_INFO
+        .long 3                # DW_SECT_ABBREV
+        .long 6                # DW_SECT_STR_OFFSETS
+
+        # The starting offsets of both CU's contributions to info,
+        # abbrev and string offsets table.
+        .long CU1_5_start-.debug_info.dwo                   
+        .long 0
+        .long .debug_str_offsets_object_file1-.debug_str_offsets.dwo
+        .long CU2_5_start-.debug_info.dwo
+        .long 0
+        .long .debug_str_offsets_object_file2-.debug_str_offsets.dwo
+
+        # The lengths of both CU's contributions to info, abbrev and
+        # string offsets table.
+        .long CU1_5_end-CU1_5_start
+        .long abbrev_end-.debug_abbrev.dwo
+        .long .debug_str_offsets_end_CU1-.debug_str_offsets_start_CU1
+        .long CU2_5_end-CU2_5_start
+        .long abbrev_end-.debug_abbrev.dwo
+        .long .debug_str_offsets_end_CU2-.debug_str_offsets_start_CU2
+
+        .section .debug_tu_index,"",@progbits
+        # The index header
+        .long 2                # Version 
+        .long 3                # Columns of contribution matrix
+        .long 2                # number of units
+        .long 2                # number of hash buckets in table
+
+        # The signatures for both TUs.
+        .quad 0xeeaaddbbaabbeedd # signature 1
+        .quad 0x00ffeeffaaff00ff # signature 2
+        # The indexes for both TUs.
+        .long 1                # index 1
+        .long 2                # index 2
+        # The sections to which both TUs contribute.
+        .long 2                # DW_SECT_TYPES
+        .long 3                # DW_SECT_ABBREV
+        .long 6                # DW_SECT_STR_OFFSETS
+
+        # The starting offsets of both TU's contributions to info,
+        # abbrev and string offsets table.
+        .long TU1_5_start-.debug_types.dwo
+        .long 0
+        .long .debug_str_offsets_object_file1-.debug_str_offsets.dwo
+        .long TU2_5_start-.debug_types.dwo
+        .long 0
+        .long .debug_str_offsets_object_file2-.debug_str_offsets.dwo
+
+        # The lengths of both TU's contributions to info, abbrev and
+        # string offsets table.
+        .long TU1_5_end-TU1_5_start
+        .long abbrev_end-.debug_abbrev.dwo
+        .long .debug_str_offsets_end_TU1-.debug_str_offsets_start_TU1
+        .long TU2_5_end-TU2_5_start
+        .long abbrev_end-.debug_abbrev.dwo
+        .long .debug_str_offsets_end_TU2-.debug_str_offsets_start_TU2
diff --git a/test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-1.s b/test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-1.s
new file mode 100644
index 000000000000..361448af0e87
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-1.s
@@ -0,0 +1,34 @@
+# Test object to verify that llvm-dwarfdump handles an invalid string offsets
+# table.
+#
+# To generate the test object:
+# llvm-mc -triple x86_64-unknown-linux dwarfdump-str-offsets-invalid-1.s -filetype=obj \
+#         -o dwarfdump-str-offsets-invalid-1.x86_64.o
+#
+# A rudimentary abbrev section.
+        .section .debug_abbrev,"",@progbits
+        .byte 0x01  # Abbrev code
+        .byte 0x11  # DW_TAG_compile_unit
+        .byte 0x00  # DW_CHILDREN_no
+        .byte 0x00  # EOM(1)
+        .byte 0x00  # EOM(2)
+        .byte 0x00  # EOM(3)
+
+# A rudimentary compile unit to convince dwarfdump that we are dealing with a 
+# DWARF v5 string offsets table.
+        .section .debug_info,"",@progbits
+
+# DWARF v5 CU header.
+        .long  CU1_5_end-CU1_5_version  # Length of Unit
+CU1_5_version:
+        .short 5               # DWARF version number
+        .byte 1                # DWARF Unit Type
+        .byte 8                # Address Size (in bytes)
+        .long .debug_abbrev    # Offset Into Abbrev. Section
+# A compile-unit DIE, which has no attributes.
+        .byte 1                # Abbreviation code
+CU1_5_end:
+
+        .section .debug_str_offsets,"",@progbits
+# A degenerate section, not enough for a single contribution size.
+        .byte 2
diff --git a/test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-1.x86_64.o b/test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-1.x86_64.o
new file mode 100644
index 0000000000000000000000000000000000000000..65aae84c22f4275cb510243d797f73e76f7e2e4d
GIT binary patch
literal 824
zcmb_a!AiqG5S?wStpyPef}hY!10Dpulz><-UWy3zlqPf|1x+AXi=O-~Kf_OQ-gXzm
zquzWWGnqH<%`Ut7yuF|IjFAW<Gx@C|D{|E#Q%|QdQl&XXais5om=V5B5AiSD`Nlww
zNpKzb0r4pN`Y`-bN{)TM(@oa4cze$BqG;?^5@+9?1g>>WR+nXKT`NgrU-H0js<M`w
z$BXPT+uEkx)K#c#wU$I(E-y5HydwV-7QF8;_dDdA*6AF-@0@4^cSyRF>R9r*!pk$E
zO%W74m$g&XFbCfpLBaRS+KFm-Pw;&qD9(3DhMJ*^_Z|5&<#;A64+skGalKT{z903;
MyG2$Z!?$n!A3-@PGynhq

literal 0
HcmV?d00001

diff --git a/test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-2.s b/test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-2.s
new file mode 100644
index 000000000000..2f0fdfce2438
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-2.s
@@ -0,0 +1,36 @@
+# Test object to verify that llvm-dwarfdump handles an invalid string offsets
+# table.
+#
+# To generate the test object:
+# llvm-mc -triple x86_64-unknown-linux dwarfdump-str-offsets-invalid-2.s -filetype=obj \
+#         -o dwarfdump-str-offsets-invalid-2.x86_64.o
+
+# A rudimentary abbrev section.
+        .section .debug_abbrev,"",@progbits
+        .byte 0x01  # Abbrev code
+        .byte 0x11  # DW_TAG_compile_unit
+        .byte 0x00  # DW_CHILDREN_no
+        .byte 0x00  # EOM(1)
+        .byte 0x00  # EOM(2)
+        .byte 0x00  # EOM(3)
+
+# A rudimentary compile unit to convince dwarfdump that we are dealing with a
+# DWARF v5 string offsets table.
+        .section .debug_info,"",@progbits
+
+# DWARF v5 CU header.
+        .long  CU1_5_end-CU1_5_version  # Length of Unit
+CU1_5_version:
+        .short 5               # DWARF version number
+        .byte 1                # DWARF Unit Type
+        .byte 8                # Address Size (in bytes)
+        .long .debug_abbrev    # Offset Into Abbrev. Section
+# A compile-unit DIE, which has no attributes.
+        .byte 1                # Abbreviation code
+CU1_5_end:
+
+        .section .debug_str_offsets,"",@progbits
+# A degenerate section with fewer bytes than required for a DWARF64 size.
+        .long 0xffffffff
+        .long 0
+        .short 4
diff --git a/test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-2.x86_64.o b/test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-2.x86_64.o
new file mode 100644
index 0000000000000000000000000000000000000000..90d2074f19ffc0d4a94685a94c99d56cd02ab0e8
GIT binary patch
literal 832
zcmb_a!AiqG5S?wSwuK@+2!4VW10H(tQWnH|=|#kYr<l-<6f}WkE%oHD`5FF=&P#VO
zKI+W}nYZ)i&Fqrh_v_`NXN*J`nafWWRT0|}Z7W+Ds?$u7jPyGYGsN0_DX<tI{Dqsg
zFpvouE`z?Wxi5Wki=JVJvC^+<vU2(BQ<~*@?KYBlxAhXV#@A_86pizZB(;0ag1#<`
zO0MoN(pkE3b+fL@khyXtiFWxcSAKf|{}XQc++pr#;EeX^Ttizq)(Gv8e3tT<@_$7y
z&xF?^D0nV=&(*^k`W6TZzF+p9sfW*mz7GV&>5k+`8NPVmQ9n_QXQJf}L4kMC1$!T~
P*y|d?`jHrY`}Y3^E+H;k

literal 0
HcmV?d00001

diff --git a/test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-3.s b/test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-3.s
new file mode 100644
index 000000000000..b4355fe27f75
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-3.s
@@ -0,0 +1,88 @@
+# Test object to verify that llvm-dwarfdump handles an invalid string offsets
+# table.
+#
+# To generate the test object:
+# llvm-mc -triple x86_64-unknown-linux dwarfdump-str-offsets-invalid-3.s -filetype=obj \
+#         -o dwarfdump-str-offsets-invalid-3.x86_64.o
+
+        .section .debug_str,"MS",@progbits,1
+str_producer:
+        .asciz "Handmade DWARF producer"
+str_CU1:
+        .asciz "Compile_Unit_1"
+str_CU1_dir:
+        .asciz "/home/test/CU1"
+str_CU2:
+        .asciz "Compile_Unit_2"
+str_CU2_dir:
+        .asciz "/home/test/CU2"
+str_TU:
+        .asciz "Type_Unit"
+str_TU_type:
+        .asciz "MyStruct"
+
+        .section .debug_str.dwo,"MS",@progbits,1
+dwo_str_CU_5_producer:
+        .asciz "Handmade split DWARF producer"
+dwo_str_CU_5_name:
+        .asciz "V5_split_compile_unit"
+dwo_str_CU_5_comp_dir:
+        .asciz "/home/test/splitCU"
+dwo_str_TU_5:
+        .asciz "V5_split_type_unit"
+dwo_str_TU_5_type:
+        .asciz "V5_split_Mystruct"
+
+# A rudimentary abbrev section.
+        .section .debug_abbrev,"",@progbits
+        .byte 0x01  # Abbrev code
+        .byte 0x11  # DW_TAG_compile_unit
+        .byte 0x00  # DW_CHILDREN_no
+        .byte 0x00  # EOM(1)
+        .byte 0x00  # EOM(2)
+        .byte 0x00  # EOM(3)
+
+# A rudimentary compile unit to convince dwarfdump that we are dealing with a
+# DWARF v5 string offsets table.
+        .section .debug_info,"",@progbits
+
+# DWARF v5 CU header.
+        .long  CU1_5_end-CU1_5_version  # Length of Unit
+CU1_5_version:
+        .short 5               # DWARF version number
+        .byte 1                # DWARF Unit Type
+        .byte 8                # Address Size (in bytes)
+        .long .debug_abbrev    # Offset Into Abbrev. Section
+# A compile-unit DIE, which has no attributes.
+        .byte 1                # Abbreviation code
+CU1_5_end:
+
+        .section .debug_str_offsets,"",@progbits
+# CU1's contribution
+# Invalid length
+        .long 0xfffffffe
+        .long .debug_str_offsets_segment0_end-.debug_str_offsets_base0
+        .short 5    # DWARF version
+        .short 0    # Padding
+.debug_str_offsets_base0:
+        .long str_producer
+        .long str_CU1
+        .long str_CU1_dir
+.debug_str_offsets_segment0_end:
+# CU2's contribution
+        .long .debug_str_offsets_segment1_end-.debug_str_offsets_base1
+        .short 5    # DWARF version
+        .short 0    # Padding
+.debug_str_offsets_base1:
+        .long str_producer
+        .long str_CU2
+        .long str_CU2_dir
+.debug_str_offsets_segment1_end:
+# The TU's contribution
+        .long .debug_str_offsets_segment2_end-.debug_str_offsets_base2
+        .short 5    # DWARF version
+        .short 0    # Padding
+.debug_str_offsets_base2:
+        .long str_TU
+        .long str_TU_type
+.debug_str_offsets_segment2_end:
diff --git a/test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-3.x86_64.o b/test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-3.x86_64.o
new file mode 100644
index 0000000000000000000000000000000000000000..68f8c5f3be02767577c03608048b899ee8c88d46
GIT binary patch
literal 2296
zcmbW2JCD;q5XZ-t+$p>SC*Bk&1%fLeIY$ItflyGmgMvc>Vh|L!a^g*5DYhfqIY>oK
zgQyS<HBeDri^Lb8;WJS41qi$ASw99JCt=Ci-QWCYXLho?ezkdjYsoM`KL*@_Y@{i`
z#!A-jsBQ<22<1K7cRbtS^*c{*KiaBK!@!vhcnICVo4OOuEZ>cp3C(ff@n+1UxY@PL
zqP11DwxBnkCXTQ@e;kLiL7at&rV}@&!Jf34;%tCXXCi8TR%Pv4kP2fNLa9=;J&%%D
z#;RQ3A@MGQQImb+_hPX)s(>73PZ^UQ%bDbW-kvIU4dhJu=N<BT9*X`9`E|lg<mU)q
zMShj=Uus-~n}q*V;}tkY_-Eu*!Z*~o3N)V|$S)Fogq&a)t30c4ml(dG{yE`al)eIV
zy@{>@SBU-%>TeMKK#eP~Nw|&r9^rea-ywX6oM5DtG7OcHeWLc8P#+fWnVgs2ACjNW
z@e|nBT-W0i;kq7m!f^<f2-kI5BV5->|KB>Uu2ZrflKf?!X`LDl@6Sfe?)O8!3yqk+
zjG+<oiH!zv_*gI;Mm&xnL*Tp!m^5`Q)>g-xIc^9EY32H6+E1^bf@?~%xf?=INWt}o
z0f_5KMB3$H<OaS9)U%kpF49!QxLqVFIp6j;dUM~&twAq~MSSFOKW?#q+sy;)>xo^|
zAh!F^h~}PbWl4HgA<42F`gX)yx_<i!Oca>=3D)(t`S13jIA3L_VvHBecXj@<>B$g|
z3zKBI&yo^%)bHOv^xnEAB+{R*q=T*_Y;~jK>-M!Ul=*)|hlzH?R{P~!-Oo!;BBbl0
z+7H5sGC9wAg!KNV+V?_BYa;#M;-8}L5qZ94F8W^5{DNR~PJ!SuLi$~yT6s5=#s7ry
hzskwPBjd~1H2yO&;gS@PC;5&5tf5iHJu6=7{|EN^7$*P#

literal 0
HcmV?d00001

diff --git a/test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-4.s b/test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-4.s
new file mode 100644
index 000000000000..8ec288151eca
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-4.s
@@ -0,0 +1,50 @@
+# Test object to verify that llvm-dwarfdump handles an invalid string offsets
+# table.
+#
+# To generate the test object:
+# llvm-mc -triple x86_64-unknown-linux dwarfdump-str-offsets-invalid-4.s -filetype=obj \
+#         -o dwarfdump-str-offsets-invalid-4.x86_64.o
+
+        .section .debug_str,"MS",@progbits,1
+str_producer:
+        .asciz "Handmade DWARF producer"
+str_CU1:
+        .asciz "Compile_Unit_1"
+
+# A rudimentary abbrev section.
+        .section .debug_abbrev,"",@progbits
+        .byte 0x01  # Abbrev code
+        .byte 0x11  # DW_TAG_compile_unit
+        .byte 0x00  # DW_CHILDREN_no
+        .byte 0x00  # EOM(1)
+        .byte 0x00  # EOM(2)
+        .byte 0x00  # EOM(3)
+
+# A rudimentary compile unit to convince dwarfdump that we are dealing with a
+# DWARF v5 string offsets table.
+        .section .debug_info,"",@progbits
+
+# DWARF v5 CU header.
+        .long  CU1_5_end-CU1_5_version  # Length of Unit
+CU1_5_version:
+        .short 5               # DWARF version number
+        .byte 1                # DWARF Unit Type
+        .byte 8                # Address Size (in bytes)
+        .long .debug_abbrev    # Offset Into Abbrev. Section
+# A compile-unit DIE, which has no attributes.
+        .byte 1                # Abbreviation code
+CU1_5_end:
+
+# Every unit contributes to the string_offsets table.
+        .section .debug_str_offsets,"",@progbits
+# CU1's contribution
+# The length is not a multiple of 4. Check that we don't read off the
+# end.
+        .long .debug_str_offsets_segment0_end-.debug_str_offsets_base0
+        .short 5    # DWARF version
+        .short 0    # Padding
+.debug_str_offsets_base0:
+        .long str_producer
+        .long str_CU1
+        .byte 0
+.debug_str_offsets_segment0_end:
diff --git a/test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-4.x86_64.o b/test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-4.x86_64.o
new file mode 100644
index 0000000000000000000000000000000000000000..8a17b0e6a5191c751c87159effe5942935f418ed
GIT binary patch
literal 1264
zcmbVM!AiqG5S_HvYEi_4Af9^jWM~h9CyCS+5l=$(xFlv<15Hwrw5S*T0`cV0qi6rX
z-|%ysNoU(-$U$6aXXfpDZ+DZ~Jom2#HOB!n95{tlWl(@=ZIydv>Ol*c7k(1PeyH2$
zw`bRb_B2bw*`3aym&Vg*qSY{o3e^Q?mo%IBYk;#sxw9xJ{mb|)NlhrVmLOrg^tbS^
z%p0(6VU*r5z9#UGvaZ9vz)u(#Fr)P<80sM1VY%-XD(o|uobaaQw#B)@m-})<9n8kc
z4}wh3!7cPd0dA%zK2PLDrqa<U*F_F12nb%j9Gf2{qZE3>t~yq8o#j!QaHW&bGBBPh
z*W*|xMMstMXj}LJxcOsDkyb&kDqY~`x?>&ipXn>=s-F56H|@9ZmA$-!^MtJno+oNk
z$~Sq06Qq|%@mO%k(FC);9ZhaH6vtf1eM#;m@+3ymnDw&XCAW_}c_xzHGtu;!xt*pz
y4Ss-J=}G!0`okgbq2%r%r)vEdCYt9;>(hOZ>%SvreTuMJj5m%y9LbJ6r2ZFW>S2=r

literal 0
HcmV?d00001

diff --git a/test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-5.s b/test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-5.s
new file mode 100644
index 000000000000..e185e407b630
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-5.s
@@ -0,0 +1,10 @@
+# Test object to verify that llvm-dwarfdump handles a degenerate string offsets
+# section.
+#
+# To generate the test object:
+# llvm-mc -triple x86_64-unknown-linux dwarfdump-str-offsets-invalid-5.s -filetype=obj \
+#         -o dwarfdump-str-offsets-invalid-5.x86_64.o
+# Every unit contributes to the string_offsets table.
+        .section .debug_str_offsets,"",@progbits
+# A degenerate section, not enough for a single entry.
+        .byte 2
diff --git a/test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-5.x86_64.o b/test/DebugInfo/Inputs/dwarfdump-str-offsets-invalid-5.x86_64.o
new file mode 100644
index 0000000000000000000000000000000000000000..6cfce83e5655d84d006d1be19c027adffe838d88
GIT binary patch
literal 464
zcmb<-^>JfjWMqH=Mg}_u1P><4z%T*9WN-kp9T-@FDw&YPu`=~aQY%Ur^ioojO4H+u
zON!$2)6$AlONtrvfSi)VBp_Xx3!<q5q=3dSV=*cLDyD@dhwfJpAB5S^)H$H>VO$m%
p14^?4X>>C|QVt*j2+-xw`AR@F*uvBUteb&B0!oAQfiMS}H~^yD6HEXA

literal 0
HcmV?d00001

diff --git a/test/DebugInfo/Inputs/dwarfdump-str-offsets.s b/test/DebugInfo/Inputs/dwarfdump-str-offsets.s
new file mode 100644
index 000000000000..e0a634c7c4a2
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-str-offsets.s
@@ -0,0 +1,500 @@
+# Test object to verify dwarfdump handles v5 string offset tables.
+# We have 2 v5 CUs, a v5 TU, and a split v5 CU and TU.
+#
+# To generate the test object:
+# llvm-mc -triple x86_64-unknown-linux dwarfdump-str-offsets.s -filetype=obj \
+#         -o dwarfdump-str-offsets.x86_64.o
+
+        .section .debug_str,"MS",@progbits,1
+str_producer:
+        .asciz "Handmade DWARF producer"
+str_CU1:
+        .asciz "Compile_Unit_1"
+str_CU1_dir:
+        .asciz "/home/test/CU1"
+str_CU2:
+        .asciz "Compile_Unit_2"
+str_CU2_dir:
+        .asciz "/home/test/CU2"
+str_TU:
+        .asciz "Type_Unit"
+str_TU_type:
+        .asciz "MyStruct"
+
+# Every unit contributes to the string_offsets table.
+        .section .debug_str_offsets,"",@progbits
+# CU1's contribution
+        .long .debug_str_offsets_segment0_end-.debug_str_offsets_base0
+        .short 5    # DWARF version
+        .short 0    # Padding
+.debug_str_offsets_base0:
+        .long str_producer
+        .long str_CU1
+        .long str_CU1_dir
+.debug_str_offsets_segment0_end:
+# CU2's contribution
+        .long .debug_str_offsets_segment1_end-.debug_str_offsets_base1
+        .short 5    # DWARF version
+        .short 0    # Padding
+.debug_str_offsets_base1:
+        .long str_producer
+        .long str_CU2
+        .long str_CU2_dir
+.debug_str_offsets_segment1_end:
+# The TU's contribution
+        .long .debug_str_offsets_segment2_end-.debug_str_offsets_base2
+        .short 5    # DWARF version
+        .short 0    # Padding
+.debug_str_offsets_base2:
+        .long str_TU
+        .long str_TU_type
+.debug_str_offsets_segment2_end:
+
+        .section .debug_str.dwo,"MS",@progbits,1
+dwo_str_CU_5_producer:
+        .asciz "Handmade split DWARF producer"
+dwo_str_CU_5_name:
+        .asciz "V5_split_compile_unit"
+dwo_str_CU_5_comp_dir:
+        .asciz "/home/test/splitCU"
+dwo_str_TU_5:
+        .asciz "V5_split_type_unit"
+dwo_str_TU_5_type:
+        .asciz "V5_split_Mystruct"
+
+        .section .debug_str_offsets.dwo,"",@progbits
+# The split CU's contribution
+        .long .debug_dwo_str_offsets_segment0_end-.debug_dwo_str_offsets_base0
+        .short 5    # DWARF version
+        .short 0    # Padding
+.debug_dwo_str_offsets_base0:
+        .long dwo_str_CU_5_producer-.debug_str.dwo
+        .long dwo_str_CU_5_name-.debug_str.dwo
+        .long dwo_str_CU_5_comp_dir-.debug_str.dwo
+.debug_dwo_str_offsets_segment0_end:
+# The split TU's contribution
+        .long .debug_dwo_str_offsets_segment1_end-.debug_dwo_str_offsets_base1
+        .short 5    # DWARF version
+        .short 0    # Padding
+.debug_dwo_str_offsets_base1:
+        .long dwo_str_TU_5-.debug_str.dwo
+        .long dwo_str_TU_5_type-.debug_str.dwo
+.debug_dwo_str_offsets_segment1_end:
+
+# All CUs/TUs use the same abbrev section for simplicity.
+        .section .debug_abbrev,"",@progbits
+        .byte 0x01  # Abbrev code
+        .byte 0x11  # DW_TAG_compile_unit
+        .byte 0x00  # DW_CHILDREN_no
+        .byte 0x25  # DW_AT_producer
+        .byte 0x1a  # DW_FORM_strx
+        .byte 0x03  # DW_AT_name
+        .byte 0x1a  # DW_FORM_strx
+        .byte 0x72  # DW_AT_str_offsets_base
+        .byte 0x17  # DW_FORM_sec_offset
+        .byte 0x1b  # DW_AT_comp_dir
+        .byte 0x1a  # DW_FORM_strx
+        .byte 0x00  # EOM(1)
+        .byte 0x00  # EOM(2)
+        .byte 0x02  # Abbrev code
+        .byte 0x41  # DW_TAG_type_unit
+        .byte 0x01  # DW_CHILDREN_yes
+        .byte 0x03  # DW_AT_name
+        .byte 0x1a  # DW_FORM_strx
+        .byte 0x72  # DW_AT_str_offsets_base
+        .byte 0x17  # DW_FORM_sec_offset
+        .byte 0x00  # EOM(1)
+        .byte 0x00  # EOM(2)
+        .byte 0x03  # Abbrev code
+        .byte 0x13  # DW_TAG_structure_type
+        .byte 0x00  # DW_CHILDREN_no (no members)
+        .byte 0x03  # DW_AT_name
+        .byte 0x1a  # DW_FORM_strx
+        .byte 0x00  # EOM(1)
+        .byte 0x00  # EOM(2)
+        .byte 0x00  # EOM(3)
+
+# And a .dwo copy for the .dwo sections.
+        .section .debug_abbrev.dwo,"",@progbits
+        .byte 0x01  # Abbrev code
+        .byte 0x11  # DW_TAG_compile_unit
+        .byte 0x00  # DW_CHILDREN_no
+        .byte 0x25  # DW_AT_producer
+        .byte 0x1a  # DW_FORM_strx
+        .byte 0x03  # DW_AT_name
+        .byte 0x1a  # DW_FORM_strx
+        .byte 0x72  # DW_AT_str_offsets_base
+        .byte 0x17  # DW_FORM_sec_offset
+        .byte 0x1b  # DW_AT_comp_dir
+        .byte 0x1a  # DW_FORM_strx
+        .byte 0x00  # EOM(1)
+        .byte 0x00  # EOM(2)
+        .byte 0x02  # Abbrev code
+        .byte 0x41  # DW_TAG_type_unit
+        .byte 0x01  # DW_CHILDREN_yes
+        .byte 0x03  # DW_AT_name
+        .byte 0x1a  # DW_FORM_strx
+        .byte 0x72  # DW_AT_str_offsets_base
+        .byte 0x17  # DW_FORM_sec_offset
+        .byte 0x00  # EOM(1)
+        .byte 0x00  # EOM(2)
+        .byte 0x03  # Abbrev code
+        .byte 0x13  # DW_TAG_structure_type
+        .byte 0x00  # DW_CHILDREN_no (no members)
+        .byte 0x03  # DW_AT_name
+        .byte 0x1a  # DW_FORM_strx
+        .byte 0x00  # EOM(1)
+        .byte 0x00  # EOM(2)
+        .byte 0x00  # EOM(3)
+        
+        .section .debug_info,"",@progbits
+
+# DWARF v5 CU header.
+        .long  CU1_5_end-CU1_5_version  # Length of Unit
+CU1_5_version:
+        .short 5               # DWARF version number
+        .byte 1                # DWARF Unit Type
+        .byte 8                # Address Size (in bytes)
+        .long .debug_abbrev    # Offset Into Abbrev. Section
+# The compile-unit DIE, which has a DW_AT_producer, DW_AT_name, 
+# DW_AT_str_offsets and DW_AT_compdir.
+        .byte 1                # Abbreviation code
+        .byte 0                # The index of the producer string
+        .byte 1                # The index of the CU name string
+        .long .debug_str_offsets_base0
+        .byte 2                # The index of the comp dir string
+        .byte 0 # NULL
+CU1_5_end:
+
+# DWARF v5 CU header
+        .long  CU2_5_end-CU2_5_version  # Length of Unit
+CU2_5_version:
+        .short 5               # DWARF version number
+        .byte 1                # DWARF Unit Type
+        .byte 8                # Address Size (in bytes)
+        .long .debug_abbrev    # Offset Into Abbrev. Section
+# The compile-unit DIE, which has a DW_AT_producer, DW_AT_name, 
+# DW_AT_str_offsets and DW_AT_compdir.
+        .byte 1                # Abbreviation code
+        .byte 0                # The index of the producer string
+        .byte 1                # The index of the CU name string
+        .long .debug_str_offsets_base1
+        .byte 2                # The index of the comp dir string
+        .byte 0 # NULL
+CU2_5_end:
+
+        .section .debug_types,"",@progbits
+# DWARF v5 Type unit header.
+TU_5_start:
+        .long  TU_5_end-TU_5_version  # Length of Unit
+TU_5_version:
+        .short 5               # DWARF version number
+        .byte 2                # DWARF Unit Type
+        .byte 8                # Address Size (in bytes)
+        .long .debug_abbrev    # Offset Into Abbrev. Section
+        .quad 0x0011223344556677 # Type Signature
+        .long TU_5_type-TU_5_start # Type offset
+# The type-unit DIE, which has a name.
+        .byte 2                # Abbreviation code
+        .byte 0                # Index of the unit type name string
+        .long .debug_str_offsets_base2  # offset into the str_offsets section
+# The type DIE, which has a name.
+TU_5_type:
+        .byte 3                # Abbreviation code
+        .byte 1                # Index of the type name string
+        .byte 0 # NULL
+        .byte 0 # NULL
+TU_5_end:
+        
+        .section .debug_info.dwo,"",@progbits
+
+# DWARF v5 split CU header.
+        .long  CU_split_5_end-CU_split_5_version  # Length of Unit
+CU_split_5_version:
+        .short 5               # DWARF version number
+        .byte 1                # DWARF Unit Type
+        .byte 8                # Address Size (in bytes)
+        .long .debug_abbrev.dwo  # Offset Into Abbrev Section
+# The compile-unit DIE, which has a DW_AT_producer, DW_AT_name, 
+# DW_AT_str_offsets and DW_AT_compdir.
+        .byte 1                # Abbreviation code
+        .byte 0                # The index of the producer string
+        .byte 1                # The index of the CU name string
+        .long .debug_dwo_str_offsets_base0-.debug_str_offsets.dwo
+        .byte 2                # The index of the comp dir string
+        .byte 0 # NULL
+CU_split_5_end:
+        
+        .section .debug_types.dwo,"",@progbits
+
+# DWARF v5 split type unit header.
+TU_split_5_start:
+        .long  TU_split_5_end-TU_split_5_version  # Length of Unit
+TU_split_5_version:
+        .short 5               # DWARF version number
+        .byte 6                # DWARF Unit Type
+        .byte 8                # Address Size (in bytes)
+        .long .debug_abbrev.dwo  # Offset Into Abbrev Section
+        .quad 0x8899aabbccddeeff # Type Signature
+        .long TU_split_5_type-TU_split_5_start  # Type offset
+# The type-unit DIE, which has a name.
+        .byte 2                # Abbreviation code
+        .byte 0                # The index of the type unit name string
+        .long .debug_dwo_str_offsets_base1-.debug_str_offsets.dwo 
+# The type DIE, which has a name.
+TU_split_5_type:
+        .byte 3                # Abbreviation code
+        .byte 1                # The index of the type name string
+        .byte 0 # NULL
+        .byte 0 # NULL
+TU_split_5_end:
+# Test object to verify dwarfdump handles v5 string offset tables.
+# We have 2 v5 CUs, a v5 TU, and a split v5 CU and TU.
+#
+# To generate the test object:
+# llvm-mc -triple x86_64-unknown-linux dwarfdump-str-offsets.s -filetype=obj \
+#         -o dwarfdump-str-offsets.elf-x86-64
+
+        .section .debug_str,"MS",@progbits,1
+str_producer:
+        .asciz "Handmade DWARF producer"
+str_CU1:
+        .asciz "Compile_Unit_1"
+str_CU1_dir:
+        .asciz "/home/test/CU1"
+str_CU2:
+        .asciz "Compile_Unit_2"
+str_CU2_dir:
+        .asciz "/home/test/CU2"
+str_TU:
+        .asciz "Type_Unit"
+str_TU_type:
+        .asciz "MyStruct"
+
+# Every unit contributes to the string_offsets table.
+        .section .debug_str_offsets,"",@progbits
+# CU1's contribution
+        .long .debug_str_offsets_segment0_end-.debug_str_offsets_base0
+        .short 5    # DWARF version
+        .short 0    # Padding
+.debug_str_offsets_base0:
+        .long str_producer
+        .long str_CU1
+        .long str_CU1_dir
+.debug_str_offsets_segment0_end:
+# CU2's contribution
+        .long .debug_str_offsets_segment1_end-.debug_str_offsets_base1
+        .short 5    # DWARF version
+        .short 0    # Padding
+.debug_str_offsets_base1:
+        .long str_producer
+        .long str_CU2
+        .long str_CU2_dir
+.debug_str_offsets_segment1_end:
+# The TU's contribution
+        .long .debug_str_offsets_segment2_end-.debug_str_offsets_base2
+        .short 5    # DWARF version
+        .short 0    # Padding
+.debug_str_offsets_base2:
+        .long str_TU
+        .long str_TU_type
+.debug_str_offsets_segment2_end:
+
+        .section .debug_str.dwo,"MS",@progbits,1
+dwo_str_CU_5_producer:
+        .asciz "Handmade split DWARF producer"
+dwo_str_CU_5_name:
+        .asciz "V5_split_compile_unit"
+dwo_str_CU_5_comp_dir:
+        .asciz "/home/test/splitCU"
+dwo_str_TU_5:
+        .asciz "V5_split_type_unit"
+dwo_str_TU_5_type:
+        .asciz "V5_split_Mystruct"
+
+        .section .debug_str_offsets.dwo,"",@progbits
+# The split CU's contribution
+        .long .debug_dwo_str_offsets_segment0_end-.debug_dwo_str_offsets_base0
+        .short 5    # DWARF version
+        .short 0    # Padding
+.debug_dwo_str_offsets_base0:
+        .long dwo_str_CU_5_producer-.debug_str.dwo
+        .long dwo_str_CU_5_name-.debug_str.dwo
+        .long dwo_str_CU_5_comp_dir-.debug_str.dwo
+.debug_dwo_str_offsets_segment0_end:
+# The split TU's contribution
+        .long .debug_dwo_str_offsets_segment1_end-.debug_dwo_str_offsets_base1
+        .short 5    # DWARF version
+        .short 0    # Padding
+.debug_dwo_str_offsets_base1:
+        .long dwo_str_TU_5-.debug_str.dwo
+        .long dwo_str_TU_5_type-.debug_str.dwo
+.debug_dwo_str_offsets_segment1_end:
+
+# All CUs/TUs use the same abbrev section for simplicity.
+        .section .debug_abbrev,"",@progbits
+        .byte 0x01  # Abbrev code
+        .byte 0x11  # DW_TAG_compile_unit
+        .byte 0x00  # DW_CHILDREN_no
+        .byte 0x25  # DW_AT_producer
+        .byte 0x1a  # DW_FORM_strx
+        .byte 0x03  # DW_AT_name
+        .byte 0x1a  # DW_FORM_strx
+        .byte 0x72  # DW_AT_str_offsets_base
+        .byte 0x17  # DW_FORM_sec_offset
+        .byte 0x1b  # DW_AT_comp_dir
+        .byte 0x1a  # DW_FORM_strx
+        .byte 0x00  # EOM(1)
+        .byte 0x00  # EOM(2)
+        .byte 0x02  # Abbrev code
+        .byte 0x41  # DW_TAG_type_unit
+        .byte 0x01  # DW_CHILDREN_yes
+        .byte 0x03  # DW_AT_name
+        .byte 0x1a  # DW_FORM_strx
+        .byte 0x72  # DW_AT_str_offsets_base
+        .byte 0x17  # DW_FORM_sec_offset
+        .byte 0x00  # EOM(1)
+        .byte 0x00  # EOM(2)
+        .byte 0x03  # Abbrev code
+        .byte 0x13  # DW_TAG_structure_type
+        .byte 0x00  # DW_CHILDREN_no (no members)
+        .byte 0x03  # DW_AT_name
+        .byte 0x1a  # DW_FORM_strx
+        .byte 0x00  # EOM(1)
+        .byte 0x00  # EOM(2)
+        .byte 0x00  # EOM(3)
+
+# And a .dwo copy for the .dwo sections.
+        .section .debug_abbrev.dwo,"",@progbits
+        .byte 0x01  # Abbrev code
+        .byte 0x11  # DW_TAG_compile_unit
+        .byte 0x00  # DW_CHILDREN_no
+        .byte 0x25  # DW_AT_producer
+        .byte 0x1a  # DW_FORM_strx
+        .byte 0x03  # DW_AT_name
+        .byte 0x1a  # DW_FORM_strx
+        .byte 0x72  # DW_AT_str_offsets_base
+        .byte 0x17  # DW_FORM_sec_offset
+        .byte 0x1b  # DW_AT_comp_dir
+        .byte 0x1a  # DW_FORM_strx
+        .byte 0x00  # EOM(1)
+        .byte 0x00  # EOM(2)
+        .byte 0x02  # Abbrev code
+        .byte 0x41  # DW_TAG_type_unit
+        .byte 0x01  # DW_CHILDREN_yes
+        .byte 0x03  # DW_AT_name
+        .byte 0x1a  # DW_FORM_strx
+        .byte 0x72  # DW_AT_str_offsets_base
+        .byte 0x17  # DW_FORM_sec_offset
+        .byte 0x00  # EOM(1)
+        .byte 0x00  # EOM(2)
+        .byte 0x03  # Abbrev code
+        .byte 0x13  # DW_TAG_structure_type
+        .byte 0x00  # DW_CHILDREN_no (no members)
+        .byte 0x03  # DW_AT_name
+        .byte 0x1a  # DW_FORM_strx
+        .byte 0x00  # EOM(1)
+        .byte 0x00  # EOM(2)
+        .byte 0x00  # EOM(3)
+        
+        .section .debug_info,"",@progbits
+
+# DWARF v5 CU header.
+        .long  CU1_5_end-CU1_5_version  # Length of Unit
+CU1_5_version:
+        .short 5               # DWARF version number
+        .byte 1                # DWARF Unit Type
+        .byte 8                # Address Size (in bytes)
+        .long .debug_abbrev    # Offset Into Abbrev. Section
+# The compile-unit DIE, which has a DW_AT_producer, DW_AT_name, 
+# DW_AT_str_offsets and DW_AT_compdir.
+        .byte 1                # Abbreviation code
+        .byte 0                # The index of the producer string
+        .byte 1                # The index of the CU name string
+        .long .debug_str_offsets_base0
+        .byte 2                # The index of the comp dir string
+        .byte 0 # NULL
+CU1_5_end:
+
+# DWARF v5 CU header
+        .long  CU2_5_end-CU2_5_version  # Length of Unit
+CU2_5_version:
+        .short 5               # DWARF version number
+        .byte 1                # DWARF Unit Type
+        .byte 8                # Address Size (in bytes)
+        .long .debug_abbrev    # Offset Into Abbrev. Section
+# The compile-unit DIE, which has a DW_AT_producer, DW_AT_name, 
+# DW_AT_str_offsets and DW_AT_compdir.
+        .byte 1                # Abbreviation code
+        .byte 0                # The index of the producer string
+        .byte 1                # The index of the CU name string
+        .long .debug_str_offsets_base1
+        .byte 2                # The index of the comp dir string
+        .byte 0 # NULL
+CU2_5_end:
+
+        .section .debug_types,"",@progbits
+# DWARF v5 Type unit header.
+TU_5_start:
+        .long  TU_5_end-TU_5_version  # Length of Unit
+TU_5_version:
+        .short 5               # DWARF version number
+        .byte 2                # DWARF Unit Type
+        .byte 8                # Address Size (in bytes)
+        .long .debug_abbrev    # Offset Into Abbrev. Section
+        .quad 0x0011223344556677 # Type Signature
+        .long TU_5_type-TU_5_start # Type offset
+# The type-unit DIE, which has a name.
+        .byte 2                # Abbreviation code
+        .byte 0                # Index of the unit type name string
+        .long .debug_str_offsets_base2  # offset into the str_offsets section
+# The type DIE, which has a name.
+TU_5_type:
+        .byte 3                # Abbreviation code
+        .byte 1                # Index of the type name string
+        .byte 0 # NULL
+        .byte 0 # NULL
+TU_5_end:
+        
+        .section .debug_info.dwo,"",@progbits
+
+# DWARF v5 split CU header.
+        .long  CU_split_5_end-CU_split_5_version  # Length of Unit
+CU_split_5_version:
+        .short 5               # DWARF version number
+        .byte 1                # DWARF Unit Type
+        .byte 8                # Address Size (in bytes)
+        .long .debug_abbrev.dwo  # Offset Into Abbrev Section
+# The compile-unit DIE, which has a DW_AT_producer, DW_AT_name, 
+# DW_AT_str_offsets and DW_AT_compdir.
+        .byte 1                # Abbreviation code
+        .byte 0                # The index of the producer string
+        .byte 1                # The index of the CU name string
+        .long .debug_dwo_str_offsets_base0-.debug_str_offsets.dwo
+        .byte 2                # The index of the comp dir string
+        .byte 0 # NULL
+CU_split_5_end:
+        
+        .section .debug_types.dwo,"",@progbits
+
+# DWARF v5 split type unit header.
+TU_split_5_start:
+        .long  TU_split_5_end-TU_split_5_version  # Length of Unit
+TU_split_5_version:
+        .short 5               # DWARF version number
+        .byte 6                # DWARF Unit Type
+        .byte 8                # Address Size (in bytes)
+        .long .debug_abbrev.dwo  # Offset Into Abbrev Section
+        .quad 0x8899aabbccddeeff # Type Signature
+        .long TU_split_5_type-TU_split_5_start  # Type offset
+# The type-unit DIE, which has a name.
+        .byte 2                # Abbreviation code
+        .byte 0                # The index of the type unit name string
+        .long .debug_dwo_str_offsets_base1-.debug_str_offsets.dwo 
+# The type DIE, which has a name.
+TU_split_5_type:
+        .byte 3                # Abbreviation code
+        .byte 1                # The index of the type name string
+        .byte 0 # NULL
+        .byte 0 # NULL
+TU_split_5_end:
diff --git a/test/DebugInfo/Inputs/dwarfdump-str-offsets.x86_64.o b/test/DebugInfo/Inputs/dwarfdump-str-offsets.x86_64.o
new file mode 100644
index 0000000000000000000000000000000000000000..e15ac1c1542f7f7bac6310a7d7bc6a7d627f1dca
GIT binary patch
literal 4000
zcmbW3O^6&-5XWn#cV^?V#9d=Bu3yWN=o(>mXUvKo3>zkiibja;M5B;)W~XNzm@m3}
z6IT>(vKMb6cu?>lisDTW^rANdxe7rDiGnA=n|h2@)m1%js(X$2V5aN+-oHNH>wd5M
zom0=9E*1))tpLYi+*1m0V`glxv$+ljQ2BIo(C#<e!Q#o6PP}k>aTE^Q+nYfM^<jV1
z?FD{g(2e~nth_$#2P<(9#Vhqjb#k;eIa-5_ol)WlXLeqU!|lx&W-(=HTmf5oY@gAk
z<wc`jH?~EdTl1Own^NsIR_j*7qxA-)!k9{9Rf^8+MA;he$2yMTPwROK?N{(un1>?^
z-a>fSJqrNE69qB>yaV82{}!Y3s3<{!LgK>nO)5ezr0gXqvhq@=aq`inc_3CKSMbo)
zxRJ8Rk`k2J?dBiXu6*~!hr5Z}z2Zjh`EAi7KFt$$6~A)MX`l#hJ1_iB7ovSo_+i7J
z<oHvW_%~TUm5Kj@@KqD%ZQ(ZW6T+8`{!d=#DR|uQ-<i+ALBo5(rwso^_))_z2*2C#
z%gpy@;vW}o<6mVy3ri->gWRXBGTA;BJ>6foofU55f6II>6F-^%oEw7ecSUdGKO)@5
z@32>yH;o(YbJmyn*>w3QqIVhDU-Nw|18q3BeIRjY6T0mXKUW?cav|Eg-1p!i!++-Z
z9z16FTcST@_*bIeF#Juf$Ab%ozb|nN7RtqDhCHyVnMl0MZc%<)CI)YMq;$+5aYpsY
zQLgKxcU-xylkRin8fsAt<+^@{4cGP4eXDw1zhg$P>*v<5wUG37M^Ae}b@w^k)w}hO
zUgx8JRFA8tW1AXr(rewWewhT)_To@!2d(Wbzu9Vq!9}RV!5cAbH01q+@2R*MGElP;
z2EC>@;S0zgb~;fIN8oaLTOmmzqX^oUh8QXI>kWU6UwVGK8$v>Aj$KXd^o2g*n$n4?
zE2wd@X-Nro%H^_*I3wx~I>TJ(uFAzAjJm@C;Czx7`I@f?*KXXYH)<2Ts?(#{qnL_v
zLvvpz&A}!Jui&8BpYRC=ZMPKavS-4ngv6rsWR35Pu9Y7JTm4`V*W7WfLT<5Ys;-7+
z@o(dM#8|yW3>nAhakB-LXs1utWXaN}iRPF$msT?hYWj4kz!pNHst&rdNCt?|7o#jy
zp7*K_T5sKpy7&Ii^&>pL^G@B}=_J}YGswrflZehAVr7Ssxo-N}wI}(jI6(e(F6-j3
zC0D&@e!bE;tF~(B`k8d>ca7CnQD<u+*L^af@2P|B4*Zc$&o_Nf{Dq8S+WM~}qt;6<
z>A&XJ>t!5KI~AevtUx`lb<Aq1Zy9Na0`2#S)qb;w|9k(@{{ht5n3VrZfk&z1G^<@e
zkhlIn%KE=1zO?@I4zlb2JTe=TTz(R`L>;GD?K3!*mw#EFf4#4ipS~4r{uvBtV^X@m
z1={att1ThO%m1?E*XNA#@6XBq8!{Uc5zO*;8Dzd^t@Z|L^YV{!^3S2x&i_-)BdKt7
R+*So%mGgu0K8!!>{|}*^%t!zL

literal 0
HcmV?d00001

diff --git a/test/DebugInfo/PDB/DIA/pdbdump-flags.test b/test/DebugInfo/PDB/DIA/pdbdump-flags.test
index 9233e20ee82e..02291c9afa9c 100644
--- a/test/DebugInfo/PDB/DIA/pdbdump-flags.test
+++ b/test/DebugInfo/PDB/DIA/pdbdump-flags.test
@@ -1,7 +1,7 @@
-; RUN: llvm-pdbdump pretty %p/../Inputs/empty.pdb | FileCheck %s -check-prefix=NO_ARGS
-; RUN: llvm-pdbdump pretty -types %p/../Inputs/empty.pdb | FileCheck %s -check-prefix=TYPES
-; RUN: llvm-pdbdump pretty -compilands %p/../Inputs/empty.pdb | FileCheck %s -check-prefix=COMPILANDS
-; RUN: llvm-pdbdump pretty -types -compilands %p/../Inputs/empty.pdb | FileCheck %s -check-prefix=MULTIPLE
+; RUN: llvm-pdbutil pretty %p/../Inputs/empty.pdb | FileCheck %s -check-prefix=NO_ARGS
+; RUN: llvm-pdbutil pretty -types %p/../Inputs/empty.pdb | FileCheck %s -check-prefix=TYPES
+; RUN: llvm-pdbutil pretty -compilands %p/../Inputs/empty.pdb | FileCheck %s -check-prefix=COMPILANDS
+; RUN: llvm-pdbutil pretty -types -compilands %p/../Inputs/empty.pdb | FileCheck %s -check-prefix=MULTIPLE
 
 ; Check that neither symbols nor compilands are dumped when neither argument specified.
 ; NO_ARGS: empty.pdb
diff --git a/test/DebugInfo/PDB/DIA/pdbdump-linenumbers.test b/test/DebugInfo/PDB/DIA/pdbdump-linenumbers.test
index 2a596e4af149..9556fb72edc5 100644
--- a/test/DebugInfo/PDB/DIA/pdbdump-linenumbers.test
+++ b/test/DebugInfo/PDB/DIA/pdbdump-linenumbers.test
@@ -1,5 +1,5 @@
-; RUN: llvm-pdbdump pretty -lines %p/../Inputs/symbolformat.pdb | FileCheck --check-prefix=LINE_NUMS_FPO %s
-; RUN: llvm-pdbdump pretty -lines %p/../Inputs/symbolformat.pdb | FileCheck --check-prefix=LINE_NUMS %s
+; RUN: llvm-pdbutil pretty -lines %p/../Inputs/symbolformat.pdb | FileCheck --check-prefix=LINE_NUMS_FPO %s
+; RUN: llvm-pdbutil pretty -lines %p/../Inputs/symbolformat.pdb | FileCheck --check-prefix=LINE_NUMS %s
 
 ; LINE_NUMS_FPO: llvm\test\debuginfo\pdb\inputs\symbolformat-fpo.cpp
 ; LINE_NUMS_FPO: Line 5, Address: [0x000011a0 - 0x000011a5] (6 bytes)
diff --git a/test/DebugInfo/PDB/DIA/pdbdump-symbol-format.test b/test/DebugInfo/PDB/DIA/pdbdump-symbol-format.test
index 997cdd9f6bac..536161586ffc 100644
--- a/test/DebugInfo/PDB/DIA/pdbdump-symbol-format.test
+++ b/test/DebugInfo/PDB/DIA/pdbdump-symbol-format.test
@@ -1,11 +1,11 @@
-; RUN: llvm-pdbdump pretty -module-syms %p/../Inputs/symbolformat.pdb | FileCheck --check-prefix=SYM_FORMAT_FPO %s
-; RUN: llvm-pdbdump pretty -module-syms %p/../Inputs/symbolformat.pdb | FileCheck --check-prefix=SYM_FORMAT %s
-; RUN: llvm-pdbdump pretty -types %p/../Inputs/symbolformat.pdb > %t.types
+; RUN: llvm-pdbutil pretty -module-syms %p/../Inputs/symbolformat.pdb | FileCheck --check-prefix=SYM_FORMAT_FPO %s
+; RUN: llvm-pdbutil pretty -module-syms %p/../Inputs/symbolformat.pdb | FileCheck --check-prefix=SYM_FORMAT %s
+; RUN: llvm-pdbutil pretty -types %p/../Inputs/symbolformat.pdb > %t.types
 ; RUN: FileCheck --check-prefix=TYPES_FORMAT %s < %t.types
 ; RUN: FileCheck --check-prefix=TYPES_1 %s < %t.types
 ; RUN: FileCheck --check-prefix=TYPES_2 %s < %t.types
-; RUN: llvm-pdbdump pretty -types %p/../Inputs/symbolformat.pdb | FileCheck --check-prefix=TYPES_FORMAT %s
-; RUN: llvm-pdbdump pretty -globals %p/../Inputs/symbolformat.pdb | FileCheck --check-prefix=GLOBALS %s
+; RUN: llvm-pdbutil pretty -types %p/../Inputs/symbolformat.pdb | FileCheck --check-prefix=TYPES_FORMAT %s
+; RUN: llvm-pdbutil pretty -globals %p/../Inputs/symbolformat.pdb | FileCheck --check-prefix=GLOBALS %s
 
 ; The format is func [0x<rva_start>+<prologue_length> - 0x<rva_end>-<epilogue_length>]
 ; SYM_FORMAT_FPO: ---SYMBOLS---
diff --git a/test/DebugInfo/PDB/Inputs/debug-subsections.yaml b/test/DebugInfo/PDB/Inputs/debug-subsections.yaml
new file mode 100644
index 000000000000..ab199d2f21a0
--- /dev/null
+++ b/test/DebugInfo/PDB/Inputs/debug-subsections.yaml
@@ -0,0 +1,91 @@
+DbiStream:       
+  Modules:         
+    - Module:          'Foo.obj'
+      ObjFile:         'Foo.obj'
+      Subsections:     
+        - !CrossModuleExports
+          Exports:         
+            - LocalId:         4852
+              GlobalId:        9283
+            - LocalId:         2147487875
+              GlobalId:        9123
+    - Module:          'Bar.obj'
+      ObjFile:         'Bar.obj'
+      Subsections:     
+        - !CrossModuleExports
+          Exports:         
+            - LocalId:         4265
+              GlobalId:        6097
+            - LocalId:         4297
+              GlobalId:        4677
+        - !CrossModuleImports
+          Imports:         
+            - Module:          'Foo.obj'
+              Imports:         [ 4852, 2147487875 ]
+    - Module:          'd:\src\llvm\test\DebugInfo\PDB\Inputs\empty.obj'
+      ObjFile:         'd:\src\llvm\test\DebugInfo\PDB\Inputs\empty.obj'
+      SourceFiles:
+        - 'd:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp'
+      Subsections:
+        - !FileChecksums
+          Checksums:
+            - FileName:        'd:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp'
+              Kind:            MD5
+              Checksum:        A0A5BD0D3ECD93FC29D19DE826FBF4BC
+            - FileName:        'f:\dd\externalapis\windows\10\sdk\inc\winerror.h'
+              Kind:            MD5
+              Checksum:        1154D69F5B2650196E1FC34F4134E56B
+        - !Lines
+          CodeSize:        10
+          Flags:           [  ]
+          RelocOffset:     100016
+          RelocSegment:    1
+          Blocks:
+            - FileName:        'd:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp'
+              Lines:
+                - Offset:          0
+                  LineStart:       5
+                  IsStatement:     true
+                  EndDelta:        0
+                - Offset:          3
+                  LineStart:       6
+                  IsStatement:     true
+                  EndDelta:        0
+                - Offset:          8
+                  LineStart:       7
+                  IsStatement:     true
+                  EndDelta:        0
+              Columns:
+        - !InlineeLines
+          HasExtraFiles:   false
+          Sites:           
+            - FileName:        'f:\dd\externalapis\windows\10\sdk\inc\winerror.h'
+              LineNum:         26950
+              Inlinee:         22767
+    # The following subsections don't normally appear in PDB files, but we test
+    # them anyway
+    - Module:          'ObjFileSubsections'
+      ObjFile:         'ObjFileSubsections'
+      Subsections:
+        - !StringTable
+          Strings:
+            - 'String1'
+            - 'String2'
+            - 'String3'
+        - !Symbols
+          Records:
+            - Kind:            S_OBJNAME
+              ObjNameSym:
+                Signature:       0
+                ObjectName:      'ObjFileSubsections'
+        - !FrameData
+          Frames:
+            - CodeSize:           1
+              FrameFunc:          'MyFunc'
+              LocalSize:          2
+              MaxStackSize:       3
+              ParamsSize:         4
+              PrologSize:         5
+              RvaStart:           6
+              SavedRegsSize:      7
+...
diff --git a/test/DebugInfo/PDB/Inputs/simple-line-info.yaml b/test/DebugInfo/PDB/Inputs/simple-line-info.yaml
deleted file mode 100644
index d1324d26d8bb..000000000000
--- a/test/DebugInfo/PDB/Inputs/simple-line-info.yaml
+++ /dev/null
@@ -1,44 +0,0 @@
----
-DbiStream:
-  Modules:
-    - Module:          'd:\src\llvm\test\DebugInfo\PDB\Inputs\empty.obj'
-      ObjFile:         'd:\src\llvm\test\DebugInfo\PDB\Inputs\empty.obj'
-      SourceFiles:
-        - 'd:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp'
-      Subsections:
-        - !FileChecksums
-          Checksums:
-            - FileName:        'd:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp'
-              Kind:            MD5
-              Checksum:        A0A5BD0D3ECD93FC29D19DE826FBF4BC
-            - FileName:        'f:\dd\externalapis\windows\10\sdk\inc\winerror.h'
-              Kind:            MD5
-              Checksum:        1154D69F5B2650196E1FC34F4134E56B
-        - !Lines
-          CodeSize:        10
-          Flags:           [  ]
-          RelocOffset:     16
-          RelocSegment:    1
-          Blocks:
-            - FileName:        'd:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp'
-              Lines:
-                - Offset:          0
-                  LineStart:       5
-                  IsStatement:     true
-                  EndDelta:        0
-                - Offset:          3
-                  LineStart:       6
-                  IsStatement:     true
-                  EndDelta:        0
-                - Offset:          8
-                  LineStart:       7
-                  IsStatement:     true
-                  EndDelta:        0
-              Columns:
-        - !InlineeLines
-          HasExtraFiles:   false
-          Sites:           
-            - FileName:        'f:\dd\externalapis\windows\10\sdk\inc\winerror.h'
-              LineNum:         26950
-              Inlinee:         22767
-...
diff --git a/test/DebugInfo/PDB/Native/pdb-native-compilands.test b/test/DebugInfo/PDB/Native/pdb-native-compilands.test
index 38234d719e50..2c7011c65708 100644
--- a/test/DebugInfo/PDB/Native/pdb-native-compilands.test
+++ b/test/DebugInfo/PDB/Native/pdb-native-compilands.test
@@ -1,7 +1,7 @@
 ; Test that the native PDB reader can enumerate the compilands.
-; RUN: llvm-pdbdump pretty -native -compilands %p/../Inputs/empty.pdb \
+; RUN: llvm-pdbutil pretty -native -compilands %p/../Inputs/empty.pdb \
 ; RUN:   | FileCheck -check-prefix=EMPTY %s
-; RUN: llvm-pdbdump pretty -native -compilands %p/../Inputs/big-read.pdb \
+; RUN: llvm-pdbutil pretty -native -compilands %p/../Inputs/big-read.pdb \
 ; RUN:   | FileCheck -check-prefix=BIGREAD %s
 
 ; Reference output was generated with the DIA reader to ensure that the
diff --git a/test/DebugInfo/PDB/Native/pdb-native-summary.test b/test/DebugInfo/PDB/Native/pdb-native-summary.test
index bd32f198a390..116d2564fda3 100644
--- a/test/DebugInfo/PDB/Native/pdb-native-summary.test
+++ b/test/DebugInfo/PDB/Native/pdb-native-summary.test
@@ -1,5 +1,5 @@
 ; Test that the native PDB reader gets the PDB summary correct.
-; RUN: llvm-pdbdump pretty -native -color-output=false %p/../Inputs/empty.pdb \
+; RUN: llvm-pdbutil pretty -native -color-output=false %p/../Inputs/empty.pdb \
 ; RUN:   | FileCheck -check-prefix=EMPTY %s
 
 ; Reference output was generated with the DIA reader to ensure that the
diff --git a/test/DebugInfo/PDB/pdb-longname-truncation.test b/test/DebugInfo/PDB/pdb-longname-truncation.test
index 2e0284fbe916..06eae8ea226d 100644
--- a/test/DebugInfo/PDB/pdb-longname-truncation.test
+++ b/test/DebugInfo/PDB/pdb-longname-truncation.test
@@ -1,3 +1,3 @@
 ; For now just verify that this doesn't cause an error.  Later we pdbdump can
 ; do type lookup, we can verify that the name matches what we expect.
-; RUN: llvm-pdbdump yaml2pdb -pdb=%t.pdb %p/Inputs/longname-truncation.yaml
+; RUN: llvm-pdbutil yaml2pdb -pdb=%t.pdb %p/Inputs/longname-truncation.yaml
diff --git a/test/DebugInfo/PDB/pdb-minimal-construct.test b/test/DebugInfo/PDB/pdb-minimal-construct.test
index d75c51056c9f..326c6cf9231b 100644
--- a/test/DebugInfo/PDB/pdb-minimal-construct.test
+++ b/test/DebugInfo/PDB/pdb-minimal-construct.test
@@ -1,11 +1,11 @@
-; This testcase verifies that we can produce a minimal PDB, while
-; serving as an example for how to construct a minimal PDB for other
-; testcases.  It takes as input a small fragment of hand-written yaml
-; that specifies nothing about the PDB other than a definition of one
-; symbol that it contains.  Then it produces a PDB, and uses the
-; resulting PDB to go back to yaml, and verify that the resulting yaml
-; is identical.
-
-; RUN: llvm-pdbdump yaml2pdb -pdb=%t.pdb %p/Inputs/one-symbol.yaml
-; RUN: llvm-pdbdump pdb2yaml -minimal -dbi-module-syms -no-file-headers %t.pdb > %t.pdb.yaml
-; RUN: diff -b %p/Inputs/one-symbol.yaml %t.pdb.yaml
+; This testcase verifies that we can produce a minimal PDB, while
+; serving as an example for how to construct a minimal PDB for other
+; testcases.  It takes as input a small fragment of hand-written yaml
+; that specifies nothing about the PDB other than a definition of one
+; symbol that it contains.  Then it produces a PDB, and uses the
+; resulting PDB to go back to yaml, and verify that the resulting yaml
+; is identical.
+
+; RUN: llvm-pdbutil yaml2pdb -pdb=%t.pdb %p/Inputs/one-symbol.yaml
+; RUN: llvm-pdbutil pdb2yaml -minimal -module-syms -no-file-headers %t.pdb > %t.pdb.yaml
+; RUN: diff -b %p/Inputs/one-symbol.yaml %t.pdb.yaml
diff --git a/test/DebugInfo/PDB/pdb-yaml-symbols.test b/test/DebugInfo/PDB/pdb-yaml-symbols.test
index e3cdcb6ababb..574065176b5b 100644
--- a/test/DebugInfo/PDB/pdb-yaml-symbols.test
+++ b/test/DebugInfo/PDB/pdb-yaml-symbols.test
@@ -1,4 +1,4 @@
-; RUN: llvm-pdbdump pdb2yaml -dbi-module-syms %p/Inputs/empty.pdb \
+; RUN: llvm-pdbutil pdb2yaml -module-syms %p/Inputs/empty.pdb \
 ; RUN:   | FileCheck -check-prefix=YAML %s
 
 
diff --git a/test/DebugInfo/PDB/pdb-yaml-types.test b/test/DebugInfo/PDB/pdb-yaml-types.test
index b3108591271e..f65d9edaa549 100644
--- a/test/DebugInfo/PDB/pdb-yaml-types.test
+++ b/test/DebugInfo/PDB/pdb-yaml-types.test
@@ -1,7 +1,7 @@
-RUN: llvm-pdbdump pdb2yaml -tpi-stream %p/Inputs/big-read.pdb > %t.yaml
+RUN: llvm-pdbutil pdb2yaml -tpi-stream %p/Inputs/big-read.pdb > %t.yaml
 RUN: FileCheck -check-prefix=YAML %s < %t.yaml
-RUN: llvm-pdbdump yaml2pdb %t.yaml -pdb %t.pdb
-RUN: llvm-pdbdump raw -tpi-records %t.pdb | FileCheck %s --check-prefix=PDB
+RUN: llvm-pdbutil yaml2pdb %t.yaml -pdb %t.pdb
+RUN: llvm-pdbutil raw -tpi-records %t.pdb | FileCheck %s --check-prefix=PDB
 
 Only verify the beginning of the type stream.
 
diff --git a/test/DebugInfo/PDB/pdbdump-debug-subsections.test b/test/DebugInfo/PDB/pdbdump-debug-subsections.test
new file mode 100644
index 000000000000..52f7bb52da2a
--- /dev/null
+++ b/test/DebugInfo/PDB/pdbdump-debug-subsections.test
@@ -0,0 +1,210 @@
+; RUN: llvm-pdbutil yaml2pdb -pdb=%t.pdb %p/Inputs/debug-subsections.yaml
+; RUN: llvm-pdbutil pdb2yaml -all -no-file-headers %t.pdb | FileCheck --check-prefix=YAML %s
+; RUN: llvm-pdbutil raw -subsections=all %t.pdb | FileCheck --check-prefix=RAW %s
+
+YAML:      Modules:
+YAML-NEXT:   - Module:          Foo.obj
+YAML-NEXT:     ObjFile:         Foo.obj
+YAML-NEXT:     Subsections:
+YAML-NEXT:       - !CrossModuleExports
+YAML-NEXT:         Exports:
+YAML-NEXT:           - LocalId:         4852
+YAML-NEXT:             GlobalId:        9283
+YAML-NEXT:           - LocalId:         2147487875
+YAML-NEXT:             GlobalId:        9123
+YAML:        - Module:          Bar.obj
+YAML-NEXT:     ObjFile:         Bar.obj
+YAML-NEXT:     Subsections:
+YAML-NEXT:       - !CrossModuleExports
+YAML-NEXT:         Exports:
+YAML-NEXT:           - LocalId:         4265
+YAML-NEXT:             GlobalId:        6097
+YAML-NEXT:           - LocalId:         4297
+YAML-NEXT:             GlobalId:        4677
+YAML-NEXT:       - !CrossModuleImports
+YAML-NEXT:         Imports:
+YAML-NEXT:           - Module:          Foo.obj
+YAML-NEXT:             Imports:         [ 4852, 2147487875 ]
+YAML:       - Module:          'd:\src\llvm\test\DebugInfo\PDB\Inputs\empty.obj'
+YAML-NEXT:    ObjFile:         'd:\src\llvm\test\DebugInfo\PDB\Inputs\empty.obj'
+YAML-NEXT:    SourceFiles:
+YAML-NEXT:      - 'd:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp'
+YAML-NEXT:    Subsections:
+YAML-NEXT:      - !FileChecksums
+YAML-NEXT:        Checksums:
+YAML-NEXT:          - FileName:        'd:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp'
+YAML-NEXT:            Kind:            MD5
+YAML-NEXT:            Checksum:        A0A5BD0D3ECD93FC29D19DE826FBF4BC
+YAML-NEXT:          - FileName:        'f:\dd\externalapis\windows\10\sdk\inc\winerror.h'
+YAML-NEXT:            Kind:            MD5
+YAML-NEXT:            Checksum:        1154D69F5B2650196E1FC34F4134E56B
+YAML-NEXT:      - !Lines
+YAML-NEXT:        CodeSize:        10
+YAML-NEXT:        Flags:           [  ]
+YAML-NEXT:        RelocOffset:     100016
+YAML-NEXT:        RelocSegment:    1
+YAML-NEXT:        Blocks:
+YAML-NEXT:          - FileName:        'd:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp'
+YAML-NEXT:            Lines:
+YAML-NEXT:              - Offset:          0
+YAML-NEXT:                LineStart:       5
+YAML-NEXT:                IsStatement:     true
+YAML-NEXT:                EndDelta:        0
+YAML-NEXT:              - Offset:          3
+YAML-NEXT:                LineStart:       6
+YAML-NEXT:                IsStatement:     true
+YAML-NEXT:                EndDelta:        0
+YAML-NEXT:              - Offset:          8
+YAML-NEXT:                LineStart:       7
+YAML-NEXT:                IsStatement:     true
+YAML-NEXT:                EndDelta:        0
+YAML-NEXT:            Columns:
+YAML-NEXT:      - !InlineeLines
+YAML-NEXT:        HasExtraFiles:   false
+YAML-NEXT:        Sites:           
+YAML-NEXT:          - FileName:        'f:\dd\externalapis\windows\10\sdk\inc\winerror.h'
+YAML-NEXT:            LineNum:         26950
+YAML-NEXT:            Inlinee:         22767
+
+
+RAW:      DBI Stream {
+RAW:        Modules [
+RAW-NEXT:     {
+RAW-NEXT:       Name: Foo.obj
+RAW:            Subsections [
+RAW-NEXT:         CrossModuleExports [
+RAW-NEXT:           Export {
+RAW-NEXT:             Local: 0x12F4
+RAW-NEXT:             Global: 0x2443
+RAW-NEXT:           }
+RAW-NEXT:           Export {
+RAW-NEXT:             Local: 0x80001083
+RAW-NEXT:             Global: 0x23A3
+RAW-NEXT:           }
+RAW-NEXT:         ]
+RAW-NEXT:       ]
+RAW-NEXT:     }
+RAW-NEXT:     {
+RAW-NEXT:       Name: Bar.obj
+RAW:            Subsections [
+RAW-NEXT:         CrossModuleExports [
+RAW-NEXT:           Export {
+RAW-NEXT:             Local: 0x10A9
+RAW-NEXT:             Global: 0x17D1
+RAW-NEXT:           }
+RAW-NEXT:           Export {
+RAW-NEXT:             Local: 0x10C9
+RAW-NEXT:             Global: 0x1245
+RAW-NEXT:           }
+RAW-NEXT:         ]
+RAW-NEXT:         CrossModuleImports [
+RAW-NEXT:           ModuleImport {
+RAW-NEXT:             Module: Foo.obj
+RAW-NEXT:             Imports: [0x12F4, 0x80001083]
+RAW-NEXT:           }
+RAW-NEXT:         ]
+RAW-NEXT:       ]
+RAW-NEXT:     }
+RAW-NEXT:     {
+RAW-NEXT:       Name: d:\src\llvm\test\DebugInfo\PDB\Inputs\empty.obj
+RAW:            Subsections [
+RAW-NEXT:         FileChecksums {
+RAW-NEXT:           Checksum {
+RAW-NEXT:             FileName: d:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp
+RAW-NEXT:             Kind: MD5 (0x1)
+RAW-NEXT:             Checksum (
+RAW-NEXT:               0000: A0A5BD0D 3ECD93FC 29D19DE8 26FBF4BC  |....>...)...&...|
+RAW-NEXT:             )
+RAW-NEXT:           }
+RAW-NEXT:           Checksum {
+RAW-NEXT:             FileName: f:\dd\externalapis\windows\10\sdk\inc\winerror.h
+RAW-NEXT:             Kind: MD5 (0x1)
+RAW-NEXT:             Checksum (
+RAW-NEXT:               0000: 1154D69F 5B265019 6E1FC34F 4134E56B  |.T..[&P.n..OA4.k|
+RAW-NEXT:             )
+RAW-NEXT:           }
+RAW-NEXT:         }
+RAW-NEXT:         Lines {
+RAW-NEXT:           RelocSegment: 1
+RAW-NEXT:           RelocOffset: 100016
+RAW-NEXT:           CodeSize: 10
+RAW-NEXT:           HasColumns: No
+RAW-NEXT:           FileEntry {
+RAW-NEXT:             FileName: d:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp
+RAW-NEXT:             Line {
+RAW-NEXT:               Offset: 0
+RAW-NEXT:               LineNumberStart: 5
+RAW-NEXT:               EndDelta: 0
+RAW-NEXT:               IsStatement: Yes
+RAW-NEXT:             }
+RAW-NEXT:             Line {
+RAW-NEXT:               Offset: 3
+RAW-NEXT:               LineNumberStart: 6
+RAW-NEXT:               EndDelta: 0
+RAW-NEXT:               IsStatement: Yes
+RAW-NEXT:             }
+RAW-NEXT:             Line {
+RAW-NEXT:               Offset: 8
+RAW-NEXT:               LineNumberStart: 7
+RAW-NEXT:               EndDelta: 0
+RAW-NEXT:               IsStatement: Yes
+RAW-NEXT:             }
+RAW-NEXT:           }
+RAW-NEXT:         }
+RAW-NEXT:         InlineeLines {
+RAW-NEXT:           HasExtraFiles: No
+RAW-NEXT:           Lines [
+RAW-NEXT:             Inlinee {
+RAW-NEXT:               FileName: f:\dd\externalapis\windows\10\sdk\inc\winerror.h
+RAW-NEXT:               Function {
+RAW-NEXT:                 Index: 0x58ef (unknown function)
+RAW-NEXT:               }
+RAW-NEXT:               SourceLine: 26950
+RAW-NEXT:             }
+RAW-NEXT:           ]
+RAW-NEXT:         }
+RAW-NEXT:       ]
+RAW-NEXT:     }
+RAW-NEXT:     {
+RAW-NEXT:       Name: ObjFileSubsections
+RAW-NEXT:       Debug Stream Index: 11
+RAW-NEXT:       Object File Name: ObjFileSubsections
+RAW-NEXT:       Num Files: 0
+RAW-NEXT:       Source File Name Idx: 0
+RAW-NEXT:       Pdb File Name Idx: 0
+RAW-NEXT:       Line Info Byte Size: 0
+RAW-NEXT:       C13 Line Info Byte Size: 116
+RAW-NEXT:       Symbol Byte Size: 4
+RAW-NEXT:       Type Server Index: 0
+RAW-NEXT:       Has EC Info: No
+RAW-NEXT:       Subsections [
+RAW-NEXT:         String Table [
+RAW-NEXT:           String1
+RAW-NEXT:           String2
+RAW-NEXT:           String3
+RAW-NEXT:         ]
+RAW-NEXT:         Symbols [
+RAW-NEXT:           {
+RAW-NEXT:             ObjectName {
+RAW-NEXT:               Signature: 0x0
+RAW-NEXT:               ObjectName: ObjFileSubsections
+RAW-NEXT:             }
+RAW-NEXT:           }
+RAW-NEXT:         ]
+RAW-NEXT:         FrameData [
+RAW-NEXT:           Frame {
+RAW-NEXT:             Rva: 6
+RAW-NEXT:             CodeSize: 1
+RAW-NEXT:             LocalSize: 2
+RAW-NEXT:             ParamsSize: 4
+RAW-NEXT:             MaxStackSize: 3
+RAW-NEXT:             FrameFunc: MyFunc
+RAW-NEXT:             PrologSize: 5
+RAW-NEXT:             SavedRegsSize: 7
+RAW-NEXT:             Flags: 0
+RAW-NEXT:           }
+RAW-NEXT:         ]
+RAW-NEXT:       ]
+RAW-NEXT:     }
+RAW-NEXT:   ]
+RAW-NEXT: }
diff --git a/test/DebugInfo/PDB/pdbdump-headers.test b/test/DebugInfo/PDB/pdbdump-headers.test
index 4e6bb75f8b8d..82fe91dd20aa 100644
--- a/test/DebugInfo/PDB/pdbdump-headers.test
+++ b/test/DebugInfo/PDB/pdbdump-headers.test
@@ -1,12 +1,12 @@
-; RUN: llvm-pdbdump raw -headers -string-table -tpi-records -tpi-record-bytes -module-syms \
+; RUN: llvm-pdbutil raw -headers -string-table -tpi-records -tpi-record-bytes -module-syms \
 ; RUN:              -sym-record-bytes -globals -publics -module-files \
 ; RUN:              -stream-summary -stream-blocks -ipi-records -ipi-record-bytes \
-; RUN:              -section-contribs -section-map -section-headers -line-info \
+; RUN:              -section-contribs -section-map -section-headers -subsections=all \
 ; RUN:              -tpi-hash -fpo -page-stats %p/Inputs/empty.pdb | FileCheck -check-prefix=EMPTY %s
-; RUN: llvm-pdbdump raw -all %p/Inputs/empty.pdb | FileCheck -check-prefix=ALL %s
-; RUN: llvm-pdbdump raw -headers -modules -module-files \
+; RUN: llvm-pdbutil raw -all %p/Inputs/empty.pdb | FileCheck -check-prefix=ALL %s
+; RUN: llvm-pdbutil raw -headers -modules -module-files \
 ; RUN:              %p/Inputs/big-read.pdb | FileCheck -check-prefix=BIG %s
-; RUN: not llvm-pdbdump raw -headers %p/Inputs/bad-block-size.pdb 2>&1 | FileCheck -check-prefix=BAD-BLOCK-SIZE %s
+; RUN: not llvm-pdbutil raw -headers %p/Inputs/bad-block-size.pdb 2>&1 | FileCheck -check-prefix=BAD-BLOCK-SIZE %s
 
 ; EMPTY:      FileHeaders {
 ; EMPTY-NEXT:   BlockSize: 4096
@@ -484,7 +484,34 @@
 ; EMPTY-NEXT:           )
 ; EMPTY-NEXT:         }
 ; EMPTY-NEXT:       ]
-; EMPTY-NEXT:       LineInfo [
+; EMPTY-NEXT:       Subsections [
+; EMPTY-NEXT:         Lines {
+; EMPTY-NEXT:           RelocSegment: 1
+; EMPTY-NEXT:           RelocOffset: 16
+; EMPTY-NEXT:           CodeSize: 10
+; EMPTY-NEXT:           HasColumns: No
+; EMPTY-NEXT:           FileEntry {
+; EMPTY-NEXT:             FileName: d:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp
+; EMPTY-NEXT:             Line {
+; EMPTY-NEXT:               Offset: 0
+; EMPTY-NEXT:               LineNumberStart: 5
+; EMPTY-NEXT:               EndDelta: 0
+; EMPTY-NEXT:               IsStatement: Yes
+; EMPTY-NEXT:             }
+; EMPTY-NEXT:             Line {
+; EMPTY-NEXT:               Offset: 3
+; EMPTY-NEXT:               LineNumberStart: 6
+; EMPTY-NEXT:               EndDelta: 0
+; EMPTY-NEXT:               IsStatement: Yes
+; EMPTY-NEXT:             }
+; EMPTY-NEXT:             Line {
+; EMPTY-NEXT:               Offset: 8
+; EMPTY-NEXT:               LineNumberStart: 7
+; EMPTY-NEXT:               EndDelta: 0
+; EMPTY-NEXT:               IsStatement: Yes
+; EMPTY-NEXT:             }
+; EMPTY-NEXT:           }
+; EMPTY-NEXT:         }
 ; EMPTY-NEXT:         FileChecksums {
 ; EMPTY-NEXT:           Checksum {
 ; EMPTY-NEXT:             FileName: d:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp
@@ -494,35 +521,6 @@
 ; EMPTY-NEXT:             )
 ; EMPTY-NEXT:           }
 ; EMPTY-NEXT:         }
-; EMPTY-NEXT:         Lines {
-; EMPTY-NEXT:           Block {
-; EMPTY-NEXT:             RelocSegment: 1
-; EMPTY-NEXT:             RelocOffset: 16
-; EMPTY-NEXT:             CodeSize: 10
-; EMPTY-NEXT:             HasColumns: No
-; EMPTY-NEXT:             Lines {
-; EMPTY-NEXT:               FileName: d:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp
-; EMPTY-NEXT:               Line {
-; EMPTY-NEXT:                 Offset: 0
-; EMPTY-NEXT:                 LineNumberStart: 5
-; EMPTY-NEXT:                 EndDelta: 0
-; EMPTY-NEXT:                 IsStatement: Yes
-; EMPTY-NEXT:               }
-; EMPTY-NEXT:               Line {
-; EMPTY-NEXT:                 Offset: 3
-; EMPTY-NEXT:                 LineNumberStart: 6
-; EMPTY-NEXT:                 EndDelta: 0
-; EMPTY-NEXT:                 IsStatement: Yes
-; EMPTY-NEXT:               }
-; EMPTY-NEXT:               Line {
-; EMPTY-NEXT:                 Offset: 8
-; EMPTY-NEXT:                 LineNumberStart: 7
-; EMPTY-NEXT:                 EndDelta: 0
-; EMPTY-NEXT:                 IsStatement: Yes
-; EMPTY-NEXT:               }
-; EMPTY-NEXT:             }
-; EMPTY-NEXT:           }
-; EMPTY-NEXT:         }
 ; EMPTY-NEXT:       ]
 ; EMPTY-NEXT:     }
 ; EMPTY-NEXT:     {
@@ -757,7 +755,7 @@
 ; EMPTY-NEXT:           )
 ; EMPTY-NEXT:         }
 ; EMPTY-NEXT:       ]
-; EMPTY-NEXT:       LineInfo [
+; EMPTY-NEXT:       Subsections [
 ; EMPTY-NEXT:       ]
 ; EMPTY-NEXT:     }
 ; EMPTY-NEXT:   ]
diff --git a/test/DebugInfo/PDB/pdbdump-merge-ids-and-types.test b/test/DebugInfo/PDB/pdbdump-merge-ids-and-types.test
index ac32ce040b98..2639490f542a 100644
--- a/test/DebugInfo/PDB/pdbdump-merge-ids-and-types.test
+++ b/test/DebugInfo/PDB/pdbdump-merge-ids-and-types.test
@@ -1,12 +1,12 @@
-; RUN: llvm-pdbdump yaml2pdb -pdb=%t.1.pdb %p/Inputs/merge-ids-and-types-1.yaml
-; RUN: llvm-pdbdump yaml2pdb -pdb=%t.2.pdb %p/Inputs/merge-ids-and-types-2.yaml
-; RUN: llvm-pdbdump merge -pdb=%t.3.pdb %t.1.pdb %t.2.pdb
-; RUN: llvm-pdbdump raw -tpi-records %t.3.pdb | FileCheck -check-prefix=TPI-TYPES %s
-; RUN: llvm-pdbdump raw -tpi-records %t.3.pdb | FileCheck -check-prefix=INTMAIN %s
-; RUN: llvm-pdbdump raw -tpi-records %t.3.pdb | FileCheck -check-prefix=VOIDMAIN %s
-; RUN: llvm-pdbdump raw -ipi-records %t.3.pdb | FileCheck -check-prefix=IPI-TYPES %s
-; RUN: llvm-pdbdump raw -ipi-records %t.3.pdb | FileCheck -check-prefix=IPI-NAMES %s
-; RUN: llvm-pdbdump raw -ipi-records %t.3.pdb | FileCheck -check-prefix=IPI-UDT %s
+; RUN: llvm-pdbutil yaml2pdb -pdb=%t.1.pdb %p/Inputs/merge-ids-and-types-1.yaml
+; RUN: llvm-pdbutil yaml2pdb -pdb=%t.2.pdb %p/Inputs/merge-ids-and-types-2.yaml
+; RUN: llvm-pdbutil merge -pdb=%t.3.pdb %t.1.pdb %t.2.pdb
+; RUN: llvm-pdbutil raw -tpi-records %t.3.pdb | FileCheck -check-prefix=TPI-TYPES %s
+; RUN: llvm-pdbutil raw -tpi-records %t.3.pdb | FileCheck -check-prefix=INTMAIN %s
+; RUN: llvm-pdbutil raw -tpi-records %t.3.pdb | FileCheck -check-prefix=VOIDMAIN %s
+; RUN: llvm-pdbutil raw -ipi-records %t.3.pdb | FileCheck -check-prefix=IPI-TYPES %s
+; RUN: llvm-pdbutil raw -ipi-records %t.3.pdb | FileCheck -check-prefix=IPI-NAMES %s
+; RUN: llvm-pdbutil raw -ipi-records %t.3.pdb | FileCheck -check-prefix=IPI-UDT %s
 
 TPI-TYPES:     Type Info Stream (TPI)
 TPI-TYPES:     Record count: 9
diff --git a/test/DebugInfo/PDB/pdbdump-mergeids.test b/test/DebugInfo/PDB/pdbdump-mergeids.test
index 6a4d19eba042..1c0a8704af2a 100644
--- a/test/DebugInfo/PDB/pdbdump-mergeids.test
+++ b/test/DebugInfo/PDB/pdbdump-mergeids.test
@@ -1,9 +1,9 @@
-; RUN: llvm-pdbdump yaml2pdb -pdb=%t.1.pdb %p/Inputs/merge-ids-1.yaml
-; RUN: llvm-pdbdump yaml2pdb -pdb=%t.2.pdb %p/Inputs/merge-ids-2.yaml
-; RUN: llvm-pdbdump merge -pdb=%t.3.pdb %t.1.pdb %t.2.pdb
-; RUN: llvm-pdbdump raw -ipi-records %t.3.pdb | FileCheck -check-prefix=MERGED %s
-; RUN: llvm-pdbdump raw -ipi-records %t.3.pdb | FileCheck -check-prefix=SUBSTRS %s
-; RUN: llvm-pdbdump raw -tpi-records %t.3.pdb | FileCheck -check-prefix=TPI-EMPTY %s
+; RUN: llvm-pdbutil yaml2pdb -pdb=%t.1.pdb %p/Inputs/merge-ids-1.yaml
+; RUN: llvm-pdbutil yaml2pdb -pdb=%t.2.pdb %p/Inputs/merge-ids-2.yaml
+; RUN: llvm-pdbutil merge -pdb=%t.3.pdb %t.1.pdb %t.2.pdb
+; RUN: llvm-pdbutil raw -ipi-records %t.3.pdb | FileCheck -check-prefix=MERGED %s
+; RUN: llvm-pdbutil raw -ipi-records %t.3.pdb | FileCheck -check-prefix=SUBSTRS %s
+; RUN: llvm-pdbutil raw -tpi-records %t.3.pdb | FileCheck -check-prefix=TPI-EMPTY %s
 
 
 MERGED: Type Info Stream (IPI)
diff --git a/test/DebugInfo/PDB/pdbdump-mergetypes.test b/test/DebugInfo/PDB/pdbdump-mergetypes.test
index a26b92631828..8d32b4d176f2 100644
--- a/test/DebugInfo/PDB/pdbdump-mergetypes.test
+++ b/test/DebugInfo/PDB/pdbdump-mergetypes.test
@@ -1,8 +1,8 @@
-; RUN: llvm-pdbdump yaml2pdb -pdb=%t.1.pdb %p/Inputs/merge-types-1.yaml
-; RUN: llvm-pdbdump yaml2pdb -pdb=%t.2.pdb %p/Inputs/merge-types-2.yaml
-; RUN: llvm-pdbdump merge -pdb=%t.3.pdb %t.1.pdb %t.2.pdb
-; RUN: llvm-pdbdump raw -tpi-records %t.3.pdb | FileCheck -check-prefix=MERGED %s
-; RUN: llvm-pdbdump raw -tpi-records %t.3.pdb | FileCheck -check-prefix=ARGLIST %s
+; RUN: llvm-pdbutil yaml2pdb -pdb=%t.1.pdb %p/Inputs/merge-types-1.yaml
+; RUN: llvm-pdbutil yaml2pdb -pdb=%t.2.pdb %p/Inputs/merge-types-2.yaml
+; RUN: llvm-pdbutil merge -pdb=%t.3.pdb %t.1.pdb %t.2.pdb
+; RUN: llvm-pdbutil raw -tpi-records %t.3.pdb | FileCheck -check-prefix=MERGED %s
+; RUN: llvm-pdbutil raw -tpi-records %t.3.pdb | FileCheck -check-prefix=ARGLIST %s
 
 
 MERGED: Type Info Stream (TPI)
diff --git a/test/DebugInfo/PDB/pdbdump-raw-blocks.test b/test/DebugInfo/PDB/pdbdump-raw-blocks.test
index b43df970e5d5..14e1f86fc029 100644
--- a/test/DebugInfo/PDB/pdbdump-raw-blocks.test
+++ b/test/DebugInfo/PDB/pdbdump-raw-blocks.test
@@ -1,8 +1,8 @@
-; RUN: llvm-pdbdump raw -block-data=0 %p/Inputs/empty.pdb | FileCheck --check-prefix=BLOCK0 %s
-; RUN: llvm-pdbdump raw -block-data=0-1 %p/Inputs/empty.pdb | FileCheck --check-prefix=BLOCK01 %s
-; RUN: not llvm-pdbdump raw -block-data=0,1 %p/Inputs/empty.pdb 2>&1 | FileCheck --check-prefix=BADSYNTAX %s
-; RUN: not llvm-pdbdump raw -block-data=0a1 %p/Inputs/empty.pdb 2>&1 | FileCheck --check-prefix=BADSYNTAX %s
-; RUN: not llvm-pdbdump raw -block-data=0- %p/Inputs/empty.pdb 2>&1 | FileCheck --check-prefix=BADSYNTAX %s
+; RUN: llvm-pdbutil raw -block-data=0 %p/Inputs/empty.pdb | FileCheck --check-prefix=BLOCK0 %s
+; RUN: llvm-pdbutil raw -block-data=0-1 %p/Inputs/empty.pdb | FileCheck --check-prefix=BLOCK01 %s
+; RUN: not llvm-pdbutil raw -block-data=0,1 %p/Inputs/empty.pdb 2>&1 | FileCheck --check-prefix=BADSYNTAX %s
+; RUN: not llvm-pdbutil raw -block-data=0a1 %p/Inputs/empty.pdb 2>&1 | FileCheck --check-prefix=BADSYNTAX %s
+; RUN: not llvm-pdbutil raw -block-data=0- %p/Inputs/empty.pdb 2>&1 | FileCheck --check-prefix=BADSYNTAX %s
 
 BLOCK0:      Block Data {
 BLOCK0-NEXT:   Block 0 (
diff --git a/test/DebugInfo/PDB/pdbdump-raw-stream.test b/test/DebugInfo/PDB/pdbdump-raw-stream.test
index 6b6624f16015..846960a0964a 100644
--- a/test/DebugInfo/PDB/pdbdump-raw-stream.test
+++ b/test/DebugInfo/PDB/pdbdump-raw-stream.test
@@ -1,5 +1,5 @@
-; RUN: llvm-pdbdump raw -stream-data=1 %p/Inputs/empty.pdb | FileCheck --check-prefix=STREAM1 %s
-; RUN: not llvm-pdbdump raw -stream-data=100 %p/Inputs/empty.pdb 2>&1 | FileCheck --check-prefix=INVALIDSTREAM %s
+; RUN: llvm-pdbutil raw -stream-data=1 %p/Inputs/empty.pdb | FileCheck --check-prefix=STREAM1 %s
+; RUN: not llvm-pdbutil raw -stream-data=100 %p/Inputs/empty.pdb 2>&1 | FileCheck --check-prefix=INVALIDSTREAM %s
 
 STREAM1:      Stream Data {
 STREAM1-NEXT:   Stream {
diff --git a/test/DebugInfo/PDB/pdbdump-readwrite.test b/test/DebugInfo/PDB/pdbdump-readwrite.test
index 4756faf68c2d..ee53f3b4cd2a 100644
--- a/test/DebugInfo/PDB/pdbdump-readwrite.test
+++ b/test/DebugInfo/PDB/pdbdump-readwrite.test
@@ -1,10 +1,10 @@
-RUN: llvm-pdbdump pdb2yaml -dbi-module-info -dbi-module-source-info \
-RUN:   -dbi-stream -pdb-stream -string-table -tpi-stream -stream-directory \
+RUN: llvm-pdbutil pdb2yaml -modules -module-files -dbi-stream \
+RUN:   -pdb-stream -string-table -tpi-stream -stream-directory \
 RUN:   -stream-metadata %p/Inputs/empty.pdb > %t.1
-RUN: llvm-pdbdump yaml2pdb -pdb=%t.2 %t.1
+RUN: llvm-pdbutil yaml2pdb -pdb=%t.2 %t.1
 
-RUN: llvm-pdbdump raw -headers -string-table -tpi-records %p/Inputs/empty.pdb | FileCheck %s
-RUN: llvm-pdbdump raw -headers -string-table -tpi-records %t.2 | FileCheck %s
+RUN: llvm-pdbutil raw -headers -string-table -tpi-records %p/Inputs/empty.pdb | FileCheck %s
+RUN: llvm-pdbutil raw -headers -string-table -tpi-records %t.2 | FileCheck %s
 
 CHECK:      FileHeaders {
 CHECK-NEXT:   BlockSize: 4096
diff --git a/test/DebugInfo/PDB/pdbdump-source-names.test b/test/DebugInfo/PDB/pdbdump-source-names.test
index 181f4d5e0ee4..f16a2699a154 100644
--- a/test/DebugInfo/PDB/pdbdump-source-names.test
+++ b/test/DebugInfo/PDB/pdbdump-source-names.test
@@ -6,11 +6,11 @@
 # that differ by one byte, so that at least one of those will only
 # pass if alignment is implemented correctly.
 
-RUN: llvm-pdbdump yaml2pdb -pdb=%T/source-names-1.pdb %p/Inputs/source-names-1.yaml
-RUN: llvm-pdbdump pdb2yaml -dbi-module-source-info %T/source-names-1.pdb \
+RUN: llvm-pdbutil yaml2pdb -pdb=%T/source-names-1.pdb %p/Inputs/source-names-1.yaml
+RUN: llvm-pdbutil pdb2yaml -module-files %T/source-names-1.pdb \
 RUN:     | FileCheck -check-prefix=CHECK1 %s
-RUN: llvm-pdbdump yaml2pdb -pdb=%T/source-names-2.pdb %p/Inputs/source-names-2.yaml
-RUN: llvm-pdbdump pdb2yaml -dbi-module-source-info %T/source-names-2.pdb \
+RUN: llvm-pdbutil yaml2pdb -pdb=%T/source-names-2.pdb %p/Inputs/source-names-2.yaml
+RUN: llvm-pdbutil pdb2yaml -module-files %T/source-names-2.pdb \
 RUN:     | FileCheck -check-prefix=CHECK2 %s
 
 CHECK1: SourceFiles:
diff --git a/test/DebugInfo/PDB/pdbdump-write.test b/test/DebugInfo/PDB/pdbdump-write.test
index 393473a53af1..b6d1959abe6a 100644
--- a/test/DebugInfo/PDB/pdbdump-write.test
+++ b/test/DebugInfo/PDB/pdbdump-write.test
@@ -10,11 +10,11 @@
 ; stream metadata, since the layout of the MSF file might be different
 ; (for example if we don't write the entire stream)
 ;
-; RUN: llvm-pdbdump pdb2yaml -stream-metadata -stream-directory \
-; RUN:   -pdb-stream -tpi-stream -dbi-module-syms %p/Inputs/empty.pdb > %t.1
-; RUN: llvm-pdbdump yaml2pdb -pdb=%t.2 %t.1
-; RUN: llvm-pdbdump pdb2yaml -pdb-stream -tpi-stream \
-; RUN:   -dbi-module-syms -no-file-headers %p/Inputs/empty.pdb > %t.3
-; RUN: llvm-pdbdump pdb2yaml -pdb-stream -tpi-stream \
-; RUN:   -dbi-module-syms -no-file-headers %t.2 > %t.4
+; RUN: llvm-pdbutil pdb2yaml -stream-metadata -stream-directory \
+; RUN:   -pdb-stream -tpi-stream -module-syms %p/Inputs/empty.pdb > %t.1
+; RUN: llvm-pdbutil yaml2pdb -pdb=%t.2 %t.1
+; RUN: llvm-pdbutil pdb2yaml -pdb-stream -tpi-stream \
+; RUN:   -module-syms -no-file-headers %p/Inputs/empty.pdb > %t.3
+; RUN: llvm-pdbutil pdb2yaml -pdb-stream -tpi-stream \
+; RUN:   -module-syms -no-file-headers %t.2 > %t.4
 ; RUN: diff %t.3 %t.4
diff --git a/test/DebugInfo/PDB/pdbdump-yaml-lineinfo-write.test b/test/DebugInfo/PDB/pdbdump-yaml-lineinfo-write.test
deleted file mode 100644
index 1d63c85352aa..000000000000
--- a/test/DebugInfo/PDB/pdbdump-yaml-lineinfo-write.test
+++ /dev/null
@@ -1,71 +0,0 @@
-; This testcase verifies that we can produce a PDB with line
-; information.  It does this by describing some line information
-; manually in YAML, creating a PDB out of it, then dumping then
-; line information from the resulting PDB.
-
-; RUN: llvm-pdbdump yaml2pdb -pdb=%t.pdb %p/Inputs/simple-line-info.yaml
-; RUN: llvm-pdbdump raw -line-info %t.pdb | FileCheck -check-prefix=LINES %s
-
-LINES:       Modules [
-LINES-NEXT:    {
-LINES-NEXT:      Name: d:\src\llvm\test\DebugInfo\PDB\Inputs\empty.obj
-LINES:           LineInfo [
-LINES-NEXT:        FileChecksums {
-LINES-NEXT:          Checksum {
-LINES-NEXT:            FileName: d:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp
-LINES-NEXT:            Kind: MD5 (0x1)
-LINES-NEXT:            Checksum (
-LINES-NEXT:              0000: A0A5BD0D 3ECD93FC 29D19DE8 26FBF4BC  |....>...)...&...|
-LINES-NEXT:            )
-LINES-NEXT:          }
-LINES-NEXT:          Checksum {
-LINES-NEXT:            FileName: f:\dd\externalapis\windows\10\sdk\inc\winerror.h
-LINES-NEXT:            Kind: MD5 (0x1)
-LINES-NEXT:            Checksum (
-LINES-NEXT:              0000: 1154D69F 5B265019 6E1FC34F 4134E56B  |.T..[&P.n..OA4.k|
-LINES-NEXT:            )
-LINES-NEXT:          }
-LINES-NEXT:        }
-LINES-NEXT:        Lines {
-LINES-NEXT:          Block {
-LINES-NEXT:            RelocSegment: 1
-LINES-NEXT:            RelocOffset: 16
-LINES-NEXT:            CodeSize: 10
-LINES-NEXT:            HasColumns: No
-LINES-NEXT:            Lines {
-LINES-NEXT:              FileName: d:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp
-LINES-NEXT:              Line {
-LINES-NEXT:                Offset: 0
-LINES-NEXT:                LineNumberStart: 5
-LINES-NEXT:                EndDelta: 0
-LINES-NEXT:                IsStatement: Yes
-LINES-NEXT:              }
-LINES-NEXT:              Line {
-LINES-NEXT:                Offset: 3
-LINES-NEXT:                LineNumberStart: 6
-LINES-NEXT:                EndDelta: 0
-LINES-NEXT:                IsStatement: Yes
-LINES-NEXT:              }
-LINES-NEXT:              Line {
-LINES-NEXT:                Offset: 8
-LINES-NEXT:                LineNumberStart: 7
-LINES-NEXT:                EndDelta: 0
-LINES-NEXT:                IsStatement: Yes
-LINES-NEXT:              }
-LINES-NEXT:            }
-LINES-NEXT:          }
-LINES-NEXT:        }
-LINES-NEXT:        InlineeLines {
-LINES-NEXT:          HasExtraFiles: No
-LINES-NEXT:          Lines [
-LINES-NEXT:            Inlinee {
-LINES-NEXT:              FileName: f:\dd\externalapis\windows\10\sdk\inc\winerror.h
-LINES-NEXT:              Function {
-LINES-NEXT:                Index: 0x58ef (unknown function)
-LINES-NEXT:              }
-LINES-NEXT:              SourceLine: 26950
-LINES-NEXT:            }
-LINES-NEXT:          ]
-LINES-NEXT:        }
-LINES-NEXT:      ]
-LINES-NEXT:    }
diff --git a/test/DebugInfo/PDB/pdbdump-yaml-lineinfo.test b/test/DebugInfo/PDB/pdbdump-yaml-lineinfo.test
deleted file mode 100644
index f959805c7474..000000000000
--- a/test/DebugInfo/PDB/pdbdump-yaml-lineinfo.test
+++ /dev/null
@@ -1,60 +0,0 @@
-; RUN: llvm-pdbdump pdb2yaml -dbi-module-lines %p/Inputs/empty.pdb \
-; RUN:   | FileCheck -check-prefix=YAML %s
-
-
-YAML: ---
-YAML: MSF:
-YAML:   SuperBlock:
-YAML:     BlockSize:       4096
-YAML:     FreeBlockMap:    2
-YAML:     NumBlocks:       25
-YAML:     NumDirectoryBytes: 136
-YAML:     Unknown1:        0
-YAML:     BlockMapAddr:    24
-YAML:   NumDirectoryBlocks: 1
-YAML:   DirectoryBlocks: [ 23 ]
-YAML:   NumStreams:      0
-YAML:   FileSize:        102400
-YAML: DbiStream:
-YAML:   VerHeader:       V70
-YAML:   Age:             1
-YAML:   BuildNumber:     35840
-YAML:   PdbDllVersion:   31101
-YAML:   PdbDllRbld:      0
-YAML:   Flags:           1
-YAML:   MachineType:     x86
-YAML:   Modules:
-YAML:   - Module:          'd:\src\llvm\test\DebugInfo\PDB\Inputs\empty.obj'
-YAML:     ObjFile:         'd:\src\llvm\test\DebugInfo\PDB\Inputs\empty.obj'
-YAML:     SourceFiles:
-YAML:       - 'd:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp'
-YAML:     Subsections:
-YAML:       - !Lines
-YAML:         CodeSize:        10
-YAML:         Flags:           [  ]
-YAML:         RelocOffset:     16
-YAML:         RelocSegment:    1
-YAML:         Blocks:
-YAML:           - FileName:        'd:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp'
-YAML:             Lines:
-YAML:               - Offset:          0
-YAML:                 LineStart:       5
-YAML:                 IsStatement:     true
-YAML:                 EndDelta:        0
-YAML:               - Offset:          3
-YAML:                 LineStart:       6
-YAML:                 IsStatement:     true
-YAML:                 EndDelta:        0
-YAML:               - Offset:          8
-YAML:                 LineStart:       7
-YAML:                 IsStatement:     true
-YAML:                 EndDelta:        0
-YAML:             Columns:
-YAML:       - !FileChecksums
-YAML:         Checksums:
-YAML:           - FileName:        'd:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp'
-YAML:             Kind:            MD5
-YAML:             Checksum:        A0A5BD0D3ECD93FC29D19DE826FBF4BC
-YAML:  - Module:          '* Linker *'
-YAML:    ObjFile:         ''
-YAML: ...
\ No newline at end of file
diff --git a/test/DebugInfo/PDB/pdbdump-yaml-types.test b/test/DebugInfo/PDB/pdbdump-yaml-types.test
index 7e6fcc1ca420..a01edcee1e99 100644
--- a/test/DebugInfo/PDB/pdbdump-yaml-types.test
+++ b/test/DebugInfo/PDB/pdbdump-yaml-types.test
@@ -1,4 +1,4 @@
-; RUN: llvm-pdbdump pdb2yaml -tpi-stream %p/Inputs/empty.pdb \
+; RUN: llvm-pdbutil pdb2yaml -tpi-stream %p/Inputs/empty.pdb \
 ; RUN:   | FileCheck -check-prefix=YAML %s
 
 YAML: ---
diff --git a/test/DebugInfo/PDB/pdbdump-yaml.test b/test/DebugInfo/PDB/pdbdump-yaml.test
index 44025be5bca7..0563230cf47c 100644
--- a/test/DebugInfo/PDB/pdbdump-yaml.test
+++ b/test/DebugInfo/PDB/pdbdump-yaml.test
@@ -1,6 +1,6 @@
-; RUN: llvm-pdbdump pdb2yaml -stream-metadata -stream-directory -string-table -pdb-stream \
+; RUN: llvm-pdbutil pdb2yaml -stream-metadata -stream-directory -string-table -pdb-stream \
 ; RUN: %p/Inputs/empty.pdb | FileCheck -check-prefix=YAML %s
-; RUN: llvm-pdbdump pdb2yaml -no-file-headers -stream-metadata -stream-directory -pdb-stream \
+; RUN: llvm-pdbutil pdb2yaml -no-file-headers -stream-metadata -stream-directory -pdb-stream \
 ; RUN:   %p/Inputs/empty.pdb | FileCheck -check-prefix=NO-HEADERS %s
 
 ; YAML:      ---
diff --git a/test/DebugInfo/dwarfdump-str-offsets-invalid.test b/test/DebugInfo/dwarfdump-str-offsets-invalid.test
new file mode 100644
index 000000000000..45916d28de0b
--- /dev/null
+++ b/test/DebugInfo/dwarfdump-str-offsets-invalid.test
@@ -0,0 +1,24 @@
+; Verify that llvm-dwarfdump handles invalid string offset tables.
+
+RUN: llvm-dwarfdump %p/Inputs/dwarfdump-str-offsets-invalid-1.x86_64.o | \
+RUN:   FileCheck --check-prefix=INVALIDCONTRIB %s
+RUN: llvm-dwarfdump %p/Inputs/dwarfdump-str-offsets-invalid-2.x86_64.o | \
+RUN:   FileCheck --check-prefix=INVALIDCONTRIB %s
+RUN: llvm-dwarfdump %p/Inputs/dwarfdump-str-offsets-invalid-3.x86_64.o | \
+RUN:   FileCheck --check-prefix=INVALIDCONTRIB %s
+RUN: llvm-dwarfdump %p/Inputs/dwarfdump-str-offsets-invalid-4.x86_64.o | \
+RUN:   FileCheck --check-prefix=INVALIDLENGTH %s
+RUN: llvm-dwarfdump %p/Inputs/dwarfdump-str-offsets-invalid-5.x86_64.o | \
+RUN:   FileCheck --check-prefix=INVALIDSECTIONLENGTH %s
+
+INVALIDCONTRIB:            .debug_str_offsets contents:
+INVALIDCONTRIB-NOT:        contents:
+INVALIDCONTRIB:            error: invalid contribution to string offsets table in section .debug_str_offsets.
+
+INVALIDLENGTH:             .debug_str_offsets contents:
+INVALIDLENGTH-NOT:         contents:
+INVALIDLENGTH:             error: contribution to string offsets table in section .debug_str_offsets has invalid length.
+
+INVALIDSECTIONLENGTH:      .debug_str_offsets contents:
+INVALIDSECTIONLENGTH-NOT:  contents:
+INVALIDSECTIONLENGTH:      error: size of .debug_str_offsets is not a multiple of 4.
diff --git a/test/DebugInfo/dwarfdump-str-offsets.test b/test/DebugInfo/dwarfdump-str-offsets.test
new file mode 100644
index 000000000000..937c9c4d6ece
--- /dev/null
+++ b/test/DebugInfo/dwarfdump-str-offsets.test
@@ -0,0 +1,76 @@
+RUN: llvm-dwarfdump %p/Inputs/dwarfdump-str-offsets.x86_64.o | FileCheck %s
+
+; We are using a hand-constructed object file and are interest in the correct
+; diplay of the DW_str_offsetsbase attribute, the correct display of strings
+; and the dump of the .debug_str_offsets[.dwo] table.
+;
+; Abbreviation for DW_AT_str_offsets_base
+CHECK:      .debug_abbrev contents:
+CHECK-NOT:  contents:
+CHECK:      DW_TAG_compile_unit
+CHECK-NOT:  DW_TAG
+CHECK:      DW_AT_str_offsets_base DW_FORM_sec_offset
+
+; Verify that strings are displayed correctly as indexed strings
+CHECK:      .debug_info contents:
+CHECK-NOT:  contents:     
+CHECK:      DW_TAG_compile_unit
+CHECK-NEXT: DW_AT_producer [DW_FORM_strx] ( indexed (00000000) string = "Handmade DWARF producer")
+CHECK-NEXT: DW_AT_name [DW_FORM_strx] ( indexed (00000001) string = "Compile_Unit_1")
+CHECK-NEXT: DW_AT_str_offsets_base [DW_FORM_sec_offset] (0x00000008)
+CHECK-NEXT: DW_AT_comp_dir [DW_FORM_strx] ( indexed (00000002) string = "/home/test/CU1")
+
+; Second compile unit (b.cpp)
+CHECK:      DW_TAG_compile_unit
+CHECK-NEXT: DW_AT_producer [DW_FORM_strx] ( indexed (00000000) string = "Handmade DWARF producer")
+CHECK-NEXT: DW_AT_name [DW_FORM_strx] ( indexed (00000001) string = "Compile_Unit_2")
+CHECK-NEXT: DW_AT_str_offsets_base [DW_FORM_sec_offset] (0x0000001c)
+CHECK-NEXT: DW_AT_comp_dir [DW_FORM_strx] ( indexed (00000002) string = "/home/test/CU2")
+
+; The split CU
+CHECK:      .debug_info.dwo contents:
+CHECK-NOT:  contents:
+CHECK:      DW_TAG_compile_unit
+CHECK-NEXT: DW_AT_producer [DW_FORM_strx] ( indexed (00000000) string = "Handmade split DWARF producer")
+CHECK-NEXT: DW_AT_name [DW_FORM_strx] ( indexed (00000001) string = "V5_split_compile_unit")
+CHECK-NEXT: DW_AT_str_offsets_base [DW_FORM_sec_offset] (0x00000008)
+CHECK-NEXT: DW_AT_comp_dir [DW_FORM_strx] ( indexed (00000002) string = "/home/test/splitCU")
+
+; The type unit
+CHECK:      .debug_types contents:
+CHECK:      DW_TAG_type_unit
+CHECK-NEXT: DW_AT_name [DW_FORM_strx] ( indexed (00000000) string = "Type_Unit")
+CHECK-NEXT: DW_AT_str_offsets_base [DW_FORM_sec_offset]       (0x00000030)
+CHECK:      DW_TAG_structure_type
+CHECK-NEXT: DW_AT_name [DW_FORM_strx] ( indexed (00000001) string = "MyStruct")
+
+; The split type unit
+CHECK:      .debug_types.dwo contents:
+CHECK:      DW_TAG_type_unit
+CHECK-NEXT: DW_AT_name [DW_FORM_strx] ( indexed (00000000) string = "V5_split_type_unit")
+CHECK-NEXT: DW_AT_str_offsets_base [DW_FORM_sec_offset]       (0x0000001c)
+CHECK:      DW_TAG_structure_type
+CHECK-NEXT: DW_AT_name [DW_FORM_strx] ( indexed (00000001) string = "V5_split_Mystruct")
+
+; The .debug_str_offsets section
+CHECK:      .debug_str_offsets contents:
+CHECK-NEXT: 0x00000000: Contribution size = 12, Version = 5
+CHECK-NEXT: 0x00000008: 00000000 "Handmade DWARF producer"
+CHECK-NEXT: 0x0000000c: 00000018 "Compile_Unit_1"
+CHECK-NEXT: 0x00000010: 00000027 "/home/test/CU1"
+CHECK-NEXT: 0x00000014: Contribution size = 12, Version = 5
+CHECK-NEXT: 0x0000001c: 00000000 "Handmade DWARF producer"
+CHECK-NEXT: 0x00000020: 00000036 "Compile_Unit_2"
+CHECK-NEXT: 0x00000024: 00000045 "/home/test/CU2"
+CHECK-NEXT: 0x00000028: Contribution size = 8, Version = 5
+CHECK-NEXT: 0x00000030: 00000054 "Type_Unit"
+CHECK-NEXT: 0x00000034: 0000005e "MyStruct"
+
+CHECK:      .debug_str_offsets.dwo contents:
+CHECK-NEXT: 0x00000000: Contribution size = 12, Version = 5
+CHECK-NEXT: 0x00000008: 00000000 "Handmade split DWARF producer"
+CHECK-NEXT: 0x0000000c: 0000001e "V5_split_compile_unit"
+CHECK-NEXT: 0x00000010: 00000034 "/home/test/splitCU"
+CHECK-NEXT: 0x00000014: Contribution size = 8, Version = 5
+CHECK-NEXT: 0x0000001c: 00000047 "V5_split_type_unit"
+CHECK-NEXT: 0x00000020: 0000005a "V5_split_Mystruct"
diff --git a/test/FileCheck/check-dag.txt b/test/FileCheck/check-dag.txt
index 2b5a47551e83..7c5a1d18292e 100644
--- a/test/FileCheck/check-dag.txt
+++ b/test/FileCheck/check-dag.txt
@@ -12,6 +12,10 @@ add r11, r3, r4
 add r10, r1, r2
 mul r5, r10, r11
 
+# begin
+# end
+xor
+
 ; CHECK-DAG: add [[REG1:r[0-9]+]], r1, r2
 ; CHECK-DAG: add [[REG2:r[0-9]+]], r3, r4
 ; CHECK: mul r5, [[REG1]], [[REG2]]
@@ -24,3 +28,8 @@ mul r5, r10, r11
 ; CHECK-DAG: add [[REG2:r[0-9]+]], r3, r4
 ; CHECK-NOT: xor
 ; CHECK-DAG: mul r5, [[REG1]], [[REG2]]
+
+; CHECK-DAG: begin
+; CHECK-NOT: xor
+; CHECK-DAG: end
+; CHECK: xor
diff --git a/test/Instrumentation/MemorySanitizer/csr.ll b/test/Instrumentation/MemorySanitizer/csr.ll
index c288f93241b9..a7664d456368 100644
--- a/test/Instrumentation/MemorySanitizer/csr.ll
+++ b/test/Instrumentation/MemorySanitizer/csr.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -msan -msan-check-access-address=0 -S | FileCheck %s
 ; RUN: opt < %s -msan -msan-check-access-address=1 -S | FileCheck %s --check-prefix=ADDR
-; REQUIRES: x86
+; REQUIRES: x86-registered-target
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Instrumentation/MemorySanitizer/msan_x86intrinsics.ll b/test/Instrumentation/MemorySanitizer/msan_x86intrinsics.ll
index be3f1976daa1..c4ec7fa29199 100644
--- a/test/Instrumentation/MemorySanitizer/msan_x86intrinsics.ll
+++ b/test/Instrumentation/MemorySanitizer/msan_x86intrinsics.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -msan -msan-check-access-address=0 -S | FileCheck %s
 ; RUN: opt < %s -msan -msan-check-access-address=0 -msan-track-origins=1 -S | FileCheck -check-prefix=CHECK -check-prefix=CHECK-ORIGINS %s
-; REQUIRES: x86
+; REQUIRES: x86-registered-target
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Instrumentation/MemorySanitizer/vector_arith.ll b/test/Instrumentation/MemorySanitizer/vector_arith.ll
index 8be085cff33d..6652fdff89b0 100644
--- a/test/Instrumentation/MemorySanitizer/vector_arith.ll
+++ b/test/Instrumentation/MemorySanitizer/vector_arith.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -msan -msan-check-access-address=0 -S | FileCheck %s
-; REQUIRES: x86
+; REQUIRES: x86-registered-target
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Instrumentation/MemorySanitizer/vector_cmp.ll b/test/Instrumentation/MemorySanitizer/vector_cmp.ll
index 62a5f573064e..910b1351330a 100644
--- a/test/Instrumentation/MemorySanitizer/vector_cmp.ll
+++ b/test/Instrumentation/MemorySanitizer/vector_cmp.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -msan -msan-check-access-address=0 -S | FileCheck %s
-; REQUIRES: x86
+; REQUIRES: x86-registered-target
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Instrumentation/MemorySanitizer/vector_cvt.ll b/test/Instrumentation/MemorySanitizer/vector_cvt.ll
index beedb0e63e50..1dd3d7d9c68d 100644
--- a/test/Instrumentation/MemorySanitizer/vector_cvt.ll
+++ b/test/Instrumentation/MemorySanitizer/vector_cvt.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -msan -msan-check-access-address=0 -S | FileCheck %s
-; REQUIRES: x86
+; REQUIRES: x86-registered-target
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Instrumentation/MemorySanitizer/vector_pack.ll b/test/Instrumentation/MemorySanitizer/vector_pack.ll
index deb03d84802a..574e7b890034 100644
--- a/test/Instrumentation/MemorySanitizer/vector_pack.ll
+++ b/test/Instrumentation/MemorySanitizer/vector_pack.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -msan -msan-check-access-address=0 -S | FileCheck %s
-; REQUIRES: x86
+; REQUIRES: x86-registered-target
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Instrumentation/MemorySanitizer/vector_shift.ll b/test/Instrumentation/MemorySanitizer/vector_shift.ll
index a4b8fdbd603f..c605c97bba17 100644
--- a/test/Instrumentation/MemorySanitizer/vector_shift.ll
+++ b/test/Instrumentation/MemorySanitizer/vector_shift.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -msan -msan-check-access-address=0 -S | FileCheck %s
-; REQUIRES: x86
+; REQUIRES: x86-registered-target
 
 ; Test instrumentation of vector shift instructions.
 
diff --git a/test/LTO/ARM/Inputs/thumb.ll b/test/LTO/ARM/Inputs/thumb.ll
new file mode 100644
index 000000000000..cb8c2dfa5585
--- /dev/null
+++ b/test/LTO/ARM/Inputs/thumb.ll
@@ -0,0 +1,15 @@
+target triple = "thumbv7-linux-gnueabihf"
+
+define i32 @foo(i32 %a, i32 %b) #0 {
+entry:
+  %add = add i32 %a, %b
+  ret i32 %add
+}
+
+define i32 @bar(i32 %a, i32 %b) #0 {
+entry:
+  %add = add i32 %a, %b
+  ret i32 %add
+}
+
+attributes #0 = { "target-features"="+thumb-mode" }
diff --git a/test/LTO/ARM/link-arm-and-thumb.ll b/test/LTO/ARM/link-arm-and-thumb.ll
new file mode 100644
index 000000000000..743e3f66194f
--- /dev/null
+++ b/test/LTO/ARM/link-arm-and-thumb.ll
@@ -0,0 +1,32 @@
+; Testcase to check that functions from a Thumb module can be inlined in an
+; ARM function.
+;
+; RUN: llvm-as %s -o %t1.bc
+; RUN: llvm-as %p/Inputs/thumb.ll -o %t2.bc
+; RUN: llvm-lto -exported-symbol main \
+; RUN:          -exported-symbol bar \
+; RUN:          -filetype=asm \
+; RUN:          -o - \
+; RUN:          %t1.bc %t2.bc 2> %t3.out| FileCheck %s
+; RUN: FileCheck --allow-empty --input-file %t3.out --check-prefix STDERR %s
+
+target triple = "armv7-linux-gnueabihf"
+
+; CHECK: .code  32
+; CHECK-NEXT: main
+; CHECK-NEXT: .fnstart
+; CHECK-NEXT: mov r0, #30
+
+; CHECK: .code  16
+; CHECK-NEXT: .thumb_func
+; CHECK-NEXT: bar
+
+declare i32 @foo(i32 %a, i32 %b);
+
+define i32 @main() {
+entry:
+  %add = call i32 @foo(i32 10, i32 20)
+  ret i32 %add
+}
+
+; STDERR-NOT: warning: Linking two modules of different target triples:
diff --git a/test/LTO/Resolution/X86/linker-redef.ll b/test/LTO/Resolution/X86/linker-redef.ll
new file mode 100644
index 000000000000..802a54db93c6
--- /dev/null
+++ b/test/LTO/Resolution/X86/linker-redef.ll
@@ -0,0 +1,16 @@
+; RUN: llvm-as %s -o %t.o
+; RUN: llvm-lto2 run -o %t1.o %t.o -r %t.o,bar,pr
+; RUN: llvm-readobj -t %t1.o.0 | FileCheck %s
+
+; CHECK: Name: bar
+; CHECK-NEXT: Value:
+; CHECK-NEXT: Size:
+; CHECK-NEXT: Binding: Weak
+; CHECK-NEXT: Type: Function
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @bar() {
+  ret void
+}
diff --git a/test/Linker/Inputs/thumb.ll b/test/Linker/Inputs/thumb.ll
new file mode 100644
index 000000000000..e15fb26a8c7e
--- /dev/null
+++ b/test/Linker/Inputs/thumb.ll
@@ -0,0 +1,16 @@
+target triple = "thumbv7-linux-gnueabihf"
+
+define i32 @foo(i32 %a, i32 %b) #0 {
+entry:
+  %add = add i32 %a, %b
+  ret i32 %add
+}
+
+define i32 @bar(i32 %a, i32 %b) #1 {
+entry:
+  %add = add i32 %a, %b
+  ret i32 %add
+}
+
+attributes #0 = { "target-features"="-thumb-mode" }
+attributes #1 = { "target-features"="+thumb-mode" }
diff --git a/test/Linker/link-arm-and-thumb.ll b/test/Linker/link-arm-and-thumb.ll
new file mode 100644
index 000000000000..a90f2128e443
--- /dev/null
+++ b/test/Linker/link-arm-and-thumb.ll
@@ -0,0 +1,23 @@
+; RUN: llvm-as %s -o %t1.bc
+; RUN: llvm-as %p/Inputs/thumb.ll -o %t2.bc
+; RUN: llvm-link %t1.bc %t2.bc -S 2> %t3.out | FileCheck %s
+; RUN: FileCheck --allow-empty --input-file %t3.out --check-prefix STDERR %s
+
+target triple = "armv7-linux-gnueabihf"
+
+declare i32 @foo(i32 %a, i32 %b);
+
+define i32 @main() {
+entry:
+  %add = call i32 @foo(i32 10, i32 20)
+  ret i32 %add
+}
+
+; CHECK: define i32 @main() {
+; CHECK: define i32 @foo(i32 %a, i32 %b) [[ARM_ATTRS:#[0-9]+]]
+; CHECK: define i32 @bar(i32 %a, i32 %b) [[THUMB_ATTRS:#[0-9]+]]
+
+; CHECK: attributes [[ARM_ATTRS]] = { "target-features"="-thumb-mode" }
+; CHECK: attributes [[THUMB_ATTRS]] = { "target-features"="+thumb-mode" }
+
+; STDERR-NOT: warning: Linking two modules of different target triples:
diff --git a/test/MC/AMDGPU/sopp-err.s b/test/MC/AMDGPU/sopp-err.s
index fac0d3222909..d65e54344031 100644
--- a/test/MC/AMDGPU/sopp-err.s
+++ b/test/MC/AMDGPU/sopp-err.s
@@ -75,16 +75,16 @@ s_sendmsg sendmsg(MSG_SYSMSG, 5)
 // GCN: error: invalid/unsupported code of SYSMSG_OP
 
 s_waitcnt lgkmcnt(16)
-// GCN: error: failed parsing operand
+// GCN: error: too large value for lgkmcnt
 
 s_waitcnt expcnt(8)
-// GCN: error: failed parsing operand
+// GCN: error: too large value for expcnt
 
 s_waitcnt vmcnt(16)
-// GCN: error: failed parsing operand
+// GCN: error: too large value for vmcnt
 
 s_waitcnt vmcnt(0xFFFFFFFFFFFF0000)
-// GCN: error: failed parsing operand
+// GCN: error: too large value for vmcnt
 
 s_waitcnt vmcnt(0), expcnt(0), lgkmcnt(0),
 // GCN: error: failed parsing operand
diff --git a/test/MC/AMDGPU/sym_option.s b/test/MC/AMDGPU/sym_option.s
index 5cf97c7b435d..79e3ae5bcef9 100644
--- a/test/MC/AMDGPU/sym_option.s
+++ b/test/MC/AMDGPU/sym_option.s
@@ -10,7 +10,7 @@
 // RUN: llvm-mc -arch=amdgcn -mcpu=stoney  %s | FileCheck %s --check-prefix=STONEY
 
 .byte .option.machine_version_major
-// SI: .byte 0
+// SI: .byte 6
 // BONAIRE: .byte 7
 // HAWAII: .byte 7
 // KABINI: .byte 7
@@ -37,7 +37,7 @@
 // SI: .byte 0
 // BONAIRE: .byte 0
 // HAWAII: .byte 1
-// KABINI: .byte 2
+// KABINI: .byte 3
 // ICELAND: .byte 0
 // CARRIZO: .byte 1
 // TONGA: .byte 2
diff --git a/test/MC/ARM/arm-thumb-tail-call.ll b/test/MC/ARM/arm-thumb-tail-call.ll
new file mode 100644
index 000000000000..c166719505df
--- /dev/null
+++ b/test/MC/ARM/arm-thumb-tail-call.ll
@@ -0,0 +1,25 @@
+; RUN: llc -O0 < %s -mtriple armv7-linux-gnueabi -o - \
+; RUN:   | llvm-mc -triple armv7-linux-gnueabi -filetype=obj -o - \
+; RUN:    | llvm-readobj -r | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "armv7--linux-gnueabihf"
+
+define internal i32 @arm_fn() #1 {
+  %1 = tail call i32 @thumb_fn()
+  ret i32 %1
+}
+
+define internal i32 @thumb_fn() #2 {
+  %1 = tail call i32 @arm_fn()
+  ret i32 %1
+}
+
+attributes #1 = { "target-features"="-thumb-mode" }
+attributes #2 = { "target-features"="+thumb-mode" }
+
+; CHECK: Relocations [
+; CHECK-NEXT: Section (3) .rel.text {
+; CHECK-NEXT: 0x0 R_ARM_JUMP24 thumb_fn 0x0
+; CHECK-NEXT: 0x4 R_ARM_THM_JUMP24 arm_fn 0x0
+; CHECK-NEXT: }
diff --git a/test/MC/ARM/big-endian-thumb2-fixup.s b/test/MC/ARM/big-endian-thumb2-fixup.s
index 0aaa26a209fe..4435f6ed79e5 100644
--- a/test/MC/ARM/big-endian-thumb2-fixup.s
+++ b/test/MC/ARM/big-endian-thumb2-fixup.s
@@ -47,3 +47,9 @@ ldst_precel_12_label:
 	nop
 adr_pcrel_12_label:
 
+@ARM::fixup_t2_so_imm
+.section s_t2_so_imm,"ax",%progbits
+// CHECK-LABEL: Contents of section s_t2_so_imm
+// CHECK: 0000 f1033337
+	add r3, r3,val
+.equ val,0x37373737
diff --git a/test/MC/ARM/t2-modified-immediate-fixup-error1.s b/test/MC/ARM/t2-modified-immediate-fixup-error1.s
new file mode 100644
index 000000000000..f5113a649207
--- /dev/null
+++ b/test/MC/ARM/t2-modified-immediate-fixup-error1.s
@@ -0,0 +1,13 @@
+@ PR28647
+@ RUN: not llvm-mc -triple=thumbv7a-linux-gnueabi -filetype=obj < %s 2>&1 | FileCheck %s
+    .text
+    .syntax unified
+    .balign 2
+
+@ Error with unencodeable immediate
+    add r1, r2, sym0
+@ CHECK: error: out of range immediate fixup value
+    .equ sym0, 0x01abcdef
+.L2:
+    mov r0, .L2
+@ CHECK: error: unsupported relocation on symbol
diff --git a/test/MC/ARM/t2-modified-immediate-fixup-error2.s b/test/MC/ARM/t2-modified-immediate-fixup-error2.s
new file mode 100644
index 000000000000..a5672b5eb1fb
--- /dev/null
+++ b/test/MC/ARM/t2-modified-immediate-fixup-error2.s
@@ -0,0 +1,12 @@
+@ PR28647
+@ RUN: not llvm-mc -triple=thumbv7a-linux-gnueabi -filetype=obj < %s 2>&1 | FileCheck %s
+    .text
+    .syntax unified
+    .balign 2
+
+@ mov with :upper16: or :lower16: should not match mov with modified immediate
+    mov r0, :upper16: sym0
+@ CHECK: error: instruction requires: arm-mode
+    mov r0, :lower16: sym0
+@ CHECK: error: instruction requires: arm-mode
+    .equ sym0, 0x01abcdef
diff --git a/test/MC/ARM/t2-modified-immediate-fixup.s b/test/MC/ARM/t2-modified-immediate-fixup.s
new file mode 100644
index 000000000000..ad0fae2e666e
--- /dev/null
+++ b/test/MC/ARM/t2-modified-immediate-fixup.s
@@ -0,0 +1,45 @@
+@ PR28647
+@ RUN: llvm-mc < %s -triple=thumbv7a-linux-gnueabi -filetype=obj -o - \
+@ RUN: | llvm-objdump --disassemble -triple=thumbv7a-linux-gnueabi - | FileCheck %s
+    .text
+    .syntax unified
+    .balign 2
+@ Thumb2 modified immediate instructions
+    add r1,r1, sym0
+    sub r1,r2, sym1
+    cmp r2,    sym2
+    and r4,r4, sym3
+    orr r8,r9, sym4
+    teq r1,    sym5
+    tst r1,    sym6
+    sbc r1,r1, sym7
+    adc r1,r0, sym8
+@CHECK: add.w   r1, r1, #255
+@CHECK: sub.w   r1, r2, #16711935
+@CHECK: cmp.w   r2, #4278255360
+@CHECK: and     r4, r4, #303174162
+@CHECK: orr     r8, r9, #2852126720
+@CHECK: teq.w   r1, #1426063360
+@CHECK: tst.w   r1, #713031680
+@CHECK: sbc     r1, r1, #2785280
+@CHECK: adc     r1, r0, #340
+
+.L1:
+    sub r3, r3, #.L2 - .L1
+.L2:
+@CHECK: sub.w   r3, r3, #4
+
+@ mov without :upper16: or :lower16: should match mov with modified immediate
+     mov r1, sym3
+@CHECK: mov.w   r1, #303174162
+
+@ Modified immediate constants
+    .equ sym0, 0x000000ff
+    .equ sym1, 0x00ff00ff
+    .equ sym2, 0xff00ff00
+    .equ sym3, 0x12121212
+    .equ sym4, 0xaa000000
+    .equ sym5, 0x55000000
+    .equ sym6, 0x2a800000
+    .equ sym7, 0x002a8000
+    .equ sym8, 0x00000154
diff --git a/test/MC/ARM/thumb2-diagnostics.s b/test/MC/ARM/thumb2-diagnostics.s
index 76b4cf12626b..ca917a0502dc 100644
--- a/test/MC/ARM/thumb2-diagnostics.s
+++ b/test/MC/ARM/thumb2-diagnostics.s
@@ -76,10 +76,8 @@
 @ CHECK-ERRORS: error: branch target out of range
 
 foo2:
-        mov r0, foo2
         movw r0, foo2
         movt r0, foo2
-@ CHECK-ERRORS: error: instruction requires: arm-mode
 @ CHECK-ERRORS: error: immediate expression for mov requires :lower16: or :upper16
 @ CHECK-ERRORS:                  ^
 @ CHECK-ERRORS: error: immediate expression for mov requires :lower16: or :upper16
diff --git a/test/MC/AsmParser/empty-comment.s b/test/MC/AsmParser/empty-comment.s
new file mode 100644
index 000000000000..57df820007ca
--- /dev/null
+++ b/test/MC/AsmParser/empty-comment.s
@@ -0,0 +1,4 @@
+	#RUN: llvm-mc -preserve-comments -n -triple i386-linux-gnu < %s > %t
+	.text
+foo:
+	nop #
\ No newline at end of file
diff --git a/test/MC/Disassembler/Mips/micromips-dsp/valid.txt b/test/MC/Disassembler/Mips/micromips-dsp/valid.txt
index f3d6f3dc0367..a373bcd9d6a3 100644
--- a/test/MC/Disassembler/Mips/micromips-dsp/valid.txt
+++ b/test/MC/Disassembler/Mips/micromips-dsp/valid.txt
@@ -94,7 +94,7 @@
 0x00 0x01 0x70 0x7c # CHECK: mtlo $1, $ac1
 0x00 0x22 0xf1 0x3c # CHECK: raddu.w.qb $1, $2
 0x00 0x20 0x86 0x7c # CHECK: rddsp $1, 2
-0x02 0x00 0x08 0x3d # CHECK: repl.ph $1, 512
+0x00 0x02 0x08 0x3d # CHECK: repl.ph $1, 2
 0x00 0x30 0x05 0xfc # CHECK: repl.qb $1, 128
 0x00 0x22 0x03 0x3c # CHECK: replv.ph $1, $2
 0x00 0x22 0x13 0x3c # CHECK: replv.qb $1, $2
diff --git a/test/MC/ELF/ARM/clang-section.s b/test/MC/ELF/ARM/clang-section.s
new file mode 100644
index 000000000000..0b0d27c4ceb1
--- /dev/null
+++ b/test/MC/ELF/ARM/clang-section.s
@@ -0,0 +1,399 @@
+// RUN: llvm-mc -filetype=obj -triple arm-eabi %s -o - | llvm-readobj -s -t | FileCheck %s
+// Test that global variables and functions are assigned correct section.
+	.text
+	.syntax unified
+	.eabi_attribute	67, "2.09"	@ Tag_conformance
+	.eabi_attribute	6, 1	@ Tag_CPU_arch
+	.eabi_attribute	8, 1	@ Tag_ARM_ISA_use
+	.eabi_attribute	17, 1	@ Tag_ABI_PCS_GOT_use
+	.eabi_attribute	20, 1	@ Tag_ABI_FP_denormal
+	.eabi_attribute	21, 1	@ Tag_ABI_FP_exceptions
+	.eabi_attribute	23, 3	@ Tag_ABI_FP_number_model
+	.eabi_attribute	34, 1	@ Tag_CPU_unaligned_access
+	.eabi_attribute	24, 1	@ Tag_ABI_align_needed
+	.eabi_attribute	25, 1	@ Tag_ABI_align_preserved
+	.eabi_attribute	38, 1	@ Tag_ABI_FP_16bit_format
+	.eabi_attribute	18, 4	@ Tag_ABI_PCS_wchar_t
+	.eabi_attribute	26, 2	@ Tag_ABI_enum_size
+	.eabi_attribute	14, 0	@ Tag_ABI_PCS_R9_use
+	.section	my_text.1,"ax",%progbits
+	.globl	foo
+	.p2align	2
+	.type	foo,%function
+	.code	32                      @ @foo
+foo:
+	.fnstart
+@ BB#0:                                 @ %entry
+	ldr	r0, .LCPI0_0
+	ldr	r0, [r0]
+	mov	pc, lr
+	.p2align	2
+@ BB#1:
+.LCPI0_0:
+	.long	b
+.Lfunc_end0:
+	.size	foo, .Lfunc_end0-foo
+	.cantunwind
+	.fnend
+
+	.section	my_text.2,"ax",%progbits
+	.globl	goo
+	.p2align	2
+	.type	goo,%function
+	.code	32                      @ @goo
+goo:
+	.fnstart
+@ BB#0:                                 @ %entry
+	.save	{r11, lr}
+	push	{r11, lr}
+	ldr	r0, .LCPI1_0
+	ldr	r1, .LCPI1_1
+	bl	zoo
+	pop	{r11, lr}
+	mov	pc, lr
+	.p2align	2
+@ BB#1:
+.LCPI1_0:
+	.long	_ZL1g
+.LCPI1_1:
+	.long	_ZZ3gooE7lstat_h
+.Lfunc_end1:
+	.size	goo, .Lfunc_end1-goo
+	.cantunwind
+	.fnend
+
+	.text
+	.globl	hoo
+	.p2align	2
+	.type	hoo,%function
+	.code	32                      @ @hoo
+hoo:
+	.fnstart
+@ BB#0:                                 @ %entry
+	ldr	r0, .LCPI2_0
+	ldr	r0, [r0]
+	mov	pc, lr
+	.p2align	2
+@ BB#1:
+.LCPI2_0:
+	.long	b
+.Lfunc_end2:
+	.size	hoo, .Lfunc_end2-hoo
+	.cantunwind
+	.fnend
+
+	.type	a,%object               @ @a
+	.section	my_bss.1,"aw",%nobits
+	.globl	a
+	.p2align	2
+a:
+	.long	0                       @ 0x0
+	.size	a, 4
+
+	.type	b,%object               @ @b
+	.section	my_data.1,"aw",%progbits
+	.globl	b
+	.p2align	2
+b:
+	.long	1                       @ 0x1
+	.size	b, 4
+
+	.type	c,%object               @ @c
+	.section	my_bss.1,"aw",%nobits
+	.globl	c
+	.p2align	2
+c:
+	.zero	16
+	.size	c, 16
+
+	.type	d,%object               @ @d
+	.globl	d
+	.p2align	1
+d:
+	.zero	10
+	.size	d, 10
+
+	.type	e,%object               @ @e
+	.section	my_data.1,"aw",%progbits
+	.globl	e
+	.p2align	1
+e:
+	.short	0                       @ 0x0
+	.short	0                       @ 0x0
+	.short	1                       @ 0x1
+	.short	0                       @ 0x0
+	.short	0                       @ 0x0
+	.short	0                       @ 0x0
+	.size	e, 12
+
+	.type	f,%object               @ @f
+	.section	my_rodata.1,"a",%progbits
+	.globl	f
+	.p2align	2
+f:
+	.long	2                       @ 0x2
+	.size	f, 4
+
+	.type	h,%object               @ @h
+	.bss
+	.globl	h
+	.p2align	2
+h:
+	.long	0                       @ 0x0
+	.size	h, 4
+
+	.type	i,%object               @ @i
+	.section	my_bss.2,"aw",%nobits
+	.globl	i
+	.p2align	2
+i:
+	.long	0                       @ 0x0
+	.size	i, 4
+
+	.type	j,%object               @ @j
+	.section	my_rodata.1,"a",%progbits
+	.globl	j
+	.p2align	2
+j:
+	.long	4                       @ 0x4
+	.size	j, 4
+
+	.type	k,%object               @ @k
+	.section	my_bss.2,"aw",%nobits
+	.globl	k
+	.p2align	2
+k:
+	.long	0                       @ 0x0
+	.size	k, 4
+
+	.type	_ZZ3gooE7lstat_h,%object @ @_ZZ3gooE7lstat_h
+	.p2align	2
+_ZZ3gooE7lstat_h:
+	.long	0                       @ 0x0
+	.size	_ZZ3gooE7lstat_h, 4
+
+	.type	_ZL1g,%object           @ @_ZL1g
+	.section	my_bss.1,"aw",%nobits
+	.p2align	2
+_ZL1g:
+	.zero	8
+	.size	_ZL1g, 8
+
+	.type	l,%object               @ @l
+	.section	my_data.2,"aw",%progbits
+	.globl	l
+	.p2align	2
+l:
+	.long	5                       @ 0x5
+	.size	l, 4
+
+	.type	m,%object               @ @m
+	.section	my_rodata.2,"a",%progbits
+	.globl	m
+	.p2align	2
+m:
+	.long	6                       @ 0x6
+	.size	m, 4
+
+	.type	n,%object               @ @n
+	.bss
+	.globl	n
+	.p2align	2
+n:
+	.long	0                       @ 0x0
+	.size	n, 4
+
+	.type	o,%object               @ @o
+	.data
+	.globl	o
+	.p2align	2
+o:
+	.long	6                       @ 0x6
+	.size	o, 4
+
+	.type	p,%object               @ @p
+	.section	.rodata,"a",%progbits
+	.globl	p
+	.p2align	2
+p:
+	.long	7                       @ 0x7
+	.size	p, 4
+
+
+	.ident	"clang version 5.0.0 (http://llvm.org/git/clang.git 254242a3ad440307fb451093a429c71ea9a8c888) (http://llvm.org/git/llvm.git 3c8daefbe3d1672ac1dae775b211f881f0063038)"
+	.section	".note.GNU-stack","",%progbits
+	.eabi_attribute	30, 1	@ Tag_ABI_optimization_goals
+
+//CHECK:   Section {
+//CHECK:     Name: .text
+//CHECK:     Type: SHT_PROGBITS (0x1)
+//CHECK:     Flags [ (0x6)
+//CHECK:       SHF_ALLOC (0x2)
+//CHECK:       SHF_EXECINSTR (0x4)
+//CHECK:     ]
+//CHECK:   }
+//CHECK:   Section {
+//CHECK:     Name: my_text.1
+//CHECK:     Type: SHT_PROGBITS (0x1)
+//CHECK:     Flags [ (0x6)
+//CHECK:       SHF_ALLOC (0x2)
+//CHECK:       SHF_EXECINSTR (0x4)
+//CHECK:     ]
+//CHECK:   }
+//CHECK:   Section {
+//CHECK:     Name: my_text.2
+//CHECK:     Type: SHT_PROGBITS (0x1)
+//CHECK:     Flags [ (0x6)
+//CHECK:       SHF_ALLOC (0x2)
+//CHECK:       SHF_EXECINSTR (0x4)
+//CHECK:     ]
+//CHECK:   }
+//CHECK:   Section {
+//CHECK:     Name: my_bss.1
+//CHECK:     Type: SHT_NOBITS (0x8)
+//CHECK:     Flags [ (0x3)
+//CHECK:       SHF_ALLOC (0x2)
+//CHECK:       SHF_WRITE (0x1)
+//CHECK:     ]
+//CHECK:   }
+//CHECK:   Section {
+//CHECK:     Name: my_data.1
+//CHECK:     Type: SHT_PROGBITS (0x1)
+//CHECK:     Flags [ (0x3)
+//CHECK:       SHF_ALLOC (0x2)
+//CHECK:       SHF_WRITE (0x1)
+//CHECK:     ]
+//CHECK:   }
+//CHECK:   Section {
+//CHECK:     Name: my_rodata.1
+//CHECK:     Type: SHT_PROGBITS (0x1)
+//CHECK:     Flags [ (0x2)
+//CHECK:       SHF_ALLOC (0x2)
+//CHECK:     ]
+//CHECK:   }
+//CHECK:   Section {
+//CHECK:     Name: .bss
+//CHECK:     Type: SHT_NOBITS (0x8)
+//CHECK:     Flags [ (0x3)
+//CHECK:       SHF_ALLOC (0x2)
+//CHECK:       SHF_WRITE (0x1)
+//CHECK:     ]
+//CHECK:   }
+//CHECK:   Section {
+//CHECK:     Name: my_bss.2
+//CHECK:     Type: SHT_NOBITS (0x8)
+//CHECK:     Flags [ (0x3)
+//CHECK:       SHF_ALLOC (0x2)
+//CHECK:       SHF_WRITE (0x1)
+//CHECK:     ]
+//CHECK:   }
+//CHECK:   Section {
+//CHECK:     Name: my_data.2
+//CHECK:     Type: SHT_PROGBITS (0x1)
+//CHECK:     Flags [ (0x3)
+//CHECK:       SHF_ALLOC (0x2)
+//CHECK:       SHF_WRITE (0x1)
+//CHECK:     ]
+//CHECK:   }
+//CHECK:   Section {
+//CHECK:     Name: my_rodata.2
+//CHECK:     Type: SHT_PROGBITS (0x1)
+//CHECK:     Flags [ (0x2)
+//CHECK:       SHF_ALLOC (0x2)
+//CHECK:     ]
+//CHECK:   }
+//CHECK:   Section {
+//CHECK:     Name: .data
+//CHECK:     Type: SHT_PROGBITS (0x1)
+//CHECK:     Flags [ (0x3)
+//CHECK:       SHF_ALLOC (0x2)
+//CHECK:       SHF_WRITE (0x1)
+//CHECK:     ]
+//CHECK:   }
+//CHECK:   Section {
+//CHECK:     Name: .rodata
+//CHECK:     Type: SHT_PROGBITS (0x1)
+//CHECK:     Flags [ (0x2)
+//CHECK:       SHF_ALLOC (0x2)
+//CHECK:     ]
+//CHECK:   }
+//CHECK:   Symbol {
+//CHECK:     Name: _ZL1g
+//CHECK:     Section: my_bss.1 (0xE)
+//CHECK:   }
+//CHECK:   Symbol {
+//CHECK:     Name: _ZZ3gooE7lstat_h
+//CHECK:     Section: my_bss.2 (0x12)
+//CHECK:   }
+//CHECK:   Symbol {
+//CHECK:     Name: a
+//CHECK:     Section: my_bss.1 (0xE)
+//CHECK:   }
+//CHECK:   Symbol {
+//CHECK:     Name: b
+//CHECK:     Section: my_data.1 (0xF)
+//CHECK:   }
+//CHECK:   Symbol {
+//CHECK:     Name: c
+//CHECK:     Section: my_bss.1 (0xE)
+//CHECK:   }
+//CHECK:   Symbol {
+//CHECK:     Name: d
+//CHECK:     Section: my_bss.1 (0xE)
+//CHECK:   }
+//CHECK:   Symbol {
+//CHECK:     Name: e
+//CHECK:     Section: my_data.1 (0xF)
+//CHECK:   }
+//CHECK:   Symbol {
+//CHECK:     Name: f
+//CHECK:     Section: my_rodata.1 (0x10)
+//CHECK:   }
+//CHECK:   Symbol {
+//CHECK:     Name: foo
+//CHECK:     Section: my_text.1 (0x4)
+//CHECK:   }
+//CHECK:   Symbol {
+//CHECK:     Name: goo
+//CHECK:     Section: my_text.2 (0x8)
+//CHECK:   }
+//CHECK:   Symbol {
+//CHECK:     Name: h
+//CHECK:     Section: .bss (0x11)
+//CHECK:   }
+//CHECK:   Symbol {
+//CHECK:     Name: hoo
+//CHECK:     Section: .text (0x2)
+//CHECK:   }
+//CHECK:   Symbol {
+//CHECK:     Name: i
+//CHECK:     Section: my_bss.2 (0x12)
+//CHECK:   }
+//CHECK:   Symbol {
+//CHECK:     Name: j
+//CHECK:     Section: my_rodata.1 (0x10)
+//CHECK:   }
+//CHECK:   Symbol {
+//CHECK:     Name: k
+//CHECK:     Section: my_bss.2 (0x12)
+//CHECK:   }
+//CHECK:   Symbol {
+//CHECK:     Name: l
+//CHECK:     Section: my_data.2 (0x13)
+//CHECK:   }
+//CHECK:   Symbol {
+//CHECK:     Name: m
+//CHECK:     Section: my_rodata.2 (0x14)
+//CHECK:   }
+//CHECK:   Symbol {
+//CHECK:     Name: n
+//CHECK:     Section: .bss (0x11)
+//CHECK:   }
+//CHECK:   Symbol {
+//CHECK:     Name: o
+//CHECK:     Section: .data (0x15)
+//CHECK:   }
+//CHECK:   Symbol {
+//CHECK:     Name: p
+//CHECK:     Section: .rodata (0x16)
+//CHECK:   }
diff --git a/test/MC/MachO/alias.s b/test/MC/MachO/alias.s
new file mode 100644
index 000000000000..aec04c63b68f
--- /dev/null
+++ b/test/MC/MachO/alias.s
@@ -0,0 +1,12 @@
+// RUN: llvm-mc -triple x86_64-apple-macosx10.12.0 %s -filetype=obj | llvm-readobj -r | FileCheck %s
+
+l_a:
+l_b = l_a
+l_c = l_b
+        .long l_c
+
+// CHECK:      Relocations [
+// CHECK-NEXT:   Section __text {
+// CHECK-NEXT:     0x0 0 2 1 X86_64_RELOC_UNSIGNED 0 l_c
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
diff --git a/test/MC/MachO/variable-exprs.s b/test/MC/MachO/variable-exprs.s
index 5369622d8cfe..380e7e75cc75 100644
--- a/test/MC/MachO/variable-exprs.s
+++ b/test/MC/MachO/variable-exprs.s
@@ -110,8 +110,8 @@ Lt0_x = Lt0_a - Lt0_b
 // CHECK-I386:     0x2C 0 2 0 GENERIC_RELOC_VANILLA 0 __data
 // CHECK-I386:     0x28 0 2 0 GENERIC_RELOC_VANILLA 0 __data
 // CHECK-I386:     0x24 0 2 1 GENERIC_RELOC_VANILLA 0 d3
-// CHECK-I386:     0x20 0 2 1 GENERIC_RELOC_VANILLA 0 d2
-// CHECK-I386:     0x1C 0 2 1 GENERIC_RELOC_VANILLA 0 d
+// CHECK-I386:     0x20 0 2 1 GENERIC_RELOC_VANILLA 0 d{{$}}
+// CHECK-I386:     0x1C 0 2 1 GENERIC_RELOC_VANILLA 0 d{{$}}
 // CHECK-I386:     0x18 0 2 n/a GENERIC_RELOC_VANILLA 1 0x5
 // CHECK-I386:     0x14 0 2 0 GENERIC_RELOC_VANILLA 0 __data
 // CHECK-I386:     0x10 0 2 0 GENERIC_RELOC_VANILLA 0 __data
@@ -319,8 +319,8 @@ Lt0_x = Lt0_a - Lt0_b
 // CHECK-X86_64:     0x2C 0 2 1 X86_64_RELOC_UNSIGNED 0 g
 // CHECK-X86_64:     0x28 0 2 1 X86_64_RELOC_UNSIGNED 0 f
 // CHECK-X86_64:     0x24 0 2 1 X86_64_RELOC_UNSIGNED 0 d3
-// CHECK-X86_64:     0x20 0 2 1 X86_64_RELOC_UNSIGNED 0 d2
-// CHECK-X86_64:     0x1C 0 2 1 X86_64_RELOC_UNSIGNED 0 d
+// CHECK-X86_64:     0x20 0 2 1 X86_64_RELOC_UNSIGNED 0 d{{$}}
+// CHECK-X86_64:     0x1C 0 2 1 X86_64_RELOC_UNSIGNED 0 d{{$}}
 // CHECK-X86_64:     0x18 0 2 1 X86_64_RELOC_UNSIGNED 0 a
 // CHECK-X86_64:     0x14 0 2 1 X86_64_RELOC_UNSIGNED 0 e
 // CHECK-X86_64:     0x10 0 2 1 X86_64_RELOC_UNSIGNED 0 b
diff --git a/test/MC/Mips/dsp/invalid.s b/test/MC/Mips/dsp/invalid.s
index 1d50b829985c..f58a44560c54 100644
--- a/test/MC/Mips/dsp/invalid.s
+++ b/test/MC/Mips/dsp/invalid.s
@@ -31,8 +31,8 @@
   shilo $ac1, -64          # CHECK: :[[@LINE]]:15: error: expected 6-bit signed immediate
   repl.qb $2, -1           # CHECK: :[[@LINE]]:15: error: expected 8-bit unsigned immediate
   repl.qb $2, 256          # CHECK: :[[@LINE]]:15: error: expected 8-bit unsigned immediate
-  repl.ph $2, -1           # CHECK: :[[@LINE]]:15: error: expected 10-bit unsigned immediate
-  repl.ph $2, 1024         # CHECK: :[[@LINE]]:15: error: expected 10-bit unsigned immediate
+  repl.ph $2, -513         # CHECK: :[[@LINE]]:15: error: expected 10-bit signed immediate
+  repl.ph $2, 512          # CHECK: :[[@LINE]]:15: error: expected 10-bit signed immediate
   rddsp $2, -1             # CHECK: :[[@LINE]]:13: error: expected 10-bit unsigned immediate
   rddsp $2, 1024           # CHECK: :[[@LINE]]:13: error: expected 10-bit unsigned immediate
   wrdsp $5, -1             # CHECK: :[[@LINE]]:13: error: expected 10-bit unsigned immediate
diff --git a/test/MC/Mips/micromips-dsp/invalid.s b/test/MC/Mips/micromips-dsp/invalid.s
index 8e6fedbf0b94..05fc77440d3e 100644
--- a/test/MC/Mips/micromips-dsp/invalid.s
+++ b/test/MC/Mips/micromips-dsp/invalid.s
@@ -1,6 +1,8 @@
 # RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r6 -mattr=micromips -mattr=+dsp 2>%t1
 # RUN: FileCheck %s < %t1
 
+  repl.ph $2, -513         # CHECK: :[[@LINE]]:15: error: expected 10-bit signed immediate
+  repl.ph $2, 512          # CHECK: :[[@LINE]]:15: error: expected 10-bit signed immediate
   shll.ph $3, $4, 16       # CHECK: :[[@LINE]]:19: error: expected 4-bit unsigned immediate
   shll.ph $3, $4, -1       # CHECK: :[[@LINE]]:19: error: expected 4-bit unsigned immediate
   shll_s.ph $3, $4, 16     # CHECK: :[[@LINE]]:21: error: expected 4-bit unsigned immediate
diff --git a/test/MC/Mips/micromips-dsp/valid.s b/test/MC/Mips/micromips-dsp/valid.s
index d1f5d0f3ae8d..ed279f3eb539 100644
--- a/test/MC/Mips/micromips-dsp/valid.s
+++ b/test/MC/Mips/micromips-dsp/valid.s
@@ -95,7 +95,7 @@
   mtlo $1, $ac1                # CHECK: mtlo $1, $ac1                # encoding: [0x00,0x01,0x70,0x7c]
   raddu.w.qb $1, $2            # CHECK: raddu.w.qb $1, $2       # encoding: [0x00,0x22,0xf1,0x3c]
   rddsp $1, 2                  # CHECK: rddsp $1, 2             # encoding: [0x00,0x20,0x86,0x7c]
-  repl.ph $1, 512              # CHECK: repl.ph $1, 512         # encoding: [0x02,0x00,0x08,0x3d]
+  repl.ph $1, 2                # CHECK: repl.ph $1, 2           # encoding: [0x00,0x02,0x08,0x3d]
   repl.qb $1, 128              # CHECK: repl.qb $1, 128         # encoding: [0x00,0x30,0x05,0xfc]
   replv.ph $1, $2              # CHECK: replv.ph $1, $2         # encoding: [0x00,0x22,0x03,0x3c]
   replv.qb $1, $2              # CHECK: replv.qb $1, $2         # encoding: [0x00,0x22,0x13,0x3c]
diff --git a/test/MC/WebAssembly/reloc-code.ll b/test/MC/WebAssembly/reloc-code.ll
index 5c794400fa09..5fcd9b403811 100644
--- a/test/MC/WebAssembly/reloc-code.ll
+++ b/test/MC/WebAssembly/reloc-code.ll
@@ -36,16 +36,6 @@ entry:
 ; CHECK-NEXT:       Addend: 0
 ; CHECK-NEXT:     }
 ; CHECK-NEXT:     Relocation {
-; CHECK-NEXT:       Type: R_WEBASSEMBLY_FUNCTION_INDEX_LEB (0)
-; CHECK-NEXT:       Offset: 0x2D
-; CHECK-NEXT:       Index: 0x0
-; CHECK-NEXT:     }
-; CHECK-NEXT:     Relocation {
-; CHECK-NEXT:       Type: R_WEBASSEMBLY_FUNCTION_INDEX_LEB (0)
-; CHECK-NEXT:       Offset: 0x34
-; CHECK-NEXT:       Index: 0x1
-; CHECK-NEXT:     }
-; CHECK-NEXT:     Relocation {
 ; CHECK-NEXT:       Type: R_WEBASSEMBLY_TYPE_INDEX_LEB (6)
 ; CHECK-NEXT:       Offset: 0x1A
 ; CHECK-NEXT:       Index: 0x1
@@ -55,5 +45,15 @@ entry:
 ; CHECK-NEXT:       Offset: 0x24
 ; CHECK-NEXT:       Index: 0x0
 ; CHECK-NEXT:     }
+; CHECK-NEXT:     Relocation {
+; CHECK-NEXT:       Type: R_WEBASSEMBLY_FUNCTION_INDEX_LEB (0)
+; CHECK-NEXT:       Offset: 0x2D
+; CHECK-NEXT:       Index: 0x0
+; CHECK-NEXT:     }
+; CHECK-NEXT:     Relocation {
+; CHECK-NEXT:       Type: R_WEBASSEMBLY_FUNCTION_INDEX_LEB (0)
+; CHECK-NEXT:       Offset: 0x34
+; CHECK-NEXT:       Index: 0x1
+; CHECK-NEXT:     }
 ; CHECK-NEXT:   }
 ; CHECK-NEXT: ]
diff --git a/test/Object/AMDGPU/elf-definitions.yaml b/test/Object/AMDGPU/elf-definitions.yaml
index 819786aa1902..07fe8c62dc47 100644
--- a/test/Object/AMDGPU/elf-definitions.yaml
+++ b/test/Object/AMDGPU/elf-definitions.yaml
@@ -3,15 +3,12 @@
 
 # CHECK: Format: ELF64-amdgpu-hsacobj
 # CHECK: Arch: amdgcn
-# CHECK:  Machine: EM_AMDGPU (0xE0)
-# CHECK: Sections [
-# CHECK: Section {
-# CHECK: Name: .shf_amdgpu
-# CHECK: Flags [ (0xF00000)
-# CHECK: SHF_AMDGPU_HSA_AGENT (0x800000)
-# CHECK: SHF_AMDGPU_HSA_CODE (0x400000)
-# CHECK: SHF_AMDGPU_HSA_GLOBAL (0x100000)
-# CHECK: SHF_AMDGPU_HSA_READONLY (0x200000)
+# CHECK: ElfHeader {
+# CHECK:   Ident {
+# CHECK:     OS/ABI: AMDGPU_HSA (0x40)
+# CHECK:     ABIVersion: 0
+# CHECK:   }
+# CHECK:   Machine: EM_AMDGPU (0xE0)
 # CHECK: }
 
 --- !ELF
@@ -21,10 +18,4 @@ FileHeader:
   Type:    ET_REL
   Machine: EM_AMDGPU
   OSABI:   ELFOSABI_AMDGPU_HSA
-
-Sections:
-  - Name:  .shf_amdgpu
-    Type:  SHT_PROGBITS
-    Flags: [ SHF_AMDGPU_HSA_GLOBAL, SHF_AMDGPU_HSA_READONLY,
-             SHF_AMDGPU_HSA_CODE, SHF_AMDGPU_HSA_AGENT]
 ...
diff --git a/test/Object/objc-imageinfo-coff.ll b/test/Object/objc-imageinfo-coff.ll
new file mode 100644
index 000000000000..017d5ac003c9
--- /dev/null
+++ b/test/Object/objc-imageinfo-coff.ll
@@ -0,0 +1,15 @@
+; RUN: llc -mtriple x86_64-unknown-windows-msvc -filetype asm -o - %s | FileCheck %s
+; REQUIRES: x86-registered-target
+
+!llvm.module.flags = !{!0, !1, !2, !3}
+
+!0 = !{i32 1, !"Objective-C Version", i32 2}
+!1 = !{i32 1, !"Objective-C Image Info Version", i32 0}
+!2 = !{i32 1, !"Objective-C Image Info Section", !".objc_imageinfo$B"}
+!3 = !{i32 1, !"Objective-C Garbage Collection", i32 2}
+
+; CHECK: .section .objc_imageinfo$B,"dr"
+; CHECK: OBJC_IMAGE_INFO:
+; CHECK:   .long 0
+; CHECK:   .long 2
+
diff --git a/test/Object/objc-imageinfo-elf.ll b/test/Object/objc-imageinfo-elf.ll
new file mode 100644
index 000000000000..f7484fa39be1
--- /dev/null
+++ b/test/Object/objc-imageinfo-elf.ll
@@ -0,0 +1,15 @@
+; RUN: llc -mtriple x86_64-unknown-linux-gnu -filetype asm -o - %s | FileCheck %s
+; REQUIRES: x86-registered-target
+
+!llvm.module.flags = !{!0, !1, !2, !3}
+
+!0 = !{i32 1, !"Objective-C Version", i32 2}
+!1 = !{i32 1, !"Objective-C Image Info Version", i32 0}
+!2 = !{i32 1, !"Objective-C Image Info Section", !"objc_imageinfo"}
+!3 = !{i32 1, !"Objective-C Garbage Collection", i32 2}
+
+; CHECK: .section objc_imageinfo
+; CHECK: OBJC_IMAGE_INFO:
+; CHECK:   .long 0
+; CHECK:   .long 2
+
diff --git a/test/Object/objc-imageinfo-macho.ll b/test/Object/objc-imageinfo-macho.ll
new file mode 100644
index 000000000000..97c36699e5df
--- /dev/null
+++ b/test/Object/objc-imageinfo-macho.ll
@@ -0,0 +1,15 @@
+; RUN: llc -mtriple x86_64-apple-ios -filetype asm -o - %s | FileCheck %s
+; REQUIRES: x86-registered-target
+
+!llvm.module.flags = !{!0, !1, !2, !3}
+
+!0 = !{i32 1, !"Objective-C Version", i32 2}
+!1 = !{i32 1, !"Objective-C Image Info Version", i32 0}
+!2 = !{i32 1, !"Objective-C Image Info Section", !"__DATA,__objc_imageinfo,regular,no_dead_strip"}
+!3 = !{i32 1, !"Objective-C Garbage Collection", i32 2}
+
+; CHECK: .section __DATA,__objc_imageinfo,regular,no_dead_strip
+; CHECK: L_OBJC_IMAGE_INFO:
+; CHECK:   .long 0
+; CHECK:   .long 2
+
diff --git a/test/Transforms/CodeGenPrepare/X86/memcmp.ll b/test/Transforms/CodeGenPrepare/X86/memcmp.ll
new file mode 100644
index 000000000000..328e8cc2907f
--- /dev/null
+++ b/test/Transforms/CodeGenPrepare/X86/memcmp.ll
@@ -0,0 +1,337 @@
+; RUN: opt -S -codegenprepare -mtriple=i686-unknown-unknown < %s   | FileCheck %s --check-prefix=ALL --check-prefix=X32
+; RUN: opt -S -codegenprepare -mtriple=x86_64-unknown-unknown < %s | FileCheck %s --check-prefix=ALL --check-prefix=X64
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+declare i32 @memcmp(i8* nocapture, i8* nocapture, i64)
+
+define i32 @cmp2(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
+; ALL-LABEL: @cmp2(
+; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 2)
+; ALL-NEXT:    ret i32 [[CALL]]
+;
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 2)
+  ret i32 %call
+}
+
+define i32 @cmp3(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
+; ALL-LABEL: @cmp3(
+; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 3)
+; ALL-NEXT:    ret i32 [[CALL]]
+;
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 3)
+  ret i32 %call
+}
+
+define i32 @cmp4(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
+; ALL-LABEL: @cmp4(
+; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 4)
+; ALL-NEXT:    ret i32 [[CALL]]
+;
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 4)
+  ret i32 %call
+}
+
+define i32 @cmp5(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
+; ALL-LABEL: @cmp5(
+; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 5)
+; ALL-NEXT:    ret i32 [[CALL]]
+;
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 5)
+  ret i32 %call
+}
+
+define i32 @cmp6(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
+; ALL-LABEL: @cmp6(
+; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 6)
+; ALL-NEXT:    ret i32 [[CALL]]
+;
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 6)
+  ret i32 %call
+}
+
+define i32 @cmp7(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
+; ALL-LABEL: @cmp7(
+; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 7)
+; ALL-NEXT:    ret i32 [[CALL]]
+;
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 7)
+  ret i32 %call
+}
+
+define i32 @cmp8(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
+; ALL-LABEL: @cmp8(
+; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 8)
+; ALL-NEXT:    ret i32 [[CALL]]
+;
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 8)
+  ret i32 %call
+}
+
+define i32 @cmp9(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
+; ALL-LABEL: @cmp9(
+; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 9)
+; ALL-NEXT:    ret i32 [[CALL]]
+;
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 9)
+  ret i32 %call
+}
+
+define i32 @cmp10(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
+; ALL-LABEL: @cmp10(
+; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 10)
+; ALL-NEXT:    ret i32 [[CALL]]
+;
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 10)
+  ret i32 %call
+}
+
+define i32 @cmp11(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
+; ALL-LABEL: @cmp11(
+; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 11)
+; ALL-NEXT:    ret i32 [[CALL]]
+;
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 11)
+  ret i32 %call
+}
+
+define i32 @cmp12(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
+; ALL-LABEL: @cmp12(
+; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 12)
+; ALL-NEXT:    ret i32 [[CALL]]
+;
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 12)
+  ret i32 %call
+}
+
+define i32 @cmp13(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
+; ALL-LABEL: @cmp13(
+; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 13)
+; ALL-NEXT:    ret i32 [[CALL]]
+;
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 13)
+  ret i32 %call
+}
+
+define i32 @cmp14(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
+; ALL-LABEL: @cmp14(
+; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 14)
+; ALL-NEXT:    ret i32 [[CALL]]
+;
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 14)
+  ret i32 %call
+}
+
+define i32 @cmp15(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
+; ALL-LABEL: @cmp15(
+; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 15)
+; ALL-NEXT:    ret i32 [[CALL]]
+;
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 15)
+  ret i32 %call
+}
+
+define i32 @cmp16(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
+; ALL-LABEL: @cmp16(
+; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 16)
+; ALL-NEXT:    ret i32 [[CALL]]
+;
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 16)
+  ret i32 %call
+}
+
+define i32 @cmp_eq2(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
+; ALL-LABEL: @cmp_eq2(
+; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 2)
+; ALL-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; ALL-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; ALL-NEXT:    ret i32 [[CONV]]
+;
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 2)
+  %cmp = icmp eq i32 %call, 0
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @cmp_eq3(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
+; ALL-LABEL: @cmp_eq3(
+; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 3)
+; ALL-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; ALL-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; ALL-NEXT:    ret i32 [[CONV]]
+;
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 3)
+  %cmp = icmp eq i32 %call, 0
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @cmp_eq4(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
+; ALL-LABEL: @cmp_eq4(
+; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 4)
+; ALL-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; ALL-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; ALL-NEXT:    ret i32 [[CONV]]
+;
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 4)
+  %cmp = icmp eq i32 %call, 0
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @cmp_eq5(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
+; ALL-LABEL: @cmp_eq5(
+; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 5)
+; ALL-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; ALL-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; ALL-NEXT:    ret i32 [[CONV]]
+;
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 5)
+  %cmp = icmp eq i32 %call, 0
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @cmp_eq6(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
+; ALL-LABEL: @cmp_eq6(
+; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 6)
+; ALL-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; ALL-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; ALL-NEXT:    ret i32 [[CONV]]
+;
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 6)
+  %cmp = icmp eq i32 %call, 0
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @cmp_eq7(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
+; ALL-LABEL: @cmp_eq7(
+; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 7)
+; ALL-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; ALL-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; ALL-NEXT:    ret i32 [[CONV]]
+;
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 7)
+  %cmp = icmp eq i32 %call, 0
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @cmp_eq8(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
+; ALL-LABEL: @cmp_eq8(
+; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 8)
+; ALL-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; ALL-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; ALL-NEXT:    ret i32 [[CONV]]
+;
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 8)
+  %cmp = icmp eq i32 %call, 0
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @cmp_eq9(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
+; ALL-LABEL: @cmp_eq9(
+; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 9)
+; ALL-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; ALL-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; ALL-NEXT:    ret i32 [[CONV]]
+;
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 9)
+  %cmp = icmp eq i32 %call, 0
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @cmp_eq10(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
+; ALL-LABEL: @cmp_eq10(
+; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 10)
+; ALL-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; ALL-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; ALL-NEXT:    ret i32 [[CONV]]
+;
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 10)
+  %cmp = icmp eq i32 %call, 0
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @cmp_eq11(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
+; ALL-LABEL: @cmp_eq11(
+; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 11)
+; ALL-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; ALL-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; ALL-NEXT:    ret i32 [[CONV]]
+;
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 11)
+  %cmp = icmp eq i32 %call, 0
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @cmp_eq12(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
+; ALL-LABEL: @cmp_eq12(
+; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 12)
+; ALL-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; ALL-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; ALL-NEXT:    ret i32 [[CONV]]
+;
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 12)
+  %cmp = icmp eq i32 %call, 0
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @cmp_eq13(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
+; ALL-LABEL: @cmp_eq13(
+; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 13)
+; ALL-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; ALL-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; ALL-NEXT:    ret i32 [[CONV]]
+;
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 13)
+  %cmp = icmp eq i32 %call, 0
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @cmp_eq14(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
+; ALL-LABEL: @cmp_eq14(
+; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 14)
+; ALL-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; ALL-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; ALL-NEXT:    ret i32 [[CONV]]
+;
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 14)
+  %cmp = icmp eq i32 %call, 0
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @cmp_eq15(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
+; ALL-LABEL: @cmp_eq15(
+; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 15)
+; ALL-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; ALL-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; ALL-NEXT:    ret i32 [[CONV]]
+;
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 15)
+  %cmp = icmp eq i32 %call, 0
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @cmp_eq16(i8* nocapture readonly %x, i8* nocapture readonly %y)  {
+; ALL-LABEL: @cmp_eq16(
+; ALL-NEXT:    [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 16)
+; ALL-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; ALL-NEXT:    [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; ALL-NEXT:    ret i32 [[CONV]]
+;
+  %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 16)
+  %cmp = icmp eq i32 %call, 0
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
diff --git a/test/Transforms/ConstProp/sse.ll b/test/Transforms/ConstProp/sse.ll
index cc37c96c1ff1..ad0a62e42062 100644
--- a/test/Transforms/ConstProp/sse.ll
+++ b/test/Transforms/ConstProp/sse.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -constprop -S | FileCheck %s
-; REQUIRES: x86
+; REQUIRES: x86-registered-target
 
 define i1 @test_sse_cvts_exact() nounwind readnone {
 ; CHECK-LABEL: @test_sse_cvts_exact(
diff --git a/test/Transforms/DCE/calls-errno.ll b/test/Transforms/DCE/calls-errno.ll
index 22ea04aa8f36..415caae0fe60 100644
--- a/test/Transforms/DCE/calls-errno.ll
+++ b/test/Transforms/DCE/calls-errno.ll
@@ -72,6 +72,10 @@ entry:
 ; CHECK-NEXT: %cos2 = call double @cos(double 0x7FF0000000000000)
   %cos2 = call double @cos(double 0x7FF0000000000000)
 
+; cos(0) nobuiltin may have side effects 
+; CHECK-NEXT: %cos3 = call double @cos(double 0.000000e+00)
+  %cos3 = call double @cos(double 0.000000e+00) nobuiltin
+
 ; pow(0, 1) is 0
   %pow1 = call double @pow(double 0x7FF0000000000000, double 1.000000e+00)
 
diff --git a/test/Transforms/GVNSink/sink-common-code.ll b/test/Transforms/GVNSink/sink-common-code.ll
index d9e757cd10fc..02b1eb7fe259 100644
--- a/test/Transforms/GVNSink/sink-common-code.ll
+++ b/test/Transforms/GVNSink/sink-common-code.ll
@@ -54,33 +54,36 @@ if.end:
 
 declare i32 @foo(i32, i32) nounwind readnone
 
-define i32 @test3(i1 zeroext %flag, i32 %x, i32 %y) {
-entry:
-  br i1 %flag, label %if.then, label %if.else
-
-if.then:
-  %x0 = call i32 @foo(i32 %x, i32 0) nounwind readnone
-  %y0 = call i32 @foo(i32 %x, i32 1) nounwind readnone
-  br label %if.end
-
-if.else:
-  %x1 = call i32 @foo(i32 %y, i32 0) nounwind readnone
-  %y1 = call i32 @foo(i32 %y, i32 1) nounwind readnone
-  br label %if.end
-
-if.end:
-  %xx = phi i32 [ %x0, %if.then ], [ %x1, %if.else ]
-  %yy = phi i32 [ %y0, %if.then ], [ %y1, %if.else ]
-  %ret = add i32 %xx, %yy
-  ret i32 %ret
-}
-
-; CHECK-LABEL: test3
-; CHECK: select
-; CHECK: call
-; CHECK: call
-; CHECK: add
-; CHECK-NOT: br
+; FIXME: The test failes when the original order of the
+; candidates with the same cost is preserved.
+;
+;define i32 @test3(i1 zeroext %flag, i32 %x, i32 %y) {
+;entry:
+;  br i1 %flag, label %if.then, label %if.else
+;
+;if.then:
+;  %x0 = call i32 @foo(i32 %x, i32 0) nounwind readnone
+;  %y0 = call i32 @foo(i32 %x, i32 1) nounwind readnone
+;  br label %if.end
+;
+;if.else:
+;  %x1 = call i32 @foo(i32 %y, i32 0) nounwind readnone
+;  %y1 = call i32 @foo(i32 %y, i32 1) nounwind readnone
+;  br label %if.end
+;
+;if.end:
+;  %xx = phi i32 [ %x0, %if.then ], [ %x1, %if.else ]
+;  %yy = phi i32 [ %y0, %if.then ], [ %y1, %if.else ]
+;  %ret = add i32 %xx, %yy
+;  ret i32 %ret
+;}
+;
+; -CHECK-LABEL: test3
+; -CHECK: select
+; -CHECK: call
+; -CHECK: call
+; -CHECK: add
+; -CHECK-NOT: br
 
 define i32 @test4(i1 zeroext %flag, i32 %x, i32* %y) {
 entry:
diff --git a/test/Transforms/IRCE/correct-loop-info.ll b/test/Transforms/IRCE/correct-loop-info.ll
new file mode 100644
index 000000000000..3c26b47f154f
--- /dev/null
+++ b/test/Transforms/IRCE/correct-loop-info.ll
@@ -0,0 +1,182 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -irce < %s -S | FileCheck %s
+
+; REQUIRES: asserts
+
+; IRCE creates the pre and post loop, and invokes the
+; canonicalizing these loops to LCSSA and loop-simplfy structure. Make sure that the update to the loopinfo does not
+; incorrectly change the header while canonicalizing these pre/post loops. We
+; were incorrectly updating LI when the split loop is a subloop as in the case below.
+source_filename = "correct-loop-info.ll"
+
+define void @baz() personality i32* ()* @ham {
+; CHECK-LABEL: @baz(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    br label [[OUTERHEADER:%.*]]
+; CHECK:       outerheader:
+; CHECK-NEXT:    [[TMP:%.*]] = icmp slt i32 undef, 84
+; CHECK-NEXT:    br i1 [[TMP]], label [[BB2:%.*]], label [[BB16:%.*]]
+; CHECK:       bb2:
+; CHECK-NEXT:    br i1 false, label [[INNERHEADER_PRELOOP_PREHEADER:%.*]], label [[PRELOOP_PSEUDO_EXIT:%.*]]
+; CHECK:       innerheader.preloop.preheader:
+; CHECK-NEXT:    br label [[INNERHEADER_PRELOOP:%.*]]
+; CHECK:       mainloop:
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp slt i32 [[INDVAR_END:%.*]], -1
+; CHECK-NEXT:    br i1 [[TMP0]], label [[INNERHEADER_PREHEADER:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]]
+; CHECK:       innerheader.preheader:
+; CHECK-NEXT:    br label [[INNERHEADER:%.*]]
+; CHECK:       innerheader:
+; CHECK-NEXT:    [[TMP4:%.*]] = phi i32 [ [[TMP6:%.*]], [[BB8:%.*]] ], [ [[TMP4_PRELOOP_COPY:%.*]], [[INNERHEADER_PREHEADER]] ]
+; CHECK-NEXT:    invoke void @pluto()
+; CHECK-NEXT:    to label [[BB5:%.*]] unwind label %outer_exiting.loopexit.split-lp.loopexit.split-lp
+; CHECK:       bb5:
+; CHECK-NEXT:    [[TMP6]] = add i32 [[TMP4]], 1
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp ult i32 [[TMP6]], 0
+; CHECK-NEXT:    br i1 true, label [[BB8]], label [[EXIT3_LOOPEXIT5:%.*]]
+; CHECK:       bb8:
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp slt i32 [[TMP6]], 84
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[TMP6]], -1
+; CHECK-NEXT:    br i1 [[TMP1]], label [[INNERHEADER]], label [[MAIN_EXIT_SELECTOR:%.*]]
+; CHECK:       main.exit.selector:
+; CHECK-NEXT:    [[TMP6_LCSSA:%.*]] = phi i32 [ [[TMP6]], [[BB8]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt i32 [[TMP6_LCSSA]], 84
+; CHECK-NEXT:    br i1 [[TMP2]], label [[MAIN_PSEUDO_EXIT]], label [[BB13:%.*]]
+; CHECK:       main.pseudo.exit:
+; CHECK-NEXT:    [[TMP4_COPY:%.*]] = phi i32 [ [[TMP4_PRELOOP_COPY]], [[MAINLOOP:%.*]] ], [ [[TMP6_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
+; CHECK-NEXT:    [[INDVAR_END1:%.*]] = phi i32 [ [[INDVAR_END]], [[MAINLOOP]] ], [ [[TMP6_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
+; CHECK-NEXT:    br label [[POSTLOOP:%.*]]
+; CHECK:       outer_exiting.loopexit:
+; CHECK-NEXT:    [[LPAD_LOOPEXIT:%.*]] = landingpad { i8*, i32 }
+; CHECK-NEXT:    cleanup
+; CHECK-NEXT:    br label [[OUTER_EXITING:%.*]]
+; CHECK:       outer_exiting.loopexit.split-lp.loopexit:
+; CHECK-NEXT:    [[LPAD_LOOPEXIT2:%.*]] = landingpad { i8*, i32 }
+; CHECK-NEXT:    cleanup
+; CHECK-NEXT:    br label %outer_exiting.loopexit.split-lp
+; CHECK:       outer_exiting.loopexit.split-lp.loopexit.split-lp:
+; CHECK-NEXT:    %lpad.loopexit.split-lp3 = landingpad { i8*, i32 }
+; CHECK-NEXT:    cleanup
+; CHECK-NEXT:    br label %outer_exiting.loopexit.split-lp
+; CHECK:       outer_exiting.loopexit.split-lp:
+; CHECK-NEXT:    br label [[OUTER_EXITING]]
+; CHECK:       outer_exiting:
+; CHECK-NEXT:    switch i32 undef, label [[EXIT2:%.*]] [
+; CHECK-NEXT:    i32 142, label [[BB14:%.*]]
+; CHECK-NEXT:    i32 448, label [[EXIT:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       exit3.loopexit:
+; CHECK-NEXT:    br label [[EXIT3:%.*]]
+; CHECK:       exit3.loopexit4:
+; CHECK-NEXT:    br label [[EXIT3]]
+; CHECK:       exit3.loopexit5:
+; CHECK-NEXT:    br label [[EXIT3]]
+; CHECK:       exit3:
+; CHECK-NEXT:    ret void
+; CHECK:       bb13.loopexit:
+; CHECK-NEXT:    br label [[BB13]]
+; CHECK:       bb13:
+; CHECK-NEXT:    unreachable
+; CHECK:       bb14:
+; CHECK-NEXT:    br label [[OUTERHEADER]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+; CHECK:       bb16:
+; CHECK-NEXT:    ret void
+; CHECK:       exit2:
+; CHECK-NEXT:    ret void
+; CHECK:       innerheader.preloop:
+; CHECK-NEXT:    [[TMP4_PRELOOP:%.*]] = phi i32 [ [[TMP6_PRELOOP:%.*]], [[BB8_PRELOOP:%.*]] ], [ undef, [[INNERHEADER_PRELOOP_PREHEADER]] ]
+; CHECK-NEXT:    invoke void @pluto()
+; CHECK-NEXT:    to label [[BB5_PRELOOP:%.*]] unwind label [[OUTER_EXITING_LOOPEXIT:%.*]]
+; CHECK:       bb5.preloop:
+; CHECK-NEXT:    [[TMP6_PRELOOP]] = add i32 [[TMP4_PRELOOP]], 1
+; CHECK-NEXT:    [[TMP7_PRELOOP:%.*]] = icmp ult i32 [[TMP6_PRELOOP]], 0
+; CHECK-NEXT:    br i1 [[TMP7_PRELOOP]], label [[BB8_PRELOOP]], label [[EXIT3_LOOPEXIT:%.*]]
+; CHECK:       bb8.preloop:
+; CHECK-NEXT:    [[TMP9_PRELOOP:%.*]] = icmp slt i32 [[TMP6_PRELOOP]], 84
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp slt i32 [[TMP6_PRELOOP]], -1
+; CHECK-NEXT:    br i1 [[TMP3]], label [[INNERHEADER_PRELOOP]], label [[PRELOOP_EXIT_SELECTOR:%.*]], !llvm.loop !0, !irce.loop.clone !5
+; CHECK:       preloop.exit.selector:
+; CHECK-NEXT:    [[TMP6_PRELOOP_LCSSA:%.*]] = phi i32 [ [[TMP6_PRELOOP]], [[BB8_PRELOOP]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp slt i32 [[TMP6_PRELOOP_LCSSA]], 84
+; CHECK-NEXT:    br i1 [[TMP4]], label [[PRELOOP_PSEUDO_EXIT]], label [[BB13]]
+; CHECK:       preloop.pseudo.exit:
+; CHECK-NEXT:    [[TMP4_PRELOOP_COPY]] = phi i32 [ undef, [[BB2]] ], [ [[TMP6_PRELOOP_LCSSA]], [[PRELOOP_EXIT_SELECTOR]] ]
+; CHECK-NEXT:    [[INDVAR_END]] = phi i32 [ undef, [[BB2]] ], [ [[TMP6_PRELOOP_LCSSA]], [[PRELOOP_EXIT_SELECTOR]] ]
+; CHECK-NEXT:    br label [[MAINLOOP]]
+; CHECK:       postloop:
+; CHECK-NEXT:    br label [[INNERHEADER_POSTLOOP:%.*]]
+; CHECK:       innerheader.postloop:
+; CHECK-NEXT:    [[TMP4_POSTLOOP:%.*]] = phi i32 [ [[TMP6_POSTLOOP:%.*]], [[BB8_POSTLOOP:%.*]] ], [ [[TMP4_COPY]], [[POSTLOOP]] ]
+; CHECK-NEXT:    invoke void @pluto()
+; CHECK-NEXT:    to label [[BB5_POSTLOOP:%.*]] unwind label %outer_exiting.loopexit.split-lp.loopexit
+; CHECK:       bb5.postloop:
+; CHECK-NEXT:    [[TMP6_POSTLOOP]] = add i32 [[TMP4_POSTLOOP]], 1
+; CHECK-NEXT:    [[TMP7_POSTLOOP:%.*]] = icmp ult i32 [[TMP6_POSTLOOP]], 0
+; CHECK-NEXT:    br i1 [[TMP7_POSTLOOP]], label [[BB8_POSTLOOP]], label [[EXIT3_LOOPEXIT4:%.*]]
+; CHECK:       bb8.postloop:
+; CHECK-NEXT:    [[TMP9_POSTLOOP:%.*]] = icmp slt i32 [[TMP6_POSTLOOP]], 84
+; CHECK-NEXT:    br i1 [[TMP9_POSTLOOP]], label [[INNERHEADER_POSTLOOP]], label [[BB13_LOOPEXIT:%.*]], !llvm.loop !6, !irce.loop.clone !5
+;
+bb:
+  br label %outerheader
+
+outerheader:                                              ; preds = %bb14, %bb
+  %tmp = icmp slt i32 undef, 84
+  br i1 %tmp, label %bb2, label %bb16
+
+bb2:                                              ; preds = %outerheader
+  br label %innerheader
+
+innerheader:                                              ; preds = %bb8, %bb2
+  %tmp4 = phi i32 [ %tmp6, %bb8 ], [ undef, %bb2 ]
+  invoke void @pluto()
+  to label %bb5 unwind label %outer_exiting
+
+bb5:                                              ; preds = %innerheader
+  %tmp6 = add i32 %tmp4, 1
+  %tmp7 = icmp ult i32 %tmp6, 0
+  br i1 %tmp7, label %bb8, label %exit3
+
+bb8:                                              ; preds = %bb5
+  %tmp9 = icmp slt i32 %tmp6, 84
+  br i1 %tmp9, label %innerheader, label %bb13
+
+outer_exiting:                                             ; preds = %innerheader
+  %tmp11 = landingpad { i8*, i32 }
+  cleanup
+  switch i32 undef, label %exit2 [
+  i32 142, label %bb14
+  i32 448, label %exit
+  ]
+
+exit3:                                             ; preds = %bb5
+  ret void
+
+bb13:                                             ; preds = %bb8
+  unreachable
+
+bb14:                                             ; preds = %outer_exiting
+  br label %outerheader
+
+exit:                                             ; preds = %outer_exiting
+  ret void
+
+bb16:                                             ; preds = %outerheader
+  ret void
+
+exit2:                                             ; preds = %outer_exiting
+  ret void
+}
+
+declare i32* @ham()
+
+declare void @pluto()
+
+!0 = distinct !{!0, !1, !2, !3, !4}
+!1 = !{!"llvm.loop.unroll.disable"}
+!2 = !{!"llvm.loop.vectorize.enable", i1 false}
+!3 = !{!"llvm.loop.licm_versioning.disable"}
+!4 = !{!"llvm.loop.distribute.enable", i1 false}
+!5 = !{}
+!6 = distinct !{!6, !1, !2, !3, !4}
diff --git a/test/Transforms/IndVarSimplify/lftr_disabled.ll b/test/Transforms/IndVarSimplify/lftr_disabled.ll
new file mode 100644
index 000000000000..c647d123dd75
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/lftr_disabled.ll
@@ -0,0 +1,28 @@
+; LFTR should not eliminate the need for the computation of i*i completely
+; due to LFTR is disabled.
+; RUN: opt < %s -indvars -dce -disable-lftr -S | FileCheck %s
+
+; Provide legal integer types.
+target datalayout = "n8:16:32:64"
+
+
+@A = external global i32                ; <i32*> [#uses=1]
+
+define i32 @quadratic_setlt() {
+; CHECK-LABEL: @quadratic_setlt(
+; CHECK: mul
+entry:
+        br label %loop
+
+loop:           ; preds = %loop, %entry
+        %i = phi i32 [ 7, %entry ], [ %i.next, %loop ]          ; <i32> [#uses=5]
+        %i.next = add i32 %i, 1         ; <i32> [#uses=1]
+        store i32 %i, i32* @A
+        %i2 = mul i32 %i, %i            ; <i32> [#uses=1]
+        %c = icmp slt i32 %i2, 1000             ; <i1> [#uses=1]
+        br i1 %c, label %loop, label %loopexit
+
+loopexit:               ; preds = %loop
+        ret i32 %i
+}
+
diff --git a/test/Transforms/InferAddressSpaces/NVPTX/clone_constexpr.ll b/test/Transforms/InferAddressSpaces/NVPTX/clone_constexpr.ll
new file mode 100644
index 000000000000..1b3240620571
--- /dev/null
+++ b/test/Transforms/InferAddressSpaces/NVPTX/clone_constexpr.ll
@@ -0,0 +1,36 @@
+; RUN: opt -S -mtriple=nvptx64-nvidia-cuda -infer-address-spaces %s | FileCheck %s
+
+%struct.S = type { [5 x i32] }
+
+$g1 = comdat any
+
+@g1 = linkonce_odr addrspace(3) global %struct.S zeroinitializer, comdat, align 4
+
+; CHECK-LABEL: @foo(
+; CHECK:  %x0 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x() #2
+; CHECK:  %idxprom.i = zext i32 %x0 to i64
+; CHECK:  %arrayidx.i = getelementptr %struct.S, %struct.S* addrspacecast (%struct.S addrspace(3)* @g1 to %struct.S*), i64 0, i32 0, i64 %idxprom.i
+; CHECK:  tail call void @f1(i32* %arrayidx.i, i32 undef) #0
+; CHECK:  %x1 = load i32, i32* getelementptr (%struct.S, %struct.S* addrspacecast (%struct.S addrspace(3)* @g1 to %struct.S*), i64 0, i32 0, i64 0), align 4
+; CHECK:  %L.sroa.0.0.insert.ext.i = zext i32 %x1 to i64
+; CHECK:  tail call void @f2(i64* null, i64 %L.sroa.0.0.insert.ext.i) #0
+; CHECK:  ret void
+define void @foo() local_unnamed_addr #0 {
+entry:
+  %x0 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x() #2
+  %idxprom.i = zext i32 %x0 to i64
+  %arrayidx.i = getelementptr %struct.S, %struct.S* addrspacecast (%struct.S addrspace(3)* @g1 to %struct.S*), i64 0, i32 0, i64 %idxprom.i
+  tail call void @f1(i32* %arrayidx.i, i32 undef) #0
+  %x1 = load i32, i32* getelementptr (%struct.S, %struct.S* addrspacecast (%struct.S addrspace(3)* @g1 to %struct.S*), i64 0, i32 0, i64 0), align 4
+  %L.sroa.0.0.insert.ext.i = zext i32 %x1 to i64
+  tail call void @f2(i64* null, i64 %L.sroa.0.0.insert.ext.i) #0
+  ret void
+}
+
+declare void @f1(i32*, i32) local_unnamed_addr #0
+declare void @f2(i64*, i64) local_unnamed_addr #0
+declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() #1
+
+attributes #0 = { convergent nounwind }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind }
diff --git a/test/Transforms/Inline/basictest.ll b/test/Transforms/Inline/basictest.ll
index b98644cd2dd4..f34ed0841132 100644
--- a/test/Transforms/Inline/basictest.ll
+++ b/test/Transforms/Inline/basictest.ll
@@ -91,3 +91,27 @@ define i32 @test() {
   ret i32 %e
 ; CHECK: }
 }
+
+; Inliner shouldn't delete calls it can't inline, even if they're trivially dead
+; CHECK-LABEL: @outer4(
+define void @outer4(void ()* %inner4) {
+entry:
+; CHECK: call void %inner4()
+  call void %inner4() nounwind readnone
+  ret void
+}
+
+declare void @inner5_inner()
+
+define void @inner5(void ()* %x) {
+  call void %x() nounwind readnone
+  ret void
+}
+
+; Inliner shouldn't delete calls it can't inline, even if they're trivially dead and temporarily indirect
+; CHECK-LABEL: @outer5(
+define void @outer5() {
+; CHECK: call void @inner5_inner(
+  call void @inner5(void ()* @inner5_inner)
+  ret void
+}
diff --git a/test/Transforms/InstCombine/constant-fold-libfunc.ll b/test/Transforms/InstCombine/constant-fold-libfunc.ll
new file mode 100644
index 000000000000..c969b65a4e74
--- /dev/null
+++ b/test/Transforms/InstCombine/constant-fold-libfunc.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare double @acos(double)
+
+; Check that functions without any function attributes are simplified.
+
+define double @test_simplify_acos() {
+; CHECK-LABEL: @test_simplify_acos
+  %pi = call double @acos(double -1.000000e+00)
+; CHECK-NOT: call double @acos
+; CHECK: ret double 0x400921FB54442D18
+  ret double %pi
+}
+
+define double @test_acos_nobuiltin() {
+; CHECK-LABEL: @test_acos_nobuiltin
+  %pi = call double @acos(double -1.000000e+00) nobuiltin 
+; CHECK: call double @acos(double -1.000000e+00)
+  ret double %pi
+}
diff --git a/test/Transforms/InstCombine/insert-extract-shuffle.ll b/test/Transforms/InstCombine/insert-extract-shuffle.ll
index 29f774c5f62b..fb25c2342798 100644
--- a/test/Transforms/InstCombine/insert-extract-shuffle.ll
+++ b/test/Transforms/InstCombine/insert-extract-shuffle.ll
@@ -260,3 +260,26 @@ bb2:
   %ins2 = insertelement <4 x float> %ins1, float %ext1, i32 3
   ret <4 x float> %ins2
 }
+
+; Don't insert extractelements from the wider vector before the def of the index operand.
+
+define <4 x i32> @extractelt_insertion(<2 x i32> %x, i32 %y) {
+; CHECK-LABEL: @extractelt_insertion(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; CHECK-NEXT:    [[B:%.*]] = shufflevector <4 x i32> <i32 0, i32 0, i32 0, i32 undef>, <4 x i32> [[TMP0]], <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+; CHECK-NEXT:    [[C:%.*]] = add i32 [[Y:%.*]], 3
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i32> [[TMP0]], i32 [[C]]
+; CHECK-NEXT:    [[E:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    [[RET:%.*]] = select i1 [[E]], <4 x i32> [[B]], <4 x i32> zeroinitializer
+; CHECK-NEXT:    ret <4 x i32> [[RET]]
+;
+entry:
+  %a = extractelement <2 x i32> %x, i32 1
+  %b = insertelement <4 x i32> zeroinitializer, i32 %a, i64 3
+  %c = add i32 %y, 3
+  %d = extractelement <2 x i32> %x, i32 %c
+  %e = icmp eq i32 %d, 0
+  %ret = select i1 %e, <4 x i32> %b, <4 x i32> zeroinitializer
+  ret <4 x i32> %ret
+}
diff --git a/test/Transforms/InstCombine/intrinsics.ll b/test/Transforms/InstCombine/intrinsics.ll
index 78c98955353e..1b1ed606868f 100644
--- a/test/Transforms/InstCombine/intrinsics.ll
+++ b/test/Transforms/InstCombine/intrinsics.ll
@@ -21,6 +21,7 @@ declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>, i1) nounwind readnone
 declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) nounwind readnone
 declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>) nounwind readnone
 declare i8 @llvm.ctlz.i8(i8, i1) nounwind readnone
+declare <2 x i8> @llvm.ctlz.v2i8(<2 x i8>, i1) nounwind readnone
 declare double @llvm.cos.f64(double %Val) nounwind readonly
 declare double @llvm.sin.f64(double %Val) nounwind readonly
 declare double @llvm.floor.f64(double %Val) nounwind readonly
@@ -282,6 +283,16 @@ define i32 @cttz(i32 %a) {
   ret i32 %count
 }
 
+define <2 x i32> @cttz_vec(<2 x i32> %a) {
+; CHECK-LABEL: @cttz_vec(
+; CHECK-NEXT:    ret <2 x i32> <i32 3, i32 3>
+;
+  %or = or <2 x i32> %a, <i32 8, i32 8>
+  %and = and <2 x i32> %or, <i32 -8, i32 -8>
+  %count = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %and, i1 true) nounwind readnone
+  ret <2 x i32> %count
+}
+
 define i1 @cttz_knownbits(i32 %arg) {
 ; CHECK-LABEL: @cttz_knownbits(
 ; CHECK-NEXT:    ret i1 false
@@ -292,6 +303,16 @@ define i1 @cttz_knownbits(i32 %arg) {
   ret i1 %res
 }
 
+define <2 x i1> @cttz_knownbits_vec(<2 x i32> %arg) {
+; CHECK-LABEL: @cttz_knownbits_vec(
+; CHECK-NEXT:    ret <2 x i1> zeroinitializer
+;
+  %or = or <2 x i32> %arg, <i32 4, i32 4>
+  %cnt = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %or, i1 true) nounwind readnone
+  %res = icmp eq <2 x i32> %cnt, <i32 4, i32 4>
+  ret <2 x i1> %res
+}
+
 define i1 @cttz_knownbits2(i32 %arg) {
 ; CHECK-LABEL: @cttz_knownbits2(
 ; CHECK-NEXT:    [[OR:%.*]] = or i32 [[ARG:%.*]], 4
@@ -305,6 +326,19 @@ define i1 @cttz_knownbits2(i32 %arg) {
   ret i1 %res
 }
 
+define <2 x i1> @cttz_knownbits2_vec(<2 x i32> %arg) {
+; CHECK-LABEL: @cttz_knownbits2_vec(
+; CHECK-NEXT:    [[OR:%.*]] = or <2 x i32> [[ARG:%.*]], <i32 4, i32 4>
+; CHECK-NEXT:    [[CNT:%.*]] = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[OR]], i1 true)
+; CHECK-NEXT:    [[RES:%.*]] = icmp eq <2 x i32> [[CNT]], <i32 2, i32 2>
+; CHECK-NEXT:    ret <2 x i1> [[RES]]
+;
+  %or = or <2 x i32> %arg, <i32 4, i32 4>
+  %cnt = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %or, i1 true) nounwind readnone
+  %res = icmp eq <2 x i32> %cnt, <i32 2, i32 2>
+  ret <2 x i1> %res
+}
+
 ; TODO: The icmp is unnecessary given the known bits of the input.
 define i1 @cttz_knownbits3(i32 %arg) {
 ; CHECK-LABEL: @cttz_knownbits3(
@@ -319,6 +353,20 @@ define i1 @cttz_knownbits3(i32 %arg) {
   ret i1 %res
 }
 
+; TODO: The icmp is unnecessary given the known bits of the input.
+define <2 x i1> @cttz_knownbits3_vec(<2 x i32> %arg) {
+; CHECK-LABEL: @cttz_knownbits3_vec(
+; CHECK-NEXT:    [[OR:%.*]] = or <2 x i32> [[ARG:%.*]], <i32 4, i32 4>
+; CHECK-NEXT:    [[CNT:%.*]] = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[OR]], i1 true)
+; CHECK-NEXT:    [[RES:%.*]] = icmp eq <2 x i32> [[CNT]], <i32 3, i32 3>
+; CHECK-NEXT:    ret <2 x i1> [[RES]]
+;
+  %or = or <2 x i32> %arg, <i32 4, i32 4>
+  %cnt = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %or, i1 true) nounwind readnone
+  %res = icmp eq <2 x i32> %cnt, <i32 3, i32 3>
+  ret <2 x i1> %res
+}
+
 define i8 @ctlz(i8 %a) {
 ; CHECK-LABEL: @ctlz(
 ; CHECK-NEXT:    ret i8 2
@@ -329,6 +377,16 @@ define i8 @ctlz(i8 %a) {
   ret i8 %count
 }
 
+define <2 x i8> @ctlz_vec(<2 x i8> %a) {
+; CHECK-LABEL: @ctlz_vec(
+; CHECK-NEXT:    ret <2 x i8> <i8 2, i8 2>
+;
+  %or = or <2 x i8> %a, <i8 32, i8 32>
+  %and = and <2 x i8> %or, <i8 63, i8 63>
+  %count = tail call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> %and, i1 true) nounwind readnone
+  ret <2 x i8> %count
+}
+
 define i1 @ctlz_knownbits(i8 %arg) {
 ; CHECK-LABEL: @ctlz_knownbits(
 ; CHECK-NEXT:    ret i1 false
@@ -339,6 +397,16 @@ define i1 @ctlz_knownbits(i8 %arg) {
   ret i1 %res
 }
 
+define <2 x i1> @ctlz_knownbits_vec(<2 x i8> %arg) {
+; CHECK-LABEL: @ctlz_knownbits_vec(
+; CHECK-NEXT:    ret <2 x i1> zeroinitializer
+;
+  %or = or <2 x i8> %arg, <i8 32, i8 32>
+  %cnt = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> %or, i1 true) nounwind readnone
+  %res = icmp eq <2 x i8> %cnt, <i8 4, i8 4>
+  ret <2 x i1> %res
+}
+
 define i1 @ctlz_knownbits2(i8 %arg) {
 ; CHECK-LABEL: @ctlz_knownbits2(
 ; CHECK-NEXT:    [[OR:%.*]] = or i8 [[ARG:%.*]], 32
@@ -352,6 +420,19 @@ define i1 @ctlz_knownbits2(i8 %arg) {
   ret i1 %res
 }
 
+define <2 x i1> @ctlz_knownbits2_vec(<2 x i8> %arg) {
+; CHECK-LABEL: @ctlz_knownbits2_vec(
+; CHECK-NEXT:    [[OR:%.*]] = or <2 x i8> [[ARG:%.*]], <i8 32, i8 32>
+; CHECK-NEXT:    [[CNT:%.*]] = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> [[OR]], i1 true)
+; CHECK-NEXT:    [[RES:%.*]] = icmp eq <2 x i8> [[CNT]], <i8 2, i8 2>
+; CHECK-NEXT:    ret <2 x i1> [[RES]]
+;
+  %or = or <2 x i8> %arg, <i8 32, i8 32>
+  %cnt = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> %or, i1 true) nounwind readnone
+  %res = icmp eq <2 x i8> %cnt, <i8 2, i8 2>
+  ret <2 x i1> %res
+}
+
 ; TODO: The icmp is unnecessary given the known bits of the input.
 define i1 @ctlz_knownbits3(i8 %arg) {
 ; CHECK-LABEL: @ctlz_knownbits3(
@@ -366,6 +447,20 @@ define i1 @ctlz_knownbits3(i8 %arg) {
   ret i1 %res
 }
 
+; TODO: The icmp is unnecessary given the known bits of the input.
+define <2 x i1> @ctlz_knownbits3_vec(<2 x i8> %arg) {
+; CHECK-LABEL: @ctlz_knownbits3_vec(
+; CHECK-NEXT:    [[OR:%.*]] = or <2 x i8> [[ARG:%.*]], <i8 32, i8 32>
+; CHECK-NEXT:    [[CNT:%.*]] = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> [[OR]], i1 true)
+; CHECK-NEXT:    [[RES:%.*]] = icmp eq <2 x i8> [[CNT]], <i8 3, i8 3>
+; CHECK-NEXT:    ret <2 x i1> [[RES]]
+;
+  %or = or <2 x i8> %arg, <i8 32, i8 32>
+  %cnt = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> %or, i1 true) nounwind readnone
+  %res = icmp eq <2 x i8> %cnt, <i8 3, i8 3>
+  ret <2 x i1> %res
+}
+
 define void @cmp.simplify(i32 %a, i32 %b, i1* %c) {
   %lz = tail call i32 @llvm.ctlz.i32(i32 %a, i1 false) nounwind readnone
   %lz.cmp = icmp eq i32 %lz, 32
@@ -406,7 +501,7 @@ define <2 x i1> @cttz_cmp_vec(<2 x i32> %a) {
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> %a, zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
-  %x = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false) nounwind readnone
+  %x = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %a, i1 false) nounwind readnone
   %cmp = icmp ne <2 x i32> %x, <i32 32, i32 32>
   ret <2 x i1> %cmp
 }
@@ -434,6 +529,14 @@ define i32 @ctlz_undef(i32 %Value) {
   ret i32 %ctlz
 }
 
+define <2 x i32> @ctlz_undef_vec(<2 x i32> %Value) {
+; CHECK-LABEL: @ctlz_undef_vec(
+; CHECK-NEXT:    ret <2 x i32> undef
+;
+  %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> zeroinitializer, i1 true)
+  ret <2 x i32> %ctlz
+}
+
 define i32 @ctlz_make_undef(i32 %a) {
   %or = or i32 %a, 8
   %ctlz = tail call i32 @llvm.ctlz.i32(i32 %or, i1 false)
@@ -444,13 +547,31 @@ define i32 @ctlz_make_undef(i32 %a) {
 ; CHECK-NEXT: ret i32 %ctlz
 }
 
+define <2 x i32> @ctlz_make_undef_vec(<2 x i32> %a) {
+; CHECK-LABEL: @ctlz_make_undef_vec(
+; CHECK-NEXT:    [[OR:%.*]] = or <2 x i32> [[A:%.*]], <i32 8, i32 8>
+; CHECK-NEXT:    [[CTLZ:%.*]] = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[OR]], i1 true)
+; CHECK-NEXT:    ret <2 x i32> [[CTLZ]]
+;
+  %or = or <2 x i32> %a, <i32 8, i32 8>
+  %ctlz = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %or, i1 false)
+  ret <2 x i32> %ctlz
+}
+
 define i32 @cttz_undef(i32 %Value) nounwind {
 ; CHECK-LABEL: @cttz_undef(
 ; CHECK-NEXT:    ret i32 undef
 ;
   %cttz = call i32 @llvm.cttz.i32(i32 0, i1 true)
   ret i32 %cttz
+}
 
+define <2 x i32> @cttz_undef_vec(<2 x i32> %Value) nounwind {
+; CHECK-LABEL: @cttz_undef_vec(
+; CHECK-NEXT:    ret <2 x i32> undef
+;
+  %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> zeroinitializer, i1 true)
+  ret <2 x i32> %cttz
 }
 
 define i32 @cttz_make_undef(i32 %a) {
@@ -463,6 +584,17 @@ define i32 @cttz_make_undef(i32 %a) {
 ; CHECK-NEXT: ret i32 %cttz
 }
 
+define <2 x i32> @cttz_make_undef_vec(<2 x i32> %a) {
+; CHECK-LABEL: @cttz_make_undef_vec(
+; CHECK-NEXT:    [[OR:%.*]] = or <2 x i32> [[A:%.*]], <i32 8, i32 8>
+; CHECK-NEXT:    [[CTTZ:%.*]] = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[OR]], i1 true)
+; CHECK-NEXT:    ret <2 x i32> [[CTTZ]]
+;
+  %or = or <2 x i32> %a, <i32 8, i32 8>
+  %cttz = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %or, i1 false)
+  ret <2 x i32> %cttz
+}
+
 define i32 @ctlz_select(i32 %Value) nounwind {
 ; CHECK-LABEL: @ctlz_select(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.ctlz.i32(i32 %Value, i1 false)
@@ -472,7 +604,17 @@ define i32 @ctlz_select(i32 %Value) nounwind {
   %ctlz = call i32 @llvm.ctlz.i32(i32 %Value, i1 true)
   %s = select i1 %tobool, i32 %ctlz, i32 32
   ret i32 %s
+}
 
+define <2 x i32> @ctlz_select_vec(<2 x i32> %Value) nounwind {
+; CHECK-LABEL: @ctlz_select_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[VALUE:%.*]], i1 false)
+; CHECK-NEXT:    ret <2 x i32> [[TMP1]]
+;
+  %tobool = icmp ne <2 x i32> %Value, zeroinitializer
+  %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %Value, i1 true)
+  %s = select <2 x i1> %tobool, <2 x i32> %ctlz, <2 x i32> <i32 32, i32 32>
+  ret <2 x i32> %s
 }
 
 define i32 @cttz_select(i32 %Value) nounwind {
@@ -484,7 +626,17 @@ define i32 @cttz_select(i32 %Value) nounwind {
   %cttz = call i32 @llvm.cttz.i32(i32 %Value, i1 true)
   %s = select i1 %tobool, i32 %cttz, i32 32
   ret i32 %s
+}
 
+define <2 x i32> @cttz_select_vec(<2 x i32> %Value) nounwind {
+; CHECK-LABEL: @cttz_select_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[VALUE:%.*]], i1 false)
+; CHECK-NEXT:    ret <2 x i32> [[TMP1]]
+;
+  %tobool = icmp ne <2 x i32> %Value, zeroinitializer
+  %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %Value, i1 true)
+  %s = select <2 x i1> %tobool, <2 x i32> %cttz, <2 x i32> <i32 32, i32 32>
+  ret <2 x i32> %s
 }
 
 define i1 @overflow_div_add(i32 %v1, i32 %v2) nounwind {
diff --git a/test/Transforms/InstCombine/lshr.ll b/test/Transforms/InstCombine/lshr.ll
index 0cad7f833ab6..71b25177162b 100644
--- a/test/Transforms/InstCombine/lshr.ll
+++ b/test/Transforms/InstCombine/lshr.ll
@@ -1,6 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -instcombine -S < %s | FileCheck %s
 
+target datalayout = "e-m:e-i64:64-n8:16:32:64"
+
 declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone
 declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
 declare i32 @llvm.ctpop.i32(i32) nounwind readnone
@@ -100,12 +102,9 @@ define <2 x i8> @lshr_exact_splat_vec(<2 x i8> %x) {
   ret <2 x i8> %lshr
 }
 
-; FIXME: The bool bit got smeared across a wide val, but then we zero'd out those bits. This is just a zext.
-
 define i16 @bool_zext(i1 %x) {
 ; CHECK-LABEL: @bool_zext(
-; CHECK-NEXT:    [[SEXT:%.*]] = sext i1 %x to i16
-; CHECK-NEXT:    [[HIBIT:%.*]] = lshr i16 [[SEXT]], 15
+; CHECK-NEXT:    [[HIBIT:%.*]] = zext i1 %x to i16
 ; CHECK-NEXT:    ret i16 [[HIBIT]]
 ;
   %sext = sext i1 %x to i16
@@ -115,8 +114,7 @@ define i16 @bool_zext(i1 %x) {
 
 define <2 x i8> @bool_zext_splat(<2 x i1> %x) {
 ; CHECK-LABEL: @bool_zext_splat(
-; CHECK-NEXT:    [[SEXT:%.*]] = sext <2 x i1> %x to <2 x i8>
-; CHECK-NEXT:    [[HIBIT:%.*]] = lshr <2 x i8> [[SEXT]], <i8 7, i8 7>
+; CHECK-NEXT:    [[HIBIT:%.*]] = zext <2 x i1> %x to <2 x i8>
 ; CHECK-NEXT:    ret <2 x i8> [[HIBIT]]
 ;
   %sext = sext <2 x i1> %x to <2 x i8>
@@ -148,23 +146,34 @@ define <2 x i8> @smear_sign_and_widen_splat(<2 x i6> %x) {
   ret <2 x i8> %hibit
 }
 
-; FIXME: All of the replicated sign bits are wiped out by the lshr. This could be lshr+zext.
-
-define i16 @fake_sext(i3 %x) {
+define i18 @fake_sext(i3 %x) {
 ; CHECK-LABEL: @fake_sext(
-; CHECK-NEXT:    [[SEXT:%.*]] = sext i3 %x to i16
-; CHECK-NEXT:    [[SH:%.*]] = lshr i16 [[SEXT]], 15
-; CHECK-NEXT:    ret i16 [[SH]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i3 %x, 2
+; CHECK-NEXT:    [[SH:%.*]] = zext i3 [[TMP1]] to i18
+; CHECK-NEXT:    ret i18 [[SH]]
 ;
-  %sext = sext i3 %x to i16
-  %sh = lshr i16 %sext, 15
-  ret i16 %sh
+  %sext = sext i3 %x to i18
+  %sh = lshr i18 %sext, 17
+  ret i18 %sh
+}
+
+; Avoid the transform if it would change the shift from a legal to illegal type.
+
+define i32 @fake_sext_but_should_not_change_type(i3 %x) {
+; CHECK-LABEL: @fake_sext_but_should_not_change_type(
+; CHECK-NEXT:    [[SEXT:%.*]] = sext i3 %x to i32
+; CHECK-NEXT:    [[SH:%.*]] = lshr i32 [[SEXT]], 31
+; CHECK-NEXT:    ret i32 [[SH]]
+;
+  %sext = sext i3 %x to i32
+  %sh = lshr i32 %sext, 31
+  ret i32 %sh
 }
 
 define <2 x i8> @fake_sext_splat(<2 x i3> %x) {
 ; CHECK-LABEL: @fake_sext_splat(
-; CHECK-NEXT:    [[SEXT:%.*]] = sext <2 x i3> %x to <2 x i8>
-; CHECK-NEXT:    [[SH:%.*]] = lshr <2 x i8> [[SEXT]], <i8 7, i8 7>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i3> %x, <i3 2, i3 2>
+; CHECK-NEXT:    [[SH:%.*]] = zext <2 x i3> [[TMP1]] to <2 x i8>
 ; CHECK-NEXT:    ret <2 x i8> [[SH]]
 ;
   %sext = sext <2 x i3> %x to <2 x i8>
diff --git a/test/Transforms/InstSimplify/call.ll b/test/Transforms/InstSimplify/call.ll
index 68daac65ee6b..c7d10e251b4a 100644
--- a/test/Transforms/InstSimplify/call.ll
+++ b/test/Transforms/InstSimplify/call.ll
@@ -199,6 +199,16 @@ define i256 @test_cttz() {
   ret i256 %x
 }
 
+declare <2 x i256> @llvm.cttz.v2i256(<2 x i256> %src, i1 %is_zero_undef)
+
+define <2 x i256> @test_cttz_vec() {
+; CHECK-LABEL: @test_cttz_vec(
+; CHECK-NEXT:    ret <2 x i256> <i256 1, i256 1>
+;
+  %x = call <2 x i256> @llvm.cttz.v2i256(<2 x i256> <i256 10, i256 10>, i1 false)
+  ret <2 x i256> %x
+}
+
 declare i256 @llvm.ctpop.i256(i256 %src)
 
 define i256 @test_ctpop() {
@@ -410,3 +420,26 @@ define <8 x i32> @masked_load_undef_mask(<8 x i32>* %V) {
 declare noalias i8* @malloc(i64)
 
 declare <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>*, i32, <8 x i1>, <8 x i32>)
+
+declare double @llvm.powi.f64(double, i32)
+declare <2 x double> @llvm.powi.v2f64(<2 x double>, i32)
+
+define double @constant_fold_powi() nounwind uwtable ssp {
+; CHECK-LABEL: @constant_fold_powi(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret double 9.000000e+00
+;
+entry:
+  %0 = call double @llvm.powi.f64(double 3.00000e+00, i32 2)
+  ret double %0
+}
+
+define <2 x double> @constant_fold_powi_vec() nounwind uwtable ssp {
+; CHECK-LABEL: @constant_fold_powi_vec(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret <2 x double> <double 9.000000e+00, double 2.500000e+01>
+;
+entry:
+  %0 = call <2 x double> @llvm.powi.v2f64(<2 x double> <double 3.00000e+00, double 5.00000e+00>, i32 2)
+  ret <2 x double> %0
+}
diff --git a/test/Transforms/InstSimplify/compare.ll b/test/Transforms/InstSimplify/compare.ll
index 20ebd36991a5..2fe079019161 100644
--- a/test/Transforms/InstSimplify/compare.ll
+++ b/test/Transforms/InstSimplify/compare.ll
@@ -69,7 +69,7 @@ define i1 @gep4() {
 
 define i1 @PR31262() {
 ; CHECK-LABEL: @PR31262(
-; CHECK-NEXT:    ret i1 icmp uge (i32* getelementptr ([1 x i32], [1 x i32]* @a, i64 0, i64 undef), i32* getelementptr inbounds ([1 x i32], [1 x i32]* @a, i32 0, i32 0))
+; CHECK-NEXT:    ret i1 icmp uge (i32* getelementptr ([1 x i32], [1 x i32]* @a, i32 0, i32 undef), i32* getelementptr inbounds ([1 x i32], [1 x i32]* @a, i32 0, i32 0))
 ;
   %idx = getelementptr inbounds [1 x i32], [1 x i32]* @a, i64 0, i64 undef
   %cmp = icmp uge i32* %idx, getelementptr inbounds ([1 x i32], [1 x i32]* @a, i32 0, i32 0)
diff --git a/test/Transforms/InstSimplify/simplify-nested-bitcast.ll b/test/Transforms/InstSimplify/simplify-nested-bitcast.ll
new file mode 100644
index 000000000000..b7ee79415a22
--- /dev/null
+++ b/test/Transforms/InstSimplify/simplify-nested-bitcast.ll
@@ -0,0 +1,54 @@
+; RUN: opt -always-inline -S %s | FileCheck %s
+%0 = type { i64, i64, i8 addrspace(1)*, i8 addrspace(1)* }
+%__aaa_struct = type { { i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(1)* }, %0, [17 x i8], { i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(1)* }, %0, [18 x i8] }
+%struct.__block_descriptor = type { i64, i64 }
+%struct.__block_literal_generic = type { i8*, i32, i32, i8*, %struct.__block_descriptor addrspace(1)* }
+
+@__aaa_struct_ptr = external addrspace(1) global %__aaa_struct
+@__aaa_const_init = constant %__aaa_struct { { i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(1)* } { i8** null, i32 1342177280, i32 0, i8* bitcast (i32 (i8 addrspace(4)*, i32 addrspace(1)*)* @bl0_block_invoke to i8*), %struct.__block_descriptor addrspace(1)* bitcast (%0 addrspace(1)* getelementptr inbounds (%__aaa_struct, %__aaa_struct addrspace(1)* @__aaa_struct_ptr, i32 0, i32 1) to %struct.__block_descriptor addrspace(1)*) }, %0 { i64 0, i64 32, i8 addrspace(1)* getelementptr inbounds (%__aaa_struct, %__aaa_struct addrspace(1)* @__aaa_struct_ptr, i32 0, i32 2, i32 0), i8 addrspace(1)* null }, [17 x i8] c"bl0_block_invoke\00", { i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(1)* } { i8** null, i32 1342177280, i32 0, i8* bitcast (i32 (i8 addrspace(4)*, i32 addrspace(1)*)* @__f1_block_invoke to i8*), %struct.__block_descriptor addrspace(1)* bitcast (%0 addrspace(1)* getelementptr inbounds (%__aaa_struct, %__aaa_struct addrspace(1)* @__aaa_struct_ptr, i32 0, i32 4) to %struct.__block_descriptor addrspace(1)*) }, %0 { i64 0, i64 32, i8 addrspace(1)* getelementptr inbounds (%__aaa_struct, %__aaa_struct addrspace(1)* @__aaa_struct_ptr, i32 0, i32 5, i32 0), i8 addrspace(1)* null }, [18 x i8] c"__f1_block_invoke\00" }
+
+; Function Attrs: alwaysinline norecurse nounwind readonly
+define i32 @bl0_block_invoke(i8 addrspace(4)* nocapture readnone, i32 addrspace(1)* nocapture readonly) #0 {
+entry:
+  %2 = load i32, i32 addrspace(1)* %1, align 4
+  %mul = shl nsw i32 %2, 1
+  ret i32 %mul
+}
+
+; Function Attrs: alwaysinline nounwind
+define i32 @f0(i32 addrspace(1)*, i32 (i32 addrspace(1)*) addrspace(4)*) #1 {
+entry:
+  %block.literal = bitcast i32 (i32 addrspace(1)*) addrspace(4)* %1 to %struct.__block_literal_generic addrspace(4)*
+  %2 = getelementptr inbounds %struct.__block_literal_generic, %struct.__block_literal_generic addrspace(4)* %block.literal, i64 0, i32 3
+  %3 = bitcast i32 (i32 addrspace(1)*) addrspace(4)* %1 to i8 addrspace(4)*
+  %4 = bitcast i8* addrspace(4)* %2 to i32 (i8 addrspace(4)*, i32 addrspace(1)*)* addrspace(4)*
+  %5 = load i32 (i8 addrspace(4)*, i32 addrspace(1)*)*, i32 (i8 addrspace(4)*, i32 addrspace(1)*)* addrspace(4)* %4, align 8
+  %call = tail call i32 %5(i8 addrspace(4)* %3, i32 addrspace(1)* %0) #2
+  ret i32 %call
+}
+
+; CHECK-LABEL: define void @f1
+; CHECK: %1 = load i32 (i8 addrspace(4)*, i32 addrspace(1)*)*, i32 (i8 addrspace(4)*, i32 addrspace(1)*)* addrspace(4)* bitcast (i8* addrspace(4)* getelementptr (%__aaa_struct, %__aaa_struct addrspace(4)* addrspacecast (%__aaa_struct addrspace(1)* @__aaa_struct_ptr to %__aaa_struct addrspace(4)*), i64 0, i32 0, i32 3) to i32 (i8 addrspace(4)*, i32 addrspace(1)*)* addrspace(4)*), align 8
+
+; Function Attrs: alwaysinline nounwind
+define void @f1(i32 addrspace(1)*) #1 {
+entry:
+  %call = tail call i32 @f0(i32 addrspace(1)* %0, i32 (i32 addrspace(1)*) addrspace(4)* addrspacecast (i32 (i32 addrspace(1)*) addrspace(1)* bitcast (%__aaa_struct addrspace(1)* @__aaa_struct_ptr to i32 (i32 addrspace(1)*) addrspace(1)*) to i32 (i32 addrspace(1)*) addrspace(4)*)) #3
+  store i32 %call, i32 addrspace(1)* %0, align 4
+  %call1 = tail call i32 @f0(i32 addrspace(1)* %0, i32 (i32 addrspace(1)*) addrspace(4)* addrspacecast (i32 (i32 addrspace(1)*) addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(1)* } addrspace(1)* getelementptr inbounds (%__aaa_struct, %__aaa_struct addrspace(1)* @__aaa_struct_ptr, i32 0, i32 3) to i32 (i32 addrspace(1)*) addrspace(1)*) to i32 (i32 addrspace(1)*) addrspace(4)*)) #3
+  store i32 %call1, i32 addrspace(1)* %0, align 4
+  ret void
+}
+
+; Function Attrs: alwaysinline norecurse nounwind readonly
+define i32 @__f1_block_invoke(i8 addrspace(4)* nocapture readnone, i32 addrspace(1)* nocapture readonly) #0 {
+entry:
+  %2 = load i32, i32 addrspace(1)* %1, align 4
+  %add = add nsw i32 %2, 1
+  ret i32 %add
+}
+
+attributes #0 = { alwaysinline norecurse nounwind readonly }
+attributes #1 = { alwaysinline nounwind }
+attributes #2 = { nobuiltin nounwind }
+attributes #3 = { nobuiltin }
diff --git a/test/Transforms/InstSimplify/vector_gep.ll b/test/Transforms/InstSimplify/vector_gep.ll
index b8e61a05cc0c..cdf4732d4b5e 100644
--- a/test/Transforms/InstSimplify/vector_gep.ll
+++ b/test/Transforms/InstSimplify/vector_gep.ll
@@ -51,7 +51,7 @@ define <4 x i8*> @test5() {
   ret <4 x i8*> %gep
 
 ; CHECK-LABEL: @test5
-; CHECK-NEXT: ret <4 x i8*> getelementptr (i8, <4 x i8*> <i8* inttoptr (i64 1 to i8*), i8* inttoptr (i64 2 to i8*), i8* inttoptr (i64 3 to i8*), i8* inttoptr (i64 4 to i8*)>, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+; CHECK-NEXT: ret <4 x i8*> getelementptr (i8, <4 x i8*> <i8* inttoptr (i64 1 to i8*), i8* inttoptr (i64 2 to i8*), i8* inttoptr (i64 3 to i8*), i8* inttoptr (i64 4 to i8*)>, <4 x i64> <i64 1, i64 1, i64 1, i64 1>)
 }
 
 @v = global [24 x [42 x [3 x i32]]] zeroinitializer, align 16
diff --git a/test/Transforms/InterleavedAccess/X86/interleaved-accesses-64bits-avx.ll b/test/Transforms/InterleavedAccess/X86/interleaved-accesses-64bits-avx.ll
index bf2009e28a7d..1f444b3748a5 100644
--- a/test/Transforms/InterleavedAccess/X86/interleaved-accesses-64bits-avx.ll
+++ b/test/Transforms/InterleavedAccess/X86/interleaved-accesses-64bits-avx.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -mtriple=x86_64-pc-linux  -mattr=+avx -interleaved-access -S | FileCheck %s
 
-; This file tests the function `llvm::lowerInterleavedLoad`.
+; This file tests the function `llvm::lowerInterleavedLoad/Store`.
 
 define <4 x double> @load_factorf64_4(<16 x double>* %ptr) {
 ; CHECK-LABEL: @load_factorf64_4(
@@ -102,4 +102,63 @@ define <4 x double> @load_factorf64_1(<16 x double>* %ptr) {
   ret <4 x double> %mul
 }
 
+define void @store_factorf64_4(<16 x double>* %ptr, <4 x double> %v0, <4 x double> %v1, <4 x double> %v2, <4 x double> %v3) {
+; CHECK-LABEL: @store_factorf64_4(
+; CHECK-NEXT:    [[S0:%.*]] = shufflevector <4 x double> [[V0:%.*]], <4 x double> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[S1:%.*]] = shufflevector <4 x double> [[V2:%.*]], <4 x double> [[V3:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
+; CHECK-NEXT:    store <16 x double> [[INTERLEAVED_VEC]], <16 x double>* [[PTR:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+  %s0 = shufflevector <4 x double> %v0, <4 x double> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %s1 = shufflevector <4 x double> %v2, <4 x double> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %interleaved.vec = shufflevector <8 x double> %s0, <8 x double> %s1, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
+  store <16 x double> %interleaved.vec, <16 x double>* %ptr, align 16
+  ret void
+}
+
+define void @store_factori64_4(<16 x i64>* %ptr, <4 x i64> %v0, <4 x i64> %v1, <4 x i64> %v2, <4 x i64> %v3) {
+; CHECK-LABEL: @store_factori64_4(
+; CHECK-NEXT:    [[S0:%.*]] = shufflevector <4 x i64> [[V0:%.*]], <4 x i64> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[S1:%.*]] = shufflevector <4 x i64> [[V2:%.*]], <4 x i64> [[V3:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i64> [[S0]], <8 x i64> [[S1]], <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
+; CHECK-NEXT:    store <16 x i64> [[INTERLEAVED_VEC]], <16 x i64>* [[PTR:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+  %s0 = shufflevector <4 x i64> %v0, <4 x i64> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %s1 = shufflevector <4 x i64> %v2, <4 x i64> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %interleaved.vec = shufflevector <8 x i64> %s0, <8 x i64> %s1, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
+  store <16 x i64> %interleaved.vec, <16 x i64>* %ptr, align 16
+  ret void
+}
+
+define void @store_factorf64_4_revMask(<16 x double>* %ptr, <4 x double> %v0, <4 x double> %v1, <4 x double> %v2, <4 x double> %v3) {
+; CHECK-LABEL: @store_factorf64_4_revMask(
+; CHECK-NEXT:    [[S0:%.*]] = shufflevector <4 x double> [[V0:%.*]], <4 x double> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[S1:%.*]] = shufflevector <4 x double> [[V2:%.*]], <4 x double> [[V3:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <16 x i32> <i32 12, i32 8, i32 4, i32 0, i32 13, i32 9, i32 5, i32 1, i32 14, i32 10, i32 6, i32 2, i32 15, i32 11, i32 7, i32 3>
+; CHECK-NEXT:    store <16 x double> [[INTERLEAVED_VEC]], <16 x double>* [[PTR:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+  %s0 = shufflevector <4 x double> %v0, <4 x double> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %s1 = shufflevector <4 x double> %v2, <4 x double> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %interleaved.vec = shufflevector <8 x double> %s0, <8 x double> %s1, <16 x i32> <i32 12, i32 8, i32 4, i32 0, i32 13, i32 9, i32 5, i32 1, i32 14, i32 10, i32 6, i32 2, i32 15, i32 11, i32 7, i32 3>
+  store <16 x double> %interleaved.vec, <16 x double>* %ptr, align 16
+  ret void
+}
+
+define void @store_factorf64_4_arbitraryMask(<16 x double>* %ptr, <16 x double> %v0, <16 x double> %v1, <16 x double> %v2, <16 x double> %v3) {
+; CHECK-LABEL: @store_factorf64_4_arbitraryMask(
+; CHECK-NEXT:    [[S0:%.*]] = shufflevector <16 x double> [[V0:%.*]], <16 x double> [[V1:%.*]], <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+; CHECK-NEXT:    [[S1:%.*]] = shufflevector <16 x double> [[V2:%.*]], <16 x double> [[V3:%.*]], <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+; CHECK-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x double> [[S0]], <32 x double> [[S1]], <16 x i32> <i32 4, i32 32, i32 16, i32 8, i32 5, i32 33, i32 17, i32 9, i32 6, i32 34, i32 18, i32 10, i32 7, i32 35, i32 19, i32 11>
+; CHECK-NEXT:    store <16 x double> [[INTERLEAVED_VEC]], <16 x double>* [[PTR:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+  %s0 = shufflevector <16 x double> %v0, <16 x double> %v1, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+  %s1 = shufflevector <16 x double> %v2, <16 x double> %v3, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+  %interleaved.vec = shufflevector <32 x double> %s0, <32 x double> %s1, <16 x i32> <i32 4, i32 32, i32 16, i32 8, i32 5, i32 33, i32 17, i32 9, i32 6, i32 34, i32 18, i32 10, i32 7, i32 35, i32 19, i32 11>
+  store <16 x double> %interleaved.vec, <16 x double>* %ptr, align 16
+  ret void
+}
 
diff --git a/test/Transforms/LoopIdiom/X86/unordered-atomic-memcpy.ll b/test/Transforms/LoopIdiom/X86/unordered-atomic-memcpy.ll
new file mode 100644
index 000000000000..ec93847178b5
--- /dev/null
+++ b/test/Transforms/LoopIdiom/X86/unordered-atomic-memcpy.ll
@@ -0,0 +1,452 @@
+; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+;; memcpy.atomic formation (atomic load & store)
+define void @test1(i64 %Size) nounwind ssp {
+; CHECK-LABEL: @test1(
+; CHECK: call void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* align 1 %Dest, i8* align 1 %Base, i64 %Size, i32 1)
+; CHECK-NOT: store
+; CHECK: ret void
+bb.nph:
+  %Base = alloca i8, i32 10000
+  %Dest = alloca i8, i32 10000
+  br label %for.body
+
+for.body:                                         ; preds = %bb.nph, %for.body
+  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+  %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
+  %DestI = getelementptr i8, i8* %Dest, i64 %indvar
+  %V = load atomic i8, i8* %I.0.014 unordered, align 1
+  store atomic i8 %V, i8* %DestI unordered, align 1
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar.next, %Size
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+;; memcpy.atomic formation (atomic store, normal load)
+define void @test2(i64 %Size) nounwind ssp {
+; CHECK-LABEL: @test2(
+; CHECK: call void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* align 1 %Dest, i8* align 1 %Base, i64 %Size, i32 1)
+; CHECK-NOT: store
+; CHECK: ret void
+bb.nph:
+  %Base = alloca i8, i32 10000
+  %Dest = alloca i8, i32 10000
+  br label %for.body
+
+for.body:                                         ; preds = %bb.nph, %for.body
+  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+  %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
+  %DestI = getelementptr i8, i8* %Dest, i64 %indvar
+  %V = load i8, i8* %I.0.014, align 1
+  store atomic i8 %V, i8* %DestI unordered, align 1
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar.next, %Size
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+;; memcpy.atomic formation rejection (atomic store, normal load w/ no align)
+define void @test2b(i64 %Size) nounwind ssp {
+; CHECK-LABEL: @test2b(
+; CHECK-NOT: call void @llvm.memcpy.element.atomic
+; CHECK: store
+; CHECK: ret void
+bb.nph:
+  %Base = alloca i8, i32 10000
+  %Dest = alloca i8, i32 10000
+  br label %for.body
+
+for.body:                                         ; preds = %bb.nph, %for.body
+  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+  %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
+  %DestI = getelementptr i8, i8* %Dest, i64 %indvar
+  %V = load i8, i8* %I.0.014
+  store atomic i8 %V, i8* %DestI unordered, align 1
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar.next, %Size
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+;; memcpy.atomic formation rejection (atomic store, normal load w/ bad align)
+define void @test2c(i64 %Size) nounwind ssp {
+; CHECK-LABEL: @test2c(
+; CHECK-NOT: call void @llvm.memcpy.element.atomic
+; CHECK: store
+; CHECK: ret void
+bb.nph:
+  %Base = alloca i32, i32 10000
+  %Dest = alloca i32, i32 10000
+  br label %for.body
+
+for.body:                                         ; preds = %bb.nph, %for.body
+  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+  %I.0.014 = getelementptr i32, i32* %Base, i64 %indvar
+  %DestI = getelementptr i32, i32* %Dest, i64 %indvar
+  %V = load i32, i32* %I.0.014, align 2
+  store atomic i32 %V, i32* %DestI unordered, align 4
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar.next, %Size
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+;; memcpy.atomic formation rejection (atomic store w/ bad align, normal load)
+define void @test2d(i64 %Size) nounwind ssp {
+; CHECK-LABEL: @test2d(
+; CHECK-NOT: call void @llvm.memcpy.element.atomic
+; CHECK: store
+; CHECK: ret void
+bb.nph:
+  %Base = alloca i32, i32 10000
+  %Dest = alloca i32, i32 10000
+  br label %for.body
+
+for.body:                                         ; preds = %bb.nph, %for.body
+  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+  %I.0.014 = getelementptr i32, i32* %Base, i64 %indvar
+  %DestI = getelementptr i32, i32* %Dest, i64 %indvar
+  %V = load i32, i32* %I.0.014, align 4
+  store atomic i32 %V, i32* %DestI unordered, align 2
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar.next, %Size
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+;; memcpy.atomic formation (normal store, atomic load)
+define void @test3(i64 %Size) nounwind ssp {
+; CHECK-LABEL: @test3(
+; CHECK: call void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* align 1 %Dest, i8* align 1 %Base, i64 %Size, i32 1)
+; CHECK-NOT: store
+; CHECK: ret void
+bb.nph:
+  %Base = alloca i8, i32 10000
+  %Dest = alloca i8, i32 10000
+  br label %for.body
+
+for.body:                                         ; preds = %bb.nph, %for.body
+  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+  %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
+  %DestI = getelementptr i8, i8* %Dest, i64 %indvar
+  %V = load atomic i8, i8* %I.0.014 unordered, align 1
+  store i8 %V, i8* %DestI, align 1
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar.next, %Size
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+;; memcpy.atomic formation rejection (normal store w/ no align, atomic load)
+define void @test3b(i64 %Size) nounwind ssp {
+; CHECK-LABEL: @test3b(
+; CHECK-NOT: call void @llvm.memcpy.element.atomic
+; CHECK: store
+; CHECK: ret void
+bb.nph:
+  %Base = alloca i8, i32 10000
+  %Dest = alloca i8, i32 10000
+  br label %for.body
+
+for.body:                                         ; preds = %bb.nph, %for.body
+  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+  %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
+  %DestI = getelementptr i8, i8* %Dest, i64 %indvar
+  %V = load atomic i8, i8* %I.0.014 unordered, align 1
+  store i8 %V, i8* %DestI
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar.next, %Size
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+;; memcpy.atomic formation rejection (normal store, atomic load w/ bad align)
+define void @test3c(i64 %Size) nounwind ssp {
+; CHECK-LABEL: @test3c(
+; CHECK-NOT: call void @llvm.memcpy.element.atomic
+; CHECK: store
+; CHECK: ret void
+bb.nph:
+  %Base = alloca i32, i32 10000
+  %Dest = alloca i32, i32 10000
+  br label %for.body
+
+for.body:                                         ; preds = %bb.nph, %for.body
+  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+  %I.0.014 = getelementptr i32, i32* %Base, i64 %indvar
+  %DestI = getelementptr i32, i32* %Dest, i64 %indvar
+  %V = load atomic i32, i32* %I.0.014 unordered, align 2
+  store i32 %V, i32* %DestI, align 4
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar.next, %Size
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+;; memcpy.atomic formation rejection (normal store w/ bad align, atomic load)
+define void @test3d(i64 %Size) nounwind ssp {
+; CHECK-LABEL: @test3d(
+; CHECK-NOT: call void @llvm.memcpy.element.atomic
+; CHECK: store
+; CHECK: ret void
+bb.nph:
+  %Base = alloca i32, i32 10000
+  %Dest = alloca i32, i32 10000
+  br label %for.body
+
+for.body:                                         ; preds = %bb.nph, %for.body
+  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+  %I.0.014 = getelementptr i32, i32* %Base, i64 %indvar
+  %DestI = getelementptr i32, i32* %Dest, i64 %indvar
+  %V = load atomic i32, i32* %I.0.014 unordered, align 4
+  store i32 %V, i32* %DestI, align 2
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar.next, %Size
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+;; memcpy.atomic formation rejection (atomic load, ordered-atomic store)
+define void @test4(i64 %Size) nounwind ssp {
+; CHECK-LABEL: @test4(
+; CHECK-NOT: call void @llvm.memcpy.element.atomic
+; CHECK: store
+; CHECK: ret void
+bb.nph:
+  %Base = alloca i8, i32 10000
+  %Dest = alloca i8, i32 10000
+  br label %for.body
+
+for.body:                                         ; preds = %bb.nph, %for.body
+  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+  %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
+  %DestI = getelementptr i8, i8* %Dest, i64 %indvar
+  %V = load atomic i8, i8* %I.0.014 unordered, align 1
+  store atomic i8 %V, i8* %DestI monotonic, align 1
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar.next, %Size
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+;; memcpy.atomic formation rejection (ordered-atomic load, unordered-atomic store)
+define void @test5(i64 %Size) nounwind ssp {
+; CHECK-LABEL: @test5(
+; CHECK-NOT: call void @llvm.memcpy.element.atomic
+; CHECK: store
+; CHECK: ret void
+bb.nph:
+  %Base = alloca i8, i32 10000
+  %Dest = alloca i8, i32 10000
+  br label %for.body
+
+for.body:                                         ; preds = %bb.nph, %for.body
+  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+  %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
+  %DestI = getelementptr i8, i8* %Dest, i64 %indvar
+  %V = load atomic i8, i8* %I.0.014 monotonic, align 1
+  store atomic i8 %V, i8* %DestI unordered, align 1
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar.next, %Size
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+;; memcpy.atomic formation (atomic load & store) -- element size 2
+define void @test6(i64 %Size) nounwind ssp {
+; CHECK-LABEL: @test6(
+; CHECK: call void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* align 2 %Dest{{[0-9]*}}, i8* align 2 %Base{{[0-9]*}}, i64 %Size, i32 2)
+; CHECK-NOT: store
+; CHECK: ret void
+bb.nph:
+  %Base = alloca i16, i32 10000
+  %Dest = alloca i16, i32 10000
+  br label %for.body
+
+for.body:                                         ; preds = %bb.nph, %for.body
+  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+  %I.0.014 = getelementptr i16, i16* %Base, i64 %indvar
+  %DestI = getelementptr i16, i16* %Dest, i64 %indvar
+  %V = load atomic i16, i16* %I.0.014 unordered, align 2
+  store atomic i16 %V, i16* %DestI unordered, align 2
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar.next, %Size
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+;; memcpy.atomic formation (atomic load & store) -- element size 4
+define void @test7(i64 %Size) nounwind ssp {
+; CHECK-LABEL: @test7(
+; CHECK: call void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* align 4 %Dest{{[0-9]*}}, i8* align 4 %Base{{[0-9]*}}, i64 %Size, i32 4)
+; CHECK-NOT: store
+; CHECK: ret void
+bb.nph:
+  %Base = alloca i32, i32 10000
+  %Dest = alloca i32, i32 10000
+  br label %for.body
+
+for.body:                                         ; preds = %bb.nph, %for.body
+  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+  %I.0.014 = getelementptr i32, i32* %Base, i64 %indvar
+  %DestI = getelementptr i32, i32* %Dest, i64 %indvar
+  %V = load atomic i32, i32* %I.0.014 unordered, align 4
+  store atomic i32 %V, i32* %DestI unordered, align 4
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar.next, %Size
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+;; memcpy.atomic formation (atomic load & store) -- element size 8
+define void @test8(i64 %Size) nounwind ssp {
+; CHECK-LABEL: @test8(
+; CHECK: call void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* align 8 %Dest{{[0-9]*}}, i8* align 8 %Base{{[0-9]*}}, i64 %Size, i32 8)
+; CHECK-NOT: store
+; CHECK: ret void
+bb.nph:
+  %Base = alloca i64, i32 10000
+  %Dest = alloca i64, i32 10000
+  br label %for.body
+
+for.body:                                         ; preds = %bb.nph, %for.body
+  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+  %I.0.014 = getelementptr i64, i64* %Base, i64 %indvar
+  %DestI = getelementptr i64, i64* %Dest, i64 %indvar
+  %V = load atomic i64, i64* %I.0.014 unordered, align 8
+  store atomic i64 %V, i64* %DestI unordered, align 8
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar.next, %Size
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+;; memcpy.atomic formation rejection (atomic load & store) -- element size 16
+define void @test9(i64 %Size) nounwind ssp {
+; CHECK-LABEL: @test9(
+; CHECK: call void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* align 16 %Dest{{[0-9]*}}, i8* align 16 %Base{{[0-9]*}}, i64 %Size, i32 16)
+; CHECK-NOT: store
+; CHECK: ret void
+bb.nph:
+  %Base = alloca i128, i32 10000
+  %Dest = alloca i128, i32 10000
+  br label %for.body
+
+for.body:                                         ; preds = %bb.nph, %for.body
+  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+  %I.0.014 = getelementptr i128, i128* %Base, i64 %indvar
+  %DestI = getelementptr i128, i128* %Dest, i64 %indvar
+  %V = load atomic i128, i128* %I.0.014 unordered, align 16
+  store atomic i128 %V, i128* %DestI unordered, align 16
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar.next, %Size
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+;; memcpy.atomic formation rejection (atomic load & store) -- element size 32
+define void @test10(i64 %Size) nounwind ssp {
+; CHECK-LABEL: @test10(
+; CHECK-NOT: call void @llvm.memcpy.element.atomic
+; CHECK: store
+; CHECK: ret void
+bb.nph:
+  %Base = alloca i256, i32 10000
+  %Dest = alloca i256, i32 10000
+  br label %for.body
+
+for.body:                                         ; preds = %bb.nph, %for.body
+  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+  %I.0.014 = getelementptr i256, i256* %Base, i64 %indvar
+  %DestI = getelementptr i256, i256* %Dest, i64 %indvar
+  %V = load atomic i256, i256* %I.0.014 unordered, align 32
+  store atomic i256 %V, i256* %DestI unordered, align 32
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar.next, %Size
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; Make sure that atomic memset doesn't get recognized by mistake
+define void @test_nomemset(i8* %Base, i64 %Size) nounwind ssp {
+; CHECK-LABEL: @test_nomemset(
+; CHECK-NOT: call void @llvm.memset
+; CHECK: store
+; CHECK: ret void
+bb.nph:                                           ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %bb.nph, %for.body
+  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+  %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
+  store atomic i8 0, i8* %I.0.014 unordered, align 1
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar.next, %Size
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+; Verify that unordered memset_pattern isn't recognized.
+; This is a replica of test11_pattern from basic.ll
+define void @test_nomemset_pattern(i32* nocapture %P) nounwind ssp {
+; CHECK-LABEL: @test_nomemset_pattern(
+; CHECK-NEXT: entry:
+; CHECK-NOT: bitcast
+; CHECK-NOT: memset_pattern
+; CHECK: store atomic
+; CHECK: ret void
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ]
+  %arrayidx = getelementptr i32, i32* %P, i64 %indvar
+  store atomic i32 1, i32* %arrayidx unordered, align 4
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar.next, 10000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
diff --git a/test/Transforms/LoopIdiom/unordered-atomic-memcpy-noarch.ll b/test/Transforms/LoopIdiom/unordered-atomic-memcpy-noarch.ll
new file mode 100644
index 000000000000..b2528f1c2457
--- /dev/null
+++ b/test/Transforms/LoopIdiom/unordered-atomic-memcpy-noarch.ll
@@ -0,0 +1,28 @@
+; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+;; memcpy.atomic formation (atomic load & store) -- element size 2
+;;  Will not create call due to a max element size of 0
+define void @test1(i64 %Size) nounwind ssp {
+; CHECK-LABEL: @test1(
+; CHECK-NOT: call void @llvm.memcpy.element.atomic
+; CHECK: store
+; CHECK: ret void
+bb.nph:
+  %Base = alloca i16, i32 10000
+  %Dest = alloca i16, i32 10000
+  br label %for.body
+
+for.body:                                         ; preds = %bb.nph, %for.body
+  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+  %I.0.014 = getelementptr i16, i16* %Base, i64 %indvar
+  %DestI = getelementptr i16, i16* %Dest, i64 %indvar
+  %V = load atomic i16, i16* %I.0.014 unordered, align 2
+  store atomic i16 %V, i16* %DestI unordered, align 2
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar.next, %Size
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
diff --git a/test/Transforms/LoopStrengthReduce/X86/canonical.ll b/test/Transforms/LoopStrengthReduce/X86/canonical.ll
index 2dafbb408aad..6b6acb868745 100644
--- a/test/Transforms/LoopStrengthReduce/X86/canonical.ll
+++ b/test/Transforms/LoopStrengthReduce/X86/canonical.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mtriple=x86_64-unknown-linux-gnu -loop-reduce -S < %s | FileCheck %s
+; RUN: opt -mtriple=x86_64-unknown-linux-gnu -loop-reduce -lsr-insns-cost=false -S < %s | FileCheck %s
 ; Check LSR formula canonicalization will put loop invariant regs before
 ; induction variable of current loop, so exprs involving loop invariant regs
 ; can be promoted outside of current loop.
diff --git a/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll b/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll
index fb63b66137f3..7c01432914ff 100644
--- a/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll
+++ b/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll
@@ -163,7 +163,7 @@ for.end:                                          ; preds = %for.body, %entry
 ; X64: movzbl -3(
 ;
 ; X32: foldedidx:
-; X32: movzbl -3(
+; X32: movzbl 400(
 define void @foldedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c) nounwind ssp {
 entry:
   br label %for.body
@@ -275,7 +275,7 @@ exit:
 ;
 ; X32: @testCmpZero
 ; X32: %for.body82.us
-; X32: dec
+; X32: cmp
 ; X32: jne
 define void @testCmpZero(i8* %src, i8* %dst, i32 %srcidx, i32 %dstidx, i32 %len) nounwind ssp {
 entry:
diff --git a/test/Transforms/LoopStrengthReduce/X86/lsr-expand-quadratic.ll b/test/Transforms/LoopStrengthReduce/X86/lsr-expand-quadratic.ll
index a7731bfcec56..deca954fea78 100644
--- a/test/Transforms/LoopStrengthReduce/X86/lsr-expand-quadratic.ll
+++ b/test/Transforms/LoopStrengthReduce/X86/lsr-expand-quadratic.ll
@@ -1,4 +1,4 @@
-; REQUIRES: x86
+; REQUIRES: x86-registered-target
 ; RUN: opt -loop-reduce -S < %s | FileCheck %s
 
 ; Strength reduction analysis here relies on IV Users analysis, that
@@ -22,16 +22,16 @@ target triple = "x86_64-apple-macosx"
 ; CHECK-LABEL: @test2
 ; CHECK-LABEL: test2.loop:
 ; CHECK:  %lsr.iv1 = phi i32 [ %lsr.iv.next2, %test2.loop ], [ -16777216, %entry ]
-; CHECK:  %lsr.iv = phi i32 [ %lsr.iv.next, %test2.loop ], [ -1, %entry ]
-; CHECK:  %lsr.iv.next = add nsw i32 %lsr.iv, 1
+; CHECK:  %lsr.iv = phi i32 [ %lsr.iv.next, %test2.loop ], [ 1, %entry ]
+; CHECK:  %lsr.iv.next = add nsw i32 %lsr.iv, -1
 ; CHECK:  %lsr.iv.next2 = add nsw i32 %lsr.iv1, 16777216
 ;
 ; CHECK-LABEL: for.end:
-; CHECK:  %tobool.us = icmp eq i32 %lsr.iv.next2, 0
+; CHECK:  %tobool.us = icmp eq i32 %lsr.iv.next, 0
 ; CHECK:  %sub.us = select i1 %tobool.us, i32 0, i32 0
-; CHECK:  %1 = sub i32 0, %sub.us
-; CHECK:  %2 = add i32 %1, %lsr.iv.next
-; CHECK:  %sext.us = mul i32 %lsr.iv.next2, %2
+; CHECK:  %0 = sub i32 0, %sub.us
+; CHECK:  %1 = sub i32 %0, %lsr.iv.next
+; CHECK:  %sext.us = mul i32 %lsr.iv.next2, %1
 ; CHECK:  %f = ashr i32 %sext.us, 24
 ; CHECK: ret i32 %f
 define i32 @test2() {
diff --git a/test/Transforms/LoopStrengthReduce/X86/lsr-insns-1.ll b/test/Transforms/LoopStrengthReduce/X86/lsr-insns-1.ll
index 4888536bdf81..7f163500a737 100644
--- a/test/Transforms/LoopStrengthReduce/X86/lsr-insns-1.ll
+++ b/test/Transforms/LoopStrengthReduce/X86/lsr-insns-1.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -loop-reduce -mtriple=x86_64 -lsr-insns-cost -S | FileCheck %s -check-prefix=BOTH -check-prefix=INSN
-; RUN: opt < %s -loop-reduce -mtriple=x86_64 -S | FileCheck %s -check-prefix=BOTH -check-prefix=REGS
+; RUN: opt < %s -loop-reduce -mtriple=x86_64  -S | FileCheck %s -check-prefix=BOTH -check-prefix=INSN
+; RUN: opt < %s -loop-reduce -mtriple=x86_64 -lsr-insns-cost=false -S | FileCheck %s -check-prefix=BOTH -check-prefix=REGS
 ; RUN: llc < %s -O2 -march=x86-64 -lsr-insns-cost -asm-verbose=0 | FileCheck %s
 
 ; OPT test checks that LSR optimize compare for static counter to compare with 0.
diff --git a/test/Transforms/LoopStrengthReduce/X86/lsr-insns-2.ll b/test/Transforms/LoopStrengthReduce/X86/lsr-insns-2.ll
index 3273cb4e6b5b..239cc0233506 100644
--- a/test/Transforms/LoopStrengthReduce/X86/lsr-insns-2.ll
+++ b/test/Transforms/LoopStrengthReduce/X86/lsr-insns-2.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -loop-reduce -mtriple=x86_64 -lsr-insns-cost -S | FileCheck %s -check-prefix=BOTH -check-prefix=INSN
-; RUN: opt < %s -loop-reduce -mtriple=x86_64 -S | FileCheck %s -check-prefix=BOTH -check-prefix=REGS
+; RUN: opt < %s -loop-reduce -mtriple=x86_64 -S | FileCheck %s -check-prefix=BOTH -check-prefix=INSN
+; RUN: opt < %s -loop-reduce -mtriple=x86_64 -lsr-insns-cost=false -S | FileCheck %s -check-prefix=BOTH -check-prefix=REGS
 ; RUN: llc < %s -O2 -march=x86-64 -lsr-insns-cost -asm-verbose=0 | FileCheck %s
 
 ; OPT checks that LSR prefers less instructions to less registers.
diff --git a/test/Transforms/LoopStrengthReduce/X86/nested-loop.ll b/test/Transforms/LoopStrengthReduce/X86/nested-loop.ll
index b563eb3ad994..e05d5aa3027b 100644
--- a/test/Transforms/LoopStrengthReduce/X86/nested-loop.ll
+++ b/test/Transforms/LoopStrengthReduce/X86/nested-loop.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -loop-reduce -S < %s | FileCheck %s
 ; Check when we use an outerloop induction variable inside of an innerloop
 ; induction value expr, LSR can still choose to use single induction variable
@@ -22,18 +23,21 @@ for.body:                                         ; preds = %for.inc, %entry
 for.body2.preheader:                              ; preds = %for.body
   br label %for.body2
 
-; Check LSR only generates one induction variable for for.body2 and the induction
-; variable will be shared by multiple array accesses.
+; Check LSR only generates two induction variables for for.body2 one for compare and
+; one to shared by multiple array accesses.
 ; CHECK: for.body2:
-; CHECK-NEXT: [[LSR:%[^,]+]] = phi i64 [ %lsr.iv.next, %for.body2 ], [ 0, %for.body2.preheader ]
+; CHECK-NEXT: [[LSRAR:%[^,]+]] = phi i8* [ %scevgep, %for.body2 ], [ %maxarray, %for.body2.preheader ]
+; CHECK-NEXT: [[LSR:%[^,]+]] = phi i64 [ %lsr.iv.next, %for.body2 ], [ %0, %for.body2.preheader ]
 ; CHECK-NOT:  = phi i64 [ {{.*}}, %for.body2 ], [ {{.*}}, %for.body2.preheader ]
-; CHECK:      [[SCEVGEP1:%[^,]+]] = getelementptr i8, i8* %maxarray, i64 [[LSR]]
-; CHECK:      [[SCEVGEP2:%[^,]+]] = getelementptr i8, i8* [[SCEVGEP1]], i64 1
+; CHECK:      [[LSRINT:%[^,]+]] = ptrtoint i8* [[LSRAR]] to i64
+; CHECK:      [[SCEVGEP1:%[^,]+]] = getelementptr i8, i8* [[LSRAR]], i64 1
+; CHECK:      {{.*}} = load i8, i8* [[SCEVGEP1]], align 1
+; CHECK:      [[SCEVGEP2:%[^,]+]] = getelementptr i8, i8* %1, i64 [[LSRINT]]
 ; CHECK:      {{.*}} = load i8, i8* [[SCEVGEP2]], align 1
-; CHECK:      [[SCEVGEP3:%[^,]+]] = getelementptr i8, i8* {{.*}}, i64 [[LSR]]
-; CHECK:      {{.*}} = load i8, i8* [[SCEVGEP3]], align 1
-; CHECK:      [[SCEVGEP4:%[^,]+]] = getelementptr i8, i8* {{.*}}, i64 [[LSR]]
-; CHECK:      store i8 {{.*}}, i8* [[SCEVGEP4]], align 1
+; CHECK:      [[SCEVGEP3:%[^,]+]] = getelementptr i8, i8* {{.*}}, i64 [[LSRINT]]
+; CHECK:      store i8 {{.*}}, i8* [[SCEVGEP3]], align 1
+; CHECK:      [[LSRNEXT:%[^,]+]] = add i64 [[LSR]], -1
+; CHECK:      %exitcond = icmp ne i64 [[LSRNEXT]], 0
 ; CHECK:      br i1 %exitcond, label %for.body2, label %for.inc.loopexit
 
 for.body2:                                        ; preds = %for.body2.preheader, %for.body2
diff --git a/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll b/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll
index d06e3fdba39c..1149afe7b9f4 100644
--- a/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll
+++ b/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll
@@ -5,7 +5,7 @@ target triple = "aarch64"
 
 ; CHECK-LABEL: @add_a(
 ; CHECK: load <16 x i8>, <16 x i8>*
-; CHECK: add nuw nsw <16 x i8>
+; CHECK: add <16 x i8>
 ; CHECK: store <16 x i8>
 ; Function Attrs: nounwind
 define void @add_a(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i32 %len) #0 {
@@ -31,9 +31,37 @@ for.body:                                         ; preds = %entry, %for.body
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
 }
 
+; Ensure that we preserve nuw/nsw if we're not shrinking the values we're
+; working with.
+; CHECK-LABEL: @add_a1(
+; CHECK: load <16 x i8>, <16 x i8>*
+; CHECK: add nuw nsw <16 x i8>
+; CHECK: store <16 x i8>
+; Function Attrs: nounwind
+define void @add_a1(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i32 %len) #0 {
+entry:
+  %cmp8 = icmp sgt i32 %len, 0
+  br i1 %cmp8, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i8, i8* %p, i64 %indvars.iv
+  %0 = load i8, i8* %arrayidx
+  %add = add nuw nsw i8 %0, 2
+  %arrayidx3 = getelementptr inbounds i8, i8* %q, i64 %indvars.iv
+  store i8 %add, i8* %arrayidx3
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %len
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
 ; CHECK-LABEL: @add_b(
 ; CHECK: load <8 x i16>, <8 x i16>*
-; CHECK: add nuw nsw <8 x i16>
+; CHECK: add <8 x i16>
 ; CHECK: store <8 x i16>
 ; Function Attrs: nounwind
 define void @add_b(i16* noalias nocapture readonly %p, i16* noalias nocapture %q, i32 %len) #0 {
@@ -61,7 +89,7 @@ for.body:                                         ; preds = %entry, %for.body
 
 ; CHECK-LABEL: @add_c(
 ; CHECK: load <8 x i8>, <8 x i8>*
-; CHECK: add nuw nsw <8 x i16>
+; CHECK: add <8 x i16>
 ; CHECK: store <8 x i16>
 ; Function Attrs: nounwind
 define void @add_c(i8* noalias nocapture readonly %p, i16* noalias nocapture %q, i32 %len) #0 {
@@ -116,12 +144,12 @@ for.body:                                         ; preds = %entry, %for.body
 ; CHECK-LABEL: @add_e(
 ; CHECK: load <16 x i8>
 ; CHECK: shl <16 x i8>
-; CHECK: add nuw nsw <16 x i8>
+; CHECK: add <16 x i8>
 ; CHECK: or <16 x i8>
-; CHECK: mul nuw nsw <16 x i8>
+; CHECK: mul <16 x i8>
 ; CHECK: and <16 x i8>
 ; CHECK: xor <16 x i8>
-; CHECK: mul nuw nsw <16 x i8>
+; CHECK: mul <16 x i8>
 ; CHECK: store <16 x i8>
 define void @add_e(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 %arg1, i8 %arg2, i32 %len) #0 {
 entry:
@@ -162,12 +190,12 @@ for.body:                                         ; preds = %for.body, %for.body
 ; CHECK: load <8 x i16>
 ; CHECK: trunc <8 x i16>
 ; CHECK: shl <8 x i8>
-; CHECK: add nsw <8 x i8>
+; CHECK: add <8 x i8>
 ; CHECK: or <8 x i8>
-; CHECK: mul nuw nsw <8 x i8>
+; CHECK: mul <8 x i8>
 ; CHECK: and <8 x i8>
 ; CHECK: xor <8 x i8>
-; CHECK: mul nuw nsw <8 x i8>
+; CHECK: mul <8 x i8>
 ; CHECK: store <8 x i8>
 define void @add_f(i16* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 %arg1, i8 %arg2, i32 %len) #0 {
 entry:
diff --git a/test/Transforms/LowerExpectIntrinsic/PR33346.ll b/test/Transforms/LowerExpectIntrinsic/PR33346.ll
new file mode 100644
index 000000000000..ca962fbdc8f3
--- /dev/null
+++ b/test/Transforms/LowerExpectIntrinsic/PR33346.ll
@@ -0,0 +1,22 @@
+; RUN: opt -lower-expect -S < %s
+; RUN: opt -passes='function(lower-expect)' -S < %s
+
+define i64 @foo(i64 %arg) #0 {
+bb:
+  %tmp = alloca i64, align 8
+  store i64 %arg, i64* %tmp, align 8
+  %tmp1 = load i64, i64* %tmp, align 8
+  %tmp2 = load i64, i64* %tmp, align 8
+  %tmp3 = call i64 @llvm.expect.i64(i64 %tmp1, i64 %tmp2)
+  ret i64 %tmp3
+}
+
+; Function Attrs: nounwind readnone
+declare i64 @llvm.expect.i64(i64, i64)
+
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 5.0.0 (trunk 304723)"}
diff --git a/test/Transforms/LowerTypeTests/simple.ll b/test/Transforms/LowerTypeTests/simple.ll
index cedfcb4a63a0..aae17c05d606 100644
--- a/test/Transforms/LowerTypeTests/simple.ll
+++ b/test/Transforms/LowerTypeTests/simple.ll
@@ -1,5 +1,5 @@
 ; RUN: opt -S -lowertypetests < %s | FileCheck %s
-; RUN: opt -S -lowertypetests -mtriple=x86_64-apple-macosx10.8.0 < %s | FileCheck -check-prefix=CHECK-DARWIN %s
+; RUN: opt -S -lowertypetests -mtriple=x86_64-apple-macosx10.8.0 < %s | FileCheck %s
 ; RUN: opt -S -O3 < %s | FileCheck -check-prefix=CHECK-NODISCARD %s
 
 target datalayout = "e-p:32:32"
@@ -39,20 +39,6 @@ target datalayout = "e-p:32:32"
 ; CHECK: @c = protected alias i32, getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }, { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 4)
 ; CHECK: @d = alias [2 x i32], getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }, { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 6)
 
-; CHECK-DARWIN: @aptr = constant i32* getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }, { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G:@[^ ]*]], i32 0, i32 0)
-@aptr = constant i32* @a
-
-; CHECK-DARWIN: @bptr = constant [63 x i32]* getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }, { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 2)
-@bptr = constant [63 x i32]* @b
-
-; CHECK-DARWIN: @cptr = constant i32* getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }, { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 4)
-@cptr = constant i32* @c
-
-; CHECK-DARWIN: @dptr = constant [2 x i32]* getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }, { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 6)
-@dptr = constant [2 x i32]* @d
-
-; CHECK-DARWIN: [[G]] = private constant
-
 ; CHECK: @bits{{[0-9]*}} = private alias i8, getelementptr inbounds ([68 x i8], [68 x i8]* [[BA]], i32 0, i32 0)
 ; CHECK: @bits.{{[0-9]*}} = private alias i8, getelementptr inbounds ([68 x i8], [68 x i8]* [[BA]], i32 0, i32 0)
 
diff --git a/test/Transforms/LowerTypeTests/simplify.ll b/test/Transforms/LowerTypeTests/simplify.ll
new file mode 100644
index 000000000000..cb5ad4a10bfb
--- /dev/null
+++ b/test/Transforms/LowerTypeTests/simplify.ll
@@ -0,0 +1,37 @@
+; RUN: opt -S -lowertypetests -lowertypetests-summary-action=import -lowertypetests-read-summary=%S/Inputs/import.yaml < %s | FileCheck %s
+
+target datalayout = "e-p:64:64"
+
+declare i1 @llvm.type.test(i8* %ptr, metadata %bitset) nounwind readnone
+
+; CHECK: define i1 @bytearray7(i8* [[p:%.*]])
+define i1 @bytearray7(i8* %p) {
+  ; CHECK-NEXT: [[pi:%.*]] = ptrtoint i8* [[p]] to i64
+  ; CHECK-NEXT: [[sub:%.*]] = sub i64 [[pi]], ptrtoint (i8* @__typeid_bytearray7_global_addr to i64)
+  ; CHECK-NEXT: [[lshr:%.*]] = lshr i64 [[sub]], zext (i8 ptrtoint (i8* @__typeid_bytearray7_align to i8) to i64)
+  ; CHECK-NEXT: [[shl:%.*]] = shl i64 [[sub]], zext (i8 sub (i8 64, i8 ptrtoint (i8* @__typeid_bytearray7_align to i8)) to i64)
+  ; CHECK-NEXT: [[or:%.*]] = or i64 [[lshr]], [[shl]]
+  ; CHECK-NEXT: [[ule:%.*]] = icmp ule i64 [[or]], ptrtoint (i8* @__typeid_bytearray7_size_m1 to i64)
+  ; CHECK-NEXT: br i1 [[ule]], label %[[t1:.*]], label %[[f:.*]]
+
+  ; CHECK: [[t1]]:
+  ; CHECK-NEXT: [[gep:%.*]] = getelementptr i8, i8* @__typeid_bytearray7_byte_array, i64 [[or]]
+  ; CHECK-NEXT: [[load:%.*]] = load i8, i8* [[gep]]
+  ; CHECK-NEXT: [[and:%.*]] = and i8 [[load]], ptrtoint (i8* @__typeid_bytearray7_bit_mask to i8)
+  ; CHECK-NEXT: [[ne:%.*]] = icmp ne i8 [[and]], 0
+  ; CHECK-NEXT: br i1 [[ne]], label %[[t:.*]], label %[[f:.*]]
+
+  ; CHECK: [[t]]:
+  ; CHECK-NEXT: ret i1 true
+
+  ; CHECK: [[f]]:
+  ; CHECK-NEXT: ret i1 false
+  %x = call i1 @llvm.type.test(i8* %p, metadata !"bytearray7")
+  br i1 %x, label %t, label %f
+
+t:
+  ret i1 true
+
+f:
+  ret i1 false
+}
diff --git a/test/Transforms/NewGVN/completeness.ll b/test/Transforms/NewGVN/completeness.ll
index 2b28f12df9d1..1798bfea5fe0 100644
--- a/test/Transforms/NewGVN/completeness.ll
+++ b/test/Transforms/NewGVN/completeness.ll
@@ -395,7 +395,7 @@ define void @test10() {
 ; CHECK:       g:
 ; CHECK-NEXT:    [[N:%.*]] = phi i32* [ [[H:%.*]], [[I:%.*]] ], [ null, [[B:%.*]] ]
 ; CHECK-NEXT:    [[H]] = getelementptr i32, i32* [[N]], i64 1
-; CHECK-NEXT:    [[J:%.*]] = icmp eq i32* [[H]], getelementptr (i32, i32* null, i64 8)
+; CHECK-NEXT:    [[J:%.*]] = icmp eq i32* [[H]], inttoptr (i64 32 to i32*)
 ; CHECK-NEXT:    br i1 [[J]], label [[C:%.*]], label [[I]]
 ; CHECK:       i:
 ; CHECK-NEXT:    br i1 undef, label [[K:%.*]], label [[G]]
diff --git a/test/Transforms/NewGVN/loadforward.ll b/test/Transforms/NewGVN/loadforward.ll
index d66b5332601f..b4cbcc6b0f4d 100644
--- a/test/Transforms/NewGVN/loadforward.ll
+++ b/test/Transforms/NewGVN/loadforward.ll
@@ -9,8 +9,8 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 ;; Test that we forward the first store to the second load
 define i16 @bazinga() {
 ; CHECK-LABEL: @bazinga(
-; CHECK-NEXT:    [[_TMP10:%.*]] = load i16, i16* getelementptr inbounds (%rec11, %rec11* @str, i16 0, i32 1)
-; CHECK-NEXT:    store i16 [[_TMP10]], i16* getelementptr inbounds (%rec11, %rec11* @str, i16 0, i32 0)
+; CHECK-NEXT:    [[_TMP10:%.*]] = load i16, i16* getelementptr inbounds (%rec11, %rec11* @str, i64 0, i32 1)
+; CHECK-NEXT:    store i16 [[_TMP10]], i16* getelementptr inbounds (%rec11, %rec11* @str, i64 0, i32 0)
 ; CHECK-NEXT:    [[_TMP15:%.*]] = icmp eq i16 [[_TMP10]], 3
 ; CHECK-NEXT:    [[_TMP16:%.*]] = select i1 [[_TMP15]], i16 1, i16 0
 ; CHECK-NEXT:    br label [[BB1:%.*]]
diff --git a/test/Transforms/NewGVN/pr32403.ll b/test/Transforms/NewGVN/pr32403.ll
index 505d31a9463e..2552e0e66ab9 100644
--- a/test/Transforms/NewGVN/pr32403.ll
+++ b/test/Transforms/NewGVN/pr32403.ll
@@ -17,8 +17,7 @@ define void @reorder_ref_pic_list() local_unnamed_addr {
 ; CHECK-NEXT:    [[INC_I:%.*]] = add nsw i32 [[REFIDXLX_0]], 1
 ; CHECK-NEXT:    br label [[FOR_BODY8_I:%.*]]
 ; CHECK:       for.body8.i:
-; CHECK-NEXT:    [[NIDX_052_I:%.*]] = phi i32 [ [[INC_I]], [[IF_THEN13]] ], [ [[NIDX_052_I]], [[FOR_INC24_I:%.*]] ]
-; CHECK-NEXT:    br i1 undef, label [[FOR_INC24_I]], label [[IF_THEN17_I:%.*]]
+; CHECK-NEXT:    br i1 undef, label [[FOR_INC24_I:%.*]], label [[IF_THEN17_I:%.*]]
 ; CHECK:       if.then17.i:
 ; CHECK-NEXT:    br label [[FOR_INC24_I]]
 ; CHECK:       for.inc24.i:
diff --git a/test/Transforms/NewGVN/pr32897.ll b/test/Transforms/NewGVN/pr32897.ll
index eb19aa367b72..dcf2af30b239 100644
--- a/test/Transforms/NewGVN/pr32897.ll
+++ b/test/Transforms/NewGVN/pr32897.ll
@@ -7,7 +7,6 @@ define void @tinkywinky(i64* %b) {
 ; CHECK-NEXT:    br label [[BODY:%.*]]
 ; CHECK:       body:
 ; CHECK-NEXT:    store i64 undef, i64* [[B:%.*]]
-; CHECK-NEXT:    [[B2:%.*]] = load i64, i64* [[B]]
 ; CHECK-NEXT:    br i1 undef, label [[BODY]], label [[END:%.*]]
 ; CHECK:       end:
 ; CHECK-NEXT:    br label [[BODY]]
diff --git a/test/Transforms/NewGVN/pr33187.ll b/test/Transforms/NewGVN/pr33187.ll
new file mode 100644
index 000000000000..61e767d36569
--- /dev/null
+++ b/test/Transforms/NewGVN/pr33187.ll
@@ -0,0 +1,148 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+;; Ensure we don't change after value numbering by accidentally deleting the wrong expression.
+; RUN: opt -newgvn -S %s | FileCheck %s
+define void @fn1() local_unnamed_addr #0 {
+; CHECK-LABEL: @fn1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_COND_PREHEADER:%.*]]
+; CHECK:       while.cond:
+; CHECK-NEXT:    br label [[FOR_COND_PREHEADER]]
+; CHECK:       for.cond.preheader:
+; CHECK-NEXT:    [[H_031:%.*]] = phi i32 [ 5, [[ENTRY:%.*]] ], [ [[H_127:%.*]], [[WHILE_COND:%.*]] ]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[H_128:%.*]] = phi i32 [ [[H_031]], [[FOR_COND_PREHEADER]] ], [ [[H_2:%.*]], [[FOR_INC:%.*]] ]
+; CHECK-NEXT:    br label [[IF_THEN:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    br i1 false, label [[L_LOOPEXIT:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.end:
+; CHECK-NEXT:    br i1 undef, label [[FOR_INC]], label [[IF_END9:%.*]]
+; CHECK:       if.end9:
+; CHECK-NEXT:    br label [[FOR_INC]]
+; CHECK:       for.inc:
+; CHECK-NEXT:    [[H_2]] = phi i32 [ [[H_128]], [[IF_END]] ], [ 0, [[IF_END9]] ]
+; CHECK-NEXT:    br i1 undef, label [[WHILE_COND10_LOOPEXIT:%.*]], label [[FOR_BODY]]
+; CHECK:       while.cond10.loopexit:
+; CHECK-NEXT:    br label [[WHILE_COND10:%.*]]
+; CHECK:       while.cond10:
+; CHECK-NEXT:    [[H_127]] = phi i32 [ [[H_126:%.*]], [[IF_END18:%.*]] ], [ [[H_125:%.*]], [[L:%.*]] ], [ [[H_2]], [[WHILE_COND10_LOOPEXIT]] ]
+; CHECK-NEXT:    br i1 undef, label [[WHILE_COND]], label [[WHILE_BODY12:%.*]]
+; CHECK:       while.body12:
+; CHECK-NEXT:    br i1 undef, label [[IF_END18]], label [[L]]
+; CHECK:       L.loopexit:
+; CHECK-NEXT:    store i8 undef, i8* null
+; CHECK-NEXT:    br label [[L]]
+; CHECK:       L:
+; CHECK-NEXT:    [[H_125]] = phi i32 [ [[H_127]], [[WHILE_BODY12]] ], [ undef, [[L_LOOPEXIT]] ]
+; CHECK-NEXT:    br i1 undef, label [[WHILE_COND10]], label [[IF_END18]]
+; CHECK:       if.end18:
+; CHECK-NEXT:    [[H_126]] = phi i32 [ [[H_125]], [[L]] ], [ [[H_127]], [[WHILE_BODY12]] ]
+; CHECK-NEXT:    br label [[WHILE_COND10]]
+;
+entry:
+  br label %for.cond.preheader
+
+while.cond:                                       ; preds = %while.cond10
+  br label %for.cond.preheader
+
+for.cond.preheader:                               ; preds = %while.cond, %entry
+  %h.031 = phi i32 [ 5, %entry ], [ %h.127, %while.cond ]
+  br label %for.body
+
+for.body:                                         ; preds = %for.inc, %for.cond.preheader
+  %h.128 = phi i32 [ %h.031, %for.cond.preheader ], [ %h.2, %for.inc ]
+  br label %if.then
+
+if.then:                                          ; preds = %for.body
+  br i1 false, label %L.loopexit, label %if.end
+
+if.end:                                           ; preds = %if.then
+  br i1 undef, label %for.inc, label %if.end9
+
+if.end9:                                          ; preds = %if.end
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.end9, %if.end
+  %h.2 = phi i32 [ %h.128, %if.end ], [ 0, %if.end9 ]
+  br i1 undef, label %while.cond10.loopexit, label %for.body
+
+while.cond10.loopexit:                            ; preds = %for.inc
+  %h.2.lcssa = phi i32 [ %h.2, %for.inc ]
+  br label %while.cond10
+
+while.cond10:                                     ; preds = %if.end18, %L, %while.cond10.loopexit
+  %h.127 = phi i32 [ %h.126, %if.end18 ], [ %h.125, %L ], [ %h.2.lcssa, %while.cond10.loopexit ]
+  br i1 undef, label %while.cond, label %while.body12
+
+while.body12:                                     ; preds = %while.cond10
+  br i1 undef, label %if.end18, label %L
+
+L.loopexit:                                       ; preds = %if.then
+  br label %L
+
+L:                                                ; preds = %L.loopexit, %while.body12
+  %h.125 = phi i32 [ %h.127, %while.body12 ], [ undef, %L.loopexit ]
+  br i1 undef, label %while.cond10, label %if.end18
+
+if.end18:                                         ; preds = %L, %while.body12
+  %h.126 = phi i32 [ %h.125, %L ], [ %h.127, %while.body12 ]
+  br label %while.cond10
+}
+
+
+define void @hoge() local_unnamed_addr #0 {
+; CHECK-LABEL: @hoge(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    br label [[BB1:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    [[TMP:%.*]] = phi i64 [ 0, [[BB:%.*]] ], [ [[TMP2:%.*]], [[BB1]] ]
+; CHECK-NEXT:    [[TMP2]] = add nuw nsw i64 [[TMP]], 1
+; CHECK-NEXT:    br label [[BB1]]
+;
+bb:
+  br label %bb1
+
+bb1:                                              ; preds = %bb1, %bb
+  %tmp = phi i64 [ 0, %bb ], [ %tmp2, %bb1 ]
+  %tmp2 = add nuw nsw i64 %tmp, 1
+  br label %bb1
+}
+
+attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+
+source_filename = "pr33187-c.ll"
+
+define void @a() {
+; CHECK-LABEL: @a(
+; CHECK-NEXT:  b:
+; CHECK-NEXT:    store i8* null, i8** null
+; CHECK-NEXT:    br label [[D:%.*]]
+; CHECK:       d:
+; CHECK-NEXT:    [[I:%.*]] = phi i8* [ null, [[B:%.*]] ], [ [[E:%.*]], [[F:%.*]] ]
+; CHECK-NEXT:    br i1 undef, label [[F]], label [[G:%.*]]
+; CHECK:       g:
+; CHECK-NEXT:    store i8* [[I]], i8** null
+; CHECK-NEXT:    unreachable
+; CHECK:       f:
+; CHECK-NEXT:    [[E]] = getelementptr i8, i8* [[I]], i64 1
+; CHECK-NEXT:    br label [[D]]
+;
+b:
+  store i8* null, i8** null
+  br label %d
+
+d:                                                ; preds = %f, %b
+  %i = phi i8* [ null, %b ], [ %e, %f ]
+  br i1 undef, label %f, label %g
+
+g:                                                ; preds = %d
+  %h = phi i8* [ %i, %d ]
+  store i8* %h, i8** null
+  unreachable
+
+f:                                                ; preds = %d
+  %e = getelementptr i8, i8* %i, i64 1
+  br label %d
+}
+
diff --git a/test/Transforms/SLPVectorizer/X86/arith-fp.ll b/test/Transforms/SLPVectorizer/X86/arith-fp.ll
index 7eec13e535d4..e00ed849ee4b 100644
--- a/test/Transforms/SLPVectorizer/X86/arith-fp.ll
+++ b/test/Transforms/SLPVectorizer/X86/arith-fp.ll
@@ -10,7 +10,7 @@
 
 define <2 x double> @buildvector_add_2f64(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: @buildvector_add_2f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = fadd <2 x double> %a, %b
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd <2 x double> [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 0
 ; CHECK-NEXT:    [[R0:%.*]] = insertelement <2 x double> undef, double [[TMP2]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
@@ -30,7 +30,7 @@ define <2 x double> @buildvector_add_2f64(<2 x double> %a, <2 x double> %b) {
 
 define <2 x double> @buildvector_sub_2f64(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: @buildvector_sub_2f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = fsub <2 x double> %a, %b
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub <2 x double> [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 0
 ; CHECK-NEXT:    [[R0:%.*]] = insertelement <2 x double> undef, double [[TMP2]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
@@ -50,7 +50,7 @@ define <2 x double> @buildvector_sub_2f64(<2 x double> %a, <2 x double> %b) {
 
 define <2 x double> @buildvector_mul_2f64(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: @buildvector_mul_2f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = fmul <2 x double> %a, %b
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul <2 x double> [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 0
 ; CHECK-NEXT:    [[R0:%.*]] = insertelement <2 x double> undef, double [[TMP2]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
@@ -70,7 +70,7 @@ define <2 x double> @buildvector_mul_2f64(<2 x double> %a, <2 x double> %b) {
 
 define <2 x double> @buildvector_div_2f64(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: @buildvector_div_2f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = fdiv <2 x double> %a, %b
+; CHECK-NEXT:    [[TMP1:%.*]] = fdiv <2 x double> [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 0
 ; CHECK-NEXT:    [[R0:%.*]] = insertelement <2 x double> undef, double [[TMP2]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
@@ -90,7 +90,7 @@ define <2 x double> @buildvector_div_2f64(<2 x double> %a, <2 x double> %b) {
 
 define <4 x float> @buildvector_add_4f32(<4 x float> %a, <4 x float> %b) {
 ; CHECK-LABEL: @buildvector_add_4f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = fadd <4 x float> %a, %b
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd <4 x float> [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
 ; CHECK-NEXT:    [[R0:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
@@ -122,7 +122,7 @@ define <4 x float> @buildvector_add_4f32(<4 x float> %a, <4 x float> %b) {
 
 define <4 x float> @buildvector_sub_4f32(<4 x float> %a, <4 x float> %b) {
 ; CHECK-LABEL: @buildvector_sub_4f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = fsub <4 x float> %a, %b
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub <4 x float> [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
 ; CHECK-NEXT:    [[R0:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
@@ -154,7 +154,7 @@ define <4 x float> @buildvector_sub_4f32(<4 x float> %a, <4 x float> %b) {
 
 define <4 x float> @buildvector_mul_4f32(<4 x float> %a, <4 x float> %b) {
 ; CHECK-LABEL: @buildvector_mul_4f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = fmul <4 x float> %a, %b
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul <4 x float> [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
 ; CHECK-NEXT:    [[R0:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
@@ -186,7 +186,7 @@ define <4 x float> @buildvector_mul_4f32(<4 x float> %a, <4 x float> %b) {
 
 define <4 x float> @buildvector_div_4f32(<4 x float> %a, <4 x float> %b) {
 ; CHECK-LABEL: @buildvector_div_4f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = fdiv <4 x float> %a, %b
+; CHECK-NEXT:    [[TMP1:%.*]] = fdiv <4 x float> [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
 ; CHECK-NEXT:    [[R0:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
@@ -222,7 +222,7 @@ define <4 x float> @buildvector_div_4f32(<4 x float> %a, <4 x float> %b) {
 
 define <4 x double> @buildvector_add_4f64(<4 x double> %a, <4 x double> %b) {
 ; CHECK-LABEL: @buildvector_add_4f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = fadd <4 x double> %a, %b
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd <4 x double> [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x double> [[TMP1]], i32 0
 ; CHECK-NEXT:    [[R0:%.*]] = insertelement <4 x double> undef, double [[TMP2]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x double> [[TMP1]], i32 1
@@ -254,7 +254,7 @@ define <4 x double> @buildvector_add_4f64(<4 x double> %a, <4 x double> %b) {
 
 define <4 x double> @buildvector_sub_4f64(<4 x double> %a, <4 x double> %b) {
 ; CHECK-LABEL: @buildvector_sub_4f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = fsub <4 x double> %a, %b
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub <4 x double> [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x double> [[TMP1]], i32 0
 ; CHECK-NEXT:    [[R0:%.*]] = insertelement <4 x double> undef, double [[TMP2]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x double> [[TMP1]], i32 1
@@ -286,7 +286,7 @@ define <4 x double> @buildvector_sub_4f64(<4 x double> %a, <4 x double> %b) {
 
 define <4 x double> @buildvector_mul_4f64(<4 x double> %a, <4 x double> %b) {
 ; CHECK-LABEL: @buildvector_mul_4f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = fmul <4 x double> %a, %b
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul <4 x double> [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x double> [[TMP1]], i32 0
 ; CHECK-NEXT:    [[R0:%.*]] = insertelement <4 x double> undef, double [[TMP2]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x double> [[TMP1]], i32 1
@@ -318,7 +318,7 @@ define <4 x double> @buildvector_mul_4f64(<4 x double> %a, <4 x double> %b) {
 
 define <4 x double> @buildvector_div_4f64(<4 x double> %a, <4 x double> %b) {
 ; CHECK-LABEL: @buildvector_div_4f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = fdiv <4 x double> %a, %b
+; CHECK-NEXT:    [[TMP1:%.*]] = fdiv <4 x double> [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x double> [[TMP1]], i32 0
 ; CHECK-NEXT:    [[R0:%.*]] = insertelement <4 x double> undef, double [[TMP2]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x double> [[TMP1]], i32 1
@@ -350,7 +350,7 @@ define <4 x double> @buildvector_div_4f64(<4 x double> %a, <4 x double> %b) {
 
 define <8 x float> @buildvector_add_8f32(<8 x float> %a, <8 x float> %b) {
 ; CHECK-LABEL: @buildvector_add_8f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = fadd <8 x float> %a, %b
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd <8 x float> [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x float> [[TMP1]], i32 0
 ; CHECK-NEXT:    [[R0:%.*]] = insertelement <8 x float> undef, float [[TMP2]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <8 x float> [[TMP1]], i32 1
@@ -406,7 +406,7 @@ define <8 x float> @buildvector_add_8f32(<8 x float> %a, <8 x float> %b) {
 
 define <8 x float> @buildvector_sub_8f32(<8 x float> %a, <8 x float> %b) {
 ; CHECK-LABEL: @buildvector_sub_8f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = fsub <8 x float> %a, %b
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub <8 x float> [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x float> [[TMP1]], i32 0
 ; CHECK-NEXT:    [[R0:%.*]] = insertelement <8 x float> undef, float [[TMP2]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <8 x float> [[TMP1]], i32 1
@@ -462,7 +462,7 @@ define <8 x float> @buildvector_sub_8f32(<8 x float> %a, <8 x float> %b) {
 
 define <8 x float> @buildvector_mul_8f32(<8 x float> %a, <8 x float> %b) {
 ; CHECK-LABEL: @buildvector_mul_8f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = fmul <8 x float> %a, %b
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul <8 x float> [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x float> [[TMP1]], i32 0
 ; CHECK-NEXT:    [[R0:%.*]] = insertelement <8 x float> undef, float [[TMP2]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <8 x float> [[TMP1]], i32 1
@@ -518,7 +518,7 @@ define <8 x float> @buildvector_mul_8f32(<8 x float> %a, <8 x float> %b) {
 
 define <8 x float> @buildvector_div_8f32(<8 x float> %a, <8 x float> %b) {
 ; CHECK-LABEL: @buildvector_div_8f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = fdiv <8 x float> %a, %b
+; CHECK-NEXT:    [[TMP1:%.*]] = fdiv <8 x float> [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x float> [[TMP1]], i32 0
 ; CHECK-NEXT:    [[R0:%.*]] = insertelement <8 x float> undef, float [[TMP2]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <8 x float> [[TMP1]], i32 1
@@ -578,7 +578,7 @@ define <8 x float> @buildvector_div_8f32(<8 x float> %a, <8 x float> %b) {
 
 define <8 x double> @buildvector_add_8f64(<8 x double> %a, <8 x double> %b) {
 ; CHECK-LABEL: @buildvector_add_8f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = fadd <8 x double> %a, %b
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd <8 x double> [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x double> [[TMP1]], i32 0
 ; CHECK-NEXT:    [[R0:%.*]] = insertelement <8 x double> undef, double [[TMP2]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <8 x double> [[TMP1]], i32 1
@@ -634,7 +634,7 @@ define <8 x double> @buildvector_add_8f64(<8 x double> %a, <8 x double> %b) {
 
 define <8 x double> @buildvector_sub_8f64(<8 x double> %a, <8 x double> %b) {
 ; CHECK-LABEL: @buildvector_sub_8f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = fsub <8 x double> %a, %b
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub <8 x double> [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x double> [[TMP1]], i32 0
 ; CHECK-NEXT:    [[R0:%.*]] = insertelement <8 x double> undef, double [[TMP2]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <8 x double> [[TMP1]], i32 1
@@ -690,7 +690,7 @@ define <8 x double> @buildvector_sub_8f64(<8 x double> %a, <8 x double> %b) {
 
 define <8 x double> @buildvector_mul_8f64(<8 x double> %a, <8 x double> %b) {
 ; CHECK-LABEL: @buildvector_mul_8f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = fmul <8 x double> %a, %b
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul <8 x double> [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x double> [[TMP1]], i32 0
 ; CHECK-NEXT:    [[R0:%.*]] = insertelement <8 x double> undef, double [[TMP2]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <8 x double> [[TMP1]], i32 1
@@ -746,7 +746,7 @@ define <8 x double> @buildvector_mul_8f64(<8 x double> %a, <8 x double> %b) {
 
 define <8 x double> @buildvector_div_8f64(<8 x double> %a, <8 x double> %b) {
 ; CHECK-LABEL: @buildvector_div_8f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = fdiv <8 x double> %a, %b
+; CHECK-NEXT:    [[TMP1:%.*]] = fdiv <8 x double> [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x double> [[TMP1]], i32 0
 ; CHECK-NEXT:    [[R0:%.*]] = insertelement <8 x double> undef, double [[TMP2]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <8 x double> [[TMP1]], i32 1
@@ -802,7 +802,7 @@ define <8 x double> @buildvector_div_8f64(<8 x double> %a, <8 x double> %b) {
 
 define <16 x float> @buildvector_add_16f32(<16 x float> %a, <16 x float> %b) {
 ; CHECK-LABEL: @buildvector_add_16f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = fadd <16 x float> %a, %b
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd <16 x float> [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <16 x float> [[TMP1]], i32 0
 ; CHECK-NEXT:    [[R0:%.*]] = insertelement <16 x float> undef, float [[TMP2]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <16 x float> [[TMP1]], i32 1
@@ -906,7 +906,7 @@ define <16 x float> @buildvector_add_16f32(<16 x float> %a, <16 x float> %b) {
 
 define <16 x float> @buildvector_sub_16f32(<16 x float> %a, <16 x float> %b) {
 ; CHECK-LABEL: @buildvector_sub_16f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = fsub <16 x float> %a, %b
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub <16 x float> [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <16 x float> [[TMP1]], i32 0
 ; CHECK-NEXT:    [[R0:%.*]] = insertelement <16 x float> undef, float [[TMP2]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <16 x float> [[TMP1]], i32 1
@@ -1010,7 +1010,7 @@ define <16 x float> @buildvector_sub_16f32(<16 x float> %a, <16 x float> %b) {
 
 define <16 x float> @buildvector_mul_16f32(<16 x float> %a, <16 x float> %b) {
 ; CHECK-LABEL: @buildvector_mul_16f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = fmul <16 x float> %a, %b
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul <16 x float> [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <16 x float> [[TMP1]], i32 0
 ; CHECK-NEXT:    [[R0:%.*]] = insertelement <16 x float> undef, float [[TMP2]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <16 x float> [[TMP1]], i32 1
@@ -1114,7 +1114,7 @@ define <16 x float> @buildvector_mul_16f32(<16 x float> %a, <16 x float> %b) {
 
 define <16 x float> @buildvector_div_16f32(<16 x float> %a, <16 x float> %b) {
 ; CHECK-LABEL: @buildvector_div_16f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = fdiv <16 x float> %a, %b
+; CHECK-NEXT:    [[TMP1:%.*]] = fdiv <16 x float> [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <16 x float> [[TMP1]], i32 0
 ; CHECK-NEXT:    [[R0:%.*]] = insertelement <16 x float> undef, float [[TMP2]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <16 x float> [[TMP1]], i32 1
diff --git a/test/Transforms/SLPVectorizer/X86/reverse_extract_elements.ll b/test/Transforms/SLPVectorizer/X86/reverse_extract_elements.ll
new file mode 100644
index 000000000000..4c8748e220fd
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/reverse_extract_elements.ll
@@ -0,0 +1,138 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -slp-vectorizer -instcombine -S -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s
+
+define float @dotf(<4 x float> %x, <4 x float> %y) {
+; CHECK-LABEL: @dotf(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = fmul fast <4 x float> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP1]], [[RDX_SHUF]]
+; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
+; CHECK-NEXT:    ret float [[TMP2]]
+;
+entry:
+  %vecext = extractelement <4 x float> %x, i32 0
+  %vecext1 = extractelement <4 x float> %y, i32 0
+  %mul = fmul fast float %vecext, %vecext1
+  %vecext.1 = extractelement <4 x float> %x, i32 1
+  %vecext1.1 = extractelement <4 x float> %y, i32 1
+  %mul.1 = fmul fast float %vecext.1, %vecext1.1
+  %add.1 = fadd fast float %mul.1, %mul
+  %vecext.2 = extractelement <4 x float> %x, i32 2
+  %vecext1.2 = extractelement <4 x float> %y, i32 2
+  %mul.2 = fmul fast float %vecext.2, %vecext1.2
+  %add.2 = fadd fast float %mul.2, %add.1
+  %vecext.3 = extractelement <4 x float> %x, i32 3
+  %vecext1.3 = extractelement <4 x float> %y, i32 3
+  %mul.3 = fmul fast float %vecext.3, %vecext1.3
+  %add.3 = fadd fast float %mul.3, %add.2
+  ret float %add.3
+}
+
+define double @dotd(<4 x double>* byval nocapture readonly align 32, <4 x double>* byval nocapture readonly align 32) {
+; CHECK-LABEL: @dotd(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[X:%.*]] = load <4 x double>, <4 x double>* [[TMP0:%.*]], align 32
+; CHECK-NEXT:    [[Y:%.*]] = load <4 x double>, <4 x double>* [[TMP1:%.*]], align 32
+; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast <4 x double> [[X]], [[Y]]
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x double> [[TMP3]], [[RDX_SHUF]]
+; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x double> [[BIN_RDX]], <4 x double> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <4 x double> [[BIN_RDX]], [[RDX_SHUF1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x double> [[BIN_RDX2]], i32 0
+; CHECK-NEXT:    ret double [[TMP4]]
+;
+entry:
+  %x = load <4 x double>, <4 x double>* %0, align 32
+  %y = load <4 x double>, <4 x double>* %1, align 32
+  %vecext = extractelement <4 x double> %x, i32 0
+  %vecext1 = extractelement <4 x double> %y, i32 0
+  %mul = fmul fast double %vecext, %vecext1
+  %vecext.1 = extractelement <4 x double> %x, i32 1
+  %vecext1.1 = extractelement <4 x double> %y, i32 1
+  %mul.1 = fmul fast double %vecext.1, %vecext1.1
+  %add.1 = fadd fast double %mul.1, %mul
+  %vecext.2 = extractelement <4 x double> %x, i32 2
+  %vecext1.2 = extractelement <4 x double> %y, i32 2
+  %mul.2 = fmul fast double %vecext.2, %vecext1.2
+  %add.2 = fadd fast double %mul.2, %add.1
+  %vecext.3 = extractelement <4 x double> %x, i32 3
+  %vecext1.3 = extractelement <4 x double> %y, i32 3
+  %mul.3 = fmul fast double %vecext.3, %vecext1.3
+  %add.3 = fadd fast double %mul.3, %add.2
+  ret double %add.3
+}
+
+define float @dotfq(<4 x float>* nocapture readonly %x, <4 x float>* nocapture readonly %y) {
+; CHECK-LABEL: @dotfq(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[X:%.*]], align 16
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[Y:%.*]], align 16
+; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP0]]
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]], [[RDX_SHUF]]
+; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
+; CHECK-NEXT:    ret float [[TMP4]]
+;
+entry:
+  %0 = load <4 x float>, <4 x float>* %x, align 16
+  %1 = load <4 x float>, <4 x float>* %y, align 16
+  %vecext = extractelement <4 x float> %0, i32 0
+  %vecext1 = extractelement <4 x float> %1, i32 0
+  %mul = fmul fast float %vecext1, %vecext
+  %vecext.1 = extractelement <4 x float> %0, i32 1
+  %vecext1.1 = extractelement <4 x float> %1, i32 1
+  %mul.1 = fmul fast float %vecext1.1, %vecext.1
+  %add.1 = fadd fast float %mul.1, %mul
+  %vecext.2 = extractelement <4 x float> %0, i32 2
+  %vecext1.2 = extractelement <4 x float> %1, i32 2
+  %mul.2 = fmul fast float %vecext1.2, %vecext.2
+  %add.2 = fadd fast float %mul.2, %add.1
+  %vecext.3 = extractelement <4 x float> %0, i32 3
+  %vecext1.3 = extractelement <4 x float> %1, i32 3
+  %mul.3 = fmul fast float %vecext1.3, %vecext.3
+  %add.3 = fadd fast float %mul.3, %add.2
+  ret float %add.3
+}
+
+define double @dotdq(<4 x double>* nocapture readonly %x, <4 x double>* nocapture readonly %y) {
+; CHECK-LABEL: @dotdq(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x double>, <4 x double>* [[X:%.*]], align 32
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x double>, <4 x double>* [[Y:%.*]], align 32
+; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast <4 x double> [[TMP1]], [[TMP0]]
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x double> [[TMP3]], [[RDX_SHUF]]
+; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x double> [[BIN_RDX]], <4 x double> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[BIN_RDX2:%.*]] = fadd fast <4 x double> [[BIN_RDX]], [[RDX_SHUF1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x double> [[BIN_RDX2]], i32 0
+; CHECK-NEXT:    ret double [[TMP4]]
+;
+entry:
+  %0 = load <4 x double>, <4 x double>* %x, align 32
+  %1 = load <4 x double>, <4 x double>* %y, align 32
+  %vecext = extractelement <4 x double> %0, i32 0
+  %vecext1 = extractelement <4 x double> %1, i32 0
+  %mul = fmul fast double %vecext1, %vecext
+  %vecext.1 = extractelement <4 x double> %0, i32 1
+  %vecext1.1 = extractelement <4 x double> %1, i32 1
+  %mul.1 = fmul fast double %vecext1.1, %vecext.1
+  %add.1 = fadd fast double %mul.1, %mul
+  %vecext.2 = extractelement <4 x double> %0, i32 2
+  %vecext1.2 = extractelement <4 x double> %1, i32 2
+  %mul.2 = fmul fast double %vecext1.2, %vecext.2
+  %add.2 = fadd fast double %mul.2, %add.1
+  %vecext.3 = extractelement <4 x double> %0, i32 3
+  %vecext1.3 = extractelement <4 x double> %1, i32 3
+  %mul.3 = fmul fast double %vecext1.3, %vecext.3
+  %add.3 = fadd fast double %mul.3, %add.2
+  ret double %add.3
+}
diff --git a/test/Transforms/SROA/address-spaces.ll b/test/Transforms/SROA/address-spaces.ll
index 8fba30c2720f..a54a3afc79f9 100644
--- a/test/Transforms/SROA/address-spaces.ll
+++ b/test/Transforms/SROA/address-spaces.ll
@@ -101,3 +101,31 @@ entry:
    %ret = fadd float %f1, %f2
    ret float %ret
 }
+
+; Test load from and store to non-zero address space.
+define void @test_load_store_diff_addr_space([2 x float] addrspace(1)* %complex1, [2 x float] addrspace(1)* %complex2) {
+; CHECK-LABEL: @test_load_store_diff_addr_space
+; CHECK-NOT: alloca
+; CHECK: load i32, i32 addrspace(1)*
+; CHECK: load i32, i32 addrspace(1)*
+; CHECK: store i32 %{{.*}}, i32 addrspace(1)*
+; CHECK: store i32 %{{.*}}, i32 addrspace(1)*
+  %a = alloca i64
+  %a.cast = bitcast i64* %a to [2 x float]*
+  %a.gep1 = getelementptr [2 x float], [2 x float]* %a.cast, i32 0, i32 0
+  %a.gep2 = getelementptr [2 x float], [2 x float]* %a.cast, i32 0, i32 1
+  %complex1.gep = getelementptr [2 x float], [2 x float] addrspace(1)* %complex1, i32 0, i32 0
+  %p1 = bitcast float addrspace(1)* %complex1.gep to i64 addrspace(1)*
+  %v1 = load i64, i64 addrspace(1)* %p1
+  store i64 %v1, i64* %a
+  %f1 = load float, float* %a.gep1
+  %f2 = load float, float* %a.gep2
+  %sum = fadd float %f1, %f2
+  store float %sum, float* %a.gep1
+  store float %sum, float* %a.gep2
+  %v2 = load i64, i64* %a
+  %complex2.gep = getelementptr [2 x float], [2 x float] addrspace(1)* %complex2, i32 0, i32 0
+  %p2 = bitcast float addrspace(1)* %complex2.gep to i64 addrspace(1)*
+  store i64 %v2, i64 addrspace(1)* %p2
+  ret void
+}
diff --git a/test/Transforms/SampleProfile/Inputs/indirect-call.prof b/test/Transforms/SampleProfile/Inputs/indirect-call.prof
index ff7be5df977a..ff1368142a0d 100644
--- a/test/Transforms/SampleProfile/Inputs/indirect-call.prof
+++ b/test/Transforms/SampleProfile/Inputs/indirect-call.prof
@@ -17,3 +17,6 @@ test_inline_strip:3000:0
 test_inline_strip_conflict:3000:0
  1: foo_inline_strip_conflict:3000
   1: 3000
+test_norecursive_inline:3000:0
+ 1: test_norecursive_inline:3000
+  20: 3000
diff --git a/test/Transforms/SampleProfile/indirect-call.ll b/test/Transforms/SampleProfile/indirect-call.ll
index 4101f6f492e5..bee98f1066d2 100644
--- a/test/Transforms/SampleProfile/indirect-call.ll
+++ b/test/Transforms/SampleProfile/indirect-call.ll
@@ -69,7 +69,18 @@ define void @test_noinline(void ()*) !dbg !12 {
   ret void
 }
 
+; CHECK-LABEL: @test_norecursive_inline
+; If the indirect call target is the caller, we should not promote it.
+define void @test_norecursive_inline() !dbg !24 {
+; CHECK-NOT: icmp
+; CHECK: call
+  %1 = load void ()*, void ()** @y, align 8
+  call void %1(), !dbg !25
+  ret void
+}
+
 @x = global i32 0, align 4
+@y = global void ()* null, align 8
 
 define i32* @foo_inline1(i32* %x) !dbg !14 {
   ret i32* %x
@@ -142,3 +153,5 @@ define void @test_direct() !dbg !22 {
 !21 = distinct !DISubprogram(name: "foo_direct", scope: !1, file: !1, line: 21, unit: !0)
 !22 = distinct !DISubprogram(name: "test_direct", scope: !1, file: !1, line: 22, unit: !0)
 !23 = !DILocation(line: 23, scope: !22)
+!24 = distinct !DISubprogram(name: "test_norecursive_inline", scope: !1, file: !1, line: 12, unit: !0)
+!25 = !DILocation(line: 13, scope: !24)
diff --git a/test/Transforms/Sink/badloadsink.ll b/test/Transforms/Sink/badloadsink.ll
new file mode 100644
index 000000000000..e3f4884c5a40
--- /dev/null
+++ b/test/Transforms/Sink/badloadsink.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -basicaa -sink -S | FileCheck %s
+declare void @foo(i64 *)
+define i64 @sinkload(i1 %cmp) {
+; CHECK-LABEL: @sinkload
+top:
+    %a = alloca i64
+; CHECK: call void @foo(i64* %a)
+; CHECK-NEXT: %x = load i64, i64* %a
+    call void @foo(i64* %a)
+    %x = load i64, i64* %a
+    br i1 %cmp, label %A, label %B
+A:
+    store i64 0, i64 *%a
+    br label %B
+B:
+; CHECK-NOT: load i64, i64 *%a
+    ret i64 %x
+}
diff --git a/test/Transforms/ThinLTOBitcodeWriter/split.ll b/test/Transforms/ThinLTOBitcodeWriter/split.ll
index d37d10bd3560..8bf3a18cd7f9 100644
--- a/test/Transforms/ThinLTOBitcodeWriter/split.ll
+++ b/test/Transforms/ThinLTOBitcodeWriter/split.ll
@@ -25,6 +25,9 @@
 ; ERROR: llvm-modextract: error: module index out of range; bitcode file contains 2 module(s)
 
 ; BCA0: <GLOBALVAL_SUMMARY_BLOCK
+; BCA1: <FULL_LTO_GLOBALVAL_SUMMARY_BLOCK
+; 16 = not eligible to import
+; BCA1: <PERMODULE_GLOBALVAR_INIT_REFS {{.*}} op1=16
 ; BCA1-NOT: <GLOBALVAL_SUMMARY_BLOCK
 
 $g = comdat any
@@ -47,5 +50,6 @@ define i8* @f() {
 ; NODEBUG-NOT: !llvm.dbg.cu
 !llvm.dbg.cu = !{}
 
+; M1: !{i32 1, !"ThinLTO", i32 0}
 !1 = !{i32 2, !"Debug Info Version", i32 3}
 !llvm.module.flags = !{!1}
diff --git a/test/Transforms/Util/PredicateInfo/condprop2.ll b/test/Transforms/Util/PredicateInfo/condprop2.ll
index 415fa7c879e3..facd22f5b7a6 100644
--- a/test/Transforms/Util/PredicateInfo/condprop2.ll
+++ b/test/Transforms/Util/PredicateInfo/condprop2.ll
@@ -1,4 +1,4 @@
-; REQUIRES: asserts
+; REQUIRES: abi-breaking-checks
 ; NOTE: The flag -reverse-iterate is present only in a +Asserts build.
 ; Hence, this test has been split from condprop.ll to test with -reverse-iterate.
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
diff --git a/test/Transforms/Util/PredicateInfo/testandor2.ll b/test/Transforms/Util/PredicateInfo/testandor2.ll
index a03250c2f7a0..a1b9c62040c8 100644
--- a/test/Transforms/Util/PredicateInfo/testandor2.ll
+++ b/test/Transforms/Util/PredicateInfo/testandor2.ll
@@ -1,4 +1,4 @@
-; REQUIRES: asserts
+; REQUIRES: abi-breaking-checks
 ; NOTE: The flag -reverse-iterate is present only in a +Asserts build.
 ; Hence, this test has been split from testandor.ll to test with -reverse-iterate.
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
diff --git a/test/lit.cfg b/test/lit.cfg
index e9916b2a60e8..5e903c26657e 100644
--- a/test/lit.cfg
+++ b/test/lit.cfg
@@ -546,3 +546,6 @@ llvm_config_cmd.wait()
 
 if config.have_libxar:
     config.available_features.add('xar')
+
+if config.enable_abi_breaking_checks == "1":
+    config.available_features.add('abi-breaking-checks')
diff --git a/test/lit.site.cfg.in b/test/lit.site.cfg.in
index b6a8b8b17bca..f95f6d8ec9ac 100644
--- a/test/lit.site.cfg.in
+++ b/test/lit.site.cfg.in
@@ -24,6 +24,7 @@ config.include_go_tests = @LLVM_INCLUDE_GO_TESTS@
 config.go_executable = "@GO_EXECUTABLE@"
 config.enable_shared = @ENABLE_SHARED@
 config.enable_assertions = @ENABLE_ASSERTIONS@
+config.enable_abi_breaking_checks = "@LLVM_ENABLE_ABI_BREAKING_CHECKS@"
 config.targets_to_build = "@TARGETS_TO_BUILD@"
 config.native_target = "@LLVM_NATIVE_ARCH@"
 config.llvm_bindings = "@LLVM_BINDINGS@".split(' ')
diff --git a/test/tools/llvm-cvtres/Inputs/test_resource.obj.coff b/test/tools/llvm-cvtres/Inputs/test_resource.obj.coff
new file mode 100644
index 0000000000000000000000000000000000000000..e5a7cbd0119659237c6719f0a616fb94bd1d099c
GIT binary patch
literal 3468
zcmd5<YfKbZ6g~qeh}A-WG&bpvi7^HfyIBwsP1~Sot0`8pfR$v^y6z4#5q8MV3|$f*
z_z1<M8jTW33YJD1g@lr{0kjlT6EvhWDg9y4Y@(H-5W^#-q+m(eerJZq?iO}!|J1Yh
z?mg!_-#Pcry)$#~WC_u9v15IH;Y&mbWQbsKNeA?UYjRAJs7N6Cbk>+@Gbmx3#iM$h
zYplt$u5(dDli*gym`EI&43rsO${a72=-v!x7GoYmzoQxnGq>3DRLP^-CFcRjd8pXo
zcH6frPQ6%iYpPw7RP7^=q$;|{DOsR!o^jMws2Bnz2+M!af4umi?X|DtS65y6dBjKb
z8}Jx7Q%OjKO8=${h2Eey6XS4&(Hv^XN{Xj;2hncX=}}arKogT}>%`2xxwaiSR7-Cq
zlJjF%_+CizO3cXiC<i@`V(}fhNK(ZlZ+hA~`+VBHvZ_0Z#2ig`$%;M2n!L%LsT3FM
zZdog{@6EL5DoT-R-<<<BIn|PEwWMrh&H>qN->InDzO+=k7u8v+VWuqHWsnFIL;Its
zJe{Z<7f9J?c`ZuNE<t+^?J~62(4v_Wo@X!Ox<YWyu@?9*z*EsqhWG`nP%iKUfy(!z
z(<HPAnG??YHvAd?zy-nzl<xzM+m%W}{JiqZFz`xF5F{pEy$F77AsiiCUjUE6P2IHs
z&i&q71Rq=kpPa!r2aOf;F@K90o>1Ew!ncIjMIkmn6M+(FucY0SPg^Myza5l?YJ-(a
zd(h_4QlNQ;#&!(Q_u$RPeV+?v4*s{(E+AR(yh}@HHBgRTP0J0h4c_<R*@|e+%aPol
z&4}J(<mP^fc#ime5lg&Ee7D6CuS>k4@r*AwG@fH#`&i>mv%=7>16yfmmC#-`v_w3o
z1DMAZ7-1IH<^;V$-@%&%Z^BF!jJE|0@H&xz>#X?y4>@-1*x|#6y<V@b>tXu8bm@}m
zJ$dqEV`HPw=PN5K3lmaPQ&U}CjgZR9N|4*y+KwDK5+;9LT^&L$T)5EG)HF6WcIwor
z<HwJO;n&yK_xASI*4Fm)^o)#*oIij5%$YMLAAv`Y9u0|YXlOv%$B!TP_4QR%Rhj(L
zr%zW@RD>{1(;UX5M~@ypd^j*L(BI$R-rj!p?AZm$09#sGkbiJ+5ag#%pWeB1=kn#t
zi;;ofxN&2Ae0*qVXn1(IySw}9)vIC1aCLNaOiWCSj*jAbo12@%li|H}>lXNT@7`@~
zZH-6<p|i8|{{8zmZ{B<!GB{mbUAJ%FeoiuIxW<U&7y3DgIQsuasQa}+_-0i|ar7vQ
zWdoCXH@xoK<$Kom*=RF#6oBFQ5Ayz)e&+jgVA?-jG5Qrf$cw;#QtF!;q9>&vjD9)g
z|0{2#J_3KZnr6!<6F*J+{So-%-%>!Snd*<gpYl;~3B&)q?;5-zCCq#-Hg7L7ZtfQ@
zDkI%cB~q!798`qL$DRT@1WkdTZI<uL`9BIfTRe5qdhAJr9h2iUXaYrLGs4~{*wqB`
z`%hpOR>%#%jDH>6Elhz326r1(&==V0qQEVuwZ<-O`iT;0AJxNHK|9Iv>ij-v`dt{G
zY>b#~9)me@7c3sT3%xmU1w>k*tcoZOQOBuVbV&uWTXKoBM{trg(W$sKhwN5GoYl)j
zRrAR1gT}d>b7we};u4GOYIpcCO5Soa3M7YSv0C{kZX$u-9Xu`30)Ke~OW-#LpEIKc
z{_5b9!@LB3Z}3qH9DH#E%Nm>25Jn9SnMiUvpN;~M4XM@)m;p#yT6*EqU}ST8buhBY
XnjVKf1EMyiSj7-zqcwXD;c@&8gF0(?

literal 0
HcmV?d00001

diff --git a/test/tools/llvm-cvtres/object.test b/test/tools/llvm-cvtres/object.test
new file mode 100644
index 000000000000..8117ecc910c9
--- /dev/null
+++ b/test/tools/llvm-cvtres/object.test
@@ -0,0 +1,229 @@
+// Check COFF emission of cvtres
+// The input was generated with the following command, using the original Windows
+// rc.exe:
+// > rc /fo test_resource.res /nologo test_resource.rc
+// The object file we are comparing against was generated with this command using
+// the original cvtres.
+// > cvtres /machine:X86 /readonly /nologo /out:test_resource.o test_resource.res
+
+RUN: llvm-cvtres /out:%t %p/Inputs/test_resource.res
+RUN: llvm-readobj -coff-resources -section-data %t | FileCheck %s
+
+CHECK:      Resources [
+CHECK-NEXT:   String Name Entries: 1
+CHECK-NEXT:   ID Entries: 4
+CHECK-NEXT:   Type: STRINGARRAY [
+CHECK-NEXT:     String Name Entries: 1
+CHECK-NEXT:     ID Entries: 0
+CHECK-NEXT:     Name: MYRESOURCE [
+CHECK-NEXT:       String Name Entries: 0
+CHECK-NEXT:       ID Entries: 1
+CHECK-NEXT:       Language: (ID 1033) [
+CHECK-NEXT:         Time/Date Stamp: 1970-01-01 00:00:00 (0x0)
+CHECK-NEXT:         Major Version: 0
+CHECK-NEXT:         Minor Version: 0
+CHECK-NEXT:       ]
+CHECK-NEXT:     ]
+CHECK-NEXT:   ]
+CHECK-NEXT:   Type: kRT_BITMAP (ID 2) [
+CHECK-NEXT:     String Name Entries: 2
+CHECK-NEXT:     ID Entries: 0
+CHECK-NEXT:     Name: CURSOR [
+CHECK-NEXT:       String Name Entries: 0
+CHECK-NEXT:       ID Entries: 1
+CHECK-NEXT:       Language: (ID 1033) [
+CHECK-NEXT:         Time/Date Stamp: 1970-01-01 00:00:00 (0x0)
+CHECK-NEXT:         Major Version: 0
+CHECK-NEXT:         Minor Version: 0
+CHECK-NEXT:       ]
+CHECK-NEXT:     ]
+CHECK-NEXT:     Name: OKAY [
+CHECK-NEXT:       String Name Entries: 0
+CHECK-NEXT:       ID Entries: 1
+CHECK-NEXT:       Language: (ID 1033) [
+CHECK-NEXT:         Time/Date Stamp: 1970-01-01 00:00:00 (0x0)
+CHECK-NEXT:         Major Version: 0
+CHECK-NEXT:         Minor Version: 0
+CHECK-NEXT:       ]
+CHECK-NEXT:     ]
+CHECK-NEXT:   ]
+CHECK-NEXT:   Type: kRT_MENU (ID 4) [
+CHECK-NEXT:     String Name Entries: 1
+CHECK-NEXT:     ID Entries: 1
+CHECK-NEXT:     Name: "EAT" [
+CHECK-NEXT:       String Name Entries: 0
+CHECK-NEXT:       ID Entries: 1
+CHECK-NEXT:       Language: (ID 3081) [
+CHECK-NEXT:         Time/Date Stamp: 1970-01-01 00:00:00 (0x0)
+CHECK-NEXT:         Major Version: 0
+CHECK-NEXT:         Minor Version: 0
+CHECK-NEXT:       ]
+CHECK-NEXT:     ]
+CHECK-NEXT:     Name: (ID 14432) [
+CHECK-NEXT:       String Name Entries: 0
+CHECK-NEXT:       ID Entries: 1
+CHECK-NEXT:       Language: (ID 2052) [
+CHECK-NEXT:         Time/Date Stamp: 1970-01-01 00:00:00 (0x0)
+CHECK-NEXT:         Major Version: 0
+CHECK-NEXT:         Minor Version: 0
+CHECK-NEXT:       ]
+CHECK-NEXT:     ]
+CHECK-NEXT:   ]
+CHECK-NEXT:   Type: kRT_DIALOG (ID 5) [
+CHECK-NEXT:     String Name Entries: 1
+CHECK-NEXT:     ID Entries: 0
+CHECK-NEXT:     Name: TESTDIALOG [
+CHECK-NEXT:       String Name Entries: 0
+CHECK-NEXT:       ID Entries: 1
+CHECK-NEXT:       Language: (ID 1033) [
+CHECK-NEXT:         Time/Date Stamp: 1970-01-01 00:00:00 (0x0)
+CHECK-NEXT:         Major Version: 0
+CHECK-NEXT:         Minor Version: 0
+CHECK-NEXT:       ]
+CHECK-NEXT:     ]
+CHECK-NEXT:   ]
+CHECK-NEXT:   Type: kRT_ACCELERATOR (ID 9) [
+CHECK-NEXT:     String Name Entries: 1
+CHECK-NEXT:     ID Entries: 1
+CHECK-NEXT:     Name: MYACCELERATORS [
+CHECK-NEXT:       String Name Entries: 0
+CHECK-NEXT:       ID Entries: 1
+CHECK-NEXT:       Language: (ID 1033) [
+CHECK-NEXT:         Time/Date Stamp: 1970-01-01 00:00:00 (0x0)
+CHECK-NEXT:         Major Version: 0
+CHECK-NEXT:         Minor Version: 0
+CHECK-NEXT:       ]
+CHECK-NEXT:     ]
+CHECK-NEXT:     Name: (ID 12) [
+CHECK-NEXT:       String Name Entries: 0
+CHECK-NEXT:       ID Entries: 1
+CHECK-NEXT:       Language: (ID 1033) [
+CHECK-NEXT:         Time/Date Stamp: 1970-01-01 00:00:00 (0x0)
+CHECK-NEXT:         Major Version: 0
+CHECK-NEXT:         Minor Version: 0
+CHECK-NEXT:       ]
+CHECK-NEXT:     ]
+CHECK-NEXT:   ]
+CHECK-DAG:   .rsrc$02 Data (
+CHECK-NEXT:    0000: 11000300 E7030000 0D004400 4C040000  |..........D.L...|
+CHECK-NEXT:    0010: 82001200 BC010000 28000000 10000000  |........(.......|
+CHECK-NEXT:    0020: 10000000 01001800 00000000 00030000  |................|
+CHECK-NEXT:    0030: C40E0000 C40E0000 00000000 00000000  |................|
+CHECK-NEXT:    0040: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF  |................|
+CHECK-NEXT:    0050: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF  |................|
+CHECK-NEXT:    0060: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF  |................|
+CHECK-NEXT:    0070: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF  |................|
+CHECK-NEXT:    0080: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF  |................|
+CHECK-NEXT:    0090: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF  |................|
+CHECK-NEXT:    00A0: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF  |................|
+CHECK-NEXT:    00B0: FFFFFFFF FF7F7F7F 7C7C7C78 78787575  |........|||xxxuu|
+CHECK-NEXT:    00C0: 75FFFFFF FFFFFFFF FFFFFFFF FFFFFFFF  |u...............|
+CHECK-NEXT:    00D0: FFFFFFFF FFFFFFFF FFFFFFFF 979797FF  |................|
+CHECK-NEXT:    00E0: FFFFFFFF FF838383 AAAAAADB DBDB7979  |..............yy|
+CHECK-NEXT:    00F0: 79757575 FFFFFFFF FFFFFFFF FFFFFFFF  |yuuu............|
+CHECK-NEXT:    0100: FFFFFFFF FFFFFFFF FFFFFFFF 9C9C9C98  |................|
+CHECK-NEXT:    0110: 9898FFFF FF888888 DBDBDBB7 B7B77D7D  |..............}}|
+CHECK-NEXT:    0120: 7DFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF  |}...............|
+CHECK-NEXT:    0130: FFFFFFFF FFFFFFFF FFFFFFFF A0A0A09C  |................|
+CHECK-NEXT:    0140: 9C9C9393 93ADADAD F2F2F284 84848181  |................|
+CHECK-NEXT:    0150: 81FFFFFF FFFFFFFF FFFFFFFF FFFFFFFF  |................|
+CHECK-NEXT:    0160: FFFFFFFF FFFFFFFF FFFFFFFF A4A4A4D7  |................|
+CHECK-NEXT:    0170: D7D79D9D 9DD0D0D0 EEEEEE91 91918D8D  |................|
+CHECK-NEXT:    0180: 8DFFFFFF FFFFFF81 81817E7E 7EFFFFFF  |..........~~~...|
+CHECK-NEXT:    0190: FFFFFFFF FFFFFFFF FFFFFFFF A9A9A9F2  |................|
+CHECK-NEXT:    01A0: F2F2E5E5 E5E2E2E2 95959591 91918D8D  |................|
+CHECK-NEXT:    01B0: 8D898989 868686FF FFFFFFFF FFFFFFFF  |................|
+CHECK-NEXT:    01C0: FFFFFFFF FFFFFFFF FFFFFFFF ADADADF2  |................|
+CHECK-NEXT:    01D0: F2F2E1E1 E1DFDFDF E7E7E7E4 E4E4BBBB  |................|
+CHECK-NEXT:    01E0: BB8E8E8E FFFFFFFF FFFFFFFF FFFFFFFF  |................|
+CHECK-NEXT:    01F0: FFFFFFFF FFFFFFFF FFFFFFFF B5B5B5F2  |................|
+CHECK-NEXT:    0200: F2F2E8E8 E8E7E7E7 EAEAEAC6 C6C69E9E  |................|
+CHECK-NEXT:    0210: 9EFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF  |................|
+CHECK-NEXT:    0220: FFFFFFFF FFFFFFFF FFFFFFFF B9B9B9F4  |................|
+CHECK-NEXT:    0230: F4F4ECEC ECEDEDED CBCBCBA7 A7A7FFFF  |................|
+CHECK-NEXT:    0240: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF  |................|
+CHECK-NEXT:    0250: FFFFFFFF FFFFFFFF FFFFFFFF BDBDBDF7  |................|
+CHECK-NEXT:    0260: F7F7EFEF EFD0D0D0 AFAFAFFF FFFFFFFF  |................|
+CHECK-NEXT:    0270: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF  |................|
+CHECK-NEXT:    0280: FFFFFFFF FFFFFFFF FFFFFFFF C1C1C1F7  |................|
+CHECK-NEXT:    0290: F7F7D5D5 D5B6B6B6 FFFFFFFF FFFFFFFF  |................|
+CHECK-NEXT:    02A0: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF  |................|
+CHECK-NEXT:    02B0: FFFFFFFF FFFFFFFF FFFFFFFF C4C4C4D9  |................|
+CHECK-NEXT:    02C0: D9D9BEBE BEFFFFFF FFFFFFFF FFFFFFFF  |................|
+CHECK-NEXT:    02D0: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF  |................|
+CHECK-NEXT:    02E0: FFFFFFFF FFFFFFFF FFFFFFFF C8C8C8C5  |................|
+CHECK-NEXT:    02F0: C5C5FFFF FFFFFFFF FFFFFFFF FFFFFFFF  |................|
+CHECK-NEXT:    0300: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF  |................|
+CHECK-NEXT:    0310: FFFFFFFF FFFFFFFF FFFFFFFF CBCBCBFF  |................|
+CHECK-NEXT:    0320: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF  |................|
+CHECK-NEXT:    0330: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF  |................|
+CHECK-NEXT:    0340: 28000000 10000000 10000000 01001800  |(...............|
+CHECK-NEXT:    0350: 00000000 00030000 C40E0000 C40E0000  |................|
+CHECK-NEXT:    0360: 00000000 00000000 FFFFFFFF FFFFFFFF  |................|
+CHECK-NEXT:    0370: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF  |................|
+CHECK-NEXT:    0380: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF  |................|
+CHECK-NEXT:    0390: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF  |................|
+CHECK-NEXT:    03A0: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF  |................|
+CHECK-NEXT:    03B0: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF  |................|
+CHECK-NEXT:    03C0: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF  |................|
+CHECK-NEXT:    03D0: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF  |................|
+CHECK-NEXT:    03E0: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF  |................|
+CHECK-NEXT:    03F0: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF  |................|
+CHECK-NEXT:    0400: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF  |................|
+CHECK-NEXT:    0410: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF  |................|
+CHECK-NEXT:    0420: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF  |................|
+CHECK-NEXT:    0430: FFFFFFFF A0E3A901 B31801B3 1801B318  |................|
+CHECK-NEXT:    0440: 01B31801 B31801B3 1861D06F FFFFFFFF  |.........a.o....|
+CHECK-NEXT:    0450: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF  |................|
+CHECK-NEXT:    0460: FFFFFFFF 01B31800 D7331CDB 49DBF9E2  |.........3..I...|
+CHECK-NEXT:    0470: 9BEFAF00 D73300D7 3301B318 FFFFFFFF  |.....3..3.......|
+CHECK-NEXT:    0480: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF  |................|
+CHECK-NEXT:    0490: FFFFFFFF 01B31800 DE55F6FE F9DBFAE7  |.........U......|
+CHECK-NEXT:    04A0: FEFFFE86 EFAE00DE 5501B318 FFFFFFFF  |........U.......|
+CHECK-NEXT:    04B0: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF  |................|
+CHECK-NEXT:    04C0: FFFFFFFF 01B31800 E676DBFB EC00E676  |.........v.....v|
+CHECK-NEXT:    04D0: 57EFA5FB FFFD55EE A401B318 FFFFFFFF  |W.....U.........|
+CHECK-NEXT:    04E0: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF  |................|
+CHECK-NEXT:    04F0: FFFFFFFF 01B31800 ED9800ED 9800ED98  |................|
+CHECK-NEXT:    0500: 00ED9887 F7CFFEFF FF01B318 FFFFFFFF  |................|
+CHECK-NEXT:    0510: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF  |................|
+CHECK-NEXT:    0520: FFFFFFFF 01B31800 F4BA00F4 BA00F4BA  |................|
+CHECK-NEXT:    0530: 00F4BA00 F4BA9CFB E401B318 FFFFFFFF  |................|
+CHECK-NEXT:    0540: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF  |................|
+CHECK-NEXT:    0550: FFFFFFFF 01B31800 FBDB00FB DB00FBDB  |................|
+CHECK-NEXT:    0560: 00FBDB00 FBDB00FB DB01B318 FFFFFFFF  |................|
+CHECK-NEXT:    0570: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF  |................|
+CHECK-NEXT:    0580: FFFFFFFF 9FE2A801 B31801B3 1801B318  |................|
+CHECK-NEXT:    0590: 01B31801 B31801B3 1861D06F FFFFFFFF  |.........a.o....|
+CHECK-NEXT:    05A0: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF  |................|
+CHECK-NEXT:    05B0: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF  |................|
+CHECK-NEXT:    05C0: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF  |................|
+CHECK-NEXT:    05D0: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF  |................|
+CHECK-NEXT:    05E0: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF  |................|
+CHECK-NEXT:    05F0: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF  |................|
+CHECK-NEXT:    0600: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF  |................|
+CHECK-NEXT:    0610: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF  |................|
+CHECK-NEXT:    0620: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF  |................|
+CHECK-NEXT:    0630: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF  |................|
+CHECK-NEXT:    0640: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF  |................|
+CHECK-NEXT:    0650: FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF  |................|
+CHECK-NEXT:    0660: FFFFFFFF FFFFFFFF 00000000 00006400  |..............d.|
+CHECK-NEXT:    0670: 79007500 00000000 65007300 68006100  |y.u.....e.s.h.a.|
+CHECK-NEXT:    0680: 6C006100 00008000 66006B00 61006F00  |l.a.....f.k.a.o.|
+CHECK-NEXT:    0690: 79006100 00000000 0000C080 00000000  |y.a.............|
+CHECK-NEXT:    06A0: 02000A00 0A00C800 2C010000 00005400  |........,.....T.|
+CHECK-NEXT:    06B0: 65007300 74000000 01000250 00000000  |e.s.t......P....|
+CHECK-NEXT:    06C0: 0A000A00 E6000E00 0100FFFF 82004300  |..............C.|
+CHECK-NEXT:    06D0: 6F006E00 74006900 6E007500 65003A00  |o.n.t.i.n.u.e.:.|
+CHECK-NEXT:    06E0: 00000000 00000150 00000000 42008600  |.......P....B...|
+CHECK-NEXT:    06F0: A1000D00 0200FFFF 80002600 4F004B00  |..........&.O.K.|
+CHECK-NEXT:    0700: 00000000 00000000 11005800 A4000000  |..........X.....|
+CHECK-NEXT:    0710: 0D004800 2E160000 82001200 BC010000  |..H.............|
+CHECK-NEXT:    0720: 00000000 00006400 66006900 73006800  |......d.f.i.s.h.|
+CHECK-NEXT:    0730: 00000000 65007300 61006C00 61006400  |....e.s.a.l.a.d.|
+CHECK-NEXT:    0740: 00008000 66006400 75006300 6B000000  |....f.d.u.c.k...|
+CHECK-NEXT:    0750: 74686973 20697320 61207573 65722064  |this is a user d|
+CHECK-NEXT:    0760: 6566696E 65642072 65736F75 72636500  |efined resource.|
+CHECK-NEXT:    0770: 69742063 6F6E7461 696E7320 6D616E79  |it contains many|
+CHECK-NEXT:    0780: 20737472 696E6773 00000000 00000000  | strings........|
+CHECK-NEXT:  )
diff --git a/test/tools/llvm-cvtres/resource.test b/test/tools/llvm-cvtres/parse.test
similarity index 93%
rename from test/tools/llvm-cvtres/resource.test
rename to test/tools/llvm-cvtres/parse.test
index b9be74bf671b..23911ada82e8 100644
--- a/test/tools/llvm-cvtres/resource.test
+++ b/test/tools/llvm-cvtres/parse.test
@@ -2,7 +2,7 @@
 // rc.exe:
 // > rc /fo test_resource.res /nologo test_resource.rc
 
-RUN: llvm-cvtres %p/Inputs/test_resource.res | FileCheck %s
+RUN: llvm-cvtres /verbose /out:%t %p/Inputs/test_resource.res | FileCheck %s
 
 CHECK:      Number of resources: 8
 CHECK-NEXT: Resource Tree [
diff --git a/test/tools/llvm-dwarfdump/X86/brief.s b/test/tools/llvm-dwarfdump/X86/brief.s
new file mode 100644
index 000000000000..82c499de8c7f
--- /dev/null
+++ b/test/tools/llvm-dwarfdump/X86/brief.s
@@ -0,0 +1,131 @@
+# RUN: llvm-mc %s -filetype obj -triple x86_64-apple-darwin -o - \
+# RUN: | llvm-dwarfdump -debug-dump=info -brief - \
+# RUN: | FileCheck %s
+
+# CHECK: DW_TAG_compile_unit
+# CHECK-NOT: DW_FORM
+# CHECK: DW_AT
+
+# This test is meant to verify that --brief hides DW_FORMs
+# and abbreviation codes from .debug_info section.
+
+
+	.section	__TEXT,__text,regular,pure_instructions
+	.section	__DWARF,__debug_str,regular,debug
+Linfo_string:
+	.asciz	"basic.c"               ## string offset=42
+	.section	__DWARF,__debug_loc,regular,debug
+Lsection_debug_loc:
+	.section	__DWARF,__debug_abbrev,regular,debug
+Lsection_abbrev:
+	.byte	1                       ## Abbreviation Code
+	.byte	17                      ## DW_TAG_compile_unit
+	.byte	0                       ## DW_CHILDREN_no
+	.byte	37                      ## DW_AT_producer
+	.byte	14                      ## DW_FORM_strp
+	.byte	19                      ## DW_AT_language
+	.byte	5                       ## DW_FORM_data2
+	.byte	3                       ## DW_AT_name
+	.byte	14                      ## DW_FORM_strp
+	.byte	16                      ## DW_AT_stmt_list
+	.byte	23                      ## DW_FORM_sec_offset
+	.byte	27                      ## DW_AT_comp_dir
+	.byte	14                      ## DW_FORM_strp
+	.byte	0                       ## EOM(1)
+	.byte	0                       ## EOM(2)
+	.byte	0                       ## EOM(3)
+	.section	__DWARF,__debug_info,regular,debug
+Lsection_info:
+Lcu_begin0:
+	.long	26                      ## Length of Unit
+	.short	4                       ## DWARF version number
+Lset0 = Lsection_abbrev-Lsection_abbrev ## Offset Into Abbrev. Section
+	.long	Lset0
+	.byte	8                       ## Address Size (in bytes)
+	.byte	1                       ## Abbrev [1] 0xb:0x13 DW_TAG_compile_unit
+	.long	0                       ## DW_AT_producer
+	.short	12                      ## DW_AT_language
+	.long	42                      ## DW_AT_name
+Lset1 = Lline_table_start0-Lsection_line ## DW_AT_stmt_list
+	.long	Lset1
+	.long	50                      ## DW_AT_comp_dir
+	.section	__DWARF,__debug_ranges,regular,debug
+Ldebug_range:
+	.section	__DWARF,__debug_macinfo,regular,debug
+Ldebug_macinfo:
+Lcu_macro_begin0:
+	.byte	0                       ## End Of Macro List Mark
+	.section	__DWARF,__apple_names,regular,debug
+Lnames_begin:
+	.long	1212240712              ## Header Magic
+	.short	1                       ## Header Version
+	.short	0                       ## Header Hash Function
+	.long	1                       ## Header Bucket Count
+	.long	0                       ## Header Hash Count
+	.long	12                      ## Header Data Length
+	.long	0                       ## HeaderData Die Offset Base
+	.long	1                       ## HeaderData Atom Count
+	.short	1                       ## DW_ATOM_die_offset
+	.short	6                       ## DW_FORM_data4
+	.long	-1                      ## Bucket 0
+	.section	__DWARF,__apple_objc,regular,debug
+Lobjc_begin:
+	.long	1212240712              ## Header Magic
+	.short	1                       ## Header Version
+	.short	0                       ## Header Hash Function
+	.long	1                       ## Header Bucket Count
+	.long	0                       ## Header Hash Count
+	.long	12                      ## Header Data Length
+	.long	0                       ## HeaderData Die Offset Base
+	.long	1                       ## HeaderData Atom Count
+	.short	1                       ## DW_ATOM_die_offset
+	.short	6                       ## DW_FORM_data4
+	.long	-1                      ## Bucket 0
+	.section	__DWARF,__apple_namespac,regular,debug
+Lnamespac_begin:
+	.long	1212240712              ## Header Magic
+	.short	1                       ## Header Version
+	.short	0                       ## Header Hash Function
+	.long	1                       ## Header Bucket Count
+	.long	0                       ## Header Hash Count
+	.long	12                      ## Header Data Length
+	.long	0                       ## HeaderData Die Offset Base
+	.long	1                       ## HeaderData Atom Count
+	.short	1                       ## DW_ATOM_die_offset
+	.short	6                       ## DW_FORM_data4
+	.long	-1                      ## Bucket 0
+	.section	__DWARF,__apple_types,regular,debug
+Ltypes_begin:
+	.long	1212240712              ## Header Magic
+	.short	1                       ## Header Version
+	.short	0                       ## Header Hash Function
+	.long	1                       ## Header Bucket Count
+	.long	0                       ## Header Hash Count
+	.long	20                      ## Header Data Length
+	.long	0                       ## HeaderData Die Offset Base
+	.long	3                       ## HeaderData Atom Count
+	.short	1                       ## DW_ATOM_die_offset
+	.short	6                       ## DW_FORM_data4
+	.short	3                       ## DW_ATOM_die_tag
+	.short	5                       ## DW_FORM_data2
+	.short	4                       ## DW_ATOM_type_flags
+	.short	11                      ## DW_FORM_data1
+	.long	-1                      ## Bucket 0
+	.section	__DWARF,__apple_exttypes,regular,debug
+Lexttypes_begin:
+	.long	1212240712              ## Header Magic
+	.short	1                       ## Header Version
+	.short	0                       ## Header Hash Function
+	.long	1                       ## Header Bucket Count
+	.long	0                       ## Header Hash Count
+	.long	12                      ## Header Data Length
+	.long	0                       ## HeaderData Die Offset Base
+	.long	1                       ## HeaderData Atom Count
+	.short	7                       ## DW_ATOM_ext_types
+	.short	6                       ## DW_FORM_data4
+	.long	-1                      ## Bucket 0
+
+.subsections_via_symbols
+	.section	__DWARF,__debug_line,regular,debug
+Lsection_line:
+Lline_table_start0:
diff --git a/test/tools/llvm-dwarfdump/X86/lit.local.cfg b/test/tools/llvm-dwarfdump/X86/lit.local.cfg
new file mode 100644
index 000000000000..c8625f4d9d24
--- /dev/null
+++ b/test/tools/llvm-dwarfdump/X86/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'X86' in config.root.targets:
+    config.unsupported = True
diff --git a/test/tools/llvm-pdbdump/class-layout.test b/test/tools/llvm-pdbdump/class-layout.test
index 4a69c17db2ba..7a08194c5005 100644
--- a/test/tools/llvm-pdbdump/class-layout.test
+++ b/test/tools/llvm-pdbdump/class-layout.test
@@ -1,4 +1,4 @@
-; RUN: llvm-pdbdump pretty -all -class-recurse-depth=1 \
+; RUN: llvm-pdbutil pretty -all -class-recurse-depth=1 \
 ; RUN:   %p/Inputs/ClassLayoutTest.pdb > %t
 ; RUN: FileCheck -input-file=%t %s -check-prefix=GLOBALS_TEST
 ; RUN: FileCheck -input-file=%t %s -check-prefix=MEMBERS_TEST
diff --git a/test/tools/llvm-pdbdump/complex-padding-graphical.test b/test/tools/llvm-pdbdump/complex-padding-graphical.test
index a23321fe0124..9373c1ec6c2f 100644
--- a/test/tools/llvm-pdbdump/complex-padding-graphical.test
+++ b/test/tools/llvm-pdbdump/complex-padding-graphical.test
@@ -1,4 +1,4 @@
-; RUN: llvm-pdbdump pretty -classes -class-definitions=layout \
+; RUN: llvm-pdbutil pretty -classes -class-definitions=layout \
 ; RUN:     -include-types=Test %p/Inputs/ComplexPaddingTest.pdb > %t
 
 ; RUN: FileCheck -input-file=%t %s -check-prefix=DIRECT_VB_ONLY
diff --git a/test/tools/llvm-pdbdump/enum-layout.test b/test/tools/llvm-pdbdump/enum-layout.test
index df447c65bbae..5813321f000d 100644
--- a/test/tools/llvm-pdbdump/enum-layout.test
+++ b/test/tools/llvm-pdbdump/enum-layout.test
@@ -1,4 +1,4 @@
-; RUN: llvm-pdbdump pretty -types %p/Inputs/ClassLayoutTest.pdb > %t
+; RUN: llvm-pdbutil pretty -types %p/Inputs/ClassLayoutTest.pdb > %t
 ; RUN: FileCheck -input-file=%t %s -check-prefix=GLOBAL_ENUM
 ; RUN: FileCheck -input-file=%t %s -check-prefix=MEMBER_ENUM
 
diff --git a/test/tools/llvm-pdbdump/load-address.test b/test/tools/llvm-pdbdump/load-address.test
index 5791637d3a74..4402790d71f4 100644
--- a/test/tools/llvm-pdbdump/load-address.test
+++ b/test/tools/llvm-pdbdump/load-address.test
@@ -1,6 +1,6 @@
-; RUN: llvm-pdbdump pretty -externals %p/Inputs/LoadAddressTest.pdb \
+; RUN: llvm-pdbutil pretty -externals %p/Inputs/LoadAddressTest.pdb \
 ; RUN:    | FileCheck --check-prefix=RVA %s
-; RUN: llvm-pdbdump pretty -externals -load-address=0x40000000 \
+; RUN: llvm-pdbutil pretty -externals -load-address=0x40000000 \
 ; RUN: %p/Inputs/LoadAddressTest.pdb | FileCheck --check-prefix=VA %s
 
 ; RVA: ---EXTERNALS---
diff --git a/test/tools/llvm-pdbdump/raw-stream-data.test b/test/tools/llvm-pdbdump/raw-stream-data.test
index d55980632d41..1d7c23fe3672 100644
--- a/test/tools/llvm-pdbdump/raw-stream-data.test
+++ b/test/tools/llvm-pdbdump/raw-stream-data.test
@@ -1,8 +1,8 @@
-; RUN: llvm-pdbdump raw -stream-data=8 %p/Inputs/LoadAddressTest.pdb \
+; RUN: llvm-pdbutil raw -stream-data=8 %p/Inputs/LoadAddressTest.pdb \
 ; RUN:   | FileCheck %s -check-prefix=FULL_STREAM
-; RUN: llvm-pdbdump raw -stream-data=8:4 %p/Inputs/LoadAddressTest.pdb \
+; RUN: llvm-pdbutil raw -stream-data=8:4 %p/Inputs/LoadAddressTest.pdb \
 ; RUN:   | FileCheck %s -check-prefix=OFFSET_STREAM
-; RUN: llvm-pdbdump raw -stream-data=8:4@24 %p/Inputs/LoadAddressTest.pdb \
+; RUN: llvm-pdbutil raw -stream-data=8:4@24 %p/Inputs/LoadAddressTest.pdb \
 ; RUN:   | FileCheck %s -check-prefix=OFFSET_AND_LENGTH
 
 FULL_STREAM:      Stream Data {
diff --git a/test/tools/llvm-pdbdump/regex-filter.test b/test/tools/llvm-pdbdump/regex-filter.test
index 36c3da33e2e4..fb8ca7fa1892 100644
--- a/test/tools/llvm-pdbdump/regex-filter.test
+++ b/test/tools/llvm-pdbdump/regex-filter.test
@@ -1,25 +1,25 @@
-; RUN: llvm-pdbdump pretty -module-syms -globals -types %p/Inputs/FilterTest.pdb \
+; RUN: llvm-pdbutil pretty -module-syms -globals -types %p/Inputs/FilterTest.pdb \
 ; RUN:    | FileCheck --check-prefix=NO_FILTER %s
 
-; RUN: llvm-pdbdump pretty -types -exclude-types="GlobalTypedef|NestedTypedef" \
+; RUN: llvm-pdbutil pretty -types -exclude-types="GlobalTypedef|NestedTypedef" \
 ; RUN:    %p/Inputs/FilterTest.pdb | FileCheck --check-prefix=EXCLUDE_TYPEDEFS %s
-; RUN: llvm-pdbdump pretty -classes -enums %p/Inputs/FilterTest.pdb \
+; RUN: llvm-pdbutil pretty -classes -enums %p/Inputs/FilterTest.pdb \
 ; RUN:    | FileCheck --check-prefix=EXCLUDE_TYPEDEFS %s
 
-; RUN: llvm-pdbdump pretty -types -exclude-types="GlobalEnum|NestedEnum" \
+; RUN: llvm-pdbutil pretty -types -exclude-types="GlobalEnum|NestedEnum" \
 ; RUN:    %p/Inputs/FilterTest.pdb | FileCheck --check-prefix=EXCLUDE_ENUMS %s
-; RUN: llvm-pdbdump pretty -classes -typedefs %p/Inputs/FilterTest.pdb \
+; RUN: llvm-pdbutil pretty -classes -typedefs %p/Inputs/FilterTest.pdb \
 ; RUN:    | FileCheck --check-prefix=EXCLUDE_ENUMS %s
 
-; RUN: llvm-pdbdump pretty -types -module-syms -globals -exclude-symbols="MemberVar|GlobalVar" \
+; RUN: llvm-pdbutil pretty -types -module-syms -globals -exclude-symbols="MemberVar|GlobalVar" \
 ; RUN:    %p/Inputs/FilterTest.pdb | FileCheck --check-prefix=EXCLUDE_VARS %s
-; RUN: llvm-pdbdump pretty -types -exclude-types="FilterTestClass" \
+; RUN: llvm-pdbutil pretty -types -exclude-types="FilterTestClass" \
 ; RUN:    %p/Inputs/FilterTest.pdb | FileCheck  --check-prefix=EXCLUDE_WHOLE_CLASS %s
-; RUN: llvm-pdbdump pretty -module-syms -globals -exclude-compilands="FilterTest.obj"  \
+; RUN: llvm-pdbutil pretty -module-syms -globals -exclude-compilands="FilterTest.obj"  \
 ; RUN:    %p/Inputs/FilterTest.pdb | FileCheck  --check-prefix=EXCLUDE_COMPILAND %s
-; RUN: llvm-pdbdump pretty -types -include-types="FilterTestClass" \
+; RUN: llvm-pdbutil pretty -types -include-types="FilterTestClass" \
 ; RUN:    %p/Inputs/FilterTest.pdb | FileCheck --check-prefix=INCLUDE_ONLY_TYPES %s
-; RUN: llvm-pdbdump pretty -types -module-syms -globals -include-symbols="[[:<:]](IntGlobalVar|DoubleGlobalVar)[[:>:]]" \
+; RUN: llvm-pdbutil pretty -types -module-syms -globals -include-symbols="[[:<:]](IntGlobalVar|DoubleGlobalVar)[[:>:]]" \
 ; RUN:    %p/Inputs/FilterTest.pdb | FileCheck --check-prefix=INCLUDE_ONLY_VARS %s
 
 ; NO_FILTER: ---TYPES---
diff --git a/test/tools/llvm-pdbdump/simple-padding-graphical.test b/test/tools/llvm-pdbdump/simple-padding-graphical.test
index 0e19f9cc7018..91da534ca010 100644
--- a/test/tools/llvm-pdbdump/simple-padding-graphical.test
+++ b/test/tools/llvm-pdbdump/simple-padding-graphical.test
@@ -1,4 +1,4 @@
-; RUN: llvm-pdbdump pretty -classes -class-definitions=layout \
+; RUN: llvm-pdbutil pretty -classes -class-definitions=layout \
 ; RUN:     -include-types=SimplePad %p/Inputs/SimplePaddingTest.pdb > %t
 
 ; RUN: FileCheck -input-file=%t %s -check-prefix=NO_PADDING
diff --git a/test/tools/llvm-pdbdump/symbol-filters.test b/test/tools/llvm-pdbdump/symbol-filters.test
index d12d2aa8be0f..80c24baf17ca 100644
--- a/test/tools/llvm-pdbdump/symbol-filters.test
+++ b/test/tools/llvm-pdbdump/symbol-filters.test
@@ -1,25 +1,25 @@
-; RUN: llvm-pdbdump pretty -globals -module-syms -sym-types=data %p/Inputs/FilterTest.pdb \
+; RUN: llvm-pdbutil pretty -globals -module-syms -sym-types=data %p/Inputs/FilterTest.pdb \
 ; RUN:    | FileCheck --check-prefix=ONLY_DATA %s
 
-; RUN: llvm-pdbdump pretty -globals -module-syms -sym-types=thunks %p/Inputs/FilterTest.pdb \
+; RUN: llvm-pdbutil pretty -globals -module-syms -sym-types=thunks %p/Inputs/FilterTest.pdb \
 ; RUN:    | FileCheck --check-prefix=ONLY_THUNKS %s
 
-; RUN: llvm-pdbdump pretty -globals -module-syms -sym-types=funcs %p/Inputs/FilterTest.pdb \
+; RUN: llvm-pdbutil pretty -globals -module-syms -sym-types=funcs %p/Inputs/FilterTest.pdb \
 ; RUN:    | FileCheck --check-prefix=ONLY_FUNCS %s
 
-; RUN: llvm-pdbdump pretty -globals -module-syms -sym-types=funcs -sym-types=data \
+; RUN: llvm-pdbutil pretty -globals -module-syms -sym-types=funcs -sym-types=data \
 ; RUN: %p/Inputs/FilterTest.pdb | FileCheck --check-prefix=TWO_TYPES %s
 
-; RUN: llvm-pdbdump pretty -globals -module-syms -sym-types=data \
+; RUN: llvm-pdbutil pretty -globals -module-syms -sym-types=data \
 ; RUN: -symbol-order=name %p/Inputs/FilterTest.pdb | FileCheck --check-prefix=NAME_SORT_DATA %s
 
-; RUN: llvm-pdbdump pretty -globals -module-syms -sym-types=data \
+; RUN: llvm-pdbutil pretty -globals -module-syms -sym-types=data \
 ; RUN: -symbol-order=size %p/Inputs/FilterTest.pdb | FileCheck --check-prefix=SIZE_SORT_DATA %s
 
-; RUN: llvm-pdbdump pretty -globals -module-syms -sym-types=funcs \
+; RUN: llvm-pdbutil pretty -globals -module-syms -sym-types=funcs \
 ; RUN: -symbol-order=name %p/Inputs/FilterTest.pdb | FileCheck --check-prefix=NAME_SORT_FUNCS %s
 
-; RUN: llvm-pdbdump pretty -globals -module-syms -sym-types=funcs \
+; RUN: llvm-pdbutil pretty -globals -module-syms -sym-types=funcs \
 ; RUN: -symbol-order=size %p/Inputs/FilterTest.pdb | FileCheck --check-prefix=SIZE_SORT_FUNCS %s
 
 ; ONLY_DATA-NOT: func
diff --git a/test/tools/llvm-readobj/Inputs/trivial.elf-amdhsa-kaveri b/test/tools/llvm-readobj/Inputs/trivial.elf-amdhsa-kaveri
deleted file mode 100755
index 9566ed5c0f1481484b014ec74f281531f0d2ac3f..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 13208
zcmeI3&uSA<6vj`+(b}rC8$nQE7P^v5gM<)VjS&Aqg)S7_+0+CQm?R}LVgi!l&V>))
zWB3N`BlrL=d<=Jb&b{ZGj6)kr5$WPw=>5()ch0>hzggYeJBLqpo(8oVp#}7l#A^pW
zlIXzKh8iphwP~IE>)c;q|FVnu@x%*{`u0nwhF;b9CDu^vNIGtj>kGZ3E>x#+wdv4_
z@=@!C(w2Wr^4G*vzpD3i0Vu{LysFM`GAB1^IZxP7Lpp9%_0eU&<xMGH*4vG|U)4Py
zCMbBi8hH%wt*RZ(gW`F94}0g~BKfkm@Ok{QQ@j_6-{wDgzY-6bx|^SUX%geVXS=Vx
zZ@m#P0!F|H7y%<-1dM<YFak!v2>c%cy6xrto9@yOSh2j@`$d00fPJV$1GoEIj)!6B
zw!+KE1+r?%@$Y~Z-#1?*hxFx$f?!b#!UfU650B#0(FLm3N-+XPzz7%tBVYuKfDteP
zM!*Od0V8lO0^8|uzt!AMd-ZfUem|?v+O2x48E2FDp_@<kz-KrTDAvaRm7~aq<Zghp
z=aCb4UqGye`Wrr6tZ_~KTdF2Tzz7%tBVYuKfDtePM!*Od0V7}pjKDu25D&+Be|kXi
zc#`)i9(1z-#l87Bn~%KaQ;PHcEO%aaG<?TKH}6uM4YGWiclX(vkGy(+RaJ79uu+@>
z5Eb|B2mLgiy!|klroC19XC1@2g?<h|6lta^zAM0d@_f0ivs1pee%gI{<lx)`{w5o8
z_JQ@I$d5qvy1)OnfQN8@0`uJDn4E<mALk`doR7#FQakbyzsoVXgO7Zizd)<Mx9Z>5
zdDrNeEp8}SwRmrSbUCj616H~8f%Q-OjsfRGs2|HTIag|u-{4xKqUu7x4gMoGid+Om
rb)Hw{7~9p~-{BZ)FF*Q5^HC1o5$Xs39ZouF{m+_sqdY0w)%E`XtEE!O

diff --git a/test/tools/llvm-readobj/Inputs/trivial.obj.elf-amdhsa-gfx803 b/test/tools/llvm-readobj/Inputs/trivial.obj.elf-amdhsa-gfx803
new file mode 100644
index 0000000000000000000000000000000000000000..421269cbd8b252b71a74e7c4e35edb5dbc516c91
GIT binary patch
literal 2208
zcmdT_O>Pr06m~+C(uge*f{hnN4L=!$rUV3=gcJg5NCTlMP_b|)&S;eRljDgfi&X46
z1P9;_<p4-rfJJXo-W!i6lW7E-BH&4mpTF<@I=1KNapy@smurw%a^!^M^ddE%6z7XZ
zq&CRPQp>sB|Da*7YFq^Ef?E8t@Gs;W@T;V>V1?dViWny)g+ift3X+2U88uIeU$L0e
zd||c<Wgn4G$0Sc?xqN;`Nbm!6kdMa&_{AP^ubR)dA{ceVuff2M5ih{0(0idVz}vlD
zW&0gK+&Q$ZR4Szb7Zcm}9#Ad&^v-QsrSD4HTzK4>%$X9U720JkpF`jz`9uOkX@_}M
z$gDZL6L7E9p{)fr$(Q6o-=?rqi2#UvFkj#PFpr3)_hSm29NS72o~8!O3Hi3|4Rxt)
z$3I}sqi{6hA_)*#y*dn%1kY+43E4Ew7a?<W07|#`h=q>o6$@z&hoT=alWQ}TBME?k
z7j|pi_T<g=m_N79p4!79_f+nk(P+Zu>!gIU4c0gReEv6oIse<==WqE#-V=V19J<&A
z&pZ=WpIa{Xq&j>0!0oXq9|_-+-O%l~doRJa9=q-NVr)z90y9udZE3*x#OnW`E&MQu
zb7iQWu%}G&<z!Ncz8v+EVHj|$O$Y8EovJHM?;2I3O3PA&-k8>^oA);AH{j+kmlc4o
zs>dcWDC6UTVo3g861?TaFnt&Ig&3YMxzX<KRwj~}W3?v6L<-3cU_NwlJ{P$KQScqh
z%IEld_%K80vNeor9I4NzqGH3O&$NTqVZIIn_a=IVQ+PQB*6;yNNoGkd0f(uoDH~Kf
saY1VtJT5EpLFzILjipV4GYu&=Yg+?2TRU26e_a>);n3fXD`*7r6T^okA^-pY

literal 0
HcmV?d00001

diff --git a/test/tools/llvm-readobj/amdgpu-elf-definitions.test b/test/tools/llvm-readobj/amdgpu-elf-definitions.test
new file mode 100644
index 000000000000..c30931242df6
--- /dev/null
+++ b/test/tools/llvm-readobj/amdgpu-elf-definitions.test
@@ -0,0 +1,11 @@
+RUN: llvm-readobj -file-headers -program-headers -sections -symbols %p/Inputs/trivial.obj.elf-amdhsa-gfx803 | FileCheck %s
+
+CHECK: Format: ELF64-amdgpu-hsacobj
+CHECK: Arch: amdgcn
+CHECK: ElfHeader {
+CHECK:   Ident {
+CHECK:     OS/ABI: AMDGPU_HSA (0x40)
+CHECK:     ABIVersion: 0
+CHECK:   }
+CHECK:   Machine: EM_AMDGPU (0xE0)
+CHECK: }
diff --git a/test/tools/llvm-readobj/amdgpu-elf-defs.test b/test/tools/llvm-readobj/amdgpu-elf-defs.test
deleted file mode 100644
index 9a576e8158f9..000000000000
--- a/test/tools/llvm-readobj/amdgpu-elf-defs.test
+++ /dev/null
@@ -1,28 +0,0 @@
-RUN: llvm-readobj  -program-headers -sections -symbols -file-headers \
-RUN: %p/Inputs/trivial.elf-amdhsa-kaveri | FileCheck %s
-
-CHECK: ElfHeader {
-CHECK:  Ident {
-CHECK: Class: 64-bit (0x2)
-CHECK: DataEncoding: LittleEndian (0x1)
-CHECK: Machine: EM_AMDGPU (0xE0)
-
-
-CHECK: Section {
-CHECK: Name: .text
-CHECK: Type: SHT_PROGBITS (0x1)
-CHECK: Flags [ (0xC00007
-CHECK: SHF_ALLOC (0x2)
-CHECK: SHF_AMDGPU_HSA_AGENT (0x800000)
-CHECK: SHF_AMDGPU_HSA_CODE (0x400000)
-CHECK: SHF_EXECINSTR (0x4)
-CHECK: SHF_WRITE (0x1)
-
-CHECK: Symbol {
-CHECK: Name: hello_world
-CHECK: Value: 0x0
-CHECK: Binding: Local (0x0)
-CHECK: Type: AMDGPU_HSA_KERNEL (0xA)
-
-CHECK: ProgramHeader {
-CHECK: Type: PT_AMDGPU_HSA_LOAD_CODE_AGENT (0x60000003)
diff --git a/test/tools/llvm-readobj/elf-sec-flags.test b/test/tools/llvm-readobj/elf-sec-flags.test
index 842ded3e9e59..8a977109238d 100644
--- a/test/tools/llvm-readobj/elf-sec-flags.test
+++ b/test/tools/llvm-readobj/elf-sec-flags.test
@@ -1,29 +1,6 @@
 # Check that llvm-readobj shows arch specific ELF section flags.
 
-# RUN: yaml2obj -docnum 1 %s > %t-amdgpu.o
-# RUN: llvm-readobj -s %t-amdgpu.o | FileCheck -check-prefix=AMD %s
-
-# AMD:      Flags [ (0x300000)
-# AMD-NEXT:   SHF_AMDGPU_HSA_GLOBAL (0x100000)
-# AMD-NEXT:   SHF_AMDGPU_HSA_READONLY (0x200000)
-# AMD-NEXT: ]
-
-# amdgpu.o
---- !ELF
-FileHeader:
-  Class:    ELFCLASS64
-  Data:     ELFDATA2LSB
-  OSABI:    ELFOSABI_GNU
-  Type:     ET_REL
-  Machine:  EM_AMDGPU
-  Flags:    []
-Sections:
-  - Name:   .amdgpu
-    Type:   SHT_PROGBITS
-    Flags:  [SHF_AMDGPU_HSA_GLOBAL, SHF_AMDGPU_HSA_READONLY]
-    Size:   4
-
-# RUN: yaml2obj -docnum 2 %s > %t-hex.o
+# RUN: yaml2obj -docnum 1 %s > %t-hex.o
 # RUN: llvm-readobj -s %t-hex.o | FileCheck -check-prefix=HEX %s
 
 # HEX:      Flags [ (0x10000000)
@@ -44,7 +21,7 @@ Sections:
     Flags:  [SHF_HEX_GPREL]
     Size:   4
 
-# RUN: yaml2obj -docnum 3 %s > %t-mips.o
+# RUN: yaml2obj -docnum 2 %s > %t-mips.o
 # RUN: llvm-readobj -s %t-mips.o | FileCheck -check-prefix=MIPS %s
 
 # MIPS:      Flags [ (0x38000000)
@@ -67,7 +44,7 @@ Sections:
     Flags:  [SHF_MIPS_GPREL, SHF_MIPS_MERGE, SHF_MIPS_NOSTRIP]
     Size:   4
 
-# RUN: yaml2obj -docnum 4 %s > %t-x86_64.o
+# RUN: yaml2obj -docnum 3 %s > %t-x86_64.o
 # RUN: llvm-readobj -s %t-x86_64.o | FileCheck -check-prefix=X86_64 %s
 
 # X86_64:      Flags [ (0x10000000)
diff --git a/tools/LLVMBuild.txt b/tools/LLVMBuild.txt
index e3041a6d40d4..bcf58842eac3 100644
--- a/tools/LLVMBuild.txt
+++ b/tools/LLVMBuild.txt
@@ -40,7 +40,7 @@ subdirectories =
  llvm-modextract
  llvm-nm
  llvm-objdump
- llvm-pdbdump
+ llvm-pdbutil
  llvm-profdata
  llvm-rtdyld
  llvm-size
diff --git a/tools/bugpoint/OptimizerDriver.cpp b/tools/bugpoint/OptimizerDriver.cpp
index ae3a31adaea3..489e50b88101 100644
--- a/tools/bugpoint/OptimizerDriver.cpp
+++ b/tools/bugpoint/OptimizerDriver.cpp
@@ -202,10 +202,11 @@ bool BugDriver::runPasses(Module *Program,
   } else
     Args.push_back(tool.c_str());
 
-  Args.push_back("-o");
-  Args.push_back(OutputFilename.c_str());
   for (unsigned i = 0, e = OptArgs.size(); i != e; ++i)
     Args.push_back(OptArgs[i].c_str());
+  Args.push_back("-disable-symbolication");
+  Args.push_back("-o");
+  Args.push_back(OutputFilename.c_str());
   std::vector<std::string> pass_args;
   for (unsigned i = 0, e = PluginLoader::getNumPlugins(); i != e; ++i) {
     pass_args.push_back(std::string("-load"));
diff --git a/tools/dsymutil/DwarfLinker.cpp b/tools/dsymutil/DwarfLinker.cpp
index f74d721e6149..88de2706544e 100644
--- a/tools/dsymutil/DwarfLinker.cpp
+++ b/tools/dsymutil/DwarfLinker.cpp
@@ -6,15 +6,15 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-#include "DebugMap.h"
 #include "BinaryHolder.h"
 #include "DebugMap.h"
-#include "dsymutil.h"
 #include "MachOUtils.h"
 #include "NonRelocatableStringpool.h"
+#include "dsymutil.h"
 #include "llvm/ADT/IntervalMap.h"
-#include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/DIE.h"
 #include "llvm/Config/config.h"
@@ -23,8 +23,8 @@
 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDwarf.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCObjectFileInfo.h"
@@ -33,7 +33,6 @@
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCTargetOptionsCommandFlags.h"
 #include "llvm/Object/MachO.h"
-#include "llvm/Support/Dwarf.h"
 #include "llvm/Support/LEB128.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Target/TargetMachine.h"
diff --git a/tools/llc/llc.cpp b/tools/llc/llc.cpp
index e10d112dcf90..e71c3c5bb705 100644
--- a/tools/llc/llc.cpp
+++ b/tools/llc/llc.cpp
@@ -61,6 +61,9 @@ using namespace llvm;
 static cl::opt<std::string>
 InputFilename(cl::Positional, cl::desc("<input bitcode>"), cl::init("-"));
 
+static cl::opt<std::string>
+InputLanguage("x", cl::desc("Input language ('ir' or 'mir')"));
+
 static cl::opt<std::string>
 OutputFilename("o", cl::desc("Output filename"), cl::value_desc("filename"));
 
@@ -335,6 +338,12 @@ int main(int argc, char **argv) {
         llvm::make_unique<yaml::Output>(YamlFile->os()));
   }
 
+  if (InputLanguage != "" && InputLanguage != "ir" &&
+      InputLanguage != "mir") {
+    errs() << argv[0] << "Input language must be '', 'IR' or 'MIR'\n";
+    return 1;
+  }
+
   // Compile the module TimeCompilations times to give better compile time
   // metrics.
   for (unsigned I = TimeCompilations; I; --I)
@@ -398,10 +407,11 @@ static int compileModule(char **argv, LLVMContext &Context) {
 
   // If user just wants to list available options, skip module loading
   if (!SkipModule) {
-    if (StringRef(InputFilename).endswith_lower(".mir")) {
+    if (InputLanguage == "mir" ||
+        (InputLanguage == "" && StringRef(InputFilename).endswith(".mir"))) {
       MIR = createMIRParserFromFile(InputFilename, Err, Context);
       if (MIR)
-        M = MIR->parseLLVMModule();
+        M = MIR->parseIRModule();
     } else
       M = parseIRFile(InputFilename, Err, Context);
     if (!M) {
@@ -518,54 +528,67 @@ static int compileModule(char **argv, LLVMContext &Context) {
       OS = BOS.get();
     }
 
-    if (!RunPassNames->empty()) {
-      if (!StartAfter.empty() || !StopAfter.empty() || !StartBefore.empty() ||
-          !StopBefore.empty()) {
-        errs() << argv[0] << ": start-after and/or stop-after passes are "
-                             "redundant when run-pass is specified.\n";
-        return 1;
-      }
-      if (!MIR) {
-        errs() << argv[0] << ": run-pass needs a .mir input.\n";
-        return 1;
-      }
+    const char *argv0 = argv[0];
+    AnalysisID StartBeforeID = getPassID(argv0, "start-before", StartBefore);
+    AnalysisID StartAfterID = getPassID(argv0, "start-after", StartAfter);
+    AnalysisID StopAfterID = getPassID(argv0, "stop-after", StopAfter);
+    AnalysisID StopBeforeID = getPassID(argv0, "stop-before", StopBefore);
+    if (StartBeforeID && StartAfterID) {
+      errs() << argv0 << ": -start-before and -start-after specified!\n";
+      return 1;
+    }
+    if (StopBeforeID && StopAfterID) {
+      errs() << argv0 << ": -stop-before and -stop-after specified!\n";
+      return 1;
+    }
+
+    if (MIR) {
+      // Construct a custom pass pipeline that starts after instruction
+      // selection.
       LLVMTargetMachine &LLVMTM = static_cast<LLVMTargetMachine&>(*Target);
       TargetPassConfig &TPC = *LLVMTM.createPassConfig(PM);
+      TPC.setDisableVerify(NoVerify);
       PM.add(&TPC);
       MachineModuleInfo *MMI = new MachineModuleInfo(&LLVMTM);
-      MMI->setMachineFunctionInitializer(MIR.get());
+      if (MIR->parseMachineFunctions(*M, *MMI))
+        return 1;
       PM.add(MMI);
       TPC.printAndVerify("");
 
-      for (const std::string &RunPassName : *RunPassNames) {
-        if (addPass(PM, argv[0], RunPassName, TPC))
+      if (!RunPassNames->empty()) {
+        if (!StartAfter.empty() || !StopAfter.empty() || !StartBefore.empty() ||
+            !StopBefore.empty()) {
+          errs() << argv0 << ": start-after and/or stop-after passes are "
+                               "redundant when run-pass is specified.\n";
           return 1;
-      }
-      PM.add(createPrintMIRPass(*OS));
-    } else {
-      const char *argv0 = argv[0];
-      AnalysisID StartBeforeID = getPassID(argv0, "start-before", StartBefore);
-      AnalysisID StartAfterID = getPassID(argv0, "start-after", StartAfter);
-      AnalysisID StopAfterID = getPassID(argv0, "stop-after", StopAfter);
-      AnalysisID StopBeforeID = getPassID(argv0, "stop-before", StopBefore);
+        }
 
-      if (StartBeforeID && StartAfterID) {
-        errs() << argv[0] << ": -start-before and -start-after specified!\n";
-        return 1;
-      }
-      if (StopBeforeID && StopAfterID) {
-        errs() << argv[0] << ": -stop-before and -stop-after specified!\n";
-        return 1;
+        for (const std::string &RunPassName : *RunPassNames) {
+          if (addPass(PM, argv0, RunPassName, TPC))
+            return 1;
+        }
+      } else {
+        TPC.setStartStopPasses(StartBeforeID, StartAfterID, StopBeforeID,
+                               StopAfterID);
+        TPC.addISelPasses();
+        TPC.addMachinePasses();
       }
+      TPC.setInitialized();
 
-      // Ask the target to add backend passes as necessary.
-      if (Target->addPassesToEmitFile(PM, *OS, FileType, NoVerify,
-                                      StartBeforeID, StartAfterID, StopBeforeID,
-                                      StopAfterID, MIR.get())) {
-        errs() << argv[0] << ": target does not support generation of this"
+      if (!StopBefore.empty() || !StopAfter.empty() || !RunPassNames->empty()) {
+        PM.add(createPrintMIRPass(*OS));
+      } else if (LLVMTM.addAsmPrinter(PM, *OS, FileType, MMI->getContext())) {
+        errs() << argv0 << ": target does not support generation of this"
                << " file type!\n";
         return 1;
       }
+      PM.add(createFreeMachineFunctionPass());
+    } else if (Target->addPassesToEmitFile(PM, *OS, FileType, NoVerify,
+                                           StartBeforeID, StartAfterID,
+                                           StopBeforeID, StopAfterID)) {
+      errs() << argv0 << ": target does not support generation of this"
+        << " file type!\n";
+      return 1;
     }
 
     // Before executing passes, print the final values of the LLVM options.
diff --git a/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp b/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
index 676134ca2368..c8fa56d724bf 100644
--- a/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
+++ b/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
@@ -121,6 +121,8 @@ static const char *GetBlockName(unsigned BlockID,
   case bitc::USELIST_BLOCK_ID:             return "USELIST_BLOCK_ID";
   case bitc::GLOBALVAL_SUMMARY_BLOCK_ID:
                                            return "GLOBALVAL_SUMMARY_BLOCK";
+  case bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID:
+                                      return "FULL_LTO_GLOBALVAL_SUMMARY_BLOCK";
   case bitc::MODULE_STRTAB_BLOCK_ID:       return "MODULE_STRTAB_BLOCK";
   case bitc::STRTAB_BLOCK_ID:              return "STRTAB_BLOCK";
   }
@@ -298,6 +300,7 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
       STRINGIFY_CODE(MST_CODE, HASH)
     }
   case bitc::GLOBALVAL_SUMMARY_BLOCK_ID:
+  case bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID:
     switch (CodeID) {
     default:
       return nullptr;
diff --git a/tools/llvm-cvtres/LLVMBuild.txt b/tools/llvm-cvtres/LLVMBuild.txt
index 73693bccb0ea..78c598b75e31 100644
--- a/tools/llvm-cvtres/LLVMBuild.txt
+++ b/tools/llvm-cvtres/LLVMBuild.txt
@@ -19,4 +19,4 @@
 type = Tool
 name = llvm-cvtres
 parent = Tools
-required_libraries = Option Support
+required_libraries = Object Option Support
diff --git a/tools/llvm-cvtres/llvm-cvtres.cpp b/tools/llvm-cvtres/llvm-cvtres.cpp
index 95a6623b44eb..eaba02c16f39 100644
--- a/tools/llvm-cvtres/llvm-cvtres.cpp
+++ b/tools/llvm-cvtres/llvm-cvtres.cpp
@@ -112,20 +112,23 @@ int main(int argc_, const char *argv_[]) {
     return 0;
   }
 
-  machine Machine;
+  bool Verbose = InputArgs.hasArg(OPT_VERBOSE);
+
+  Machine MachineType;
 
   if (InputArgs.hasArg(OPT_MACHINE)) {
     std::string MachineString = InputArgs.getLastArgValue(OPT_MACHINE).upper();
-    Machine = StringSwitch<machine>(MachineString)
-                  .Case("ARM", machine::ARM)
-                  .Case("X64", machine::X64)
-                  .Case("X86", machine::X86)
-                  .Default(machine::UNKNOWN);
-    if (Machine == machine::UNKNOWN)
+    MachineType = StringSwitch<Machine>(MachineString)
+                      .Case("ARM", Machine::ARM)
+                      .Case("X64", Machine::X64)
+                      .Case("X86", Machine::X86)
+                      .Default(Machine::UNKNOWN);
+    if (MachineType == Machine::UNKNOWN)
       reportError("Unsupported machine architecture");
   } else {
-    outs() << "Machine architecture not specified; assumed X64.\n";
-    Machine = machine::X64;
+    if (Verbose)
+      outs() << "Machine architecture not specified; assumed X64.\n";
+    MachineType = Machine::X64;
   }
 
   std::vector<std::string> InputFiles = InputArgs.getAllArgValues(OPT_INPUT);
@@ -139,20 +142,22 @@ int main(int argc_, const char *argv_[]) {
   if (InputArgs.hasArg(OPT_OUT)) {
     OutputFile = InputArgs.getLastArgValue(OPT_OUT);
   } else {
-    OutputFile = StringRef(InputFiles[0]);
+    OutputFile = llvm::sys::path::filename(StringRef(InputFiles[0]));
     llvm::sys::path::replace_extension(OutputFile, ".obj");
   }
 
-  outs() << "Machine: ";
-  switch (Machine) {
-  case machine::ARM:
-    outs() << "ARM\n";
-    break;
-  case machine::X86:
-    outs() << "X86\n";
-    break;
-  default:
-    outs() << "X64\n";
+  if (Verbose) {
+    outs() << "Machine: ";
+    switch (MachineType) {
+    case Machine::ARM:
+      outs() << "ARM\n";
+      break;
+    case Machine::X86:
+      outs() << "X86\n";
+      break;
+    default:
+      outs() << "X64\n";
+    }
   }
 
   WindowsResourceParser Parser;
@@ -169,22 +174,28 @@ int main(int argc_, const char *argv_[]) {
     if (!RF)
       reportError(File + ": unrecognized file format.\n");
 
-    int EntryNumber = 0;
-    Expected<ResourceEntryRef> EntryOrErr = RF->getHeadEntry();
-    if (!EntryOrErr)
-      error(EntryOrErr.takeError());
-    ResourceEntryRef Entry = EntryOrErr.get();
-    bool End = false;
-    while (!End) {
-      error(Entry.moveNext(End));
-      EntryNumber++;
+    if (Verbose) {
+      int EntryNumber = 0;
+      Expected<ResourceEntryRef> EntryOrErr = RF->getHeadEntry();
+      if (!EntryOrErr)
+        error(EntryOrErr.takeError());
+      ResourceEntryRef Entry = EntryOrErr.get();
+      bool End = false;
+      while (!End) {
+        error(Entry.moveNext(End));
+        EntryNumber++;
+      }
+      outs() << "Number of resources: " << EntryNumber << "\n";
     }
-    outs() << "Number of resources: " << EntryNumber << "\n";
 
     error(Parser.parse(RF));
   }
 
-  Parser.printTree();
+  if (Verbose)
+    Parser.printTree();
+
+  error(
+      llvm::object::writeWindowsResourceCOFF(OutputFile, MachineType, Parser));
 
   return 0;
 }
diff --git a/tools/llvm-cvtres/llvm-cvtres.h b/tools/llvm-cvtres/llvm-cvtres.h
index 2e45b66461f0..f7b14faeebe3 100644
--- a/tools/llvm-cvtres/llvm-cvtres.h
+++ b/tools/llvm-cvtres/llvm-cvtres.h
@@ -14,6 +14,4 @@
 
 void error(std::error_code EC);
 
-enum class machine { UNKNOWN = 0, ARM, X64, X86 };
-
 #endif
diff --git a/tools/llvm-dwarfdump/llvm-dwarfdump.cpp b/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
index 2b5babe79824..1da157c4e4d5 100644
--- a/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
+++ b/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
@@ -67,6 +67,7 @@ static cl::opt<DIDumpType> DumpType(
         clEnumValN(DIDT_GnuPubnames, "gnu_pubnames", ".debug_gnu_pubnames"),
         clEnumValN(DIDT_GnuPubtypes, "gnu_pubtypes", ".debug_gnu_pubtypes"),
         clEnumValN(DIDT_Str, "str", ".debug_str"),
+        clEnumValN(DIDT_StrOffsets, "str_offsets", ".debug_str_offsets"),
         clEnumValN(DIDT_StrDwo, "str.dwo", ".debug_str.dwo"),
         clEnumValN(DIDT_StrOffsetsDwo, "str_offsets.dwo",
                    ".debug_str_offsets.dwo"),
@@ -83,6 +84,8 @@ static cl::opt<bool> Verify("verify", cl::desc("Verify the DWARF debug info"));
 static cl::opt<bool> Quiet("quiet",
                            cl::desc("Use with -verify to not emit to STDOUT."));
 
+static cl::opt<bool> Brief("brief", cl::desc("Print fewer low-level details"));
+
 static void error(StringRef Filename, std::error_code EC) {
   if (!EC)
     return;
@@ -100,6 +103,7 @@ static void DumpObjectFile(ObjectFile &Obj, Twine Filename) {
   DIDumpOptions DumpOpts;
   DumpOpts.DumpType = DumpType;
   DumpOpts.SummarizeTypes = SummarizeTypes;
+  DumpOpts.Brief = Brief;
   DICtx->dump(outs(), DumpOpts);
 }
 
diff --git a/tools/llvm-lto2/llvm-lto2.cpp b/tools/llvm-lto2/llvm-lto2.cpp
index 89f85157e1df..bbfece517c80 100644
--- a/tools/llvm-lto2/llvm-lto2.cpp
+++ b/tools/llvm-lto2/llvm-lto2.cpp
@@ -162,6 +162,8 @@ static int run(int argc, char **argv) {
         Res.FinalDefinitionInLinkageUnit = true;
       else if (C == 'x')
         Res.VisibleToRegularObj = true;
+      else if (C == 'r')
+        Res.LinkerRedefined = true;
       else {
         llvm::errs() << "invalid character " << C << " in resolution: " << R
                      << '\n';
diff --git a/tools/llvm-mc/llvm-mc.cpp b/tools/llvm-mc/llvm-mc.cpp
index 87efac2d33cf..8782588dfdd8 100644
--- a/tools/llvm-mc/llvm-mc.cpp
+++ b/tools/llvm-mc/llvm-mc.cpp
@@ -56,17 +56,15 @@ static cl::opt<bool> RelaxELFRel(
     "relax-relocations", cl::init(true),
     cl::desc("Emit R_X86_64_GOTPCRELX instead of R_X86_64_GOTPCREL"));
 
-static cl::opt<DebugCompressionType>
-CompressDebugSections("compress-debug-sections", cl::ValueOptional,
-  cl::init(DebugCompressionType::DCT_None),
-  cl::desc("Choose DWARF debug sections compression:"),
-  cl::values(
-    clEnumValN(DebugCompressionType::DCT_None, "none",
-      "No compression"),
-    clEnumValN(DebugCompressionType::DCT_Zlib, "zlib",
-      "Use zlib compression"),
-    clEnumValN(DebugCompressionType::DCT_ZlibGnu, "zlib-gnu",
-      "Use zlib-gnu compression (deprecated)")));
+static cl::opt<DebugCompressionType> CompressDebugSections(
+    "compress-debug-sections", cl::ValueOptional,
+    cl::init(DebugCompressionType::None),
+    cl::desc("Choose DWARF debug sections compression:"),
+    cl::values(clEnumValN(DebugCompressionType::None, "none", "No compression"),
+               clEnumValN(DebugCompressionType::Z, "zlib",
+                          "Use zlib compression"),
+               clEnumValN(DebugCompressionType::GNU, "zlib-gnu",
+                          "Use zlib-gnu compression (deprecated)")));
 
 static cl::opt<bool>
 ShowInst("show-inst", cl::desc("Show internal instruction representation"));
@@ -494,7 +492,7 @@ int main(int argc, char **argv) {
 
   MAI->setRelaxELFRelocations(RelaxELFRel);
 
-  if (CompressDebugSections != DebugCompressionType::DCT_None) {
+  if (CompressDebugSections != DebugCompressionType::None) {
     if (!zlib::isAvailable()) {
       errs() << ProgName
              << ": build tools with zlib to enable -compress-debug-sections";
diff --git a/tools/llvm-nm/llvm-nm.cpp b/tools/llvm-nm/llvm-nm.cpp
index b022c300756d..722cb9e7e449 100644
--- a/tools/llvm-nm/llvm-nm.cpp
+++ b/tools/llvm-nm/llvm-nm.cpp
@@ -17,6 +17,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/BinaryFormat/COFF.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalAlias.h"
 #include "llvm/IR/GlobalVariable.h"
@@ -31,7 +32,6 @@
 #include "llvm/Object/MachOUniversal.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Object/Wasm.h"
-#include "llvm/Support/COFF.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Format.h"
diff --git a/tools/llvm-objdump/MachODump.cpp b/tools/llvm-objdump/MachODump.cpp
index a260d6ff42c5..8927f57cc97f 100644
--- a/tools/llvm-objdump/MachODump.cpp
+++ b/tools/llvm-objdump/MachODump.cpp
@@ -11,12 +11,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Object/MachO.h"
 #include "llvm-objdump.h"
 #include "llvm-c/Disassembler.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/BinaryFormat/MachO.h"
 #include "llvm/Config/config.h"
 #include "llvm/DebugInfo/DIContext.h"
 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
@@ -30,6 +30,7 @@
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Object/MachO.h"
 #include "llvm/Object/MachOUniversal.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
@@ -39,7 +40,6 @@
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/GraphWriter.h"
 #include "llvm/Support/LEB128.h"
-#include "llvm/Support/MachO.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/TargetSelect.h"
@@ -2594,7 +2594,8 @@ static const char *get_symbol_32(uint32_t sect_offset, SectionRef S,
 
 // These are structs in the Objective-C meta data and read to produce the
 // comments for disassembly.  While these are part of the ABI they are no
-// public defintions.  So the are here not in include/llvm/Support/MachO.h .
+// public defintions.  So the are here not in include/llvm/BinaryFormat/MachO.h
+// .
 
 // The cfstring object in a 64-bit Mach-O file.
 struct cfstring64_t {
diff --git a/tools/llvm-pdbdump/C13DebugFragmentVisitor.cpp b/tools/llvm-pdbdump/C13DebugFragmentVisitor.cpp
deleted file mode 100644
index 78971eb5879a..000000000000
--- a/tools/llvm-pdbdump/C13DebugFragmentVisitor.cpp
+++ /dev/null
@@ -1,87 +0,0 @@
-//===- C13DebugFragmentVisitor.cpp -------------------------------*- C++-*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "C13DebugFragmentVisitor.h"
-
-#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h"
-#include "llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h"
-#include "llvm/DebugInfo/CodeView/DebugLinesSubsection.h"
-#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
-#include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
-#include "llvm/DebugInfo/PDB/Native/RawError.h"
-
-using namespace llvm;
-using namespace llvm::codeview;
-using namespace llvm::pdb;
-
-C13DebugFragmentVisitor::C13DebugFragmentVisitor(PDBFile &F) : F(F) {}
-
-C13DebugFragmentVisitor::~C13DebugFragmentVisitor() {}
-
-Error C13DebugFragmentVisitor::visitUnknown(
-    codeview::DebugUnknownSubsectionRef &Fragment) {
-  return Error::success();
-}
-
-Error C13DebugFragmentVisitor::visitFileChecksums(
-    codeview::DebugChecksumsSubsectionRef &Checksums) {
-  assert(!this->Checksums.hasValue());
-  this->Checksums = Checksums;
-  return Error::success();
-}
-
-Error C13DebugFragmentVisitor::visitLines(
-    codeview::DebugLinesSubsectionRef &Lines) {
-  this->Lines.push_back(Lines);
-  return Error::success();
-}
-
-Error C13DebugFragmentVisitor::visitInlineeLines(
-    codeview::DebugInlineeLinesSubsectionRef &Lines) {
-  this->InlineeLines.push_back(Lines);
-  return Error::success();
-}
-
-Error C13DebugFragmentVisitor::finished() {
-  if (!Checksums.hasValue()) {
-    assert(Lines.empty());
-    return Error::success();
-  }
-  if (auto EC = handleFileChecksums())
-    return EC;
-
-  if (auto EC = handleLines())
-    return EC;
-
-  if (auto EC = handleInlineeLines())
-    return EC;
-
-  return Error::success();
-}
-
-Expected<StringRef>
-C13DebugFragmentVisitor::getNameFromStringTable(uint32_t Offset) {
-  auto ST = F.getStringTable();
-  if (!ST)
-    return ST.takeError();
-
-  return ST->getStringForID(Offset);
-}
-
-Expected<StringRef>
-C13DebugFragmentVisitor::getNameFromChecksumsBuffer(uint32_t Offset) {
-  assert(Checksums.hasValue());
-
-  auto Array = Checksums->getArray();
-  auto ChecksumIter = Array.at(Offset);
-  if (ChecksumIter == Array.end())
-    return make_error<RawError>(raw_error_code::invalid_format);
-  const auto &Entry = *ChecksumIter;
-  return getNameFromStringTable(Entry.FileNameOffset);
-}
diff --git a/tools/llvm-pdbdump/C13DebugFragmentVisitor.h b/tools/llvm-pdbdump/C13DebugFragmentVisitor.h
deleted file mode 100644
index a12f282c4c5c..000000000000
--- a/tools/llvm-pdbdump/C13DebugFragmentVisitor.h
+++ /dev/null
@@ -1,60 +0,0 @@
-//===- C13DebugFragmentVisitor.h - Visitor for CodeView Info ----*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TOOLS_LLVMPDBDUMP_C13DEBUGFRAGMENTVISITOR_H
-#define LLVM_TOOLS_LLVMPDBDUMP_C13DEBUGFRAGMENTVISITOR_H
-
-#include "llvm/ADT/Optional.h"
-#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h"
-#include "llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h"
-#include "llvm/Support/Error.h"
-
-#include <vector>
-
-namespace llvm {
-
-namespace pdb {
-
-class PDBFile;
-
-class C13DebugFragmentVisitor : public codeview::DebugSubsectionVisitor {
-public:
-  C13DebugFragmentVisitor(PDBFile &F);
-  ~C13DebugFragmentVisitor();
-
-  Error visitUnknown(codeview::DebugUnknownSubsectionRef &Fragment) final;
-
-  Error
-  visitFileChecksums(codeview::DebugChecksumsSubsectionRef &Checksums) final;
-
-  Error visitLines(codeview::DebugLinesSubsectionRef &Lines) final;
-
-  Error
-  visitInlineeLines(codeview::DebugInlineeLinesSubsectionRef &Lines) final;
-
-  Error finished() final;
-
-protected:
-  virtual Error handleFileChecksums() { return Error::success(); }
-  virtual Error handleLines() { return Error::success(); }
-  virtual Error handleInlineeLines() { return Error::success(); }
-
-  Expected<StringRef> getNameFromStringTable(uint32_t Offset);
-  Expected<StringRef> getNameFromChecksumsBuffer(uint32_t Offset);
-
-  Optional<codeview::DebugChecksumsSubsectionRef> Checksums;
-  std::vector<codeview::DebugInlineeLinesSubsectionRef> InlineeLines;
-  std::vector<codeview::DebugLinesSubsectionRef> Lines;
-
-  PDBFile &F;
-};
-}
-}
-
-#endif
diff --git a/tools/llvm-pdbdump/Analyze.cpp b/tools/llvm-pdbutil/Analyze.cpp
similarity index 100%
rename from tools/llvm-pdbdump/Analyze.cpp
rename to tools/llvm-pdbutil/Analyze.cpp
diff --git a/tools/llvm-pdbdump/Analyze.h b/tools/llvm-pdbutil/Analyze.h
similarity index 100%
rename from tools/llvm-pdbdump/Analyze.h
rename to tools/llvm-pdbutil/Analyze.h
diff --git a/tools/llvm-pdbdump/CMakeLists.txt b/tools/llvm-pdbutil/CMakeLists.txt
similarity index 89%
rename from tools/llvm-pdbdump/CMakeLists.txt
rename to tools/llvm-pdbutil/CMakeLists.txt
index a1f54a3bff6a..9875dfb5a257 100644
--- a/tools/llvm-pdbdump/CMakeLists.txt
+++ b/tools/llvm-pdbutil/CMakeLists.txt
@@ -7,12 +7,11 @@ set(LLVM_LINK_COMPONENTS
   Support
   )
 
-add_llvm_tool(llvm-pdbdump
+add_llvm_tool(llvm-pdbutil
   Analyze.cpp
-  C13DebugFragmentVisitor.cpp
   CompactTypeDumpVisitor.cpp
   Diff.cpp
-  llvm-pdbdump.cpp
+  llvm-pdbutil.cpp
   LinePrinter.cpp
   LLVMOutputStyle.cpp
   PdbYaml.cpp
diff --git a/tools/llvm-pdbdump/CompactTypeDumpVisitor.cpp b/tools/llvm-pdbutil/CompactTypeDumpVisitor.cpp
similarity index 100%
rename from tools/llvm-pdbdump/CompactTypeDumpVisitor.cpp
rename to tools/llvm-pdbutil/CompactTypeDumpVisitor.cpp
diff --git a/tools/llvm-pdbdump/CompactTypeDumpVisitor.h b/tools/llvm-pdbutil/CompactTypeDumpVisitor.h
similarity index 100%
rename from tools/llvm-pdbdump/CompactTypeDumpVisitor.h
rename to tools/llvm-pdbutil/CompactTypeDumpVisitor.h
diff --git a/tools/llvm-pdbdump/Diff.cpp b/tools/llvm-pdbutil/Diff.cpp
similarity index 99%
rename from tools/llvm-pdbdump/Diff.cpp
rename to tools/llvm-pdbutil/Diff.cpp
index 418c2361ac32..3fe6c511d35f 100644
--- a/tools/llvm-pdbdump/Diff.cpp
+++ b/tools/llvm-pdbutil/Diff.cpp
@@ -10,7 +10,7 @@
 #include "Diff.h"
 
 #include "StreamUtil.h"
-#include "llvm-pdbdump.h"
+#include "llvm-pdbutil.h"
 
 #include "llvm/DebugInfo/PDB/Native/Formatters.h"
 #include "llvm/DebugInfo/PDB/Native/InfoStream.h"
diff --git a/tools/llvm-pdbdump/Diff.h b/tools/llvm-pdbutil/Diff.h
similarity index 100%
rename from tools/llvm-pdbdump/Diff.h
rename to tools/llvm-pdbutil/Diff.h
diff --git a/tools/llvm-pdbdump/LLVMBuild.txt b/tools/llvm-pdbutil/LLVMBuild.txt
similarity index 88%
rename from tools/llvm-pdbdump/LLVMBuild.txt
rename to tools/llvm-pdbutil/LLVMBuild.txt
index 4043e13ceaec..adbe4993e3fa 100644
--- a/tools/llvm-pdbdump/LLVMBuild.txt
+++ b/tools/llvm-pdbutil/LLVMBuild.txt
@@ -1,4 +1,4 @@
-;===- ./tools/llvm-pdbdump/LLVMBuild.txt -----------------------*- Conf -*--===;
+;===- ./tools/llvm-pdbutil/LLVMBuild.txt -----------------------*- Conf -*--===;
 ;
 ;                     The LLVM Compiler Infrastructure
 ;
@@ -17,7 +17,7 @@
 
 [component_0]
 type = Tool
-name = llvm-pdbdump
+name = llvm-pdbutil
 parent = Tools
 required_libraries = DebugInfoMSF DebugInfoPDB
 
diff --git a/tools/llvm-pdbdump/LLVMOutputStyle.cpp b/tools/llvm-pdbutil/LLVMOutputStyle.cpp
similarity index 75%
rename from tools/llvm-pdbdump/LLVMOutputStyle.cpp
rename to tools/llvm-pdbutil/LLVMOutputStyle.cpp
index 31c342cd0f5a..824f88f8efd0 100644
--- a/tools/llvm-pdbdump/LLVMOutputStyle.cpp
+++ b/tools/llvm-pdbutil/LLVMOutputStyle.cpp
@@ -9,16 +9,21 @@
 
 #include "LLVMOutputStyle.h"
 
-#include "C13DebugFragmentVisitor.h"
 #include "CompactTypeDumpVisitor.h"
 #include "StreamUtil.h"
-#include "llvm-pdbdump.h"
+#include "llvm-pdbutil.h"
 
 #include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
 #include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h"
+#include "llvm/DebugInfo/CodeView/DebugCrossExSubsection.h"
+#include "llvm/DebugInfo/CodeView/DebugCrossImpSubsection.h"
+#include "llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h"
 #include "llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h"
 #include "llvm/DebugInfo/CodeView/DebugLinesSubsection.h"
+#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h"
 #include "llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h"
+#include "llvm/DebugInfo/CodeView/DebugSymbolRVASubsection.h"
+#include "llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h"
 #include "llvm/DebugInfo/CodeView/DebugUnknownSubsection.h"
 #include "llvm/DebugInfo/CodeView/EnumTables.h"
 #include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
@@ -81,62 +86,74 @@ struct PageStats {
   BitVector UseAfterFreePages;
 };
 
-class C13RawVisitor : public C13DebugFragmentVisitor {
+class C13RawVisitor : public DebugSubsectionVisitor {
 public:
-  C13RawVisitor(ScopedPrinter &P, PDBFile &F, LazyRandomTypeCollection &IPI)
-      : C13DebugFragmentVisitor(F), P(P), IPI(IPI) {}
+  C13RawVisitor(ScopedPrinter &P, LazyRandomTypeCollection &TPI,
+                LazyRandomTypeCollection &IPI)
+      : P(P), TPI(TPI), IPI(IPI) {}
 
-  Error handleLines() override {
-    if (Lines.empty())
+  Error visitUnknown(DebugUnknownSubsectionRef &Unknown) override {
+    if (!opts::checkModuleSubsection(opts::ModuleSubsection::Unknown))
+      return Error::success();
+    DictScope DD(P, "Unknown");
+    P.printHex("Kind", static_cast<uint32_t>(Unknown.kind()));
+    ArrayRef<uint8_t> Data;
+    BinaryStreamReader Reader(Unknown.getData());
+    consumeError(Reader.readBytes(Data, Reader.bytesRemaining()));
+    P.printBinaryBlock("Data", Data);
+    return Error::success();
+  }
+
+  Error visitLines(DebugLinesSubsectionRef &Lines,
+                   const DebugSubsectionState &State) override {
+    if (!opts::checkModuleSubsection(opts::ModuleSubsection::Lines))
       return Error::success();
 
     DictScope DD(P, "Lines");
 
-    for (const auto &Fragment : Lines) {
-      DictScope DDD(P, "Block");
-      P.printNumber("RelocSegment", Fragment.header()->RelocSegment);
-      P.printNumber("RelocOffset", Fragment.header()->RelocOffset);
-      P.printNumber("CodeSize", Fragment.header()->CodeSize);
-      P.printBoolean("HasColumns", Fragment.hasColumnInfo());
+    P.printNumber("RelocSegment", Lines.header()->RelocSegment);
+    P.printNumber("RelocOffset", Lines.header()->RelocOffset);
+    P.printNumber("CodeSize", Lines.header()->CodeSize);
+    P.printBoolean("HasColumns", Lines.hasColumnInfo());
 
-      for (const auto &L : Fragment) {
-        DictScope DDDD(P, "Lines");
+    for (const auto &L : Lines) {
+      DictScope DDDD(P, "FileEntry");
 
-        if (auto EC = printFileName("FileName", L.NameIndex))
-          return EC;
+      if (auto EC = printFileName("FileName", L.NameIndex, State))
+        return EC;
 
-        for (const auto &N : L.LineNumbers) {
-          DictScope DDD(P, "Line");
-          LineInfo LI(N.Flags);
-          P.printNumber("Offset", N.Offset);
-          if (LI.isAlwaysStepInto())
-            P.printString("StepInto", StringRef("Always"));
-          else if (LI.isNeverStepInto())
-            P.printString("StepInto", StringRef("Never"));
-          else
-            P.printNumber("LineNumberStart", LI.getStartLine());
-          P.printNumber("EndDelta", LI.getLineDelta());
-          P.printBoolean("IsStatement", LI.isStatement());
-        }
-        for (const auto &C : L.Columns) {
-          DictScope DDD(P, "Column");
-          P.printNumber("Start", C.StartColumn);
-          P.printNumber("End", C.EndColumn);
-        }
+      for (const auto &N : L.LineNumbers) {
+        DictScope DDD(P, "Line");
+        LineInfo LI(N.Flags);
+        P.printNumber("Offset", N.Offset);
+        if (LI.isAlwaysStepInto())
+          P.printString("StepInto", StringRef("Always"));
+        else if (LI.isNeverStepInto())
+          P.printString("StepInto", StringRef("Never"));
+        else
+          P.printNumber("LineNumberStart", LI.getStartLine());
+        P.printNumber("EndDelta", LI.getLineDelta());
+        P.printBoolean("IsStatement", LI.isStatement());
+      }
+      for (const auto &C : L.Columns) {
+        DictScope DDD(P, "Column");
+        P.printNumber("Start", C.StartColumn);
+        P.printNumber("End", C.EndColumn);
       }
     }
 
     return Error::success();
   }
 
-  Error handleFileChecksums() override {
-    if (!Checksums.hasValue())
+  Error visitFileChecksums(DebugChecksumsSubsectionRef &Checksums,
+                           const DebugSubsectionState &State) override {
+    if (!opts::checkModuleSubsection(opts::ModuleSubsection::FileChecksums))
       return Error::success();
 
     DictScope DD(P, "FileChecksums");
-    for (const auto &CS : *Checksums) {
+    for (const auto &CS : Checksums) {
       DictScope DDD(P, "Checksum");
-      if (auto Result = getNameFromStringTable(CS.FileNameOffset))
+      if (auto Result = getNameFromStringTable(CS.FileNameOffset, State))
         P.printString("FileName", *Result);
       else
         return Result.takeError();
@@ -146,34 +163,140 @@ public:
     return Error::success();
   }
 
-  Error handleInlineeLines() override {
-    if (InlineeLines.empty())
+  Error visitInlineeLines(DebugInlineeLinesSubsectionRef &Inlinees,
+                          const DebugSubsectionState &State) override {
+    if (!opts::checkModuleSubsection(opts::ModuleSubsection::InlineeLines))
       return Error::success();
 
     DictScope D(P, "InlineeLines");
-    for (const auto &IL : InlineeLines) {
-      P.printBoolean("HasExtraFiles", IL.hasExtraFiles());
-      ListScope LS(P, "Lines");
-      for (const auto &L : IL) {
-        DictScope DDD(P, "Inlinee");
-        if (auto EC = printFileName("FileName", L.Header->FileID))
-          return EC;
+    P.printBoolean("HasExtraFiles", Inlinees.hasExtraFiles());
+    ListScope LS(P, "Lines");
+    for (const auto &L : Inlinees) {
+      DictScope DDD(P, "Inlinee");
+      if (auto EC = printFileName("FileName", L.Header->FileID, State))
+        return EC;
 
-        if (auto EC = dumpTypeRecord("Function", L.Header->Inlinee))
-          return EC;
-        P.printNumber("SourceLine", L.Header->SourceLineNum);
-        if (IL.hasExtraFiles()) {
-          ListScope DDDD(P, "ExtraFiles");
-          for (const auto &EF : L.ExtraFiles) {
-            if (auto EC = printFileName("File", EF))
-              return EC;
-          }
+      if (auto EC = dumpTypeRecord("Function", L.Header->Inlinee))
+        return EC;
+      P.printNumber("SourceLine", L.Header->SourceLineNum);
+      if (Inlinees.hasExtraFiles()) {
+        ListScope DDDD(P, "ExtraFiles");
+        for (const auto &EF : L.ExtraFiles) {
+          if (auto EC = printFileName("File", EF, State))
+            return EC;
         }
       }
     }
     return Error::success();
   }
 
+  Error visitCrossModuleExports(DebugCrossModuleExportsSubsectionRef &CSE,
+                                const DebugSubsectionState &State) override {
+    if (!opts::checkModuleSubsection(opts::ModuleSubsection::CrossScopeExports))
+      return Error::success();
+
+    ListScope D(P, "CrossModuleExports");
+    for (const auto &M : CSE) {
+      DictScope D(P, "Export");
+      P.printHex("Local", M.Local);
+      P.printHex("Global", M.Global);
+    }
+    return Error::success();
+  }
+
+  Error visitCrossModuleImports(DebugCrossModuleImportsSubsectionRef &CSI,
+                                const DebugSubsectionState &State) override {
+    if (!opts::checkModuleSubsection(opts::ModuleSubsection::CrossScopeImports))
+      return Error::success();
+
+    ListScope L(P, "CrossModuleImports");
+    for (const auto &M : CSI) {
+      DictScope D(P, "ModuleImport");
+      auto Name = getNameFromStringTable(M.Header->ModuleNameOffset, State);
+      if (!Name)
+        return Name.takeError();
+      P.printString("Module", *Name);
+      P.printHexList("Imports", M.Imports);
+    }
+    return Error::success();
+  }
+
+  Error visitFrameData(DebugFrameDataSubsectionRef &FD,
+                       const DebugSubsectionState &State) override {
+    if (!opts::checkModuleSubsection(opts::ModuleSubsection::FrameData))
+      return Error::success();
+
+    ListScope L(P, "FrameData");
+    for (const auto &Frame : FD) {
+      DictScope D(P, "Frame");
+      auto Name = getNameFromStringTable(Frame.FrameFunc, State);
+      if (!Name)
+        return joinErrors(make_error<RawError>(raw_error_code::invalid_format,
+                                               "Invalid Frame.FrameFunc index"),
+                          Name.takeError());
+      P.printNumber("Rva", Frame.RvaStart);
+      P.printNumber("CodeSize", Frame.CodeSize);
+      P.printNumber("LocalSize", Frame.LocalSize);
+      P.printNumber("ParamsSize", Frame.ParamsSize);
+      P.printNumber("MaxStackSize", Frame.MaxStackSize);
+      P.printString("FrameFunc", *Name);
+      P.printNumber("PrologSize", Frame.PrologSize);
+      P.printNumber("SavedRegsSize", Frame.SavedRegsSize);
+      P.printNumber("Flags", Frame.Flags);
+    }
+    return Error::success();
+  }
+
+  Error visitSymbols(DebugSymbolsSubsectionRef &Symbols,
+                     const DebugSubsectionState &State) override {
+    if (!opts::checkModuleSubsection(opts::ModuleSubsection::Symbols))
+      return Error::success();
+    ListScope L(P, "Symbols");
+
+    // This section should not actually appear in a PDB file, it really only
+    // appears in object files.  But we support it here for testing.  So we
+    // specify the Object File container type.
+    codeview::CVSymbolDumper SD(P, TPI, CodeViewContainer::ObjectFile, nullptr,
+                                false);
+    for (auto S : Symbols) {
+      DictScope LL(P, "");
+      if (auto EC = SD.dump(S)) {
+        return make_error<RawError>(
+            raw_error_code::corrupt_file,
+            "DEBUG_S_SYMBOLS subsection contained corrupt symbol record");
+      }
+    }
+    return Error::success();
+  }
+
+  Error visitStringTable(DebugStringTableSubsectionRef &Strings,
+                         const DebugSubsectionState &State) override {
+    if (!opts::checkModuleSubsection(opts::ModuleSubsection::StringTable))
+      return Error::success();
+
+    ListScope D(P, "String Table");
+    BinaryStreamReader Reader(Strings.getBuffer());
+    StringRef S;
+    consumeError(Reader.readCString(S));
+    while (Reader.bytesRemaining() > 0) {
+      consumeError(Reader.readCString(S));
+      if (S.empty() && Reader.bytesRemaining() < 4)
+        break;
+      P.printString(S);
+    }
+    return Error::success();
+  }
+
+  Error visitCOFFSymbolRVAs(DebugSymbolRVASubsectionRef &RVAs,
+                            const DebugSubsectionState &State) override {
+    if (!opts::checkModuleSubsection(opts::ModuleSubsection::CoffSymbolRVAs))
+      return Error::success();
+
+    ListScope D(P, "COFF Symbol RVAs");
+    P.printHexList("RVAs", RVAs);
+    return Error::success();
+  }
+
 private:
   Error dumpTypeRecord(StringRef Label, TypeIndex Index) {
     CompactTypeDumpVisitor CTDV(IPI, Index, &P);
@@ -189,15 +312,33 @@ private:
     }
     return Error::success();
   }
-  Error printFileName(StringRef Label, uint32_t Offset) {
-    if (auto Result = getNameFromChecksumsBuffer(Offset)) {
+  Error printFileName(StringRef Label, uint32_t Offset,
+                      const DebugSubsectionState &State) {
+    if (auto Result = getNameFromChecksumsBuffer(Offset, State)) {
       P.printString(Label, *Result);
       return Error::success();
     } else
       return Result.takeError();
   }
 
+  Expected<StringRef>
+  getNameFromStringTable(uint32_t Offset, const DebugSubsectionState &State) {
+    return State.strings().getString(Offset);
+  }
+
+  Expected<StringRef>
+  getNameFromChecksumsBuffer(uint32_t Offset,
+                             const DebugSubsectionState &State) {
+    auto Array = State.checksums().getArray();
+    auto ChecksumIter = Array.at(Offset);
+    if (ChecksumIter == Array.end())
+      return make_error<RawError>(raw_error_code::invalid_format);
+    const auto &Entry = *ChecksumIter;
+    return getNameFromStringTable(Entry.FileNameOffset, State);
+  }
+
   ScopedPrinter &P;
+  LazyRandomTypeCollection &TPI;
   LazyRandomTypeCollection &IPI;
 };
 }
@@ -727,8 +868,10 @@ LLVMOutputStyle::initializeTypeDatabase(uint32_t SN) {
 }
 
 Error LLVMOutputStyle::dumpDbiStream() {
-  bool DumpModules = opts::raw::DumpModules || opts::raw::DumpModuleSyms ||
-                     opts::raw::DumpModuleFiles || opts::raw::DumpLineInfo;
+  bool DumpModules = opts::shared::DumpModules ||
+                     opts::shared::DumpModuleSyms ||
+                     opts::shared::DumpModuleFiles ||
+                     !opts::shared::DumpModuleSubsections.empty();
   if (!opts::raw::DumpHeaders && !DumpModules)
     return Error::success();
   if (!File.hasPDBDbiStream()) {
@@ -778,7 +921,7 @@ Error LLVMOutputStyle::dumpDbiStream() {
       P.printNumber("Symbol Byte Size", Modi.getSymbolDebugInfoByteSize());
       P.printNumber("Type Server Index", Modi.getTypeServerIndex());
       P.printBoolean("Has EC Info", Modi.hasECInfo());
-      if (opts::raw::DumpModuleFiles) {
+      if (opts::shared::DumpModuleFiles) {
         std::string FileListName = to_string(Modules.getSourceFileCount(I)) +
                                    " Contributing Source Files";
         ListScope LL(P, FileListName);
@@ -787,8 +930,9 @@ Error LLVMOutputStyle::dumpDbiStream() {
       }
       bool HasModuleDI = (Modi.getModuleStreamIndex() < File.getNumStreams());
       bool ShouldDumpSymbols =
-          (opts::raw::DumpModuleSyms || opts::raw::DumpSymRecordBytes);
-      if (HasModuleDI && (ShouldDumpSymbols || opts::raw::DumpLineInfo)) {
+          (opts::shared::DumpModuleSyms || opts::raw::DumpSymRecordBytes);
+      if (HasModuleDI &&
+          (ShouldDumpSymbols || !opts::shared::DumpModuleSubsections.empty())) {
         auto ModStreamData = MappedBlockStream::createIndexedStream(
             File.getMsfLayout(), File.getMsfBuffer(),
             Modi.getModuleStreamIndex(), File.getAllocator());
@@ -797,19 +941,19 @@ Error LLVMOutputStyle::dumpDbiStream() {
         if (auto EC = ModS.reload())
           return EC;
 
+        auto ExpectedTpi = initializeTypeDatabase(StreamTPI);
+        if (!ExpectedTpi)
+          return ExpectedTpi.takeError();
+        auto &Tpi = *ExpectedTpi;
         if (ShouldDumpSymbols) {
-          auto ExpectedTypes = initializeTypeDatabase(StreamTPI);
-          if (!ExpectedTypes)
-            return ExpectedTypes.takeError();
-          auto &Types = *ExpectedTypes;
 
           ListScope SS(P, "Symbols");
-          codeview::CVSymbolDumper SD(P, Types, CodeViewContainer::Pdb, nullptr,
+          codeview::CVSymbolDumper SD(P, Tpi, CodeViewContainer::Pdb, nullptr,
                                       false);
           bool HadError = false;
           for (auto S : ModS.symbols(&HadError)) {
             DictScope LL(P, "");
-            if (opts::raw::DumpModuleSyms) {
+            if (opts::shared::DumpModuleSyms) {
               if (auto EC = SD.dump(S)) {
                 llvm::consumeError(std::move(EC));
                 HadError = true;
@@ -824,14 +968,22 @@ Error LLVMOutputStyle::dumpDbiStream() {
                 raw_error_code::corrupt_file,
                 "DBI stream contained corrupt symbol record");
         }
-        if (opts::raw::DumpLineInfo) {
-          ListScope SS(P, "LineInfo");
-          auto ExpectedTypes = initializeTypeDatabase(StreamIPI);
-          if (!ExpectedTypes)
-            return ExpectedTypes.takeError();
-          auto &IpiItems = *ExpectedTypes;
-          C13RawVisitor V(P, File, IpiItems);
-          if (auto EC = codeview::visitDebugSubsections(ModS.subsections(), V))
+        if (!opts::shared::DumpModuleSubsections.empty()) {
+          ListScope SS(P, "Subsections");
+          auto ExpectedIpi = initializeTypeDatabase(StreamIPI);
+          if (!ExpectedIpi)
+            return ExpectedIpi.takeError();
+          auto &Ipi = *ExpectedIpi;
+          auto ExpectedStrings = File.getStringTable();
+          if (!ExpectedStrings)
+            return joinErrors(
+                make_error<RawError>(raw_error_code::no_stream,
+                                     "Could not get string table!"),
+                ExpectedStrings.takeError());
+
+          C13RawVisitor V(P, Tpi, Ipi);
+          if (auto EC = codeview::visitDebugSubsections(
+                  ModS.subsections(), V, ExpectedStrings->getStringTable()))
             return EC;
         }
       }
diff --git a/tools/llvm-pdbdump/LLVMOutputStyle.h b/tools/llvm-pdbutil/LLVMOutputStyle.h
similarity index 100%
rename from tools/llvm-pdbdump/LLVMOutputStyle.h
rename to tools/llvm-pdbutil/LLVMOutputStyle.h
diff --git a/tools/llvm-pdbdump/LinePrinter.cpp b/tools/llvm-pdbutil/LinePrinter.cpp
similarity index 99%
rename from tools/llvm-pdbdump/LinePrinter.cpp
rename to tools/llvm-pdbutil/LinePrinter.cpp
index 7fa524400aef..ef56b5fe8e6a 100644
--- a/tools/llvm-pdbdump/LinePrinter.cpp
+++ b/tools/llvm-pdbutil/LinePrinter.cpp
@@ -9,7 +9,7 @@
 
 #include "LinePrinter.h"
 
-#include "llvm-pdbdump.h"
+#include "llvm-pdbutil.h"
 
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/DebugInfo/PDB/UDTLayout.h"
diff --git a/tools/llvm-pdbdump/LinePrinter.h b/tools/llvm-pdbutil/LinePrinter.h
similarity index 100%
rename from tools/llvm-pdbdump/LinePrinter.h
rename to tools/llvm-pdbutil/LinePrinter.h
diff --git a/tools/llvm-pdbdump/OutputStyle.h b/tools/llvm-pdbutil/OutputStyle.h
similarity index 100%
rename from tools/llvm-pdbdump/OutputStyle.h
rename to tools/llvm-pdbutil/OutputStyle.h
diff --git a/tools/llvm-pdbdump/PdbYaml.cpp b/tools/llvm-pdbutil/PdbYaml.cpp
similarity index 100%
rename from tools/llvm-pdbdump/PdbYaml.cpp
rename to tools/llvm-pdbutil/PdbYaml.cpp
diff --git a/tools/llvm-pdbdump/PdbYaml.h b/tools/llvm-pdbutil/PdbYaml.h
similarity index 100%
rename from tools/llvm-pdbdump/PdbYaml.h
rename to tools/llvm-pdbutil/PdbYaml.h
diff --git a/tools/llvm-pdbdump/PrettyBuiltinDumper.cpp b/tools/llvm-pdbutil/PrettyBuiltinDumper.cpp
similarity index 98%
rename from tools/llvm-pdbdump/PrettyBuiltinDumper.cpp
rename to tools/llvm-pdbutil/PrettyBuiltinDumper.cpp
index 591d5e70cfd6..fcda312e65e9 100644
--- a/tools/llvm-pdbdump/PrettyBuiltinDumper.cpp
+++ b/tools/llvm-pdbutil/PrettyBuiltinDumper.cpp
@@ -9,7 +9,7 @@
 
 #include "PrettyBuiltinDumper.h"
 #include "LinePrinter.h"
-#include "llvm-pdbdump.h"
+#include "llvm-pdbutil.h"
 
 #include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h"
 
diff --git a/tools/llvm-pdbdump/PrettyBuiltinDumper.h b/tools/llvm-pdbutil/PrettyBuiltinDumper.h
similarity index 100%
rename from tools/llvm-pdbdump/PrettyBuiltinDumper.h
rename to tools/llvm-pdbutil/PrettyBuiltinDumper.h
diff --git a/tools/llvm-pdbdump/PrettyClassDefinitionDumper.cpp b/tools/llvm-pdbutil/PrettyClassDefinitionDumper.cpp
similarity index 99%
rename from tools/llvm-pdbdump/PrettyClassDefinitionDumper.cpp
rename to tools/llvm-pdbutil/PrettyClassDefinitionDumper.cpp
index 90f7772001d7..651cb8b7649e 100644
--- a/tools/llvm-pdbdump/PrettyClassDefinitionDumper.cpp
+++ b/tools/llvm-pdbutil/PrettyClassDefinitionDumper.cpp
@@ -11,7 +11,7 @@
 
 #include "LinePrinter.h"
 #include "PrettyClassLayoutGraphicalDumper.h"
-#include "llvm-pdbdump.h"
+#include "llvm-pdbutil.h"
 
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/SmallString.h"
diff --git a/tools/llvm-pdbdump/PrettyClassDefinitionDumper.h b/tools/llvm-pdbutil/PrettyClassDefinitionDumper.h
similarity index 100%
rename from tools/llvm-pdbdump/PrettyClassDefinitionDumper.h
rename to tools/llvm-pdbutil/PrettyClassDefinitionDumper.h
diff --git a/tools/llvm-pdbdump/PrettyClassLayoutGraphicalDumper.cpp b/tools/llvm-pdbutil/PrettyClassLayoutGraphicalDumper.cpp
similarity index 99%
rename from tools/llvm-pdbdump/PrettyClassLayoutGraphicalDumper.cpp
rename to tools/llvm-pdbutil/PrettyClassLayoutGraphicalDumper.cpp
index d11472679626..54e33683f552 100644
--- a/tools/llvm-pdbdump/PrettyClassLayoutGraphicalDumper.cpp
+++ b/tools/llvm-pdbutil/PrettyClassLayoutGraphicalDumper.cpp
@@ -16,7 +16,7 @@
 #include "PrettyTypedefDumper.h"
 #include "PrettyVariableDumper.h"
 #include "PrettyVariableDumper.h"
-#include "llvm-pdbdump.h"
+#include "llvm-pdbutil.h"
 
 #include "llvm/DebugInfo/PDB/PDBSymbolData.h"
 #include "llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h"
diff --git a/tools/llvm-pdbdump/PrettyClassLayoutGraphicalDumper.h b/tools/llvm-pdbutil/PrettyClassLayoutGraphicalDumper.h
similarity index 100%
rename from tools/llvm-pdbdump/PrettyClassLayoutGraphicalDumper.h
rename to tools/llvm-pdbutil/PrettyClassLayoutGraphicalDumper.h
diff --git a/tools/llvm-pdbdump/PrettyCompilandDumper.cpp b/tools/llvm-pdbutil/PrettyCompilandDumper.cpp
similarity index 98%
rename from tools/llvm-pdbdump/PrettyCompilandDumper.cpp
rename to tools/llvm-pdbutil/PrettyCompilandDumper.cpp
index 9cf7bf82a164..65e8badbc99a 100644
--- a/tools/llvm-pdbdump/PrettyCompilandDumper.cpp
+++ b/tools/llvm-pdbutil/PrettyCompilandDumper.cpp
@@ -1,4 +1,4 @@
-//===- PrettyCompilandDumper.cpp - llvm-pdbdump compiland dumper -*- C++ *-===//
+//===- PrettyCompilandDumper.cpp - llvm-pdbutil compiland dumper -*- C++ *-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,7 +11,7 @@
 
 #include "LinePrinter.h"
 #include "PrettyFunctionDumper.h"
-#include "llvm-pdbdump.h"
+#include "llvm-pdbutil.h"
 
 #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
 #include "llvm/DebugInfo/PDB/IPDBLineNumber.h"
diff --git a/tools/llvm-pdbdump/PrettyCompilandDumper.h b/tools/llvm-pdbutil/PrettyCompilandDumper.h
similarity index 95%
rename from tools/llvm-pdbdump/PrettyCompilandDumper.h
rename to tools/llvm-pdbutil/PrettyCompilandDumper.h
index 2127e7d1f529..cae196e9d134 100644
--- a/tools/llvm-pdbdump/PrettyCompilandDumper.h
+++ b/tools/llvm-pdbutil/PrettyCompilandDumper.h
@@ -1,4 +1,4 @@
-//===- PrettyCompilandDumper.h - llvm-pdbdump compiland dumper -*- C++ --*-===//
+//===- PrettyCompilandDumper.h - llvm-pdbutil compiland dumper -*- C++ --*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/tools/llvm-pdbdump/PrettyEnumDumper.cpp b/tools/llvm-pdbutil/PrettyEnumDumper.cpp
similarity index 98%
rename from tools/llvm-pdbdump/PrettyEnumDumper.cpp
rename to tools/llvm-pdbutil/PrettyEnumDumper.cpp
index 965ca1b9f989..7aff5b93d986 100644
--- a/tools/llvm-pdbdump/PrettyEnumDumper.cpp
+++ b/tools/llvm-pdbutil/PrettyEnumDumper.cpp
@@ -11,7 +11,7 @@
 
 #include "LinePrinter.h"
 #include "PrettyBuiltinDumper.h"
-#include "llvm-pdbdump.h"
+#include "llvm-pdbutil.h"
 
 #include "llvm/DebugInfo/PDB/PDBSymbolData.h"
 #include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h"
diff --git a/tools/llvm-pdbdump/PrettyEnumDumper.h b/tools/llvm-pdbutil/PrettyEnumDumper.h
similarity index 100%
rename from tools/llvm-pdbdump/PrettyEnumDumper.h
rename to tools/llvm-pdbutil/PrettyEnumDumper.h
diff --git a/tools/llvm-pdbdump/PrettyExternalSymbolDumper.cpp b/tools/llvm-pdbutil/PrettyExternalSymbolDumper.cpp
similarity index 100%
rename from tools/llvm-pdbdump/PrettyExternalSymbolDumper.cpp
rename to tools/llvm-pdbutil/PrettyExternalSymbolDumper.cpp
diff --git a/tools/llvm-pdbdump/PrettyExternalSymbolDumper.h b/tools/llvm-pdbutil/PrettyExternalSymbolDumper.h
similarity index 100%
rename from tools/llvm-pdbdump/PrettyExternalSymbolDumper.h
rename to tools/llvm-pdbutil/PrettyExternalSymbolDumper.h
diff --git a/tools/llvm-pdbdump/PrettyFunctionDumper.cpp b/tools/llvm-pdbutil/PrettyFunctionDumper.cpp
similarity index 99%
rename from tools/llvm-pdbdump/PrettyFunctionDumper.cpp
rename to tools/llvm-pdbutil/PrettyFunctionDumper.cpp
index 8b2043989b81..06d72410359f 100644
--- a/tools/llvm-pdbdump/PrettyFunctionDumper.cpp
+++ b/tools/llvm-pdbutil/PrettyFunctionDumper.cpp
@@ -10,7 +10,7 @@
 #include "PrettyFunctionDumper.h"
 #include "LinePrinter.h"
 #include "PrettyBuiltinDumper.h"
-#include "llvm-pdbdump.h"
+#include "llvm-pdbutil.h"
 
 #include "llvm/DebugInfo/PDB/IPDBSession.h"
 #include "llvm/DebugInfo/PDB/PDBExtras.h"
diff --git a/tools/llvm-pdbdump/PrettyFunctionDumper.h b/tools/llvm-pdbutil/PrettyFunctionDumper.h
similarity index 100%
rename from tools/llvm-pdbdump/PrettyFunctionDumper.h
rename to tools/llvm-pdbutil/PrettyFunctionDumper.h
diff --git a/tools/llvm-pdbdump/PrettyTypeDumper.cpp b/tools/llvm-pdbutil/PrettyTypeDumper.cpp
similarity index 68%
rename from tools/llvm-pdbdump/PrettyTypeDumper.cpp
rename to tools/llvm-pdbutil/PrettyTypeDumper.cpp
index cd156f051573..0f6086395ad1 100644
--- a/tools/llvm-pdbdump/PrettyTypeDumper.cpp
+++ b/tools/llvm-pdbutil/PrettyTypeDumper.cpp
@@ -14,7 +14,7 @@
 #include "PrettyClassDefinitionDumper.h"
 #include "PrettyEnumDumper.h"
 #include "PrettyTypedefDumper.h"
-#include "llvm-pdbdump.h"
+#include "llvm-pdbutil.h"
 
 #include "llvm/DebugInfo/PDB/IPDBSession.h"
 #include "llvm/DebugInfo/PDB/PDBSymbolExe.h"
@@ -135,80 +135,84 @@ filterAndSortClassDefs(LinePrinter &Printer, Enumerator &E,
 TypeDumper::TypeDumper(LinePrinter &P) : PDBSymDumper(true), Printer(P) {}
 
 void TypeDumper::start(const PDBSymbolExe &Exe) {
+  auto Children = Exe.findAllChildren();
   if (opts::pretty::Enums) {
-    auto Enums = Exe.findAllChildren<PDBSymbolTypeEnum>();
-    Printer.NewLine();
-    WithColor(Printer, PDB_ColorItem::Identifier).get() << "Enums";
-    Printer << ": (" << Enums->getChildCount() << " items)";
-    Printer.Indent();
-    while (auto Enum = Enums->getNext())
-      Enum->dump(*this);
-    Printer.Unindent();
+    if (auto Enums = Exe.findAllChildren<PDBSymbolTypeEnum>()) {
+      Printer.NewLine();
+      WithColor(Printer, PDB_ColorItem::Identifier).get() << "Enums";
+      Printer << ": (" << Enums->getChildCount() << " items)";
+      Printer.Indent();
+      while (auto Enum = Enums->getNext())
+        Enum->dump(*this);
+      Printer.Unindent();
+    }
   }
 
   if (opts::pretty::Typedefs) {
-    auto Typedefs = Exe.findAllChildren<PDBSymbolTypeTypedef>();
-    Printer.NewLine();
-    WithColor(Printer, PDB_ColorItem::Identifier).get() << "Typedefs";
-    Printer << ": (" << Typedefs->getChildCount() << " items)";
-    Printer.Indent();
-    while (auto Typedef = Typedefs->getNext())
-      Typedef->dump(*this);
-    Printer.Unindent();
+    if (auto Typedefs = Exe.findAllChildren<PDBSymbolTypeTypedef>()) {
+      Printer.NewLine();
+      WithColor(Printer, PDB_ColorItem::Identifier).get() << "Typedefs";
+      Printer << ": (" << Typedefs->getChildCount() << " items)";
+      Printer.Indent();
+      while (auto Typedef = Typedefs->getNext())
+        Typedef->dump(*this);
+      Printer.Unindent();
+    }
   }
 
   if (opts::pretty::Classes) {
-    auto Classes = Exe.findAllChildren<PDBSymbolTypeUDT>();
-    uint32_t All = Classes->getChildCount();
+    if (auto Classes = Exe.findAllChildren<PDBSymbolTypeUDT>()) {
+      uint32_t All = Classes->getChildCount();
 
-    Printer.NewLine();
-    WithColor(Printer, PDB_ColorItem::Identifier).get() << "Classes";
+      Printer.NewLine();
+      WithColor(Printer, PDB_ColorItem::Identifier).get() << "Classes";
 
-    bool Precompute = false;
-    Precompute =
-        (opts::pretty::ClassOrder != opts::pretty::ClassSortMode::None);
+      bool Precompute = false;
+      Precompute =
+          (opts::pretty::ClassOrder != opts::pretty::ClassSortMode::None);
 
-    // If we're using no sort mode, then we can start getting immediate output
-    // from the tool by just filtering as we go, rather than processing
-    // everything up front so that we can sort it.  This makes the tool more
-    // responsive.  So only precompute the filtered/sorted set of classes if
-    // necessary due to the specified options.
-    std::vector<LayoutPtr> Filtered;
-    uint32_t Shown = All;
-    if (Precompute) {
-      Filtered = filterAndSortClassDefs(Printer, *Classes, All);
+      // If we're using no sort mode, then we can start getting immediate output
+      // from the tool by just filtering as we go, rather than processing
+      // everything up front so that we can sort it.  This makes the tool more
+      // responsive.  So only precompute the filtered/sorted set of classes if
+      // necessary due to the specified options.
+      std::vector<LayoutPtr> Filtered;
+      uint32_t Shown = All;
+      if (Precompute) {
+        Filtered = filterAndSortClassDefs(Printer, *Classes, All);
 
-      Shown = Filtered.size();
-    }
-
-    Printer << ": (Showing " << Shown << " items";
-    if (Shown < All)
-      Printer << ", " << (All - Shown) << " filtered";
-    Printer << ")";
-    Printer.Indent();
-
-    // If we pre-computed, iterate the filtered/sorted list, otherwise iterate
-    // the DIA enumerator and filter on the fly.
-    if (Precompute) {
-      for (auto &Class : Filtered)
-        dumpClassLayout(*Class);
-    } else {
-      while (auto Class = Classes->getNext()) {
-        if (Class->getUnmodifiedTypeId() != 0)
-          continue;
-
-        if (Printer.IsTypeExcluded(Class->getName(), Class->getLength()))
-          continue;
-
-        auto Layout = llvm::make_unique<ClassLayout>(std::move(Class));
-        if (Layout->deepPaddingSize() < opts::pretty::PaddingThreshold)
-          continue;
-
-        dumpClassLayout(*Layout);
+        Shown = Filtered.size();
       }
-    }
 
-    Printer.Unindent();
+      Printer << ": (Showing " << Shown << " items";
+      if (Shown < All)
+        Printer << ", " << (All - Shown) << " filtered";
+      Printer << ")";
+      Printer.Indent();
+
+      // If we pre-computed, iterate the filtered/sorted list, otherwise iterate
+      // the DIA enumerator and filter on the fly.
+      if (Precompute) {
+        for (auto &Class : Filtered)
+          dumpClassLayout(*Class);
+      } else {
+        while (auto Class = Classes->getNext()) {
+          if (Class->getUnmodifiedTypeId() != 0)
+            continue;
+
+          if (Printer.IsTypeExcluded(Class->getName(), Class->getLength()))
+            continue;
+
+          auto Layout = llvm::make_unique<ClassLayout>(std::move(Class));
+          if (Layout->deepPaddingSize() < opts::pretty::PaddingThreshold)
+            continue;
+
+          dumpClassLayout(*Layout);
+        }
+      }
+
+      Printer.Unindent();
+    }
   }
 }
 
diff --git a/tools/llvm-pdbdump/PrettyTypeDumper.h b/tools/llvm-pdbutil/PrettyTypeDumper.h
similarity index 100%
rename from tools/llvm-pdbdump/PrettyTypeDumper.h
rename to tools/llvm-pdbutil/PrettyTypeDumper.h
diff --git a/tools/llvm-pdbdump/PrettyTypedefDumper.cpp b/tools/llvm-pdbutil/PrettyTypedefDumper.cpp
similarity index 99%
rename from tools/llvm-pdbdump/PrettyTypedefDumper.cpp
rename to tools/llvm-pdbutil/PrettyTypedefDumper.cpp
index 2d8e915d7604..2266e6ea2bef 100644
--- a/tools/llvm-pdbdump/PrettyTypedefDumper.cpp
+++ b/tools/llvm-pdbutil/PrettyTypedefDumper.cpp
@@ -12,7 +12,7 @@
 #include "LinePrinter.h"
 #include "PrettyBuiltinDumper.h"
 #include "PrettyFunctionDumper.h"
-#include "llvm-pdbdump.h"
+#include "llvm-pdbutil.h"
 
 #include "llvm/DebugInfo/PDB/IPDBSession.h"
 #include "llvm/DebugInfo/PDB/PDBExtras.h"
diff --git a/tools/llvm-pdbdump/PrettyTypedefDumper.h b/tools/llvm-pdbutil/PrettyTypedefDumper.h
similarity index 94%
rename from tools/llvm-pdbdump/PrettyTypedefDumper.h
rename to tools/llvm-pdbutil/PrettyTypedefDumper.h
index 34c139601301..133bbfb7db0e 100644
--- a/tools/llvm-pdbdump/PrettyTypedefDumper.h
+++ b/tools/llvm-pdbutil/PrettyTypedefDumper.h
@@ -1,4 +1,4 @@
-//===- PrettyTypedefDumper.h - llvm-pdbdump typedef dumper ---*- C++ ----*-===//
+//===- PrettyTypedefDumper.h - llvm-pdbutil typedef dumper ---*- C++ ----*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/tools/llvm-pdbdump/PrettyVariableDumper.cpp b/tools/llvm-pdbutil/PrettyVariableDumper.cpp
similarity index 99%
rename from tools/llvm-pdbdump/PrettyVariableDumper.cpp
rename to tools/llvm-pdbutil/PrettyVariableDumper.cpp
index 70925f4b03d0..4884fc8ee5a4 100644
--- a/tools/llvm-pdbdump/PrettyVariableDumper.cpp
+++ b/tools/llvm-pdbutil/PrettyVariableDumper.cpp
@@ -12,7 +12,7 @@
 #include "LinePrinter.h"
 #include "PrettyBuiltinDumper.h"
 #include "PrettyFunctionDumper.h"
-#include "llvm-pdbdump.h"
+#include "llvm-pdbutil.h"
 
 #include "llvm/DebugInfo/PDB/IPDBSession.h"
 #include "llvm/DebugInfo/PDB/PDBSymbolData.h"
diff --git a/tools/llvm-pdbdump/PrettyVariableDumper.h b/tools/llvm-pdbutil/PrettyVariableDumper.h
similarity index 100%
rename from tools/llvm-pdbdump/PrettyVariableDumper.h
rename to tools/llvm-pdbutil/PrettyVariableDumper.h
diff --git a/tools/llvm-pdbdump/StreamUtil.cpp b/tools/llvm-pdbutil/StreamUtil.cpp
similarity index 100%
rename from tools/llvm-pdbdump/StreamUtil.cpp
rename to tools/llvm-pdbutil/StreamUtil.cpp
diff --git a/tools/llvm-pdbdump/StreamUtil.h b/tools/llvm-pdbutil/StreamUtil.h
similarity index 100%
rename from tools/llvm-pdbdump/StreamUtil.h
rename to tools/llvm-pdbutil/StreamUtil.h
diff --git a/tools/llvm-pdbdump/YAMLOutputStyle.cpp b/tools/llvm-pdbutil/YAMLOutputStyle.cpp
similarity index 84%
rename from tools/llvm-pdbdump/YAMLOutputStyle.cpp
rename to tools/llvm-pdbutil/YAMLOutputStyle.cpp
index ee72b90b12d1..58c538d968c8 100644
--- a/tools/llvm-pdbdump/YAMLOutputStyle.cpp
+++ b/tools/llvm-pdbutil/YAMLOutputStyle.cpp
@@ -9,15 +9,13 @@
 
 #include "YAMLOutputStyle.h"
 
-#include "C13DebugFragmentVisitor.h"
 #include "PdbYaml.h"
-#include "llvm-pdbdump.h"
+#include "llvm-pdbutil.h"
 
 #include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h"
 #include "llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h"
 #include "llvm/DebugInfo/CodeView/DebugLinesSubsection.h"
 #include "llvm/DebugInfo/CodeView/DebugSubsection.h"
-#include "llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h"
 #include "llvm/DebugInfo/CodeView/DebugUnknownSubsection.h"
 #include "llvm/DebugInfo/CodeView/Line.h"
 #include "llvm/DebugInfo/MSF/MappedBlockStream.h"
@@ -39,39 +37,8 @@ YAMLOutputStyle::YAMLOutputStyle(PDBFile &File)
 }
 
 Error YAMLOutputStyle::dump() {
-  if (opts::pdb2yaml::All) {
-    opts::pdb2yaml::StreamMetadata = true;
-    opts::pdb2yaml::StreamDirectory = true;
-    opts::pdb2yaml::PdbStream = true;
-    opts::pdb2yaml::StringTable = true;
-    opts::pdb2yaml::DbiStream = true;
-    opts::pdb2yaml::DbiModuleInfo = true;
-    opts::pdb2yaml::DbiModuleSyms = true;
-    opts::pdb2yaml::DbiModuleSourceFileInfo = true;
-    opts::pdb2yaml::DbiModuleSourceLineInfo = true;
-    opts::pdb2yaml::TpiStream = true;
-    opts::pdb2yaml::IpiStream = true;
-  }
-
   if (opts::pdb2yaml::StreamDirectory)
     opts::pdb2yaml::StreamMetadata = true;
-  if (opts::pdb2yaml::DbiModuleSyms)
-    opts::pdb2yaml::DbiModuleInfo = true;
-
-  if (opts::pdb2yaml::DbiModuleSourceLineInfo)
-    opts::pdb2yaml::DbiModuleSourceFileInfo = true;
-
-  if (opts::pdb2yaml::DbiModuleSourceFileInfo)
-    opts::pdb2yaml::DbiModuleInfo = true;
-
-  if (opts::pdb2yaml::DbiModuleInfo)
-    opts::pdb2yaml::DbiStream = true;
-
-  // Some names from the module source file info get pulled from the string
-  // table, so if we're writing module source info, we have to write the string
-  // table as well.
-  if (opts::pdb2yaml::DbiModuleSourceLineInfo)
-    opts::pdb2yaml::StringTable = true;
 
   if (auto EC = dumpFileHeaders())
     return EC;
@@ -125,8 +92,8 @@ Error YAMLOutputStyle::dumpFileHeaders() {
 }
 
 Error YAMLOutputStyle::dumpStringTable() {
-  bool RequiresStringTable = opts::pdb2yaml::DbiModuleSourceFileInfo ||
-                             opts::pdb2yaml::DbiModuleSourceLineInfo;
+  bool RequiresStringTable = opts::shared::DumpModuleFiles ||
+                             !opts::shared::DumpModuleSubsections.empty();
   bool RequestedStringTable = opts::pdb2yaml::StringTable;
   if (!RequiresStringTable && !RequestedStringTable)
     return Error::success();
@@ -192,6 +159,30 @@ Error YAMLOutputStyle::dumpPDBStream() {
   return Error::success();
 }
 
+static opts::ModuleSubsection convertSubsectionKind(DebugSubsectionKind K) {
+  switch (K) {
+  case DebugSubsectionKind::CrossScopeExports:
+    return opts::ModuleSubsection::CrossScopeExports;
+  case DebugSubsectionKind::CrossScopeImports:
+    return opts::ModuleSubsection::CrossScopeImports;
+  case DebugSubsectionKind::FileChecksums:
+    return opts::ModuleSubsection::FileChecksums;
+  case DebugSubsectionKind::InlineeLines:
+    return opts::ModuleSubsection::InlineeLines;
+  case DebugSubsectionKind::Lines:
+    return opts::ModuleSubsection::Lines;
+  case DebugSubsectionKind::Symbols:
+    return opts::ModuleSubsection::Symbols;
+  case DebugSubsectionKind::StringTable:
+    return opts::ModuleSubsection::StringTable;
+  case DebugSubsectionKind::FrameData:
+    return opts::ModuleSubsection::FrameData;
+  default:
+    return opts::ModuleSubsection::Unknown;
+  }
+  llvm_unreachable("Unreachable!");
+}
+
 Error YAMLOutputStyle::dumpDbiStream() {
   if (!opts::pdb2yaml::DbiStream)
     return Error::success();
@@ -209,7 +200,7 @@ Error YAMLOutputStyle::dumpDbiStream() {
   Obj.DbiStream->PdbDllRbld = DS.getPdbDllRbld();
   Obj.DbiStream->PdbDllVersion = DS.getPdbDllVersion();
   Obj.DbiStream->VerHeader = DS.getDbiVersion();
-  if (opts::pdb2yaml::DbiModuleInfo) {
+  if (opts::shared::DumpModules) {
     const auto &Modules = DS.modules();
     for (uint32_t I = 0; I < Modules.getModuleCount(); ++I) {
       DbiModuleDescriptor MI = Modules.getModuleDescriptor(I);
@@ -219,7 +210,7 @@ Error YAMLOutputStyle::dumpDbiStream() {
 
       DMI.Mod = MI.getModuleName();
       DMI.Obj = MI.getObjFileName();
-      if (opts::pdb2yaml::DbiModuleSourceFileInfo) {
+      if (opts::shared::DumpModuleFiles) {
         auto Files = Modules.source_files(I);
         DMI.SourceFiles.assign(Files.begin(), Files.end());
       }
@@ -239,13 +230,17 @@ Error YAMLOutputStyle::dumpDbiStream() {
       auto ExpectedST = File.getStringTable();
       if (!ExpectedST)
         return ExpectedST.takeError();
-      if (opts::pdb2yaml::DbiModuleSourceLineInfo &&
+      if (!opts::shared::DumpModuleSubsections.empty() &&
           ModS.hasDebugSubsections()) {
         auto ExpectedChecksums = ModS.findChecksumsSubsection();
         if (!ExpectedChecksums)
           return ExpectedChecksums.takeError();
 
         for (const auto &SS : ModS.subsections()) {
+          opts::ModuleSubsection OptionKind = convertSubsectionKind(SS.kind());
+          if (!opts::checkModuleSubsection(OptionKind))
+            continue;
+
           auto Converted =
               CodeViewYAML::YAMLDebugSubsection::fromCodeViewSubection(
                   ExpectedST->getStringTable(), *ExpectedChecksums, SS);
@@ -255,7 +250,7 @@ Error YAMLOutputStyle::dumpDbiStream() {
         }
       }
 
-      if (opts::pdb2yaml::DbiModuleSyms) {
+      if (opts::shared::DumpModuleSyms) {
         DMI.Modi.emplace();
 
         DMI.Modi->Signature = ModS.signature();
diff --git a/tools/llvm-pdbdump/YAMLOutputStyle.h b/tools/llvm-pdbutil/YAMLOutputStyle.h
similarity index 100%
rename from tools/llvm-pdbdump/YAMLOutputStyle.h
rename to tools/llvm-pdbutil/YAMLOutputStyle.h
diff --git a/tools/llvm-pdbdump/fuzzer/CMakeLists.txt b/tools/llvm-pdbutil/fuzzer/CMakeLists.txt
similarity index 53%
rename from tools/llvm-pdbdump/fuzzer/CMakeLists.txt
rename to tools/llvm-pdbutil/fuzzer/CMakeLists.txt
index cf5a0f70aab3..6af00476577f 100644
--- a/tools/llvm-pdbdump/fuzzer/CMakeLists.txt
+++ b/tools/llvm-pdbutil/fuzzer/CMakeLists.txt
@@ -5,11 +5,11 @@ set(LLVM_LINK_COMPONENTS
   Support
   )
 
-add_llvm_executable(llvm-pdbdump-fuzzer
+add_llvm_executable(llvm-pdbutil-fuzzer
   EXCLUDE_FROM_ALL
-  llvm-pdbdump-fuzzer.cpp
+  llvm-pdbutil-fuzzer.cpp
   )
 
-target_link_libraries(llvm-pdbdump-fuzzer
+target_link_libraries(llvm-pdbutil-fuzzer
   LLVMFuzzer
   )
diff --git a/tools/llvm-pdbdump/fuzzer/llvm-pdbdump-fuzzer.cpp b/tools/llvm-pdbutil/fuzzer/llvm-pdbutil-fuzzer.cpp
similarity index 96%
rename from tools/llvm-pdbdump/fuzzer/llvm-pdbdump-fuzzer.cpp
rename to tools/llvm-pdbutil/fuzzer/llvm-pdbutil-fuzzer.cpp
index 5f09416a9ff6..4edb53e261ff 100644
--- a/tools/llvm-pdbdump/fuzzer/llvm-pdbdump-fuzzer.cpp
+++ b/tools/llvm-pdbutil/fuzzer/llvm-pdbutil-fuzzer.cpp
@@ -1,4 +1,4 @@
-//===-- llvm-pdbdump-fuzzer.cpp - Fuzz the llvm-pdbdump tool --------------===//
+//===-- llvm-pdbutil-fuzzer.cpp - Fuzz the llvm-pdbutil tool --------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -8,7 +8,7 @@
 //===----------------------------------------------------------------------===//
 ///
 /// \file
-/// \brief This file implements a function that runs llvm-pdbdump
+/// \brief This file implements a function that runs llvm-pdbutil
 ///  on a single input. This function is then linked into the Fuzzer library.
 ///
 //===----------------------------------------------------------------------===//
diff --git a/tools/llvm-pdbdump/llvm-pdbdump.cpp b/tools/llvm-pdbutil/llvm-pdbutil.cpp
similarity index 90%
rename from tools/llvm-pdbdump/llvm-pdbdump.cpp
rename to tools/llvm-pdbutil/llvm-pdbutil.cpp
index 4626de9c4440..f6b6a156a767 100644
--- a/tools/llvm-pdbdump/llvm-pdbdump.cpp
+++ b/tools/llvm-pdbutil/llvm-pdbutil.cpp
@@ -1,4 +1,4 @@
-//===- llvm-pdbdump.cpp - Dump debug info from a PDB file -------*- C++ -*-===//
+//===- llvm-pdbutil.cpp - Dump debug info from a PDB file -------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,7 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm-pdbdump.h"
+#include "llvm-pdbutil.h"
 
 #include "Analyze.h"
 #include "Diff.h"
@@ -269,7 +269,6 @@ namespace raw {
 
 cl::OptionCategory MsfOptions("MSF Container Options");
 cl::OptionCategory TypeOptions("Type Record Options");
-cl::OptionCategory FileOptions("Module & File Options");
 cl::OptionCategory SymbolOptions("Symbol Options");
 cl::OptionCategory MiscOptions("Miscellaneous Options");
 
@@ -323,20 +322,9 @@ cl::opt<bool> DumpIpiRecordBytes(
     cl::desc("dump CodeView type record raw bytes from IPI stream"),
     cl::cat(TypeOptions), cl::sub(RawSubcommand));
 
-// MODULE & FILE OPTIONS
-cl::opt<bool> DumpModules("modules", cl::desc("dump compiland information"),
-                          cl::cat(FileOptions), cl::sub(RawSubcommand));
-cl::opt<bool> DumpModuleFiles("module-files", cl::desc("dump file information"),
-                              cl::cat(FileOptions), cl::sub(RawSubcommand));
-cl::opt<bool> DumpLineInfo("line-info",
-                           cl::desc("dump file and line information"),
-                           cl::cat(FileOptions), cl::sub(RawSubcommand));
-
 // SYMBOL OPTIONS
 cl::opt<bool> DumpGlobals("globals", cl::desc("dump globals stream data"),
                           cl::cat(SymbolOptions), cl::sub(RawSubcommand));
-cl::opt<bool> DumpModuleSyms("module-syms", cl::desc("dump module symbols"),
-                             cl::cat(SymbolOptions), cl::sub(RawSubcommand));
 cl::opt<bool> DumpPublics("publics", cl::desc("dump Publics stream data"),
                           cl::cat(SymbolOptions), cl::sub(RawSubcommand));
 cl::opt<bool>
@@ -381,11 +369,9 @@ namespace pdb2yaml {
 cl::opt<bool> All("all",
                   cl::desc("Dump everything we know how to dump."),
                   cl::sub(PdbToYamlSubcommand), cl::init(false));
-cl::opt<bool>
-    NoFileHeaders("no-file-headers",
-                  cl::desc("Do not dump MSF file headers (you will not be able "
-                           "to generate a fresh PDB from the resulting YAML)"),
-                  cl::sub(PdbToYamlSubcommand), cl::init(false));
+cl::opt<bool> NoFileHeaders("no-file-headers",
+                            cl::desc("Do not dump MSF file headers"),
+                            cl::sub(PdbToYamlSubcommand), cl::init(false));
 cl::opt<bool> Minimal("minimal",
                       cl::desc("Don't write fields with default values"),
                       cl::sub(PdbToYamlSubcommand), cl::init(false));
@@ -406,29 +392,8 @@ cl::opt<bool> StringTable("string-table", cl::desc("Dump the PDB String Table"),
                           cl::sub(PdbToYamlSubcommand), cl::init(false));
 
 cl::opt<bool> DbiStream("dbi-stream",
-                        cl::desc("Dump the DBI Stream (Stream 2)"),
+                        cl::desc("Dump the DBI Stream Headers (Stream 2)"),
                         cl::sub(PdbToYamlSubcommand), cl::init(false));
-cl::opt<bool>
-    DbiModuleInfo("dbi-module-info",
-                  cl::desc("Dump DBI Module Information (implies -dbi-stream)"),
-                  cl::sub(PdbToYamlSubcommand), cl::init(false));
-
-cl::opt<bool> DbiModuleSyms(
-    "dbi-module-syms",
-    cl::desc("Dump DBI Module Information (implies -dbi-module-info)"),
-    cl::sub(PdbToYamlSubcommand), cl::init(false));
-
-cl::opt<bool> DbiModuleSourceFileInfo(
-    "dbi-module-source-info",
-    cl::desc(
-        "Dump DBI Module Source File Information (implies -dbi-module-info)"),
-    cl::sub(PdbToYamlSubcommand), cl::init(false));
-
-cl::opt<bool>
-    DbiModuleSourceLineInfo("dbi-module-lines",
-                            cl::desc("Dump DBI Module Source Line Information "
-                                     "(implies -dbi-module-source-info)"),
-                            cl::sub(PdbToYamlSubcommand), cl::init(false));
 
 cl::opt<bool> TpiStream("tpi-stream",
                         cl::desc("Dump the TPI Stream (Stream 3)"),
@@ -443,6 +408,51 @@ cl::list<std::string> InputFilename(cl::Positional,
                                     cl::sub(PdbToYamlSubcommand));
 }
 
+namespace shared {
+cl::OptionCategory FileOptions("Module & File Options");
+
+// MODULE & FILE OPTIONS
+cl::opt<bool> DumpModules("modules", cl::desc("dump compiland information"),
+                          cl::cat(FileOptions), cl::sub(RawSubcommand),
+                          cl::sub(PdbToYamlSubcommand));
+cl::opt<bool> DumpModuleFiles("module-files", cl::desc("dump file information"),
+                              cl::cat(FileOptions), cl::sub(RawSubcommand),
+                              cl::sub(PdbToYamlSubcommand));
+cl::list<ModuleSubsection> DumpModuleSubsections(
+    "subsections", cl::ZeroOrMore, cl::CommaSeparated,
+    cl::desc("dump subsections from each module's debug stream"),
+    cl::values(
+        clEnumValN(
+            ModuleSubsection::CrossScopeExports, "cme",
+            "Cross module exports (DEBUG_S_CROSSSCOPEEXPORTS subsection)"),
+        clEnumValN(
+            ModuleSubsection::CrossScopeImports, "cmi",
+            "Cross module imports (DEBUG_S_CROSSSCOPEIMPORTS subsection)"),
+        clEnumValN(ModuleSubsection::FileChecksums, "fc",
+                   "File checksums (DEBUG_S_CHECKSUMS subsection)"),
+        clEnumValN(ModuleSubsection::InlineeLines, "ilines",
+                   "Inlinee lines (DEBUG_S_INLINEELINES subsection)"),
+        clEnumValN(ModuleSubsection::Lines, "lines",
+                   "Lines (DEBUG_S_LINES subsection)"),
+        clEnumValN(ModuleSubsection::StringTable, "strings",
+                   "String Table (DEBUG_S_STRINGTABLE subsection) (not "
+                   "typically present in PDB file)"),
+        clEnumValN(ModuleSubsection::FrameData, "frames",
+                   "Frame Data (DEBUG_S_FRAMEDATA subsection)"),
+        clEnumValN(ModuleSubsection::Symbols, "symbols",
+                   "Symbols (DEBUG_S_SYMBOLS subsection) (not typically "
+                   "present in PDB file)"),
+        clEnumValN(ModuleSubsection::CoffSymbolRVAs, "rvas",
+                   "COFF Symbol RVAs (DEBUG_S_COFF_SYMBOL_RVA subsection)"),
+        clEnumValN(ModuleSubsection::Unknown, "unknown",
+                   "Any subsection not covered by another option"),
+        clEnumValN(ModuleSubsection::All, "all", "All known subsections")),
+    cl::cat(FileOptions), cl::sub(RawSubcommand), cl::sub(PdbToYamlSubcommand));
+cl::opt<bool> DumpModuleSyms("module-syms", cl::desc("dump module symbols"),
+                             cl::cat(FileOptions), cl::sub(RawSubcommand),
+                             cl::sub(PdbToYamlSubcommand));
+} // namespace shared
+
 namespace analyze {
 cl::opt<bool> StringTable("hash-collisions", cl::desc("Find hash collisions"),
                           cl::sub(AnalyzeSubcommand), cl::init(false));
@@ -463,6 +473,13 @@ cl::opt<std::string>
 
 static ExitOnError ExitOnErr;
 
+bool opts::checkModuleSubsection(opts::ModuleSubsection MS) {
+  return any_of(opts::shared::DumpModuleSubsections,
+                [=](opts::ModuleSubsection M) {
+                  return M == MS || M == opts::ModuleSubsection::All;
+                });
+}
+
 static void yamlToPdb(StringRef Path) {
   BumpPtrAllocator Allocator;
   ErrorOr<std::unique_ptr<MemoryBuffer>> ErrorOrBuffer =
@@ -540,8 +557,8 @@ static void yamlToPdb(StringRef Path) {
       }
     }
 
-    auto CodeViewSubsections =
-        ExitOnErr(CodeViewYAML::convertSubsectionList(MI.Subsections, Strings));
+    auto CodeViewSubsections = ExitOnErr(CodeViewYAML::toCodeViewSubsectionList(
+        Allocator, MI.Subsections, Strings));
     for (auto &SS : CodeViewSubsections) {
       ModiBuilder.addDebugSubsection(std::move(SS));
     }
@@ -852,7 +869,7 @@ int main(int argc_, const char *argv_[]) {
   sys::PrintStackTraceOnErrorSignal(argv_[0]);
   PrettyStackTraceProgram X(argc_, argv_);
 
-  ExitOnErr.setBanner("llvm-pdbdump: ");
+  ExitOnErr.setBanner("llvm-pdbutil: ");
 
   SmallVector<const char *, 256> argv;
   SpecificBumpPtrAllocator<char> ArgAllocator;
@@ -879,12 +896,29 @@ int main(int argc_, const char *argv_[]) {
     }
   }
 
+  if ((opts::RawSubcommand && opts::raw::RawAll) ||
+      (opts::PdbToYamlSubcommand && opts::pdb2yaml::All)) {
+    opts::shared::DumpModules = true;
+    opts::shared::DumpModuleFiles = true;
+    opts::shared::DumpModuleSyms = true;
+    opts::shared::DumpModuleSubsections.push_back(opts::ModuleSubsection::All);
+    if (llvm::is_contained(opts::shared::DumpModuleSubsections,
+                           opts::ModuleSubsection::All)) {
+      opts::shared::DumpModuleSubsections.reset();
+      opts::shared::DumpModuleSubsections.push_back(
+          opts::ModuleSubsection::All);
+    }
+  }
+
+  if (opts::shared::DumpModuleSyms || opts::shared::DumpModuleFiles)
+    opts::shared::DumpModules = true;
+
+  if (opts::shared::DumpModules)
+    opts::pdb2yaml::DbiStream = true;
+
   if (opts::RawSubcommand) {
     if (opts::raw::RawAll) {
       opts::raw::DumpHeaders = true;
-      opts::raw::DumpModules = true;
-      opts::raw::DumpModuleFiles = true;
-      opts::raw::DumpModuleSyms = true;
       opts::raw::DumpGlobals = true;
       opts::raw::DumpPublics = true;
       opts::raw::DumpSectionHeaders = true;
@@ -896,7 +930,6 @@ int main(int argc_, const char *argv_[]) {
       opts::raw::DumpIpiRecords = true;
       opts::raw::DumpSectionMap = true;
       opts::raw::DumpSectionContribs = true;
-      opts::raw::DumpLineInfo = true;
       opts::raw::DumpFpo = true;
       opts::raw::DumpStringTable = true;
     }
@@ -908,6 +941,17 @@ int main(int argc_, const char *argv_[]) {
       exit(1);
     }
   }
+  if (opts::PdbToYamlSubcommand) {
+    if (opts::pdb2yaml::All) {
+      opts::pdb2yaml::StreamMetadata = true;
+      opts::pdb2yaml::StreamDirectory = true;
+      opts::pdb2yaml::PdbStream = true;
+      opts::pdb2yaml::StringTable = true;
+      opts::pdb2yaml::DbiStream = true;
+      opts::pdb2yaml::TpiStream = true;
+      opts::pdb2yaml::IpiStream = true;
+    }
+  }
 
   llvm::sys::InitializeCOMRAII COM(llvm::sys::COMThreadingMode::MultiThreaded);
 
diff --git a/tools/llvm-pdbdump/llvm-pdbdump.h b/tools/llvm-pdbutil/llvm-pdbutil.h
similarity index 85%
rename from tools/llvm-pdbdump/llvm-pdbdump.h
rename to tools/llvm-pdbutil/llvm-pdbutil.h
index b344129d217a..f1699d0bb557 100644
--- a/tools/llvm-pdbdump/llvm-pdbdump.h
+++ b/tools/llvm-pdbutil/llvm-pdbutil.h
@@ -1,4 +1,4 @@
-//===- llvm-pdbdump.h ----------------------------------------- *- C++ --*-===//
+//===- llvm-pdbutil.h ----------------------------------------- *- C++ --*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -27,6 +27,29 @@ uint32_t getTypeLength(const PDBSymbolData &Symbol);
 
 namespace opts {
 
+enum class ModuleSubsection {
+  Unknown,
+  Lines,
+  FileChecksums,
+  InlineeLines,
+  CrossScopeImports,
+  CrossScopeExports,
+  StringTable,
+  Symbols,
+  FrameData,
+  CoffSymbolRVAs,
+  All
+};
+
+bool checkModuleSubsection(ModuleSubsection Kind);
+
+template <typename... Ts>
+bool checkModuleSubsection(ModuleSubsection K1, ModuleSubsection K2,
+                           Ts &&... Rest) {
+  return checkModuleSubsection(K1) ||
+         checkModuleSubsection(K2, std::forward<Ts>(Rest)...);
+}
+
 namespace pretty {
 
 enum class ClassDefinitionFormat { None, Layout, All };
@@ -96,13 +119,8 @@ extern llvm::cl::opt<bool> DumpTpiRecordBytes;
 extern llvm::cl::opt<bool> DumpTpiRecords;
 extern llvm::cl::opt<bool> DumpIpiRecords;
 extern llvm::cl::opt<bool> DumpIpiRecordBytes;
-extern llvm::cl::opt<bool> DumpModules;
-extern llvm::cl::opt<bool> DumpModuleFiles;
-extern llvm::cl::opt<bool> DumpModuleLines;
-extern llvm::cl::opt<bool> DumpModuleSyms;
 extern llvm::cl::opt<bool> DumpPublics;
 extern llvm::cl::opt<bool> DumpSectionContribs;
-extern llvm::cl::opt<bool> DumpLineInfo;
 extern llvm::cl::opt<bool> DumpSectionMap;
 extern llvm::cl::opt<bool> DumpSymRecordBytes;
 extern llvm::cl::opt<bool> DumpSectionHeaders;
@@ -123,14 +141,17 @@ extern llvm::cl::opt<bool> StreamDirectory;
 extern llvm::cl::opt<bool> StringTable;
 extern llvm::cl::opt<bool> PdbStream;
 extern llvm::cl::opt<bool> DbiStream;
-extern llvm::cl::opt<bool> DbiModuleInfo;
-extern llvm::cl::opt<bool> DbiModuleSyms;
-extern llvm::cl::opt<bool> DbiModuleSourceFileInfo;
-extern llvm::cl::opt<bool> DbiModuleSourceLineInfo;
 extern llvm::cl::opt<bool> TpiStream;
 extern llvm::cl::opt<bool> IpiStream;
 extern llvm::cl::list<std::string> InputFilename;
 }
+
+namespace shared {
+extern llvm::cl::opt<bool> DumpModules;
+extern llvm::cl::opt<bool> DumpModuleFiles;
+extern llvm::cl::list<ModuleSubsection> DumpModuleSubsections;
+extern llvm::cl::opt<bool> DumpModuleSyms;
+} // namespace shared
 }
 
 #endif
diff --git a/tools/llvm-readobj/CMakeLists.txt b/tools/llvm-readobj/CMakeLists.txt
index 0ad149538f63..bde486a5f0db 100644
--- a/tools/llvm-readobj/CMakeLists.txt
+++ b/tools/llvm-readobj/CMakeLists.txt
@@ -1,6 +1,7 @@
 set(LLVM_LINK_COMPONENTS
   DebugInfoCodeView
   Object
+  BinaryFormat
   Support
   DebugInfoCodeView
   DebugInfoMSF
diff --git a/tools/llvm-readobj/COFFDumper.cpp b/tools/llvm-readobj/COFFDumper.cpp
index bc07bd296ad2..6223c09a4ded 100644
--- a/tools/llvm-readobj/COFFDumper.cpp
+++ b/tools/llvm-readobj/COFFDumper.cpp
@@ -21,6 +21,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/BinaryFormat/COFF.h"
 #include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
 #include "llvm/DebugInfo/CodeView/CodeView.h"
 #include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h"
@@ -45,7 +46,6 @@
 #include "llvm/Object/COFF.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/BinaryStreamReader.h"
-#include "llvm/Support/COFF.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ConvertUTF.h"
diff --git a/tools/llvm-readobj/COFFImportDumper.cpp b/tools/llvm-readobj/COFFImportDumper.cpp
index 83715e60f057..c5b8bf758462 100644
--- a/tools/llvm-readobj/COFFImportDumper.cpp
+++ b/tools/llvm-readobj/COFFImportDumper.cpp
@@ -15,9 +15,9 @@
 #include "Error.h"
 #include "ObjDumper.h"
 #include "llvm-readobj.h"
+#include "llvm/BinaryFormat/COFF.h"
 #include "llvm/Object/COFF.h"
 #include "llvm/Object/COFFImportFile.h"
-#include "llvm/Support/COFF.h"
 
 using namespace llvm::object;
 
diff --git a/tools/llvm-readobj/ELFDumper.cpp b/tools/llvm-readobj/ELFDumper.cpp
index 427920569042..116f02f7f154 100644
--- a/tools/llvm-readobj/ELFDumper.cpp
+++ b/tools/llvm-readobj/ELFDumper.cpp
@@ -1,4 +1,4 @@
-//===-- ELFDumper.cpp - ELF-specific dumper ---------------------*- C++ -*-===//
+//===- ELFDumper.cpp - ELF-specific dumper --------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -17,19 +17,44 @@
 #include "ObjDumper.h"
 #include "StackMapPrinter.h"
 #include "llvm-readobj.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/Optional.h"
+#include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/Object/ELF.h"
 #include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Object/ELFTypes.h"
+#include "llvm/Object/Error.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/StackMapParser.h"
 #include "llvm/Support/ARMAttributeParser.h"
 #include "llvm/Support/ARMBuildAttributes.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/MipsABIFlags.h"
 #include "llvm/Support/ScopedPrinter.h"
 #include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cinttypes>
+#include <cstddef>
+#include <cstdint>
+#include <cstdlib>
+#include <iterator>
+#include <memory>
+#include <string>
+#include <system_error>
+#include <vector>
 
 using namespace llvm;
 using namespace llvm::object;
@@ -49,28 +74,28 @@ using namespace ELF;
     return std::string(#enum).substr(3);
 
 #define TYPEDEF_ELF_TYPES(ELFT)                                                \
-  typedef ELFFile<ELFT> ELFO;                                                  \
-  typedef typename ELFO::Elf_Shdr Elf_Shdr;                                    \
-  typedef typename ELFO::Elf_Sym Elf_Sym;                                      \
-  typedef typename ELFO::Elf_Dyn Elf_Dyn;                                      \
-  typedef typename ELFO::Elf_Dyn_Range Elf_Dyn_Range;                          \
-  typedef typename ELFO::Elf_Rel Elf_Rel;                                      \
-  typedef typename ELFO::Elf_Rela Elf_Rela;                                    \
-  typedef typename ELFO::Elf_Rel_Range Elf_Rel_Range;                          \
-  typedef typename ELFO::Elf_Rela_Range Elf_Rela_Range;                        \
-  typedef typename ELFO::Elf_Phdr Elf_Phdr;                                    \
-  typedef typename ELFO::Elf_Half Elf_Half;                                    \
-  typedef typename ELFO::Elf_Ehdr Elf_Ehdr;                                    \
-  typedef typename ELFO::Elf_Word Elf_Word;                                    \
-  typedef typename ELFO::Elf_Hash Elf_Hash;                                    \
-  typedef typename ELFO::Elf_GnuHash Elf_GnuHash;                              \
-  typedef typename ELFO::Elf_Sym_Range Elf_Sym_Range;                          \
-  typedef typename ELFO::Elf_Versym Elf_Versym;                                \
-  typedef typename ELFO::Elf_Verneed Elf_Verneed;                              \
-  typedef typename ELFO::Elf_Vernaux Elf_Vernaux;                              \
-  typedef typename ELFO::Elf_Verdef Elf_Verdef;                                \
-  typedef typename ELFO::Elf_Verdaux Elf_Verdaux;                              \
-  typedef typename ELFO::uintX_t uintX_t;
+  using ELFO = ELFFile<ELFT>;                                                  \
+  using Elf_Shdr = typename ELFO::Elf_Shdr;                                    \
+  using Elf_Sym = typename ELFO::Elf_Sym;                                      \
+  using Elf_Dyn = typename ELFO::Elf_Dyn;                                      \
+  using Elf_Dyn_Range = typename ELFO::Elf_Dyn_Range;                          \
+  using Elf_Rel = typename ELFO::Elf_Rel;                                      \
+  using Elf_Rela = typename ELFO::Elf_Rela;                                    \
+  using Elf_Rel_Range = typename ELFO::Elf_Rel_Range;                          \
+  using Elf_Rela_Range = typename ELFO::Elf_Rela_Range;                        \
+  using Elf_Phdr = typename ELFO::Elf_Phdr;                                    \
+  using Elf_Half = typename ELFO::Elf_Half;                                    \
+  using Elf_Ehdr = typename ELFO::Elf_Ehdr;                                    \
+  using Elf_Word = typename ELFO::Elf_Word;                                    \
+  using Elf_Hash = typename ELFO::Elf_Hash;                                    \
+  using Elf_GnuHash = typename ELFO::Elf_GnuHash;                              \
+  using Elf_Sym_Range = typename ELFO::Elf_Sym_Range;                          \
+  using Elf_Versym = typename ELFO::Elf_Versym;                                \
+  using Elf_Verneed = typename ELFO::Elf_Verneed;                              \
+  using Elf_Vernaux = typename ELFO::Elf_Vernaux;                              \
+  using Elf_Verdef = typename ELFO::Elf_Verdef;                                \
+  using Elf_Verdaux = typename ELFO::Elf_Verdaux;                              \
+  using uintX_t = typename ELFO::uintX_t;
 
 namespace {
 
@@ -81,15 +106,16 @@ template <class ELFT> class DumpStyle;
 /// the size, entity size and virtual address are different entries in arbitrary
 /// order (DT_REL, DT_RELSZ, DT_RELENT for example).
 struct DynRegionInfo {
-  DynRegionInfo() : Addr(nullptr), Size(0), EntSize(0) {}
+  DynRegionInfo() = default;
   DynRegionInfo(const void *A, uint64_t S, uint64_t ES)
       : Addr(A), Size(S), EntSize(ES) {}
+
   /// \brief Address in current address space.
-  const void *Addr;
+  const void *Addr = nullptr;
   /// \brief Size in bytes of the region.
-  uint64_t Size;
+  uint64_t Size = 0;
   /// \brief Size of each entity in the region.
-  uint64_t EntSize;
+  uint64_t EntSize = 0;
 
   template <typename Type> ArrayRef<Type> getAsArrayRef() const {
     const Type *Start = reinterpret_cast<const Type *>(Addr);
@@ -139,6 +165,7 @@ public:
 
 private:
   std::unique_ptr<DumpStyle<ELFT>> ELFDumperStyle;
+
   TYPEDEF_ELF_TYPES(ELFT)
 
   DynRegionInfo checkDRI(DynRegionInfo DRI) {
@@ -196,6 +223,7 @@ private:
         : PointerIntPair<const void *, 1>(verdef, 0) {}
     VersionMapEntry(const Elf_Vernaux *vernaux)
         : PointerIntPair<const void *, 1>(vernaux, 1) {}
+
     bool isNull() const { return getPointer() == nullptr; }
     bool isVerdef() const { return !isNull() && getInt() == 0; }
     bool isVernaux() const { return !isNull() && getInt() == 1; }
@@ -262,10 +290,11 @@ void ELFDumper<ELFT>::printSymbolsHelper(bool IsDynamic) const {
 template <typename ELFT> class DumpStyle {
 public:
   using Elf_Shdr = typename ELFFile<ELFT>::Elf_Shdr;
-  using Elf_Sym =  typename ELFFile<ELFT>::Elf_Sym;
+  using Elf_Sym = typename ELFFile<ELFT>::Elf_Sym;
 
   DumpStyle(ELFDumper<ELFT> *Dumper) : Dumper(Dumper) {}
-  virtual ~DumpStyle() {}
+  virtual ~DumpStyle() = default;
+
   virtual void printFileHeaders(const ELFFile<ELFT> *Obj) = 0;
   virtual void printGroupSections(const ELFFile<ELFT> *Obj) = 0;
   virtual void printRelocations(const ELFFile<ELFT> *Obj) = 0;
@@ -274,9 +303,7 @@ public:
   virtual void printDynamicSymbols(const ELFFile<ELFT> *Obj) = 0;
   virtual void printDynamicRelocations(const ELFFile<ELFT> *Obj) = 0;
   virtual void printSymtabMessage(const ELFFile<ELFT> *obj, StringRef Name,
-                                  size_t Offset) {
-    return;
-  }
+                                  size_t Offset) {}
   virtual void printSymbol(const ELFFile<ELFT> *Obj, const Elf_Sym *Symbol,
                            const Elf_Sym *FirstSym, StringRef StrTable,
                            bool IsDynamic) = 0;
@@ -284,16 +311,20 @@ public:
   virtual void printHashHistogram(const ELFFile<ELFT> *Obj) = 0;
   virtual void printNotes(const ELFFile<ELFT> *Obj) = 0;
   const ELFDumper<ELFT> *dumper() const { return Dumper; }
+
 private:
   const ELFDumper<ELFT> *Dumper;
 };
 
 template <typename ELFT> class GNUStyle : public DumpStyle<ELFT> {
   formatted_raw_ostream OS;
+
 public:
   TYPEDEF_ELF_TYPES(ELFT)
+
   GNUStyle(ScopedPrinter &W, ELFDumper<ELFT> *Dumper)
       : DumpStyle<ELFT>(Dumper), OS(W.getOStream()) {}
+
   void printFileHeaders(const ELFO *Obj) override;
   void printGroupSections(const ELFFile<ELFT> *Obj) override;
   void printRelocations(const ELFO *Obj) override;
@@ -301,8 +332,8 @@ public:
   void printSymbols(const ELFO *Obj) override;
   void printDynamicSymbols(const ELFO *Obj) override;
   void printDynamicRelocations(const ELFO *Obj) override;
-  virtual void printSymtabMessage(const ELFO *Obj, StringRef Name,
-                                  size_t Offset) override;
+  void printSymtabMessage(const ELFO *Obj, StringRef Name,
+                          size_t Offset) override;
   void printProgramHeaders(const ELFO *Obj) override;
   void printHashHistogram(const ELFFile<ELFT> *Obj) override;
   void printNotes(const ELFFile<ELFT> *Obj) override;
@@ -311,6 +342,7 @@ private:
   struct Field {
     StringRef Str;
     unsigned Column;
+
     Field(StringRef S, unsigned Col) : Str(S), Column(Col) {}
     Field(unsigned Col) : Str(""), Column(Col) {}
   };
@@ -348,6 +380,7 @@ private:
 template <typename ELFT> class LLVMStyle : public DumpStyle<ELFT> {
 public:
   TYPEDEF_ELF_TYPES(ELFT)
+
   LLVMStyle(ScopedPrinter &W, ELFDumper<ELFT> *Dumper)
       : DumpStyle<ELFT>(Dumper), W(W) {}
 
@@ -368,10 +401,11 @@ private:
   void printDynamicRelocation(const ELFO *Obj, Elf_Rela Rel);
   void printSymbol(const ELFO *Obj, const Elf_Sym *Symbol, const Elf_Sym *First,
                    StringRef StrTable, bool IsDynamic) override;
+
   ScopedPrinter &W;
 };
 
-} // namespace
+} // end anonymous namespace
 
 namespace llvm {
 
@@ -405,7 +439,7 @@ std::error_code createELFDumper(const object::ObjectFile *Obj,
   return readobj_error::unsupported_obj_file_format;
 }
 
-} // namespace llvm
+} // end namespace llvm
 
 // Iterate through the versions needed section, and place each Elf_Vernaux
 // in the VersionMap according to its index.
@@ -525,8 +559,8 @@ static void printVersionDefinitionSection(ELFDumper<ELFT> *Dumper,
                                           const ELFO *Obj,
                                           const typename ELFO::Elf_Shdr *Sec,
                                           ScopedPrinter &W) {
-  typedef typename ELFO::Elf_Verdef VerDef;
-  typedef typename ELFO::Elf_Verdaux VerdAux;
+  using VerDef = typename ELFO::Elf_Verdef;
+  using VerdAux = typename ELFO::Elf_Verdaux;
 
   DictScope SD(W, "SHT_GNU_verdef");
   if (!Sec)
@@ -581,8 +615,8 @@ static void printVersionDependencySection(ELFDumper<ELFT> *Dumper,
                                           const ELFO *Obj,
                                           const typename ELFO::Elf_Shdr *Sec,
                                           ScopedPrinter &W) {
-  typedef typename ELFO::Elf_Verneed VerNeed;
-  typedef typename ELFO::Elf_Vernaux VernAux;
+  using VerNeed = typename ELFO::Elf_Verneed;
+  using VernAux = typename ELFO::Elf_Vernaux;
 
   DictScope SD(W, "SHT_GNU_verneed");
   if (!Sec)
@@ -978,9 +1012,7 @@ static const EnumEntry<unsigned> ElfSymbolTypes[] = {
     {"GNU_IFunc", "IFUNC",   ELF::STT_GNU_IFUNC}};
 
 static const EnumEntry<unsigned> AMDGPUSymbolTypes[] = {
-  { "AMDGPU_HSA_KERNEL",            ELF::STT_AMDGPU_HSA_KERNEL },
-  { "AMDGPU_HSA_INDIRECT_FUNCTION", ELF::STT_AMDGPU_HSA_INDIRECT_FUNCTION },
-  { "AMDGPU_HSA_METADATA",          ELF::STT_AMDGPU_HSA_METADATA }
+  { "AMDGPU_HSA_KERNEL",            ELF::STT_AMDGPU_HSA_KERNEL }
 };
 
 static const char *getGroupType(uint32_t Flag) {
@@ -1012,13 +1044,6 @@ static const EnumEntry<unsigned> ElfXCoreSectionFlags[] = {
   LLVM_READOBJ_ENUM_ENT(ELF, XCORE_SHF_DP_SECTION)
 };
 
-static const EnumEntry<unsigned> ElfAMDGPUSectionFlags[] = {
-  LLVM_READOBJ_ENUM_ENT(ELF, SHF_AMDGPU_HSA_GLOBAL),
-  LLVM_READOBJ_ENUM_ENT(ELF, SHF_AMDGPU_HSA_READONLY),
-  LLVM_READOBJ_ENUM_ENT(ELF, SHF_AMDGPU_HSA_CODE),
-  LLVM_READOBJ_ENUM_ENT(ELF, SHF_AMDGPU_HSA_AGENT)
-};
-
 static const EnumEntry<unsigned> ElfARMSectionFlags[] = {
   LLVM_READOBJ_ENUM_ENT(ELF, SHF_ARM_PURECODE)
 };
@@ -1077,13 +1102,6 @@ static const char *getElfSegmentType(unsigned Arch, unsigned Type) {
   // Check potentially overlapped processor-specific
   // program header type.
   switch (Arch) {
-  case ELF::EM_AMDGPU:
-    switch (Type) {
-    LLVM_READOBJ_ENUM_CASE(ELF, PT_AMDGPU_HSA_LOAD_GLOBAL_PROGRAM);
-    LLVM_READOBJ_ENUM_CASE(ELF, PT_AMDGPU_HSA_LOAD_GLOBAL_AGENT);
-    LLVM_READOBJ_ENUM_CASE(ELF, PT_AMDGPU_HSA_LOAD_READONLY_AGENT);
-    LLVM_READOBJ_ENUM_CASE(ELF, PT_AMDGPU_HSA_LOAD_CODE_AGENT);
-    }
   case ELF::EM_ARM:
     switch (Type) {
     LLVM_READOBJ_ENUM_CASE(ELF, PT_ARM_EXIDX);
@@ -1139,14 +1157,6 @@ static std::string getElfPtType(unsigned Arch, unsigned Type) {
   default:
     // All machine specific PT_* types
     switch (Arch) {
-    case ELF::EM_AMDGPU:
-      switch (Type) {
-        LLVM_READOBJ_ENUM_CASE(ELF, PT_AMDGPU_HSA_LOAD_GLOBAL_PROGRAM);
-        LLVM_READOBJ_ENUM_CASE(ELF, PT_AMDGPU_HSA_LOAD_GLOBAL_AGENT);
-        LLVM_READOBJ_ENUM_CASE(ELF, PT_AMDGPU_HSA_LOAD_READONLY_AGENT);
-        LLVM_READOBJ_ENUM_CASE(ELF, PT_AMDGPU_HSA_LOAD_CODE_AGENT);
-      }
-      return "";
     case ELF::EM_ARM:
       if (Type == ELF::PT_ARM_EXIDX)
         return "EXIDX";
@@ -1262,7 +1272,6 @@ static const char *getElfMipsOptionsOdkType(unsigned Odk) {
 template <typename ELFT>
 ELFDumper<ELFT>::ELFDumper(const ELFFile<ELFT> *Obj, ScopedPrinter &Writer)
     : ObjDumper(Writer), Obj(Obj) {
-
   SmallVector<const Elf_Phdr *, 4> LoadSegments;
   for (const Elf_Phdr &Phdr : unwrapOrError(Obj->program_headers())) {
     if (Phdr.p_type == ELF::PT_DYNAMIC) {
@@ -1591,8 +1600,8 @@ static const EnumEntry<unsigned> ElfDynamicDTMipsFlags[] = {
 
 template <typename T, typename TFlag>
 void printFlags(T Value, ArrayRef<EnumEntry<TFlag>> Flags, raw_ostream &OS) {
-  typedef EnumEntry<TFlag> FlagEntry;
-  typedef SmallVector<FlagEntry, 10> FlagVector;
+  using FlagEntry = EnumEntry<TFlag>;
+  using FlagVector = SmallVector<FlagEntry, 10>;
   FlagVector SetFlags;
 
   for (const auto &Flag : Flags) {
@@ -1711,6 +1720,7 @@ void ELFDumper<ELFT>::printUnwindInfo() {
 }
 
 namespace {
+
 template <> void ELFDumper<ELFType<support::little, false>>::printUnwindInfo() {
   const unsigned Machine = Obj->getHeader()->e_machine;
   if (Machine == EM_ARM) {
@@ -1720,7 +1730,8 @@ template <> void ELFDumper<ELFType<support::little, false>>::printUnwindInfo() {
   }
   W.startLine() << "UnwindInfo not implemented.\n";
 }
-}
+
+} // end anonymous namespace
 
 template<class ELFT>
 void ELFDumper<ELFT>::printDynamicTable() {
@@ -1766,7 +1777,7 @@ template<class ELFT>
 void ELFDumper<ELFT>::printNeededLibraries() {
   ListScope D(W, "NeededLibraries");
 
-  typedef std::vector<StringRef> LibsTy;
+  using LibsTy = std::vector<StringRef>;
   LibsTy Libs;
 
   for (const auto &Entry : dynamic_table())
@@ -1820,6 +1831,7 @@ void ELFDumper<ELFT>::printAttributes() {
 }
 
 namespace {
+
 template <> void ELFDumper<ELFType<support::little, false>>::printAttributes() {
   if (Obj->getHeader()->e_machine != EM_ARM) {
     W.startLine() << "Attributes not implemented.\n";
@@ -1845,13 +1857,12 @@ template <> void ELFDumper<ELFType<support::little, false>>::printAttributes() {
     ARMAttributeParser(&W).Parse(Contents, true);
   }
 }
-}
 
-namespace {
 template <class ELFT> class MipsGOTParser {
 public:
   TYPEDEF_ELF_TYPES(ELFT)
-  typedef typename ELFO::Elf_Addr GOTEntry;
+  using GOTEntry = typename ELFO::Elf_Addr;
+
   MipsGOTParser(ELFDumper<ELFT> *Dumper, const ELFO *Obj,
                 Elf_Dyn_Range DynTable, ScopedPrinter &W);
 
@@ -1862,11 +1873,11 @@ private:
   ELFDumper<ELFT> *Dumper;
   const ELFO *Obj;
   ScopedPrinter &W;
-  llvm::Optional<uint64_t> DtPltGot;
-  llvm::Optional<uint64_t> DtLocalGotNum;
-  llvm::Optional<uint64_t> DtGotSym;
-  llvm::Optional<uint64_t> DtMipsPltGot;
-  llvm::Optional<uint64_t> DtJmpRel;
+  Optional<uint64_t> DtPltGot;
+  Optional<uint64_t> DtLocalGotNum;
+  Optional<uint64_t> DtGotSym;
+  Optional<uint64_t> DtMipsPltGot;
+  Optional<uint64_t> DtJmpRel;
 
   std::size_t getGOTTotal(ArrayRef<uint8_t> GOT) const;
   const GOTEntry *makeGOTIter(ArrayRef<uint8_t> GOT, std::size_t EntryNum);
@@ -1882,7 +1893,8 @@ private:
                      const GOTEntry *It, StringRef StrTable,
                      const Elf_Sym *Sym);
 };
-}
+
+} // end anonymous namespace
 
 template <class ELFT>
 MipsGOTParser<ELFT>::MipsGOTParser(ELFDumper<ELFT> *Dumper, const ELFO *Obj,
@@ -2353,8 +2365,8 @@ template <class ELFT> void ELFDumper<ELFT>::printStackMap() const {
   ArrayRef<uint8_t> StackMapContentsArray =
       unwrapOrError(Obj->getSectionContents(StackMapSection));
 
-  prettyPrintStackMap(llvm::outs(), StackMapV2Parser<ELFT::TargetEndianness>(
-                                        StackMapContentsArray));
+  prettyPrintStackMap(outs(), StackMapV2Parser<ELFT::TargetEndianness>(
+                                  StackMapContentsArray));
 }
 
 template <class ELFT> void ELFDumper<ELFT>::printGroupSections() {
@@ -2455,7 +2467,7 @@ template <class ELFT> void GNUStyle<ELFT>::printGroupSections(const ELFO *Obj) {
 template <class ELFT>
 void GNUStyle<ELFT>::printRelocation(const ELFO *Obj, const Elf_Shdr *SymTab,
                                      const Elf_Rela &R, bool IsRela) {
-  std::string Offset, Info, Addend = "", Value;
+  std::string Offset, Info, Addend, Value;
   SmallString<32> RelocName;
   StringRef StrTable = unwrapOrError(Obj->getStringTableForSymtab(*SymTab));
   StringRef TargetName;
@@ -2549,6 +2561,7 @@ template <class ELFT> void GNUStyle<ELFT>::printRelocations(const ELFO *Obj) {
 
 std::string getSectionTypeString(unsigned Arch, unsigned Type) {
   using namespace ELF;
+
   switch (Arch) {
   case EM_ARM:
     switch (Type) {
@@ -2715,7 +2728,7 @@ template <class ELFT> void GNUStyle<ELFT>::printSections(const ELFO *Obj) {
 template <class ELFT>
 void GNUStyle<ELFT>::printSymtabMessage(const ELFO *Obj, StringRef Name,
                                         size_t Entries) {
-  if (Name.size())
+  if (!Name.empty())
     OS << "\nSymbol table '" << Name << "' contains " << Entries
        << " entries:\n";
   else
@@ -2870,7 +2883,7 @@ template <class ELFT> void GNUStyle<ELFT>::printSymbols(const ELFO *Obj) {
 
 template <class ELFT>
 void GNUStyle<ELFT>::printDynamicSymbols(const ELFO *Obj) {
-  if (this->dumper()->getDynamicStringTable().size() == 0)
+  if (this->dumper()->getDynamicStringTable().empty())
     return;
   auto StringTable = this->dumper()->getDynamicStringTable();
   auto DynSyms = this->dumper()->dynamic_symbols();
@@ -3084,19 +3097,19 @@ void GNUStyle<ELFT>::printDynamicRelocation(const ELFO *Obj, Elf_Rela R,
   Obj->getRelocationTypeName(R.getType(Obj->isMips64EL()), RelocName);
   SymbolName =
       unwrapOrError(Sym->getName(this->dumper()->getDynamicStringTable()));
-  std::string Addend = "", Info, Offset, Value;
+  std::string Addend, Info, Offset, Value;
   Offset = to_string(format_hex_no_prefix(R.r_offset, Width));
   Info = to_string(format_hex_no_prefix(R.r_info, Width));
   Value = to_string(format_hex_no_prefix(Sym->getValue(), Width));
   int64_t RelAddend = R.r_addend;
-  if (SymbolName.size() && IsRela) {
+  if (!SymbolName.empty() && IsRela) {
     if (R.r_addend < 0)
       Addend = " - ";
     else
       Addend = " + ";
   }
 
-  if (!SymbolName.size() && Sym->getValue() == 0)
+  if (SymbolName.empty() && Sym->getValue() == 0)
     Value = "";
 
   if (IsRela)
@@ -3231,7 +3244,7 @@ void GNUStyle<ELFT>::printHashHistogram(const ELFFile<ELFT> *Obj) {
     size_t MaxChain = 1;
     size_t CumulativeNonZero = 0;
 
-    if (Chains.size() == 0 || NBucket == 0)
+    if (Chains.empty() || NBucket == 0)
       return;
 
     std::vector<size_t> ChainLen(NBucket, 0);
@@ -3562,13 +3575,13 @@ void LLVMStyle<ELFT>::printRelocation(const ELFO *Obj, Elf_Rela Rel,
     DictScope Group(W, "Relocation");
     W.printHex("Offset", Rel.r_offset);
     W.printNumber("Type", RelocName, (int)Rel.getType(Obj->isMips64EL()));
-    W.printNumber("Symbol", TargetName.size() > 0 ? TargetName : "-",
+    W.printNumber("Symbol", !TargetName.empty() ? TargetName : "-",
                   Rel.getSymbol(Obj->isMips64EL()));
     W.printHex("Addend", Rel.r_addend);
   } else {
     raw_ostream &OS = W.startLine();
     OS << W.hex(Rel.r_offset) << " " << RelocName << " "
-       << (TargetName.size() > 0 ? TargetName : "-") << " "
+       << (!TargetName.empty() ? TargetName : "-") << " "
        << W.hex(Rel.r_addend) << "\n";
   }
 }
@@ -3592,10 +3605,6 @@ template <class ELFT> void LLVMStyle<ELFT>::printSections(const ELFO *Obj) {
     std::vector<EnumEntry<unsigned>> SectionFlags(std::begin(ElfSectionFlags),
                                                   std::end(ElfSectionFlags));
     switch (Obj->getHeader()->e_machine) {
-    case EM_AMDGPU:
-      SectionFlags.insert(SectionFlags.end(), std::begin(ElfAMDGPUSectionFlags),
-                          std::end(ElfAMDGPUSectionFlags));
-      break;
     case EM_ARM:
       SectionFlags.insert(SectionFlags.end(), std::begin(ElfARMSectionFlags),
                           std::end(ElfARMSectionFlags));
@@ -3763,12 +3772,12 @@ void LLVMStyle<ELFT>::printDynamicRelocation(const ELFO *Obj, Elf_Rela Rel) {
     DictScope Group(W, "Relocation");
     W.printHex("Offset", Rel.r_offset);
     W.printNumber("Type", RelocName, (int)Rel.getType(Obj->isMips64EL()));
-    W.printString("Symbol", SymbolName.size() > 0 ? SymbolName : "-");
+    W.printString("Symbol", !SymbolName.empty() ? SymbolName : "-");
     W.printHex("Addend", Rel.r_addend);
   } else {
     raw_ostream &OS = W.startLine();
     OS << W.hex(Rel.r_offset) << " " << RelocName << " "
-       << (SymbolName.size() > 0 ? SymbolName : "-") << " "
+       << (!SymbolName.empty() ? SymbolName : "-") << " "
        << W.hex(Rel.r_addend) << "\n";
   }
 }
@@ -3801,4 +3810,3 @@ template <class ELFT>
 void LLVMStyle<ELFT>::printNotes(const ELFFile<ELFT> *Obj) {
   W.startLine() << "printNotes not implemented!\n";
 }
-
diff --git a/tools/llvm-readobj/LLVMBuild.txt b/tools/llvm-readobj/LLVMBuild.txt
index c0ed38e18d0c..87ede1008cb3 100644
--- a/tools/llvm-readobj/LLVMBuild.txt
+++ b/tools/llvm-readobj/LLVMBuild.txt
@@ -19,4 +19,4 @@
 type = Tool
 name = llvm-readobj
 parent = Tools
-required_libraries = all-targets BitReader Object DebugInfoCodeView DebugInfoPDB DebugInfoMSF
+required_libraries = all-targets BitReader Object BinaryFormat DebugInfoCodeView DebugInfoPDB DebugInfoMSF
diff --git a/tools/llvm-stress/llvm-stress.cpp b/tools/llvm-stress/llvm-stress.cpp
index 74b7735f8cd1..f1cdc5fa1056 100644
--- a/tools/llvm-stress/llvm-stress.cpp
+++ b/tools/llvm-stress/llvm-stress.cpp
@@ -382,6 +382,7 @@ struct ConstModifier: public Modifier {
       switch (Ran->Rand() % 2) {
       case 0: if (Ty->getScalarType()->isIntegerTy())
                 return PT->push_back(ConstantVector::getAllOnesValue(Ty));
+              llvm_unreachable("Unexpected state");
       case 1: if (Ty->getScalarType()->isIntegerTy())
                 return PT->push_back(ConstantVector::getNullValue(Ty));
       }
@@ -407,9 +408,11 @@ struct ConstModifier: public Modifier {
       case 0: if (Ty->isIntegerTy())
                 return PT->push_back(ConstantInt::get(Ty,
                   APInt::getAllOnesValue(Ty->getPrimitiveSizeInBits())));
+              llvm_unreachable("Unexpected state");
       case 1: if (Ty->isIntegerTy())
                 return PT->push_back(ConstantInt::get(Ty,
                   APInt::getNullValue(Ty->getPrimitiveSizeInBits())));
+              llvm_unreachable("Unexpected state");
       case 2: case 3: case 4: case 5:
       case 6: if (Ty->isIntegerTy())
                 PT->push_back(ConstantInt::get(Ty, Ran->Rand()));
diff --git a/tools/llvm-xray/xray-extract.cc b/tools/llvm-xray/xray-extract.cc
index d7015a05b0f2..6b72b81ab814 100644
--- a/tools/llvm-xray/xray-extract.cc
+++ b/tools/llvm-xray/xray-extract.cc
@@ -18,11 +18,11 @@
 
 #include "func-id-helper.h"
 #include "xray-registry.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/Object/ELF.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/DataExtractor.h"
-#include "llvm/Support/ELF.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Format.h"
diff --git a/tools/obj2yaml/dwarf2yaml.cpp b/tools/obj2yaml/dwarf2yaml.cpp
index d41b44c06810..d97eda30c039 100644
--- a/tools/obj2yaml/dwarf2yaml.cpp
+++ b/tools/obj2yaml/dwarf2yaml.cpp
@@ -203,6 +203,7 @@ void dumpDebugInfo(DWARFContextInMemory &DCtx, DWARFYAML::Data &Y) {
             case dwarf::DW_FORM_line_strp:
             case dwarf::DW_FORM_strp_sup:
             case dwarf::DW_FORM_GNU_str_index:
+            case dwarf::DW_FORM_strx:
               if (auto Val = FormValue.getValue().getAsCStringOffset())
                 NewValue.Value = Val.getValue();
               break;
diff --git a/tools/obj2yaml/macho2yaml.cpp b/tools/obj2yaml/macho2yaml.cpp
index f7b6c4748d5e..a1d107dc5afb 100644
--- a/tools/obj2yaml/macho2yaml.cpp
+++ b/tools/obj2yaml/macho2yaml.cpp
@@ -216,7 +216,7 @@ void MachODumper::dumpLoadCommands(std::unique_ptr<MachOYAML::Object> &Y) {
         MachO::swapStruct(LC.Data.load_command_data);
       EndPtr = processLoadCommandData<MachO::load_command>(LC, LoadCmd);
       break;
-#include "llvm/Support/MachO.def"
+#include "llvm/BinaryFormat/MachO.def"
     }
     auto RemainingBytes = LoadCmd.C.cmdsize - (EndPtr - LoadCmd.Ptr);
     if (!std::all_of(EndPtr, &EndPtr[RemainingBytes],
diff --git a/tools/yaml2obj/yaml2elf.cpp b/tools/yaml2obj/yaml2elf.cpp
index 8fd2bfd16726..c89f768ed6ff 100644
--- a/tools/yaml2obj/yaml2elf.cpp
+++ b/tools/yaml2obj/yaml2elf.cpp
@@ -14,10 +14,10 @@
 
 #include "yaml2obj.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/MC/StringTableBuilder.h"
 #include "llvm/Object/ELFObjectFile.h"
 #include "llvm/ObjectYAML/ELFYAML.h"
-#include "llvm/Support/ELF.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/YAMLTraits.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/tools/yaml2obj/yaml2macho.cpp b/tools/yaml2obj/yaml2macho.cpp
index 92b736e5298e..34b6ac2029fc 100644
--- a/tools/yaml2obj/yaml2macho.cpp
+++ b/tools/yaml2obj/yaml2macho.cpp
@@ -13,11 +13,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "yaml2obj.h"
-#include "llvm/ObjectYAML/ObjectYAML.h"
+#include "llvm/BinaryFormat/MachO.h"
 #include "llvm/ObjectYAML/DWARFEmitter.h"
+#include "llvm/ObjectYAML/ObjectYAML.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/LEB128.h"
-#include "llvm/Support/MachO.h"
 #include "llvm/Support/YAMLTraits.h"
 #include "llvm/Support/raw_ostream.h"
 
@@ -239,7 +239,7 @@ Error MachOWriter::writeLoadCommands(raw_ostream &OS) {
       BytesWritten +=
           writeLoadCommandData<MachO::load_command>(LC, OS, Obj.IsLittleEndian);
       break;
-#include "llvm/Support/MachO.def"
+#include "llvm/BinaryFormat/MachO.def"
     }
 
     if (LC.PayloadBytes.size() > 0) {
diff --git a/unittests/ADT/DAGDeltaAlgorithmTest.cpp b/unittests/ADT/DAGDeltaAlgorithmTest.cpp
index 190df7f57470..030fadbc9a38 100644
--- a/unittests/ADT/DAGDeltaAlgorithmTest.cpp
+++ b/unittests/ADT/DAGDeltaAlgorithmTest.cpp
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "gtest/gtest.h"
 #include "llvm/ADT/DAGDeltaAlgorithm.h"
+#include "gtest/gtest.h"
 #include <algorithm>
 #include <cstdarg>
 using namespace llvm;
diff --git a/unittests/ADT/DeltaAlgorithmTest.cpp b/unittests/ADT/DeltaAlgorithmTest.cpp
index bed57b1a1725..01dc1f36bd2f 100644
--- a/unittests/ADT/DeltaAlgorithmTest.cpp
+++ b/unittests/ADT/DeltaAlgorithmTest.cpp
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "gtest/gtest.h"
 #include "llvm/ADT/DeltaAlgorithm.h"
+#include "gtest/gtest.h"
 #include <algorithm>
 #include <cstdarg>
 using namespace llvm;
diff --git a/unittests/ADT/DenseMapTest.cpp b/unittests/ADT/DenseMapTest.cpp
index 273f4da021c4..cb5ba6875eaa 100644
--- a/unittests/ADT/DenseMapTest.cpp
+++ b/unittests/ADT/DenseMapTest.cpp
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "gtest/gtest.h"
 #include "llvm/ADT/DenseMap.h"
+#include "gtest/gtest.h"
 #include <map>
 #include <set>
 
diff --git a/unittests/ADT/DepthFirstIteratorTest.cpp b/unittests/ADT/DepthFirstIteratorTest.cpp
index 463d6928bd5c..4169cd48fcbd 100644
--- a/unittests/ADT/DepthFirstIteratorTest.cpp
+++ b/unittests/ADT/DepthFirstIteratorTest.cpp
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "TestGraph.h"
 #include "llvm/ADT/DepthFirstIterator.h"
+#include "TestGraph.h"
 #include "gtest/gtest.h"
 
 using namespace llvm;
diff --git a/unittests/ADT/FoldingSet.cpp b/unittests/ADT/FoldingSet.cpp
index 696463881195..f5b1b71beb63 100644
--- a/unittests/ADT/FoldingSet.cpp
+++ b/unittests/ADT/FoldingSet.cpp
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "gtest/gtest.h"
 #include "llvm/ADT/FoldingSet.h"
+#include "gtest/gtest.h"
 #include <string>
 
 using namespace llvm;
diff --git a/unittests/ADT/HashingTest.cpp b/unittests/ADT/HashingTest.cpp
index b28561bd0115..367f58dc7a6a 100644
--- a/unittests/ADT/HashingTest.cpp
+++ b/unittests/ADT/HashingTest.cpp
@@ -11,9 +11,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "gtest/gtest.h"
 #include "llvm/ADT/Hashing.h"
 #include "llvm/Support/DataTypes.h"
+#include "gtest/gtest.h"
 #include <deque>
 #include <list>
 #include <map>
diff --git a/unittests/ADT/ImmutableMapTest.cpp b/unittests/ADT/ImmutableMapTest.cpp
index 6a99884bfbb3..23ca168a7f7c 100644
--- a/unittests/ADT/ImmutableMapTest.cpp
+++ b/unittests/ADT/ImmutableMapTest.cpp
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "gtest/gtest.h"
 #include "llvm/ADT/ImmutableMap.h"
+#include "gtest/gtest.h"
 
 using namespace llvm;
 
diff --git a/unittests/ADT/ImmutableSetTest.cpp b/unittests/ADT/ImmutableSetTest.cpp
index a6eb405db720..35ac2c19b2f5 100644
--- a/unittests/ADT/ImmutableSetTest.cpp
+++ b/unittests/ADT/ImmutableSetTest.cpp
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "gtest/gtest.h"
 #include "llvm/ADT/ImmutableSet.h"
+#include "gtest/gtest.h"
 
 using namespace llvm;
 
diff --git a/unittests/ADT/IteratorTest.cpp b/unittests/ADT/IteratorTest.cpp
index 7f261824b499..c95ce8061847 100644
--- a/unittests/ADT/IteratorTest.cpp
+++ b/unittests/ADT/IteratorTest.cpp
@@ -7,9 +7,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/iterator.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/iterator.h"
 #include "gtest/gtest.h"
 
 using namespace llvm;
diff --git a/unittests/ADT/MapVectorTest.cpp b/unittests/ADT/MapVectorTest.cpp
index f5b094fb627b..bd6602b030f6 100644
--- a/unittests/ADT/MapVectorTest.cpp
+++ b/unittests/ADT/MapVectorTest.cpp
@@ -7,9 +7,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "gtest/gtest.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/iterator_range.h"
+#include "gtest/gtest.h"
 #include <utility>
 
 using namespace llvm;
diff --git a/unittests/ADT/OptionalTest.cpp b/unittests/ADT/OptionalTest.cpp
index 4c0c99393d21..46d4fe0780c3 100644
--- a/unittests/ADT/OptionalTest.cpp
+++ b/unittests/ADT/OptionalTest.cpp
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "gtest/gtest.h"
 #include "llvm/ADT/Optional.h"
+#include "gtest/gtest.h"
 
 using namespace llvm;
 
diff --git a/unittests/ADT/PointerEmbeddedIntTest.cpp b/unittests/ADT/PointerEmbeddedIntTest.cpp
index 9c27f8ee655f..695ea12e0b03 100644
--- a/unittests/ADT/PointerEmbeddedIntTest.cpp
+++ b/unittests/ADT/PointerEmbeddedIntTest.cpp
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "gtest/gtest.h"
 #include "llvm/ADT/PointerEmbeddedInt.h"
+#include "gtest/gtest.h"
 using namespace llvm;
 
 namespace {
diff --git a/unittests/ADT/PointerIntPairTest.cpp b/unittests/ADT/PointerIntPairTest.cpp
index 13680c78b9bb..985fdba0247f 100644
--- a/unittests/ADT/PointerIntPairTest.cpp
+++ b/unittests/ADT/PointerIntPairTest.cpp
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "gtest/gtest.h"
 #include "llvm/ADT/PointerIntPair.h"
+#include "gtest/gtest.h"
 #include <limits>
 using namespace llvm;
 
diff --git a/unittests/ADT/PointerSumTypeTest.cpp b/unittests/ADT/PointerSumTypeTest.cpp
index 75c88f7fee9f..a4faea624f1c 100644
--- a/unittests/ADT/PointerSumTypeTest.cpp
+++ b/unittests/ADT/PointerSumTypeTest.cpp
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "gtest/gtest.h"
 #include "llvm/ADT/PointerSumType.h"
+#include "gtest/gtest.h"
 using namespace llvm;
 
 namespace {
diff --git a/unittests/ADT/PointerUnionTest.cpp b/unittests/ADT/PointerUnionTest.cpp
index a592784ae095..360c3714841b 100644
--- a/unittests/ADT/PointerUnionTest.cpp
+++ b/unittests/ADT/PointerUnionTest.cpp
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "gtest/gtest.h"
 #include "llvm/ADT/PointerUnion.h"
+#include "gtest/gtest.h"
 using namespace llvm;
 
 namespace {
diff --git a/unittests/ADT/PostOrderIteratorTest.cpp b/unittests/ADT/PostOrderIteratorTest.cpp
index 17b8c4d842d3..20c938e89326 100644
--- a/unittests/ADT/PostOrderIteratorTest.cpp
+++ b/unittests/ADT/PostOrderIteratorTest.cpp
@@ -6,10 +6,10 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-#include "gtest/gtest.h"
 #include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CFG.h"
+#include "gtest/gtest.h"
 using namespace llvm;
 
 namespace {
diff --git a/unittests/ADT/RangeAdapterTest.cpp b/unittests/ADT/RangeAdapterTest.cpp
index 4c7bef53235b..edc1ced72ba9 100644
--- a/unittests/ADT/RangeAdapterTest.cpp
+++ b/unittests/ADT/RangeAdapterTest.cpp
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/iterator_range.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/iterator_range.h"
 #include "gtest/gtest.h"
 
 #include <iterator>
diff --git a/unittests/ADT/ReverseIterationTest.cpp b/unittests/ADT/ReverseIterationTest.cpp
index a1fd3b26d4e3..1e2dedf083f7 100644
--- a/unittests/ADT/ReverseIterationTest.cpp
+++ b/unittests/ADT/ReverseIterationTest.cpp
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "gtest/gtest.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "gtest/gtest.h"
 
 #if LLVM_ENABLE_ABI_BREAKING_CHECKS
 using namespace llvm;
diff --git a/unittests/ADT/SCCIteratorTest.cpp b/unittests/ADT/SCCIteratorTest.cpp
index f596ea6d6b88..ca6d84ed2f32 100644
--- a/unittests/ADT/SCCIteratorTest.cpp
+++ b/unittests/ADT/SCCIteratorTest.cpp
@@ -8,8 +8,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/SCCIterator.h"
-#include "gtest/gtest.h"
 #include "TestGraph.h"
+#include "gtest/gtest.h"
 #include <limits.h>
 
 using namespace llvm;
diff --git a/unittests/ADT/SmallPtrSetTest.cpp b/unittests/ADT/SmallPtrSetTest.cpp
index fc14c684d67f..0070d1cbae1b 100644
--- a/unittests/ADT/SmallPtrSetTest.cpp
+++ b/unittests/ADT/SmallPtrSetTest.cpp
@@ -11,10 +11,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "gtest/gtest.h"
-#include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/PointerIntPair.h"
 #include "llvm/Support/PointerLikeTypeTraits.h"
+#include "gtest/gtest.h"
 
 using namespace llvm;
 
diff --git a/unittests/ADT/SmallVectorTest.cpp b/unittests/ADT/SmallVectorTest.cpp
index ca6391024f27..5903ce8c08eb 100644
--- a/unittests/ADT/SmallVectorTest.cpp
+++ b/unittests/ADT/SmallVectorTest.cpp
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/Support/Compiler.h"
 #include "gtest/gtest.h"
 #include <list>
@@ -209,6 +209,22 @@ typedef ::testing::Types<SmallVector<Constructable, 0>,
                          > SmallVectorTestTypes;
 TYPED_TEST_CASE(SmallVectorTest, SmallVectorTestTypes);
 
+// Constructor test.
+TYPED_TEST(SmallVectorTest, ConstructorNonIterTest) {
+  SCOPED_TRACE("ConstructorTest");
+  this->theVector = SmallVector<Constructable, 2>(2, 2);
+  this->assertValuesInOrder(this->theVector, 2u, 2, 2);
+}
+
+// Constructor test.
+TYPED_TEST(SmallVectorTest, ConstructorIterTest) {
+  SCOPED_TRACE("ConstructorTest");
+  int arr[] = {1, 2, 3};
+  this->theVector =
+      SmallVector<Constructable, 4>(std::begin(arr), std::end(arr));
+  this->assertValuesInOrder(this->theVector, 3u, 1, 2, 3);
+}
+
 // New vector test.
 TYPED_TEST(SmallVectorTest, EmptyVectorTest) {
   SCOPED_TRACE("EmptyVectorTest");
@@ -415,6 +431,33 @@ TYPED_TEST(SmallVectorTest, AppendRepeatedTest) {
   this->assertValuesInOrder(this->theVector, 3u, 1, 77, 77);
 }
 
+// Append test
+TYPED_TEST(SmallVectorTest, AppendNonIterTest) {
+  SCOPED_TRACE("AppendRepeatedTest");
+
+  this->theVector.push_back(Constructable(1));
+  this->theVector.append(2, 7);
+  this->assertValuesInOrder(this->theVector, 3u, 1, 7, 7);
+}
+
+TYPED_TEST(SmallVectorTest, AppendRepeatedNonForwardIterator) {
+  SCOPED_TRACE("AppendRepeatedTest");
+
+  struct output_iterator {
+    typedef std::output_iterator_tag iterator_category;
+    typedef int value_type;
+    typedef int difference_type;
+    typedef value_type *pointer;
+    typedef value_type &reference;
+    operator int() { return 2; }
+    operator Constructable() { return 7; }
+  };
+
+  this->theVector.push_back(Constructable(1));
+  this->theVector.append(output_iterator(), output_iterator());
+  this->assertValuesInOrder(this->theVector, 3u, 1, 7, 7);
+}
+
 // Assign test
 TYPED_TEST(SmallVectorTest, AssignTest) {
   SCOPED_TRACE("AssignTest");
@@ -434,6 +477,15 @@ TYPED_TEST(SmallVectorTest, AssignRangeTest) {
   this->assertValuesInOrder(this->theVector, 3u, 1, 2, 3);
 }
 
+// Assign test
+TYPED_TEST(SmallVectorTest, AssignNonIterTest) {
+  SCOPED_TRACE("AssignTest");
+
+  this->theVector.push_back(Constructable(1));
+  this->theVector.assign(2, 7);
+  this->assertValuesInOrder(this->theVector, 2u, 7, 7);
+}
+
 // Move-assign test
 TYPED_TEST(SmallVectorTest, MoveAssignTest) {
   SCOPED_TRACE("MoveAssignTest");
@@ -532,6 +584,15 @@ TYPED_TEST(SmallVectorTest, InsertRepeatedTest) {
   this->assertValuesInOrder(this->theVector, 6u, 1, 16, 16, 2, 3, 4);
 }
 
+TYPED_TEST(SmallVectorTest, InsertRepeatedNonIterTest) {
+  SCOPED_TRACE("InsertRepeatedTest");
+
+  this->makeSequence(this->theVector, 1, 4);
+  Constructable::reset();
+  auto I = this->theVector.insert(this->theVector.begin() + 1, 2, 7);
+  EXPECT_EQ(this->theVector.begin() + 1, I);
+  this->assertValuesInOrder(this->theVector, 6u, 1, 7, 7, 2, 3, 4);
+}
 
 TYPED_TEST(SmallVectorTest, InsertRepeatedAtEndTest) {
   SCOPED_TRACE("InsertRepeatedTest");
diff --git a/unittests/ADT/TripleTest.cpp b/unittests/ADT/TripleTest.cpp
index 6d25526e5ad1..af4592ba0957 100644
--- a/unittests/ADT/TripleTest.cpp
+++ b/unittests/ADT/TripleTest.cpp
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "gtest/gtest.h"
 #include "llvm/ADT/Triple.h"
+#include "gtest/gtest.h"
 
 using namespace llvm;
 
@@ -1090,11 +1090,16 @@ TEST(TripleTest, NormalizeARM) {
   EXPECT_EQ("armv6eb--netbsd-eabihf", Triple::normalize("armv6eb-netbsd-eabihf"));
   EXPECT_EQ("armv7eb--netbsd-eabihf", Triple::normalize("armv7eb-netbsd-eabihf"));
 
+  EXPECT_EQ("armv7-suse-linux-gnueabihf",
+            Triple::normalize("armv7-suse-linux-gnueabi"));
+
   Triple T;
   T = Triple("armv6--netbsd-eabi");
   EXPECT_EQ(Triple::arm, T.getArch());
   T = Triple("armv6eb--netbsd-eabi");
   EXPECT_EQ(Triple::armeb, T.getArch());
+  T = Triple("armv7-suse-linux-gnueabihf");
+  EXPECT_EQ(Triple::GNUEABIHF, T.getEnvironment());
 }
 
 TEST(TripleTest, ParseARMArch) {
diff --git a/unittests/ADT/TwineTest.cpp b/unittests/ADT/TwineTest.cpp
index 0b7e88dee500..950eda2b058a 100644
--- a/unittests/ADT/TwineTest.cpp
+++ b/unittests/ADT/TwineTest.cpp
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/Support/FormatAdapters.h"
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/unittests/ADT/VariadicFunctionTest.cpp b/unittests/ADT/VariadicFunctionTest.cpp
index cde31205966c..43db64836cc1 100644
--- a/unittests/ADT/VariadicFunctionTest.cpp
+++ b/unittests/ADT/VariadicFunctionTest.cpp
@@ -7,9 +7,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "gtest/gtest.h"
-#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/VariadicFunction.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "gtest/gtest.h"
 
 using namespace llvm;
 namespace {
diff --git a/unittests/Analysis/BranchProbabilityInfoTest.cpp b/unittests/Analysis/BranchProbabilityInfoTest.cpp
index cbf8b50c7623..529af5c068da 100644
--- a/unittests/Analysis/BranchProbabilityInfoTest.cpp
+++ b/unittests/Analysis/BranchProbabilityInfoTest.cpp
@@ -13,8 +13,8 @@
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Dominators.h"
-#include "llvm/IR/Instructions.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/DataTypes.h"
diff --git a/unittests/Analysis/CFGTest.cpp b/unittests/Analysis/CFGTest.cpp
index c60044fa52df..d7f14c3e1c35 100644
--- a/unittests/Analysis/CFGTest.cpp
+++ b/unittests/Analysis/CFGTest.cpp
@@ -14,9 +14,9 @@
 #include "llvm/IR/Function.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Pass.h"
-#include "llvm/IR/LegacyPassManager.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/SourceMgr.h"
 #include "gtest/gtest.h"
diff --git a/unittests/Analysis/CMakeLists.txt b/unittests/Analysis/CMakeLists.txt
index 8082c54b9c66..ac8bca25d93a 100644
--- a/unittests/Analysis/CMakeLists.txt
+++ b/unittests/Analysis/CMakeLists.txt
@@ -12,6 +12,7 @@ add_llvm_unittest(AnalysisTests
   CallGraphTest.cpp
   CFGTest.cpp
   CGSCCPassManagerTest.cpp
+  GlobalsModRefTest.cpp
   LazyCallGraphTest.cpp
   LoopInfoTest.cpp
   MemoryBuiltinsTest.cpp
diff --git a/unittests/Analysis/GlobalsModRefTest.cpp b/unittests/Analysis/GlobalsModRefTest.cpp
new file mode 100644
index 000000000000..323edc2cc175
--- /dev/null
+++ b/unittests/Analysis/GlobalsModRefTest.cpp
@@ -0,0 +1,55 @@
+//===--- GlobalsModRefTest.cpp - Mixed TBAA unit tests --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/AsmParser/Parser.h"
+#include "llvm/Support/SourceMgr.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+TEST(GlobalsModRef, OptNone) {
+  StringRef Assembly = R"(
+    define void @f1() optnone {
+      ret void
+    }
+    define void @f2() optnone readnone {
+      ret void
+    }
+    define void @f3() optnone readonly {
+      ret void
+    }
+  )";
+
+  LLVMContext Context;
+  SMDiagnostic Error;
+  auto M = parseAssemblyString(Assembly, Error, Context);
+  ASSERT_TRUE(M) << "Bad assembly?";
+
+  const auto &funcs = M->functions();
+  auto I = funcs.begin();
+  ASSERT_NE(I, funcs.end());
+  const Function &F1 = *I;
+  ASSERT_NE(++I, funcs.end());
+  const Function &F2 = *I;
+  ASSERT_NE(++I, funcs.end());
+  const Function &F3 = *I;
+  EXPECT_EQ(++I, funcs.end());
+
+  Triple Trip(M->getTargetTriple());
+  TargetLibraryInfoImpl TLII(Trip);
+  TargetLibraryInfo TLI(TLII);
+  llvm::CallGraph CG(*M);
+
+  auto AAR = GlobalsAAResult::analyzeModule(*M, TLI, CG);
+
+  EXPECT_EQ(FMRB_UnknownModRefBehavior, AAR.getModRefBehavior(&F1));
+  EXPECT_EQ(FMRB_DoesNotAccessMemory, AAR.getModRefBehavior(&F2));
+  EXPECT_EQ(FMRB_OnlyReadsMemory, AAR.getModRefBehavior(&F3));
+}
diff --git a/unittests/Analysis/LazyCallGraphTest.cpp b/unittests/Analysis/LazyCallGraphTest.cpp
index 6955beb37109..8c251cf043b8 100644
--- a/unittests/Analysis/LazyCallGraphTest.cpp
+++ b/unittests/Analysis/LazyCallGraphTest.cpp
@@ -9,8 +9,8 @@
 
 #include "llvm/Analysis/LazyCallGraph.h"
 #include "llvm/AsmParser/Parser.h"
-#include "llvm/IR/Instructions.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/ErrorHandling.h"
diff --git a/unittests/Analysis/MemorySSA.cpp b/unittests/Analysis/MemorySSA.cpp
index 08b0e830a9b2..affa0e71820c 100644
--- a/unittests/Analysis/MemorySSA.cpp
+++ b/unittests/Analysis/MemorySSA.cpp
@@ -6,9 +6,9 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
+#include "llvm/Analysis/MemorySSA.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/BasicAliasAnalysis.h"
-#include "llvm/Analysis/MemorySSA.h"
 #include "llvm/Analysis/MemorySSAUpdater.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/DataLayout.h"
@@ -244,6 +244,52 @@ TEST_F(MemorySSATest, CreateALoadUpdater) {
   MSSA.verifyMemorySSA();
 }
 
+TEST_F(MemorySSATest, SinkLoad) {
+  F = Function::Create(
+      FunctionType::get(B.getVoidTy(), {B.getInt8PtrTy()}, false),
+      GlobalValue::ExternalLinkage, "F", &M);
+  BasicBlock *Entry(BasicBlock::Create(C, "", F));
+  BasicBlock *Left(BasicBlock::Create(C, "", F));
+  BasicBlock *Right(BasicBlock::Create(C, "", F));
+  BasicBlock *Merge(BasicBlock::Create(C, "", F));
+  B.SetInsertPoint(Entry);
+  B.CreateCondBr(B.getTrue(), Left, Right);
+  B.SetInsertPoint(Left, Left->begin());
+  Argument *PointerArg = &*F->arg_begin();
+  B.SetInsertPoint(Left);
+  B.CreateBr(Merge);
+  B.SetInsertPoint(Right);
+  B.CreateBr(Merge);
+
+  // Load in left block
+  B.SetInsertPoint(Left, Left->begin());
+  LoadInst *LoadInst1 = B.CreateLoad(PointerArg);
+  // Store in merge block
+  B.SetInsertPoint(Merge, Merge->begin());
+  B.CreateStore(B.getInt8(16), PointerArg);
+
+  setupAnalyses();
+  MemorySSA &MSSA = *Analyses->MSSA;
+  MemorySSAUpdater Updater(&MSSA);
+
+  // Mimic sinking of a load:
+  // - clone load
+  // - insert in "exit" block
+  // - insert in mssa
+  // - remove from original block
+
+  LoadInst *LoadInstClone = cast<LoadInst>(LoadInst1->clone());
+  Merge->getInstList().insert(Merge->begin(), LoadInstClone);
+  MemoryAccess * NewLoadAccess =
+      Updater.createMemoryAccessInBB(LoadInstClone, nullptr,
+                                     LoadInstClone->getParent(),
+                                     MemorySSA::Beginning);
+  Updater.insertUse(cast<MemoryUse>(NewLoadAccess));
+  MSSA.verifyMemorySSA();
+  Updater.removeMemoryAccess(MSSA.getMemoryAccess(LoadInst1));
+  MSSA.verifyMemorySSA();
+}
+
 TEST_F(MemorySSATest, MoveAStore) {
   // We create a diamond where there is a in the entry, a store on one side, and
   // a load at the end.  After building MemorySSA, we test updating by moving
diff --git a/unittests/Analysis/ProfileSummaryInfoTest.cpp b/unittests/Analysis/ProfileSummaryInfoTest.cpp
index c9e4fc029dc0..68a6d7bb4584 100644
--- a/unittests/Analysis/ProfileSummaryInfoTest.cpp
+++ b/unittests/Analysis/ProfileSummaryInfoTest.cpp
@@ -7,11 +7,11 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/BlockFrequencyInfoImpl.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/AsmParser/Parser.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CallSite.h"
diff --git a/unittests/Analysis/ScalarEvolutionTest.cpp b/unittests/Analysis/ScalarEvolutionTest.cpp
index fcbbb46f7a7e..91f4c3535b74 100644
--- a/unittests/Analysis/ScalarEvolutionTest.cpp
+++ b/unittests/Analysis/ScalarEvolutionTest.cpp
@@ -10,7 +10,6 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
diff --git a/unittests/Analysis/TBAATest.cpp b/unittests/Analysis/TBAATest.cpp
index 3a1d2f43563e..f3f05d8fbd93 100644
--- a/unittests/Analysis/TBAATest.cpp
+++ b/unittests/Analysis/TBAATest.cpp
@@ -12,9 +12,9 @@
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/Module.h"
-#include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Verifier.h"
 #include "llvm/Support/CommandLine.h"
 #include "gtest/gtest.h"
diff --git a/unittests/Analysis/UnrollAnalyzer.cpp b/unittests/Analysis/UnrollAnalyzer.cpp
index d6a7bd360b93..937e69f0c257 100644
--- a/unittests/Analysis/UnrollAnalyzer.cpp
+++ b/unittests/Analysis/UnrollAnalyzer.cpp
@@ -7,11 +7,11 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Analysis/LoopUnrollAnalyzer.h"
 #include "llvm/AsmParser/Parser.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/LegacyPassManager.h"
 #include "llvm/Support/SourceMgr.h"
-#include "llvm/Analysis/LoopUnrollAnalyzer.h"
-#include "llvm/IR/Dominators.h"
 #include "gtest/gtest.h"
 
 using namespace llvm;
diff --git a/unittests/BinaryFormat/CMakeLists.txt b/unittests/BinaryFormat/CMakeLists.txt
new file mode 100644
index 000000000000..631936795b6c
--- /dev/null
+++ b/unittests/BinaryFormat/CMakeLists.txt
@@ -0,0 +1,9 @@
+set(LLVM_LINK_COMPONENTS
+  BinaryFormat
+  )
+
+add_llvm_unittest(BinaryFormatTests
+  DwarfTest.cpp
+  TestFileMagic.cpp
+  )
+
diff --git a/unittests/Support/DwarfTest.cpp b/unittests/BinaryFormat/DwarfTest.cpp
similarity index 97%
rename from unittests/Support/DwarfTest.cpp
rename to unittests/BinaryFormat/DwarfTest.cpp
index 148ea2736e15..f24e029beef2 100644
--- a/unittests/Support/DwarfTest.cpp
+++ b/unittests/BinaryFormat/DwarfTest.cpp
@@ -1,4 +1,4 @@
-//===- unittest/Support/DwarfTest.cpp - Dwarf support tests ---------------===//
+//===- unittest/BinaryFormat/DwarfTest.cpp - Dwarf support tests ----------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,7 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Support/Dwarf.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/ADT/StringRef.h"
 #include "gtest/gtest.h"
 
 using namespace llvm;
diff --git a/unittests/BinaryFormat/TestFileMagic.cpp b/unittests/BinaryFormat/TestFileMagic.cpp
new file mode 100644
index 000000000000..fc2c1eef9fbf
--- /dev/null
+++ b/unittests/BinaryFormat/TestFileMagic.cpp
@@ -0,0 +1,128 @@
+//===- llvm/unittest/BinaryFormat/TestFileMagic.cpp - File magic tests ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/Magic.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h"
+
+#include "gtest/gtest.h"
+
+using namespace llvm;
+namespace fs = llvm::sys::fs;
+
+#define ASSERT_NO_ERROR(x)                                                     \
+  if (std::error_code ASSERT_NO_ERROR_ec = x) {                                \
+    SmallString<128> MessageStorage;                                           \
+    raw_svector_ostream Message(MessageStorage);                               \
+    Message << #x ": did not return errc::success.\n"                          \
+            << "error number: " << ASSERT_NO_ERROR_ec.value() << "\n"          \
+            << "error message: " << ASSERT_NO_ERROR_ec.message() << "\n";      \
+    GTEST_FATAL_FAILURE_(MessageStorage.c_str());                              \
+  } else {                                                                     \
+  }
+
+class MagicTest : public testing::Test {
+protected:
+  /// Unique temporary directory in which all created filesystem entities must
+  /// be placed. It is removed at the end of each test (must be empty).
+  SmallString<128> TestDirectory;
+
+  void SetUp() override {
+    ASSERT_NO_ERROR(
+        fs::createUniqueDirectory("file-system-test", TestDirectory));
+    // We don't care about this specific file.
+    errs() << "Test Directory: " << TestDirectory << '\n';
+    errs().flush();
+  }
+
+  void TearDown() override { ASSERT_NO_ERROR(fs::remove(TestDirectory.str())); }
+};
+
+const char archive[] = "!<arch>\x0A";
+const char bitcode[] = "\xde\xc0\x17\x0b";
+const char coff_object[] = "\x00\x00......";
+const char coff_bigobj[] =
+    "\x00\x00\xff\xff\x00\x02......"
+    "\xc7\xa1\xba\xd1\xee\xba\xa9\x4b\xaf\x20\xfa\xf6\x6a\xa4\xdc\xb8";
+const char coff_import_library[] = "\x00\x00\xff\xff....";
+const char elf_relocatable[] = {0x7f, 'E', 'L', 'F', 1, 2, 1, 0, 0,
+                                0,    0,   0,   0,   0, 0, 0, 0, 1};
+const char macho_universal_binary[] = "\xca\xfe\xba\xbe...\x00";
+const char macho_object[] =
+    "\xfe\xed\xfa\xce........\x00\x00\x00\x01............";
+const char macho_executable[] =
+    "\xfe\xed\xfa\xce........\x00\x00\x00\x02............";
+const char macho_fixed_virtual_memory_shared_lib[] =
+    "\xfe\xed\xfa\xce........\x00\x00\x00\x03............";
+const char macho_core[] =
+    "\xfe\xed\xfa\xce........\x00\x00\x00\x04............";
+const char macho_preload_executable[] =
+    "\xfe\xed\xfa\xce........\x00\x00\x00\x05............";
+const char macho_dynamically_linked_shared_lib[] =
+    "\xfe\xed\xfa\xce........\x00\x00\x00\x06............";
+const char macho_dynamic_linker[] =
+    "\xfe\xed\xfa\xce........\x00\x00\x00\x07............";
+const char macho_bundle[] =
+    "\xfe\xed\xfa\xce........\x00\x00\x00\x08............";
+const char macho_dsym_companion[] =
+    "\xfe\xed\xfa\xce........\x00\x00\x00\x0a............";
+const char macho_kext_bundle[] =
+    "\xfe\xed\xfa\xce........\x00\x00\x00\x0b............";
+const char windows_resource[] = "\x00\x00\x00\x00\x020\x00\x00\x00\xff";
+const char macho_dynamically_linked_shared_lib_stub[] =
+    "\xfe\xed\xfa\xce........\x00\x00\x00\x09............";
+
+TEST_F(MagicTest, Magic) {
+  struct type {
+    const char *filename;
+    const char *magic_str;
+    size_t magic_str_len;
+    file_magic magic;
+  } types[] = {
+#define DEFINE(magic) {#magic, magic, sizeof(magic), file_magic::magic}
+      DEFINE(archive),
+      DEFINE(bitcode),
+      DEFINE(coff_object),
+      {"coff_bigobj", coff_bigobj, sizeof(coff_bigobj),
+       file_magic::coff_object},
+      DEFINE(coff_import_library),
+      DEFINE(elf_relocatable),
+      DEFINE(macho_universal_binary),
+      DEFINE(macho_object),
+      DEFINE(macho_executable),
+      DEFINE(macho_fixed_virtual_memory_shared_lib),
+      DEFINE(macho_core),
+      DEFINE(macho_preload_executable),
+      DEFINE(macho_dynamically_linked_shared_lib),
+      DEFINE(macho_dynamic_linker),
+      DEFINE(macho_bundle),
+      DEFINE(macho_dynamically_linked_shared_lib_stub),
+      DEFINE(macho_dsym_companion),
+      DEFINE(macho_kext_bundle),
+      DEFINE(windows_resource)
+#undef DEFINE
+  };
+
+  // Create some files filled with magic.
+  for (type *i = types, *e = types + (sizeof(types) / sizeof(type)); i != e;
+       ++i) {
+    SmallString<128> file_pathname(TestDirectory);
+    llvm::sys::path::append(file_pathname, i->filename);
+    std::error_code EC;
+    raw_fd_ostream file(file_pathname, EC, sys::fs::F_None);
+    ASSERT_FALSE(file.has_error());
+    StringRef magic(i->magic_str, i->magic_str_len);
+    file << magic;
+    file.close();
+    EXPECT_EQ(i->magic, identify_magic(magic));
+    ASSERT_NO_ERROR(fs::remove(Twine(file_pathname)));
+  }
+}
diff --git a/unittests/Bitcode/BitstreamReaderTest.cpp b/unittests/Bitcode/BitstreamReaderTest.cpp
index 935ef4bcffc0..e7535f3e818e 100644
--- a/unittests/Bitcode/BitstreamReaderTest.cpp
+++ b/unittests/Bitcode/BitstreamReaderTest.cpp
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/Bitcode/BitstreamReader.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Bitcode/BitstreamWriter.h"
 #include "gtest/gtest.h"
 
diff --git a/unittests/Bitcode/BitstreamWriterTest.cpp b/unittests/Bitcode/BitstreamWriterTest.cpp
index f17cc157cde9..79143c8e7a63 100644
--- a/unittests/Bitcode/BitstreamWriterTest.cpp
+++ b/unittests/Bitcode/BitstreamWriterTest.cpp
@@ -7,9 +7,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Bitcode/BitstreamWriter.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
-#include "llvm/Bitcode/BitstreamWriter.h"
 #include "gtest/gtest.h"
 
 using namespace llvm;
diff --git a/unittests/CMakeLists.txt b/unittests/CMakeLists.txt
index 8e40f141463b..daece1fe3221 100644
--- a/unittests/CMakeLists.txt
+++ b/unittests/CMakeLists.txt
@@ -18,6 +18,7 @@ add_subdirectory(Linker)
 add_subdirectory(MC)
 add_subdirectory(MI)
 add_subdirectory(Object)
+add_subdirectory(BinaryFormat)
 add_subdirectory(ObjectYAML)
 add_subdirectory(Option)
 add_subdirectory(ProfileData)
diff --git a/unittests/CodeGen/DIEHashTest.cpp b/unittests/CodeGen/DIEHashTest.cpp
index dda08fcd6654..f60b0dd3b7ef 100644
--- a/unittests/CodeGen/DIEHashTest.cpp
+++ b/unittests/CodeGen/DIEHashTest.cpp
@@ -7,12 +7,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/DIE.h"
 #include "../lib/CodeGen/AsmPrinter/DIEHash.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/CodeGen/DIE.h"
 #include "llvm/CodeGen/DwarfStringPoolEntry.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/Dwarf.h"
 #include "llvm/Support/Format.h"
 #include "gtest/gtest.h"
 
diff --git a/unittests/CodeGen/MachineInstrBundleIteratorTest.cpp b/unittests/CodeGen/MachineInstrBundleIteratorTest.cpp
index 8f15fbf3941d..63365ab8fd4e 100644
--- a/unittests/CodeGen/MachineInstrBundleIteratorTest.cpp
+++ b/unittests/CodeGen/MachineInstrBundleIteratorTest.cpp
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/ilist_node.h"
 #include "llvm/CodeGen/MachineInstrBundleIterator.h"
+#include "llvm/ADT/ilist_node.h"
 #include "gtest/gtest.h"
 
 using namespace llvm;
diff --git a/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp b/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp
index 3d14eb736df2..18cab52a81b0 100644
--- a/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp
+++ b/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp
@@ -13,6 +13,7 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/Config/llvm-config.h"
 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
@@ -21,7 +22,6 @@
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/ObjectYAML/DWARFEmitter.h"
 #include "llvm/ObjectYAML/DWARFYAML.h"
-#include "llvm/Support/Dwarf.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/TargetSelect.h"
diff --git a/unittests/DebugInfo/DWARF/DWARFFormValueTest.cpp b/unittests/DebugInfo/DWARF/DWARFFormValueTest.cpp
index 028a03595de6..da7f43e721aa 100644
--- a/unittests/DebugInfo/DWARF/DWARFFormValueTest.cpp
+++ b/unittests/DebugInfo/DWARF/DWARFFormValueTest.cpp
@@ -10,7 +10,7 @@
 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallString.h"
-#include "llvm/Support/Dwarf.h"
+#include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/Support/Host.h"
 #include "llvm/Support/LEB128.h"
 #include "gtest/gtest.h"
diff --git a/unittests/DebugInfo/DWARF/DwarfGenerator.cpp b/unittests/DebugInfo/DWARF/DwarfGenerator.cpp
index ac63bbaf0a11..c32cfa1de9ae 100644
--- a/unittests/DebugInfo/DWARF/DwarfGenerator.cpp
+++ b/unittests/DebugInfo/DWARF/DwarfGenerator.cpp
@@ -7,9 +7,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "../lib/CodeGen/AsmPrinter/DwarfStringPool.h"
 #include "DwarfGenerator.h"
+#include "../lib/CodeGen/AsmPrinter/DwarfStringPool.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/DIE.h"
 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
@@ -28,7 +29,6 @@
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCTargetOptionsCommandFlags.h"
 #include "llvm/PassAnalysisSupport.h"
-#include "llvm/Support/Dwarf.h"
 #include "llvm/Support/LEB128.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/unittests/ExecutionEngine/ExecutionEngineTest.cpp b/unittests/ExecutionEngine/ExecutionEngineTest.cpp
index 7cad84130692..ec5fab6c4893 100644
--- a/unittests/ExecutionEngine/ExecutionEngineTest.cpp
+++ b/unittests/ExecutionEngine/ExecutionEngineTest.cpp
@@ -7,9 +7,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ExecutionEngine/Interpreter.h"
 #include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/LLVMContext.h"
diff --git a/unittests/ExecutionEngine/MCJIT/MCJITCAPITest.cpp b/unittests/ExecutionEngine/MCJIT/MCJITCAPITest.cpp
index cf63da3a22a5..155d10f5ccd3 100644
--- a/unittests/ExecutionEngine/MCJIT/MCJITCAPITest.cpp
+++ b/unittests/ExecutionEngine/MCJIT/MCJITCAPITest.cpp
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm-c/Analysis.h"
 #include "MCJITTestAPICommon.h"
+#include "llvm-c/Analysis.h"
 #include "llvm-c/Core.h"
 #include "llvm-c/ExecutionEngine.h"
 #include "llvm-c/Target.h"
diff --git a/unittests/ExecutionEngine/MCJIT/MCJITMultipleModuleTest.cpp b/unittests/ExecutionEngine/MCJIT/MCJITMultipleModuleTest.cpp
index 65f969f24c6c..1226bba1c506 100644
--- a/unittests/ExecutionEngine/MCJIT/MCJITMultipleModuleTest.cpp
+++ b/unittests/ExecutionEngine/MCJIT/MCJITMultipleModuleTest.cpp
@@ -12,8 +12,8 @@
 // modules, accessing global variables, etc.
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ExecutionEngine/MCJIT.h"
 #include "MCJITTestBase.h"
+#include "llvm/ExecutionEngine/MCJIT.h"
 #include "gtest/gtest.h"
 
 using namespace llvm;
diff --git a/unittests/ExecutionEngine/MCJIT/MCJITTest.cpp b/unittests/ExecutionEngine/MCJIT/MCJITTest.cpp
index 744bfdb4a01b..bc5cd689a19f 100644
--- a/unittests/ExecutionEngine/MCJIT/MCJITTest.cpp
+++ b/unittests/ExecutionEngine/MCJIT/MCJITTest.cpp
@@ -13,8 +13,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ExecutionEngine/MCJIT.h"
-#include "llvm/Support/DynamicLibrary.h"
 #include "MCJITTestBase.h"
+#include "llvm/Support/DynamicLibrary.h"
 #include "gtest/gtest.h"
 
 using namespace llvm;
diff --git a/unittests/ExecutionEngine/Orc/CompileOnDemandLayerTest.cpp b/unittests/ExecutionEngine/Orc/CompileOnDemandLayerTest.cpp
index ab04c14b0957..13693381c006 100644
--- a/unittests/ExecutionEngine/Orc/CompileOnDemandLayerTest.cpp
+++ b/unittests/ExecutionEngine/Orc/CompileOnDemandLayerTest.cpp
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "OrcTestCommon.h"
 #include "llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h"
+#include "OrcTestCommon.h"
 #include "gtest/gtest.h"
 
 using namespace llvm;
diff --git a/unittests/ExecutionEngine/Orc/IndirectionUtilsTest.cpp b/unittests/ExecutionEngine/Orc/IndirectionUtilsTest.cpp
index ab43c4af13f1..ed425449784c 100644
--- a/unittests/ExecutionEngine/Orc/IndirectionUtilsTest.cpp
+++ b/unittests/ExecutionEngine/Orc/IndirectionUtilsTest.cpp
@@ -7,9 +7,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ExecutionEngine/Orc/IndirectionUtils.h"
 #include "OrcTestCommon.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ExecutionEngine/Orc/IndirectionUtils.h"
 #include "gtest/gtest.h"
 
 using namespace llvm;
diff --git a/unittests/ExecutionEngine/Orc/LazyEmittingLayerTest.cpp b/unittests/ExecutionEngine/Orc/LazyEmittingLayerTest.cpp
index 3362f490a38a..213c460aa676 100644
--- a/unittests/ExecutionEngine/Orc/LazyEmittingLayerTest.cpp
+++ b/unittests/ExecutionEngine/Orc/LazyEmittingLayerTest.cpp
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ExecutionEngine/RuntimeDyld.h"
 #include "llvm/ExecutionEngine/Orc/LazyEmittingLayer.h"
+#include "llvm/ExecutionEngine/RuntimeDyld.h"
 #include "gtest/gtest.h"
 
 namespace {
diff --git a/unittests/ExecutionEngine/Orc/ObjectTransformLayerTest.cpp b/unittests/ExecutionEngine/Orc/ObjectTransformLayerTest.cpp
index 362c143c54ef..68db454637c5 100644
--- a/unittests/ExecutionEngine/Orc/ObjectTransformLayerTest.cpp
+++ b/unittests/ExecutionEngine/Orc/ObjectTransformLayerTest.cpp
@@ -7,13 +7,13 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ExecutionEngine/Orc/ObjectTransformLayer.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ExecutionEngine/Orc/CompileUtils.h"
 #include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
 #include "llvm/ExecutionEngine/Orc/NullResolver.h"
 #include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
-#include "llvm/ExecutionEngine/Orc/ObjectTransformLayer.h"
 #include "llvm/Object/ObjectFile.h"
 #include "gtest/gtest.h"
 
diff --git a/unittests/ExecutionEngine/Orc/OrcCAPITest.cpp b/unittests/ExecutionEngine/Orc/OrcCAPITest.cpp
index 305325b6c6ef..e8ba16a472b7 100644
--- a/unittests/ExecutionEngine/Orc/OrcCAPITest.cpp
+++ b/unittests/ExecutionEngine/Orc/OrcCAPITest.cpp
@@ -8,11 +8,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "OrcTestCommon.h"
-#include "gtest/gtest.h"
 #include "llvm-c/Core.h"
 #include "llvm-c/OrcBindings.h"
 #include "llvm-c/Target.h"
 #include "llvm-c/TargetMachine.h"
+#include "gtest/gtest.h"
 
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/unittests/ExecutionEngine/Orc/OrcTestCommon.h b/unittests/ExecutionEngine/Orc/OrcTestCommon.h
index dff72c6b9d57..24320034a17a 100644
--- a/unittests/ExecutionEngine/Orc/OrcTestCommon.h
+++ b/unittests/ExecutionEngine/Orc/OrcTestCommon.h
@@ -15,14 +15,14 @@
 #ifndef LLVM_UNITTESTS_EXECUTIONENGINE_ORC_ORCTESTCOMMON_H
 #define LLVM_UNITTESTS_EXECUTIONENGINE_ORC_ORCTESTCOMMON_H
 
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/ExecutionEngine/JITSymbol.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/TypeBuilder.h"
 #include "llvm/Object/ObjectFile.h"
-#include "llvm/ExecutionEngine/ExecutionEngine.h"
-#include "llvm/ExecutionEngine/JITSymbol.h"
 #include "llvm/Support/TargetSelect.h"
 #include <memory>
 
diff --git a/unittests/ExecutionEngine/Orc/QueueChannel.h b/unittests/ExecutionEngine/Orc/QueueChannel.h
index 3d1058a83ebc..d8c16811af11 100644
--- a/unittests/ExecutionEngine/Orc/QueueChannel.h
+++ b/unittests/ExecutionEngine/Orc/QueueChannel.h
@@ -13,8 +13,8 @@
 #include "llvm/ExecutionEngine/Orc/RawByteChannel.h"
 #include "llvm/Support/Error.h"
 
-#include <queue>
 #include <condition_variable>
+#include <queue>
 
 namespace llvm {
 
diff --git a/unittests/ExecutionEngine/Orc/RTDyldObjectLinkingLayerTest.cpp b/unittests/ExecutionEngine/Orc/RTDyldObjectLinkingLayerTest.cpp
index c13a75a5cbfe..8878451bdec2 100644
--- a/unittests/ExecutionEngine/Orc/RTDyldObjectLinkingLayerTest.cpp
+++ b/unittests/ExecutionEngine/Orc/RTDyldObjectLinkingLayerTest.cpp
@@ -7,13 +7,13 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
 #include "OrcTestCommon.h"
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
-#include "llvm/ExecutionEngine/SectionMemoryManager.h"
 #include "llvm/ExecutionEngine/Orc/CompileUtils.h"
 #include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
 #include "llvm/ExecutionEngine/Orc/NullResolver.h"
-#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
+#include "llvm/ExecutionEngine/SectionMemoryManager.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/LLVMContext.h"
 #include "gtest/gtest.h"
diff --git a/unittests/IR/AsmWriterTest.cpp b/unittests/IR/AsmWriterTest.cpp
index c7e7bb5c9f0f..55c2a70e21f5 100644
--- a/unittests/IR/AsmWriterTest.cpp
+++ b/unittests/IR/AsmWriterTest.cpp
@@ -6,8 +6,8 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/Module.h"
diff --git a/unittests/IR/ConstantRangeTest.cpp b/unittests/IR/ConstantRangeTest.cpp
index c6c9bf6d6b50..0292f60fe332 100644
--- a/unittests/IR/ConstantRangeTest.cpp
+++ b/unittests/IR/ConstantRangeTest.cpp
@@ -187,6 +187,23 @@ TEST_F(ConstantRangeTest, Trunc) {
   EXPECT_EQ(TOne, ConstantRange(One.getLower().trunc(10),
                                 One.getUpper().trunc(10)));
   EXPECT_TRUE(TSome.isFullSet());
+  EXPECT_TRUE(TWrap.isFullSet());
+
+  // trunc([2, 5), 3->2) = [2, 1)
+  ConstantRange TwoFive(APInt(3, 2), APInt(3, 5));
+  EXPECT_EQ(TwoFive.truncate(2), ConstantRange(APInt(2, 2), APInt(2, 1)));
+
+  // trunc([2, 6), 3->2) = full
+  ConstantRange TwoSix(APInt(3, 2), APInt(3, 6));
+  EXPECT_TRUE(TwoSix.truncate(2).isFullSet());
+
+  // trunc([5, 7), 3->2) = [1, 3)
+  ConstantRange FiveSeven(APInt(3, 5), APInt(3, 7));
+  EXPECT_EQ(FiveSeven.truncate(2), ConstantRange(APInt(2, 1), APInt(2, 3)));
+
+  // trunc([7, 1), 3->2) = [3, 1)
+  ConstantRange SevenOne(APInt(3, 7), APInt(3, 1));
+  EXPECT_EQ(SevenOne.truncate(2), ConstantRange(APInt(2, 3), APInt(2, 1)));
 }
 
 TEST_F(ConstantRangeTest, ZExt) {
diff --git a/unittests/IR/ConstantsTest.cpp b/unittests/IR/ConstantsTest.cpp
index 7a8a3045a0d4..ccffa50bf133 100644
--- a/unittests/IR/ConstantsTest.cpp
+++ b/unittests/IR/ConstantsTest.cpp
@@ -7,15 +7,15 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/AsmParser/Parser.h"
 #include "llvm/IR/Constants.h"
+#include "llvm-c/Core.h"
+#include "llvm/AsmParser/Parser.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/SourceMgr.h"
-#include "llvm-c/Core.h"
 #include "gtest/gtest.h"
 
 namespace llvm {
diff --git a/unittests/IR/DebugTypeODRUniquingTest.cpp b/unittests/IR/DebugTypeODRUniquingTest.cpp
index 7cf1cd22a2fb..7eb08e24b408 100644
--- a/unittests/IR/DebugTypeODRUniquingTest.cpp
+++ b/unittests/IR/DebugTypeODRUniquingTest.cpp
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/LLVMContext.h"
 #include "gtest/gtest.h"
 using namespace llvm;
 
diff --git a/unittests/IR/DominatorTreeTest.cpp b/unittests/IR/DominatorTreeTest.cpp
index d2062839a734..232f0cbd4ed9 100644
--- a/unittests/IR/DominatorTreeTest.cpp
+++ b/unittests/IR/DominatorTreeTest.cpp
@@ -257,3 +257,55 @@ TEST(DominatorTree, Unreachable) {
         DT->verifyDomTree();
       });
 }
+
+TEST(DominatorTree, NonUniqueEdges) {
+  StringRef ModuleString =
+      "define i32 @f(i32 %i, i32 *%p) {\n"
+      "bb0:\n"
+      "   store i32 %i, i32 *%p\n"
+      "   switch i32 %i, label %bb2 [\n"
+      "     i32 0, label %bb1\n"
+      "     i32 1, label %bb1\n"
+      "   ]\n"
+      " bb1:\n"
+      "   ret i32 1\n"
+      " bb2:\n"
+      "   ret i32 4\n"
+      "}\n";
+
+  // Parse the module.
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleString);
+
+  runWithDomTree(
+      *M, "f",
+      [&](Function &F, DominatorTree *DT, DominatorTreeBase<BasicBlock> *PDT) {
+        Function::iterator FI = F.begin();
+
+        BasicBlock *BB0 = &*FI++;
+        BasicBlock *BB1 = &*FI++;
+        BasicBlock *BB2 = &*FI++;
+
+        const TerminatorInst *TI = BB0->getTerminator();
+        assert(TI->getNumSuccessors() == 3 && "Switch has three successors");
+
+        BasicBlockEdge Edge_BB0_BB2(BB0, TI->getSuccessor(0));
+        assert(Edge_BB0_BB2.getEnd() == BB2 &&
+               "Default label is the 1st successor");
+
+        BasicBlockEdge Edge_BB0_BB1_a(BB0, TI->getSuccessor(1));
+        assert(Edge_BB0_BB1_a.getEnd() == BB1 && "BB1 is the 2nd successor");
+
+        BasicBlockEdge Edge_BB0_BB1_b(BB0, TI->getSuccessor(2));
+        assert(Edge_BB0_BB1_b.getEnd() == BB1 && "BB1 is the 3rd successor");
+
+        EXPECT_TRUE(DT->dominates(Edge_BB0_BB2, BB2));
+        EXPECT_FALSE(DT->dominates(Edge_BB0_BB2, BB1));
+
+        EXPECT_FALSE(DT->dominates(Edge_BB0_BB1_a, BB1));
+        EXPECT_FALSE(DT->dominates(Edge_BB0_BB1_b, BB1));
+
+        EXPECT_FALSE(DT->dominates(Edge_BB0_BB1_a, BB2));
+        EXPECT_FALSE(DT->dominates(Edge_BB0_BB1_b, BB2));
+      });
+}
diff --git a/unittests/IR/IRBuilderTest.cpp b/unittests/IR/IRBuilderTest.cpp
index 5686c3b2b3a7..186330f10573 100644
--- a/unittests/IR/IRBuilderTest.cpp
+++ b/unittests/IR/IRBuilderTest.cpp
@@ -9,8 +9,8 @@
 
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/LLVMContext.h"
diff --git a/unittests/IR/MetadataTest.cpp b/unittests/IR/MetadataTest.cpp
index 9f8fc4eaeb6f..bcf3babf7f68 100644
--- a/unittests/IR/MetadataTest.cpp
+++ b/unittests/IR/MetadataTest.cpp
@@ -7,6 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/IR/Metadata.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DebugInfo.h"
@@ -14,7 +15,6 @@
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/ModuleSlotTracker.h"
 #include "llvm/IR/Type.h"
diff --git a/unittests/IR/ModuleTest.cpp b/unittests/IR/ModuleTest.cpp
index 9f52fedc4559..d93d036bb115 100644
--- a/unittests/IR/ModuleTest.cpp
+++ b/unittests/IR/ModuleTest.cpp
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/GlobalVariable.h"
 #include "llvm/Support/RandomNumberGenerator.h"
 #include "gtest/gtest.h"
 
diff --git a/unittests/IR/PassManagerTest.cpp b/unittests/IR/PassManagerTest.cpp
index b3a039a364fc..ad06cc4778fe 100644
--- a/unittests/IR/PassManagerTest.cpp
+++ b/unittests/IR/PassManagerTest.cpp
@@ -7,11 +7,11 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/IR/PassManager.h"
 #include "llvm/AsmParser/Parser.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
-#include "llvm/IR/PassManager.h"
 #include "llvm/Support/SourceMgr.h"
 #include "gtest/gtest.h"
 
diff --git a/unittests/IR/PatternMatch.cpp b/unittests/IR/PatternMatch.cpp
index 2d1321def7e3..02ef87f5b13e 100644
--- a/unittests/IR/PatternMatch.cpp
+++ b/unittests/IR/PatternMatch.cpp
@@ -7,6 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/IR/PatternMatch.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/BasicBlock.h"
@@ -21,7 +22,6 @@
 #include "llvm/IR/Module.h"
 #include "llvm/IR/NoFolder.h"
 #include "llvm/IR/Operator.h"
-#include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/Type.h"
 #include "gtest/gtest.h"
 
diff --git a/unittests/IR/UserTest.cpp b/unittests/IR/UserTest.cpp
index 7ba6840313b1..794dfc133bba 100644
--- a/unittests/IR/UserTest.cpp
+++ b/unittests/IR/UserTest.cpp
@@ -7,12 +7,12 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/IR/User.h"
 #include "llvm/AsmParser/Parser.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
-#include "llvm/IR/User.h"
 #include "llvm/Support/SourceMgr.h"
 #include "gtest/gtest.h"
 using namespace llvm;
diff --git a/unittests/IR/ValueTest.cpp b/unittests/IR/ValueTest.cpp
index 142444a809c6..0087cb2fa82c 100644
--- a/unittests/IR/ValueTest.cpp
+++ b/unittests/IR/ValueTest.cpp
@@ -7,12 +7,12 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/IR/Value.h"
 #include "llvm/AsmParser/Parser.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/ModuleSlotTracker.h"
-#include "llvm/IR/Value.h"
 #include "llvm/Support/SourceMgr.h"
 #include "gtest/gtest.h"
 using namespace llvm;
diff --git a/unittests/IR/VerifierTest.cpp b/unittests/IR/VerifierTest.cpp
index 188509aadf77..f1f453ed5d10 100644
--- a/unittests/IR/VerifierTest.cpp
+++ b/unittests/IR/VerifierTest.cpp
@@ -7,18 +7,18 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/IR/Verifier.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DIBuilder.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalAlias.h"
 #include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/Instructions.h"
 #include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Module.h"
-#include "llvm/IR/Verifier.h"
 #include "gtest/gtest.h"
 
 namespace llvm {
diff --git a/unittests/Linker/LinkModulesTest.cpp b/unittests/Linker/LinkModulesTest.cpp
index f31409c50121..67c12b838344 100644
--- a/unittests/Linker/LinkModulesTest.cpp
+++ b/unittests/Linker/LinkModulesTest.cpp
@@ -7,6 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm-c/Core.h"
+#include "llvm-c/Linker.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/AsmParser/Parser.h"
 #include "llvm/IR/BasicBlock.h"
@@ -16,8 +18,6 @@
 #include "llvm/IR/Module.h"
 #include "llvm/Linker/Linker.h"
 #include "llvm/Support/SourceMgr.h"
-#include "llvm-c/Core.h"
-#include "llvm-c/Linker.h"
 #include "gtest/gtest.h"
 
 using namespace llvm;
diff --git a/unittests/MC/DwarfLineTables.cpp b/unittests/MC/DwarfLineTables.cpp
index d66c832df0ba..1b1a4d647ce0 100644
--- a/unittests/MC/DwarfLineTables.cpp
+++ b/unittests/MC/DwarfLineTables.cpp
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Support/Dwarf.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDwarf.h"
diff --git a/unittests/MC/StringTableBuilderTest.cpp b/unittests/MC/StringTableBuilderTest.cpp
index f68350d90ba5..b547f934b02c 100644
--- a/unittests/MC/StringTableBuilderTest.cpp
+++ b/unittests/MC/StringTableBuilderTest.cpp
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/SmallString.h"
 #include "llvm/MC/StringTableBuilder.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/Support/Endian.h"
 #include "gtest/gtest.h"
 #include <string>
diff --git a/unittests/MI/LiveIntervalTest.cpp b/unittests/MI/LiveIntervalTest.cpp
index 026fb42d345f..7118a43e6d88 100644
--- a/unittests/MI/LiveIntervalTest.cpp
+++ b/unittests/MI/LiveIntervalTest.cpp
@@ -1,4 +1,3 @@
-#include "gtest/gtest.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/MIRParser/MIRParser.h"
@@ -6,6 +5,7 @@
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/LegacyPassManager.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/TargetRegistry.h"
@@ -13,7 +13,7 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/IR/LegacyPassManager.h"
+#include "gtest/gtest.h"
 
 using namespace llvm;
 
@@ -59,18 +59,15 @@ std::unique_ptr<Module> parseMIR(LLVMContext &Context,
   if (!MIR)
     return nullptr;
 
-  std::unique_ptr<Module> M = MIR->parseLLVMModule();
+  std::unique_ptr<Module> M = MIR->parseIRModule();
   if (!M)
     return nullptr;
 
   M->setDataLayout(TM.createDataLayout());
 
-  Function *F = M->getFunction(FuncName);
-  if (!F)
-    return nullptr;
-
   MachineModuleInfo *MMI = new MachineModuleInfo(&TM);
-  MMI->setMachineFunctionInitializer(MIR.get());
+  if (MIR->parseMachineFunctions(*M, *MMI))
+    return nullptr;
   PM.add(MMI);
 
   return M;
@@ -154,6 +151,8 @@ body: |
   std::unique_ptr<MIRParser> MIR;
   std::unique_ptr<Module> M = parseMIR(Context, PM, MIR, *TM, MIRString,
                                        "func");
+  if (!M)
+    report_fatal_error("Could not parse MIR code\n");
 
   PM.add(new TestPass(T));
 
diff --git a/unittests/ProfileData/CoverageMappingTest.cpp b/unittests/ProfileData/CoverageMappingTest.cpp
index 0783a23a67b0..0e6e993cf3da 100644
--- a/unittests/ProfileData/CoverageMappingTest.cpp
+++ b/unittests/ProfileData/CoverageMappingTest.cpp
@@ -584,6 +584,6 @@ INSTANTIATE_TEST_CASE_P(ParameterizedCovMapTest, CoverageMappingTest,
                         ::testing::Values(std::pair<bool, bool>({false, false}),
                                           std::pair<bool, bool>({false, true}),
                                           std::pair<bool, bool>({true, false}),
-                                          std::pair<bool, bool>({true, true})));
+                                          std::pair<bool, bool>({true, true})),);
 
 } // end anonymous namespace
diff --git a/unittests/ProfileData/InstrProfTest.cpp b/unittests/ProfileData/InstrProfTest.cpp
index 1b44463cd650..b15029a08137 100644
--- a/unittests/ProfileData/InstrProfTest.cpp
+++ b/unittests/ProfileData/InstrProfTest.cpp
@@ -1024,6 +1024,6 @@ TEST_F(SparseInstrProfTest, preserve_no_records) {
 }
 
 INSTANTIATE_TEST_CASE_P(MaybeSparse, MaybeSparseInstrProfTest,
-                        ::testing::Bool());
+                        ::testing::Bool(),);
 
 } // end anonymous namespace
diff --git a/unittests/ProfileData/SampleProfTest.cpp b/unittests/ProfileData/SampleProfTest.cpp
index 96b2a01c7bd7..68b46cc5086d 100644
--- a/unittests/ProfileData/SampleProfTest.cpp
+++ b/unittests/ProfileData/SampleProfTest.cpp
@@ -7,13 +7,13 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ProfileData/SampleProf.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
 #include "llvm/ProfileData/ProfileCommon.h"
-#include "llvm/ProfileData/SampleProf.h"
 #include "llvm/ProfileData/SampleProfReader.h"
 #include "llvm/ProfileData/SampleProfWriter.h"
 #include "llvm/Support/Casting.h"
diff --git a/unittests/Support/ARMAttributeParser.cpp b/unittests/Support/ARMAttributeParser.cpp
index c2df6537ff63..1df03db6d07f 100644
--- a/unittests/Support/ARMAttributeParser.cpp
+++ b/unittests/Support/ARMAttributeParser.cpp
@@ -1,5 +1,5 @@
-#include "llvm/Support/ARMBuildAttributes.h"
 #include "llvm/Support/ARMAttributeParser.h"
+#include "llvm/Support/ARMBuildAttributes.h"
 #include "llvm/Support/LEB128.h"
 #include "gtest/gtest.h"
 #include <string>
diff --git a/unittests/Support/BinaryStreamTest.cpp b/unittests/Support/BinaryStreamTest.cpp
index 1ce74cbb722b..795c18902a9b 100644
--- a/unittests/Support/BinaryStreamTest.cpp
+++ b/unittests/Support/BinaryStreamTest.cpp
@@ -416,9 +416,7 @@ TEST_F(BinaryStreamTest, VarStreamArray) {
 
   struct StringExtractor {
   public:
-    typedef uint32_t &ContextType;
-    static Error extract(BinaryStreamRef Stream, uint32_t &Len, StringRef &Item,
-                         uint32_t &Index) {
+    Error operator()(BinaryStreamRef Stream, uint32_t &Len, StringRef &Item) {
       if (Index == 0)
         Len = strlen("1. Test");
       else if (Index == 1)
@@ -435,11 +433,12 @@ TEST_F(BinaryStreamTest, VarStreamArray) {
       ++Index;
       return Error::success();
     }
+
+    uint32_t Index = 0;
   };
 
   for (auto &Stream : Streams) {
-    uint32_t Context = 0;
-    VarStreamArray<StringRef, StringExtractor> Array(*Stream.Input, Context);
+    VarStreamArray<StringRef, StringExtractor> Array(*Stream.Input);
     auto Iter = Array.begin();
     ASSERT_EQ("1. Test", *Iter++);
     ASSERT_EQ("2. Longer Test", *Iter++);
diff --git a/unittests/Support/CMakeLists.txt b/unittests/Support/CMakeLists.txt
index e7f2f515d76a..098dba83197b 100644
--- a/unittests/Support/CMakeLists.txt
+++ b/unittests/Support/CMakeLists.txt
@@ -19,7 +19,6 @@ add_llvm_unittest(SupportTests
   ConvertUTFTest.cpp
   DataExtractorTest.cpp
   DebugTest.cpp
-  DwarfTest.cpp
   EndianStreamTest.cpp
   EndianTest.cpp
   ErrorOrTest.cpp
@@ -67,6 +66,11 @@ add_llvm_unittest(SupportTests
   xxhashTest.cpp
   )
 
+# Disable all warning for AlignOfTest.cpp,
+# as it does things intentionally, and there is no reliable way of
+# disabling all warnings for all the compilers by using pragmas.
+set_source_files_properties(AlignOfTest.cpp PROPERTIES COMPILE_FLAGS -w)
+
 # ManagedStatic.cpp uses <pthread>.
 target_link_libraries(SupportTests ${LLVM_PTHREAD_LIB})
 
diff --git a/unittests/Support/CommandLineTest.cpp b/unittests/Support/CommandLineTest.cpp
index 33573c4e6960..7fdd42b4feca 100644
--- a/unittests/Support/CommandLineTest.cpp
+++ b/unittests/Support/CommandLineTest.cpp
@@ -7,10 +7,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Config/config.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Config/config.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/StringSaver.h"
diff --git a/unittests/Support/CompressionTest.cpp b/unittests/Support/CompressionTest.cpp
index 18a6175460d3..505714bd2da5 100644
--- a/unittests/Support/CompressionTest.cpp
+++ b/unittests/Support/CompressionTest.cpp
@@ -12,10 +12,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/Compression.h"
-#include "llvm/Support/Error.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Config/config.h"
+#include "llvm/Support/Error.h"
 #include "gtest/gtest.h"
 
 using namespace llvm;
diff --git a/unittests/Support/CrashRecoveryTest.cpp b/unittests/Support/CrashRecoveryTest.cpp
index e9ffd1f8871f..3f13693632db 100644
--- a/unittests/Support/CrashRecoveryTest.cpp
+++ b/unittests/Support/CrashRecoveryTest.cpp
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Support/CrashRecoveryContext.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/CrashRecoveryContext.h"
 #include "gtest/gtest.h"
 
 #ifdef LLVM_ON_WIN32
diff --git a/unittests/Support/DataExtractorTest.cpp b/unittests/Support/DataExtractorTest.cpp
index 81de983d2265..8b645524ec6c 100644
--- a/unittests/Support/DataExtractorTest.cpp
+++ b/unittests/Support/DataExtractorTest.cpp
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "gtest/gtest.h"
 #include "llvm/Support/DataExtractor.h"
+#include "gtest/gtest.h"
 using namespace llvm;
 
 namespace {
diff --git a/unittests/Support/DynamicLibrary/CMakeLists.txt b/unittests/Support/DynamicLibrary/CMakeLists.txt
index f0e945e78b18..2fa4bf237d40 100644
--- a/unittests/Support/DynamicLibrary/CMakeLists.txt
+++ b/unittests/Support/DynamicLibrary/CMakeLists.txt
@@ -4,16 +4,21 @@ add_llvm_unittest(DynamicLibraryTests DynamicLibraryTest.cpp)
 
 export_executable_symbols(DynamicLibraryTests)
 
-add_library(PipSqueak SHARED PipSqueak.cxx)
+function(dynlib_add_module NAME)
+  add_library(${NAME} SHARED PipSqueak.cxx)
 
-set_output_directory(PipSqueak
-  BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}
-  LIBRARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}
-  )
+  set_output_directory(${NAME}
+    BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}
+    LIBRARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}
+    )
 
-set_target_properties(PipSqueak
-  PROPERTIES PREFIX ""
-  SUFFIX ".so"
-  )
+  set_target_properties(${NAME}
+    PROPERTIES PREFIX ""
+    SUFFIX ".so"
+    )
 
-add_dependencies(DynamicLibraryTests PipSqueak)
+  add_dependencies(DynamicLibraryTests ${NAME})
+endfunction(dynlib_add_module)
+
+dynlib_add_module(PipSqueak)
+dynlib_add_module(SecondLib)
diff --git a/unittests/Support/DynamicLibrary/DynamicLibraryTest.cpp b/unittests/Support/DynamicLibrary/DynamicLibraryTest.cpp
index 0674a91282a1..80a20990de18 100644
--- a/unittests/Support/DynamicLibrary/DynamicLibraryTest.cpp
+++ b/unittests/Support/DynamicLibrary/DynamicLibraryTest.cpp
@@ -7,34 +7,34 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Config/config.h"
 #include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Config/config.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/Path.h"
 #include "gtest/gtest.h"
 
 #include "PipSqueak.h"
-#include <string>
 
 using namespace llvm;
 using namespace llvm::sys;
 
 extern "C" PIPSQUEAK_EXPORT const char *TestA() { return "ProcessCall"; }
 
-std::string LibPath() {
+std::string LibPath(const std::string Name = "PipSqueak") {
   const std::vector<testing::internal::string>& Argvs = testing::internal::GetArgvs();
   const char *Argv0 = Argvs.size() > 0 ? Argvs[0].c_str() : "DynamicLibraryTests";
   void *Ptr = (void*)(intptr_t)TestA;
   std::string Path = fs::getMainExecutable(Argv0, Ptr);
   llvm::SmallString<256> Buf(path::parent_path(Path));
-  path::append(Buf, "PipSqueak.so");
+  path::append(Buf, (Name+".so").c_str());
   return Buf.str();
 }
 
 #if defined(_WIN32) || (defined(HAVE_DLFCN_H) && defined(HAVE_DLOPEN))
 
 typedef void (*SetStrings)(std::string &GStr, std::string &LStr);
+typedef void (*TestOrder)(std::vector<std::string> &V);
 typedef const char *(*GetString)();
 
 template <class T> static T FuncPtr(void *Ptr) {
@@ -100,26 +100,59 @@ TEST(DynamicLibrary, Overload) {
 }
 
 TEST(DynamicLibrary, Shutdown) {
-  std::string A, B;
+  std::string A("PipSqueak"), B, C("SecondLib");
+  std::vector<std::string> Order;
   {
     std::string Err;
     llvm_shutdown_obj Shutdown;
     DynamicLibrary DL =
-        DynamicLibrary::getPermanentLibrary(LibPath().c_str(), &Err);
+        DynamicLibrary::getPermanentLibrary(LibPath(A).c_str(), &Err);
     EXPECT_TRUE(DL.isValid());
     EXPECT_TRUE(Err.empty());
 
-    SetStrings SS = FuncPtr<SetStrings>(
+    SetStrings SS_0 = FuncPtr<SetStrings>(
         DynamicLibrary::SearchForAddressOfSymbol("SetStrings"));
-    EXPECT_TRUE(SS != nullptr);
+    EXPECT_TRUE(SS_0 != nullptr);
 
-    SS(A, B);
-    EXPECT_EQ(B, "Local::Local");
+    SS_0(A, B);
+    EXPECT_EQ(B, "Local::Local(PipSqueak)");
+
+    TestOrder TO_0 = FuncPtr<TestOrder>(
+        DynamicLibrary::SearchForAddressOfSymbol("TestOrder"));
+    EXPECT_TRUE(TO_0 != nullptr);
+    
+    DynamicLibrary DL2 =
+        DynamicLibrary::getPermanentLibrary(LibPath(C).c_str(), &Err);
+    EXPECT_TRUE(DL2.isValid());
+    EXPECT_TRUE(Err.empty());
+
+    // Should find latest version of symbols in SecondLib
+    SetStrings SS_1 = FuncPtr<SetStrings>(
+        DynamicLibrary::SearchForAddressOfSymbol("SetStrings"));
+    EXPECT_TRUE(SS_1 != nullptr);
+    EXPECT_TRUE(SS_0 != SS_1);
+
+    TestOrder TO_1 = FuncPtr<TestOrder>(
+        DynamicLibrary::SearchForAddressOfSymbol("TestOrder"));
+    EXPECT_TRUE(TO_1 != nullptr);
+    EXPECT_TRUE(TO_0 != TO_1);
+
+    B.clear();
+    SS_1(C, B);
+    EXPECT_EQ(B, "Local::Local(SecondLib)");
+
+    TO_0(Order);
+    TO_1(Order);
   }
   EXPECT_EQ(A, "Global::~Global");
   EXPECT_EQ(B, "Local::~Local");
   EXPECT_TRUE(FuncPtr<SetStrings>(DynamicLibrary::SearchForAddressOfSymbol(
                   "SetStrings")) == nullptr);
+
+  // Test unload/destruction ordering
+  EXPECT_EQ(Order.size(), 2UL);
+  EXPECT_EQ(Order.front(), "SecondLib");
+  EXPECT_EQ(Order.back(), "PipSqueak");
 }
 
 #else
diff --git a/unittests/Support/DynamicLibrary/PipSqueak.cxx b/unittests/Support/DynamicLibrary/PipSqueak.cxx
index d1cf7c042b72..79cf59255a4f 100644
--- a/unittests/Support/DynamicLibrary/PipSqueak.cxx
+++ b/unittests/Support/DynamicLibrary/PipSqueak.cxx
@@ -9,38 +9,40 @@
 
 #include "PipSqueak.h"
 
-#if defined(_WIN32) && !defined(__GNUC__)
-// Disable warnings from inclusion of xlocale & exception
-#pragma warning(push)
-#pragma warning(disable: 4530)
-#pragma warning(disable: 4577)
-#include <string>
-#pragma warning(pop)
-#else
-#include <string>
-#endif
-
 struct Global {
   std::string *Str;
-  Global() : Str(nullptr) {}
+  std::vector<std::string> *Vec;
+  Global() : Str(nullptr), Vec(nullptr) {}
   ~Global() {
-    if (Str)
+    if (Str) {
+      if (Vec)
+        Vec->push_back(*Str);
       *Str = "Global::~Global";
+    }
   }
 };
 
-struct Local {
-  std::string &Str;
-  Local(std::string &S) : Str(S) { Str = "Local::Local"; }
-  ~Local() { Str = "Local::~Local"; }
-};
-
 static Global Glb;
 
+struct Local {
+  std::string &Str;
+  Local(std::string &S) : Str(S) {
+    Str = "Local::Local";
+    if (Glb.Str && !Glb.Str->empty())
+      Str += std::string("(") + *Glb.Str + std::string(")");
+  }
+  ~Local() { Str = "Local::~Local"; }
+};
+
+
 extern "C" PIPSQUEAK_EXPORT void SetStrings(std::string &GStr,
                                             std::string &LStr) {
-  static Local Lcl(LStr);
   Glb.Str = &GStr;
+  static Local Lcl(LStr);
+}
+
+extern "C" PIPSQUEAK_EXPORT void TestOrder(std::vector<std::string> &V) {
+  Glb.Vec = &V;
 }
 
 extern "C" PIPSQUEAK_EXPORT const char *TestA() { return "LibCall"; }
diff --git a/unittests/Support/DynamicLibrary/PipSqueak.h b/unittests/Support/DynamicLibrary/PipSqueak.h
index e6a859d60716..3e4f79a9a6f4 100644
--- a/unittests/Support/DynamicLibrary/PipSqueak.h
+++ b/unittests/Support/DynamicLibrary/PipSqueak.h
@@ -10,6 +10,19 @@
 #ifndef LLVM_PIPSQUEAK_H
 #define LLVM_PIPSQUEAK_H
 
+#if defined(_WIN32) && !defined(__GNUC__)
+// Disable warnings from inclusion of xlocale & exception
+#pragma warning(push)
+#pragma warning(disable: 4530)
+#pragma warning(disable: 4577)
+#include <string>
+#include <vector>
+#pragma warning(pop)
+#else
+#include <string>
+#include <vector>
+#endif
+
 #ifdef _WIN32
 #define PIPSQUEAK_EXPORT __declspec(dllexport)
 #else
diff --git a/unittests/Support/EndianStreamTest.cpp b/unittests/Support/EndianStreamTest.cpp
index ea6c308c560a..48c5c3bc8175 100644
--- a/unittests/Support/EndianStreamTest.cpp
+++ b/unittests/Support/EndianStreamTest.cpp
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/SmallString.h"
 #include "llvm/Support/EndianStream.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/Support/DataTypes.h"
 #include "gtest/gtest.h"
 using namespace llvm;
diff --git a/unittests/Support/FileOutputBufferTest.cpp b/unittests/Support/FileOutputBufferTest.cpp
index 53a2ae0aadde..5f20634d66c2 100644
--- a/unittests/Support/FileOutputBufferTest.cpp
+++ b/unittests/Support/FileOutputBufferTest.cpp
@@ -7,9 +7,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Support/FileOutputBuffer.h"
 #include "llvm/Support/Errc.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/FileOutputBuffer.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/unittests/Support/FormatVariadicTest.cpp b/unittests/Support/FormatVariadicTest.cpp
index b0c843870afc..99b90b17ae44 100644
--- a/unittests/Support/FormatVariadicTest.cpp
+++ b/unittests/Support/FormatVariadicTest.cpp
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Support/FormatAdapters.h"
 #include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/FormatAdapters.h"
 #include "gtest/gtest.h"
 
 using namespace llvm;
diff --git a/unittests/Support/LEB128Test.cpp b/unittests/Support/LEB128Test.cpp
index 061936df1d19..09db6dfdc593 100644
--- a/unittests/Support/LEB128Test.cpp
+++ b/unittests/Support/LEB128Test.cpp
@@ -7,10 +7,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "gtest/gtest.h"
-#include "llvm/Support/DataTypes.h"
 #include "llvm/Support/LEB128.h"
+#include "llvm/Support/DataTypes.h"
 #include "llvm/Support/raw_ostream.h"
+#include "gtest/gtest.h"
 #include <string>
 using namespace llvm;
 
diff --git a/unittests/Support/MD5Test.cpp b/unittests/Support/MD5Test.cpp
index fa9372fde33f..8b151827a7bd 100644
--- a/unittests/Support/MD5Test.cpp
+++ b/unittests/Support/MD5Test.cpp
@@ -11,9 +11,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Support/MD5.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallString.h"
-#include "llvm/Support/MD5.h"
 #include "gtest/gtest.h"
 
 using namespace llvm;
diff --git a/unittests/Support/MathExtrasTest.cpp b/unittests/Support/MathExtrasTest.cpp
index f46d94e9e577..e26653b8a656 100644
--- a/unittests/Support/MathExtrasTest.cpp
+++ b/unittests/Support/MathExtrasTest.cpp
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "gtest/gtest.h"
 #include "llvm/Support/MathExtras.h"
+#include "gtest/gtest.h"
 
 using namespace llvm;
 
diff --git a/unittests/Support/MemoryBufferTest.cpp b/unittests/Support/MemoryBufferTest.cpp
index 0efa22c157d9..294581aeb928 100644
--- a/unittests/Support/MemoryBufferTest.cpp
+++ b/unittests/Support/MemoryBufferTest.cpp
@@ -11,9 +11,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/FileUtilities.h"
-#include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/raw_ostream.h"
 #include "gtest/gtest.h"
 
diff --git a/unittests/Support/MemoryTest.cpp b/unittests/Support/MemoryTest.cpp
index f439cb2af9b1..140219ffd1d6 100644
--- a/unittests/Support/MemoryTest.cpp
+++ b/unittests/Support/MemoryTest.cpp
@@ -360,6 +360,6 @@ unsigned MemoryFlags[] = {
 
 INSTANTIATE_TEST_CASE_P(AllocationTests,
 			MappedMemoryTest,
-			::testing::ValuesIn(MemoryFlags));
+			::testing::ValuesIn(MemoryFlags),);
 
 }  // anonymous namespace
diff --git a/unittests/Support/Path.cpp b/unittests/Support/Path.cpp
index a4bdcb5c79a2..3e474f33ca6d 100644
--- a/unittests/Support/Path.cpp
+++ b/unittests/Support/Path.cpp
@@ -8,9 +8,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/Path.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/BinaryFormat/Magic.h"
 #include "llvm/Support/ConvertUTF.h"
 #include "llvm/Support/Errc.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -919,86 +920,6 @@ TEST_F(FileSystemTest, Remove) {
   ASSERT_FALSE(fs::exists(BaseDir));
 }
 
-const char archive[] = "!<arch>\x0A";
-const char bitcode[] = "\xde\xc0\x17\x0b";
-const char coff_object[] = "\x00\x00......";
-const char coff_bigobj[] = "\x00\x00\xff\xff\x00\x02......"
-    "\xc7\xa1\xba\xd1\xee\xba\xa9\x4b\xaf\x20\xfa\xf6\x6a\xa4\xdc\xb8";
-const char coff_import_library[] = "\x00\x00\xff\xff....";
-const char elf_relocatable[] = { 0x7f, 'E', 'L', 'F', 1, 2, 1, 0, 0,
-                                 0,    0,   0,   0,   0, 0, 0, 0, 1 };
-const char macho_universal_binary[] = "\xca\xfe\xba\xbe...\x00";
-const char macho_object[] =
-    "\xfe\xed\xfa\xce........\x00\x00\x00\x01............";
-const char macho_executable[] =
-    "\xfe\xed\xfa\xce........\x00\x00\x00\x02............";
-const char macho_fixed_virtual_memory_shared_lib[] =
-    "\xfe\xed\xfa\xce........\x00\x00\x00\x03............";
-const char macho_core[] =
-    "\xfe\xed\xfa\xce........\x00\x00\x00\x04............";
-const char macho_preload_executable[] =
-    "\xfe\xed\xfa\xce........\x00\x00\x00\x05............";
-const char macho_dynamically_linked_shared_lib[] =
-    "\xfe\xed\xfa\xce........\x00\x00\x00\x06............";
-const char macho_dynamic_linker[] =
-    "\xfe\xed\xfa\xce........\x00\x00\x00\x07............";
-const char macho_bundle[] =
-    "\xfe\xed\xfa\xce........\x00\x00\x00\x08............";
-const char macho_dsym_companion[] =
-    "\xfe\xed\xfa\xce........\x00\x00\x00\x0a............";
-const char macho_kext_bundle[] =
-    "\xfe\xed\xfa\xce........\x00\x00\x00\x0b............";
-const char windows_resource[] = "\x00\x00\x00\x00\x020\x00\x00\x00\xff";
-const char macho_dynamically_linked_shared_lib_stub[] =
-    "\xfe\xed\xfa\xce........\x00\x00\x00\x09............";
-
-TEST_F(FileSystemTest, Magic) {
-  struct type {
-    const char *filename;
-    const char *magic_str;
-    size_t magic_str_len;
-    fs::file_magic magic;
-  } types[] = {
-#define DEFINE(magic)                                           \
-    { #magic, magic, sizeof(magic), fs::file_magic::magic }
-    DEFINE(archive),
-    DEFINE(bitcode),
-    DEFINE(coff_object),
-    { "coff_bigobj", coff_bigobj, sizeof(coff_bigobj), fs::file_magic::coff_object },
-    DEFINE(coff_import_library),
-    DEFINE(elf_relocatable),
-    DEFINE(macho_universal_binary),
-    DEFINE(macho_object),
-    DEFINE(macho_executable),
-    DEFINE(macho_fixed_virtual_memory_shared_lib),
-    DEFINE(macho_core),
-    DEFINE(macho_preload_executable),
-    DEFINE(macho_dynamically_linked_shared_lib),
-    DEFINE(macho_dynamic_linker),
-    DEFINE(macho_bundle),
-    DEFINE(macho_dynamically_linked_shared_lib_stub),
-    DEFINE(macho_dsym_companion),
-    DEFINE(macho_kext_bundle),
-    DEFINE(windows_resource)
-#undef DEFINE
-    };
-
-  // Create some files filled with magic.
-  for (type *i = types, *e = types + (sizeof(types) / sizeof(type)); i != e;
-                                                                     ++i) {
-    SmallString<128> file_pathname(TestDirectory);
-    path::append(file_pathname, i->filename);
-    std::error_code EC;
-    raw_fd_ostream file(file_pathname, EC, sys::fs::F_None);
-    ASSERT_FALSE(file.has_error());
-    StringRef magic(i->magic_str, i->magic_str_len);
-    file << magic;
-    file.close();
-    EXPECT_EQ(i->magic, fs::identify_magic(magic));
-    ASSERT_NO_ERROR(fs::remove(Twine(file_pathname)));
-  }
-}
-
 #ifdef LLVM_ON_WIN32
 TEST_F(FileSystemTest, CarriageReturn) {
   SmallString<128> FilePathname(TestDirectory);
diff --git a/unittests/Support/ProgramTest.cpp b/unittests/Support/ProgramTest.cpp
index 3926ceb92b3e..f658980073da 100644
--- a/unittests/Support/ProgramTest.cpp
+++ b/unittests/Support/ProgramTest.cpp
@@ -7,11 +7,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Support/ConvertUTF.h"
+#include "llvm/Support/Program.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ConvertUTF.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Path.h"
-#include "llvm/Support/Program.h"
 #include "gtest/gtest.h"
 #include <stdlib.h>
 #if defined(__APPLE__)
diff --git a/unittests/Support/SpecialCaseListTest.cpp b/unittests/Support/SpecialCaseListTest.cpp
index e86eecb527bb..130848845e45 100644
--- a/unittests/Support/SpecialCaseListTest.cpp
+++ b/unittests/Support/SpecialCaseListTest.cpp
@@ -7,9 +7,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Support/SpecialCaseList.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/SpecialCaseList.h"
 #include "gtest/gtest.h"
 
 using namespace llvm;
diff --git a/unittests/Support/SwapByteOrderTest.cpp b/unittests/Support/SwapByteOrderTest.cpp
index 4f2537c4d5de..474eac6bbb38 100644
--- a/unittests/Support/SwapByteOrderTest.cpp
+++ b/unittests/Support/SwapByteOrderTest.cpp
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "gtest/gtest.h"
 #include "llvm/Support/SwapByteOrder.h"
+#include "gtest/gtest.h"
 #include <cstdlib>
 #include <ctime>
 using namespace llvm;
diff --git a/unittests/Support/TarWriterTest.cpp b/unittests/Support/TarWriterTest.cpp
index 84005de56c12..927c8ed9be14 100644
--- a/unittests/Support/TarWriterTest.cpp
+++ b/unittests/Support/TarWriterTest.cpp
@@ -7,9 +7,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Support/TarWriter.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/TarWriter.h"
 #include "gtest/gtest.h"
 
 using namespace llvm;
diff --git a/unittests/Support/TargetParserTest.cpp b/unittests/Support/TargetParserTest.cpp
index 76d1917d537a..b252641f1a13 100644
--- a/unittests/Support/TargetParserTest.cpp
+++ b/unittests/Support/TargetParserTest.cpp
@@ -7,9 +7,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Support/TargetParser.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/ARMBuildAttributes.h"
-#include "llvm/Support/TargetParser.h"
 #include "gtest/gtest.h"
 #include <string>
 
diff --git a/unittests/Support/TrigramIndexTest.cpp b/unittests/Support/TrigramIndexTest.cpp
index fb0ad1749bbd..df42c1efd8a0 100644
--- a/unittests/Support/TrigramIndexTest.cpp
+++ b/unittests/Support/TrigramIndexTest.cpp
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/TrigramIndex.h"
+#include "llvm/ADT/STLExtras.h"
 #include "gtest/gtest.h"
 
 #include <string>
diff --git a/unittests/Support/YAMLIOTest.cpp b/unittests/Support/YAMLIOTest.cpp
index 5a0280c8ca5b..21c8430f9588 100644
--- a/unittests/Support/YAMLIOTest.cpp
+++ b/unittests/Support/YAMLIOTest.cpp
@@ -14,7 +14,6 @@
 #include "llvm/Support/YAMLTraits.h"
 #include "gtest/gtest.h"
 
-
 using llvm::yaml::Input;
 using llvm::yaml::Output;
 using llvm::yaml::IO;
diff --git a/unittests/Support/YAMLParserTest.cpp b/unittests/Support/YAMLParserTest.cpp
index 3f12a53fd9c5..d411a286830b 100644
--- a/unittests/Support/YAMLParserTest.cpp
+++ b/unittests/Support/YAMLParserTest.cpp
@@ -7,11 +7,11 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Support/YAMLParser.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/YAMLParser.h"
 #include "gtest/gtest.h"
 
 namespace llvm {
diff --git a/unittests/Support/formatted_raw_ostream_test.cpp b/unittests/Support/formatted_raw_ostream_test.cpp
index 9bb804691337..2b8f06523beb 100644
--- a/unittests/Support/formatted_raw_ostream_test.cpp
+++ b/unittests/Support/formatted_raw_ostream_test.cpp
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Support/FormattedStream.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/raw_ostream.h"
 #include "gtest/gtest.h"
 
diff --git a/unittests/Support/raw_ostream_test.cpp b/unittests/Support/raw_ostream_test.cpp
index 777e555949ee..a7a5ce8dd6d4 100644
--- a/unittests/Support/raw_ostream_test.cpp
+++ b/unittests/Support/raw_ostream_test.cpp
@@ -7,11 +7,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "gtest/gtest.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
+#include "gtest/gtest.h"
 
 using namespace llvm;
 
diff --git a/unittests/Support/raw_pwrite_stream_test.cpp b/unittests/Support/raw_pwrite_stream_test.cpp
index 08b2f90d6054..249780a8c829 100644
--- a/unittests/Support/raw_pwrite_stream_test.cpp
+++ b/unittests/Support/raw_pwrite_stream_test.cpp
@@ -7,11 +7,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "gtest/gtest.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/FileUtilities.h"
 #include "llvm/Support/raw_ostream.h"
+#include "gtest/gtest.h"
 
 using namespace llvm;
 
diff --git a/unittests/Support/raw_sha1_ostream_test.cpp b/unittests/Support/raw_sha1_ostream_test.cpp
index 1bb4e2eb1d58..e176f3f7a19b 100644
--- a/unittests/Support/raw_sha1_ostream_test.cpp
+++ b/unittests/Support/raw_sha1_ostream_test.cpp
@@ -7,9 +7,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "gtest/gtest.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_sha1_ostream.h"
+#include "gtest/gtest.h"
 
 #include <string>
 
diff --git a/unittests/Target/AArch64/InstSizes.cpp b/unittests/Target/AArch64/InstSizes.cpp
index 22b47c6852ab..c1fe7f22dc5a 100644
--- a/unittests/Target/AArch64/InstSizes.cpp
+++ b/unittests/Target/AArch64/InstSizes.cpp
@@ -21,7 +21,8 @@ std::unique_ptr<TargetMachine> createTargetMachine() {
 
   std::string Error;
   const Target *TheTarget = TargetRegistry::lookupTarget(TT, Error);
-  assert(TheTarget && "Target not registered");
+  if (!TheTarget)
+    report_fatal_error("Target not registered");
 
   return std::unique_ptr<TargetMachine>(
       TheTarget->createTargetMachine(TT, CPU, FS, TargetOptions(), None,
@@ -58,20 +59,25 @@ void runChecks(
   std::unique_ptr<MemoryBuffer> MBuffer = MemoryBuffer::getMemBuffer(MIRString);
   std::unique_ptr<MIRParser> MParser =
       createMIRParser(std::move(MBuffer), Context);
-  assert(MParser && "Couldn't create MIR parser");
+  if (!MParser)
+    report_fatal_error("Couldn't create MIR parser");
 
-  std::unique_ptr<Module> M = MParser->parseLLVMModule();
-  assert(M && "Couldn't parse module");
+  std::unique_ptr<Module> M = MParser->parseIRModule();
+  if (!M)
+    report_fatal_error("Couldn't parse module");
 
   M->setTargetTriple(TM->getTargetTriple().getTriple());
   M->setDataLayout(TM->createDataLayout());
 
-  auto F = M->getFunction("sizes");
-  assert(F && "Couldn't find intended function");
-
   MachineModuleInfo MMI(TM);
-  MMI.setMachineFunctionInitializer(MParser.get());
-  auto &MF = MMI.getMachineFunction(*F);
+  bool Res = MParser->parseMachineFunctions(*M, MMI);
+  if (Res)
+    report_fatal_error("Couldn't parse MIR functions");
+
+  auto F = M->getFunction("sizes");
+  if (!F)
+    report_fatal_error("Couldn't find intended function");
+  auto &MF = MMI.getOrCreateMachineFunction(*F);
 
   Checks(*II, MF);
 }
diff --git a/unittests/Transforms/Scalar/LoopPassManagerTest.cpp b/unittests/Transforms/Scalar/LoopPassManagerTest.cpp
index 227060f0a46e..dfc351c648b1 100644
--- a/unittests/Transforms/Scalar/LoopPassManagerTest.cpp
+++ b/unittests/Transforms/Scalar/LoopPassManagerTest.cpp
@@ -7,6 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Transforms/Scalar/LoopPassManager.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/ScalarEvolution.h"
@@ -19,7 +20,6 @@
 #include "llvm/IR/Module.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/Support/SourceMgr.h"
-#include "llvm/Transforms/Scalar/LoopPassManager.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 
diff --git a/unittests/Transforms/Utils/CMakeLists.txt b/unittests/Transforms/Utils/CMakeLists.txt
index 0fc19ef09fb0..475c365dddc4 100644
--- a/unittests/Transforms/Utils/CMakeLists.txt
+++ b/unittests/Transforms/Utils/CMakeLists.txt
@@ -11,5 +11,6 @@ add_llvm_unittest(UtilsTests
   FunctionComparator.cpp
   IntegerDivision.cpp
   Local.cpp
+  OrderedInstructions.cpp
   ValueMapperTest.cpp
   )
diff --git a/unittests/Transforms/Utils/FunctionComparator.cpp b/unittests/Transforms/Utils/FunctionComparator.cpp
index ff68cd6224d7..26e20cd9112c 100644
--- a/unittests/Transforms/Utils/FunctionComparator.cpp
+++ b/unittests/Transforms/Utils/FunctionComparator.cpp
@@ -10,8 +10,8 @@
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instructions.h"
-#include "llvm/IR/Module.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
 #include "gtest/gtest.h"
 
 using namespace llvm;
diff --git a/unittests/Transforms/Utils/OrderedInstructions.cpp b/unittests/Transforms/Utils/OrderedInstructions.cpp
new file mode 100644
index 000000000000..1d6df1722945
--- /dev/null
+++ b/unittests/Transforms/Utils/OrderedInstructions.cpp
@@ -0,0 +1,65 @@
+//===- OrderedInstructions.cpp - Unit tests for OrderedInstructions  ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/OrderedInstructions.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+/// Check intra-basicblock and inter-basicblock dominance using
+/// OrderedInstruction.
+TEST(OrderedInstructionsTest, DominanceTest) {
+  LLVMContext Ctx;
+  Module M("test", Ctx);
+  IRBuilder<> B(Ctx);
+  FunctionType *FTy =
+      FunctionType::get(Type::getVoidTy(Ctx), {B.getInt8PtrTy()}, false);
+  Function *F = cast<Function>(M.getOrInsertFunction("f", FTy));
+
+  // Create the function as follow and check for dominance relation.
+  //
+  // test():
+  //  bbx:
+  //    loadx;
+  //    loady;
+  //  bby:
+  //    loadz;
+  //    return;
+  //
+  // More specifically, check for loadx -> (dominates) loady,
+  // loady -> loadx and loady -> loadz.
+  //
+  // Create BBX with 2 loads.
+  BasicBlock *BBX = BasicBlock::Create(Ctx, "bbx", F);
+  B.SetInsertPoint(BBX);
+  Argument *PointerArg = &*F->arg_begin();
+  LoadInst *LoadInstX = B.CreateLoad(PointerArg);
+  LoadInst *LoadInstY = B.CreateLoad(PointerArg);
+
+  // Create BBY with 1 load.
+  BasicBlock *BBY = BasicBlock::Create(Ctx, "bby", F);
+  B.SetInsertPoint(BBY);
+  LoadInst *LoadInstZ = B.CreateLoad(PointerArg);
+  B.CreateRet(LoadInstZ);
+  std::unique_ptr<DominatorTree> DT(new DominatorTree(*F));
+  OrderedInstructions OI(&*DT);
+
+  // Intra-BB dominance test.
+  EXPECT_TRUE(OI.dominates(LoadInstX, LoadInstY));
+  EXPECT_FALSE(OI.dominates(LoadInstY, LoadInstX));
+
+  // Inter-BB dominance test.
+  EXPECT_TRUE(OI.dominates(LoadInstY, LoadInstZ));
+}
diff --git a/unittests/Transforms/Utils/ValueMapperTest.cpp b/unittests/Transforms/Utils/ValueMapperTest.cpp
index 34b62bb930d9..94ac76bb076c 100644
--- a/unittests/Transforms/Utils/ValueMapperTest.cpp
+++ b/unittests/Transforms/Utils/ValueMapperTest.cpp
@@ -7,12 +7,12 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Transforms/Utils/ValueMapper.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Metadata.h"
-#include "llvm/Transforms/Utils/ValueMapper.h"
 #include "gtest/gtest.h"
 
 using namespace llvm;
diff --git a/utils/FileCheck/FileCheck.cpp b/utils/FileCheck/FileCheck.cpp
index f563cadc92c3..9d808cc875c0 100644
--- a/utils/FileCheck/FileCheck.cpp
+++ b/utils/FileCheck/FileCheck.cpp
@@ -1203,7 +1203,7 @@ size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
       // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to
       // CHECK-DAG, verify that there's no 'not' strings occurred in that
       // region.
-      StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
+      StringRef SkippedRegion = Buffer.slice(LastPos, MatchPos);
       if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
         return StringRef::npos;
       // Clear "not strings".
diff --git a/utils/TableGen/CMakeLists.txt b/utils/TableGen/CMakeLists.txt
index 9933c8319d30..b2913afae12a 100644
--- a/utils/TableGen/CMakeLists.txt
+++ b/utils/TableGen/CMakeLists.txt
@@ -35,7 +35,6 @@ add_tablegen(llvm-tblgen LLVM
   TableGen.cpp
   Types.cpp
   X86DisassemblerTables.cpp
-  X86FoldTablesEmitter.cpp
   X86EVEX2VEXTablesEmitter.cpp
   X86ModRMFilters.cpp
   X86RecognizableInstr.cpp
diff --git a/utils/TableGen/TableGen.cpp b/utils/TableGen/TableGen.cpp
index 329ce348727e..00d20f1df6c2 100644
--- a/utils/TableGen/TableGen.cpp
+++ b/utils/TableGen/TableGen.cpp
@@ -46,7 +46,6 @@ enum ActionType {
   GenAttributes,
   GenSearchableTables,
   GenGlobalISel,
-  GenX86FoldTables,
   GenX86EVEX2VEXTables,
   GenRegisterBank,
 };
@@ -98,8 +97,6 @@ namespace {
                                "Generate generic binary-searchable table"),
                     clEnumValN(GenGlobalISel, "gen-global-isel",
                                "Generate GlobalISel selector"),
-                    clEnumValN(GenX86FoldTables, "gen-x86-fold-tables",
-                               "Generate X86 fold tables"),
                     clEnumValN(GenX86EVEX2VEXTables, "gen-x86-EVEX2VEX-tables",
                                "Generate X86 EVEX to VEX compress tables"),
                     clEnumValN(GenRegisterBank, "gen-register-bank",
@@ -193,9 +190,6 @@ bool LLVMTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
   case GenGlobalISel:
     EmitGlobalISel(Records, OS);
     break;
-  case GenX86FoldTables:
-    EmitX86FoldTables(Records, OS);
-    break;
   case GenRegisterBank:
     EmitRegisterBank(Records, OS);
     break;
diff --git a/utils/TableGen/TableGenBackends.h b/utils/TableGen/TableGenBackends.h
index 53614df27c40..2512997e27f9 100644
--- a/utils/TableGen/TableGenBackends.h
+++ b/utils/TableGen/TableGenBackends.h
@@ -81,7 +81,6 @@ void EmitCTags(RecordKeeper &RK, raw_ostream &OS);
 void EmitAttributes(RecordKeeper &RK, raw_ostream &OS);
 void EmitSearchableTables(RecordKeeper &RK, raw_ostream &OS);
 void EmitGlobalISel(RecordKeeper &RK, raw_ostream &OS);
-void EmitX86FoldTables(RecordKeeper &RK, raw_ostream &OS);
 void EmitX86EVEX2VEXTables(RecordKeeper &RK, raw_ostream &OS);
 void EmitRegisterBank(RecordKeeper &RK, raw_ostream &OS);
 
diff --git a/utils/TableGen/X86FoldTablesEmitter.cpp b/utils/TableGen/X86FoldTablesEmitter.cpp
deleted file mode 100644
index 34f5fbc6ea31..000000000000
--- a/utils/TableGen/X86FoldTablesEmitter.cpp
+++ /dev/null
@@ -1,732 +0,0 @@
-//===- utils/TableGen/X86FoldTablesEmitter.cpp - X86 backend-*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This tablegen backend is responsible for emitting the memory fold tables of
-// the X86 backend instructions.
-//
-//===----------------------------------------------------------------------===//
-
-#include "CodeGenDAGPatterns.h"
-#include "CodeGenTarget.h"
-#include "X86RecognizableInstr.h"
-#include "llvm/TableGen/Error.h"
-#include "llvm/TableGen/TableGenBackend.h"
-
-using namespace llvm;
-
-namespace {
-
-// 3 possible strategies for the unfolding flag (TB_NO_REVERSE) of the
-// manual added entries.
-enum UnfoldStrategy {
-  UNFOLD,     // Allow unfolding
-  NO_UNFOLD,  // Prevent unfolding
-  NO_STRATEGY // Make decision according to operands' sizes
-};
-
-// Represents an entry in the manual mapped instructions set.
-struct ManualMapEntry {
-  const char *RegInstStr;
-  const char *MemInstStr;
-  UnfoldStrategy Strategy;
-
-  ManualMapEntry(const char *RegInstStr, const char *MemInstStr,
-                 UnfoldStrategy Strategy = NO_STRATEGY)
-      : RegInstStr(RegInstStr), MemInstStr(MemInstStr), Strategy(Strategy) {}
-};
-
-class IsMatch;
-
-// List of instructions requiring explicitly aligned memory.
-const char *const ExplicitAlign[] = {"MOVDQA",  "MOVAPS",  "MOVAPD",  "MOVNTPS",
-                                     "MOVNTPD", "MOVNTDQ", "MOVNTDQA"};
-
-// List of instructions NOT requiring explicit memory alignment.
-const char *const ExplicitUnalign[] = {"MOVDQU", "MOVUPS", "MOVUPD"};
-
-// For manually mapping instructions that do not match by their encoding.
-const ManualMapEntry ManualMapSet[] = {
-    { "ADD16ri_DB",       "ADD16mi",         NO_UNFOLD  },
-    { "ADD16ri8_DB",      "ADD16mi8",        NO_UNFOLD  },
-    { "ADD16rr_DB",       "ADD16mr",         NO_UNFOLD  },
-    { "ADD32ri_DB",       "ADD32mi",         NO_UNFOLD  },
-    { "ADD32ri8_DB",      "ADD32mi8",        NO_UNFOLD  },
-    { "ADD32rr_DB",       "ADD32mr",         NO_UNFOLD  },
-    { "ADD64ri32_DB",     "ADD64mi32",       NO_UNFOLD  },
-    { "ADD64ri8_DB",      "ADD64mi8",        NO_UNFOLD  },
-    { "ADD64rr_DB",       "ADD64mr",         NO_UNFOLD  },
-    { "ADD16rr_DB",       "ADD16rm",         NO_UNFOLD  },
-    { "ADD32rr_DB",       "ADD32rm",         NO_UNFOLD  },
-    { "ADD64rr_DB",       "ADD64rm",         NO_UNFOLD  },
-    { "PUSH16r",          "PUSH16rmm",       NO_UNFOLD  },
-    { "PUSH32r",          "PUSH32rmm",       NO_UNFOLD  },
-    { "PUSH64r",          "PUSH64rmm",       NO_UNFOLD  },
-    { "TAILJMPr",         "TAILJMPm",        UNFOLD },
-    { "TAILJMPr64",       "TAILJMPm64",      UNFOLD },
-    { "TAILJMPr64_REX",   "TAILJMPm64_REX",  UNFOLD },
-};
-
-// Do not add these instructions to any of the folding tables.
-const char *const NoFoldSet[] = {
-    "TCRETURNri64",
-    "TCRETURNmi64", // Special dealing (in X86InstrCompiler.td under
-    "TCRETURNri",   // "tailcall stuff" section).
-    "TCRETURNmi",
-
-    // Never fold XCHG, the register and memory forms have different locking
-    // semantics.
-    "XCHG8rr",  "XCHG8rm",
-    "XCHG16rr", "XCHG16rm",
-    "XCHG32rr", "XCHG32rm",
-    "XCHG64rr", "XCHG64rm",
-
-    // Different calculations of the folded operand between
-    // memory and register forms (folding is illegal).
-    // - In their register form, the second register operand's relevant
-    //   bits are only the first 4/5/6 (depending on mode and reg size).
-    // - In their memory form, the second register operand's relevant
-    //   bits are only the first 16/32/64 (depending on mode and reg size).
-    "BT16rr",  "BT32rr",  "BT64rr",
-    "BT16mr",  "BT32mr",  "BT64mr",
-    "BTC16rr", "BTC32rr", "BTC64rr",
-    "BTC16mr", "BTC32mr", "BTC64mr",
-    "BTR16rr", "BTR32rr", "BTR64rr",
-    "BTR16mr", "BTR32mr", "BTR64mr",
-    "BTS16rr", "BTS32rr", "BTS64rr",
-    "BTS16mr", "BTS32mr", "BTS64mr",
-
-    // insertps cannot be folded without adjusting the immediate. There's custom
-    // code to handle it in X86InstrInfo.cpp, ignore it here.
-    "INSERTPSrr", "INSERTPSrm",
-    "VINSERTPSrr", "VINSERTPSrm", "VINSERTPSZrr", "VINSERTPSZrm",
-
-    // Memory folding is enabled only when optimizing for size by DAG
-    // patterns only. (issue detailed in D28744 review)
-    "VCVTSS2SDrm",            "VCVTSS2SDrr",
-    "VCVTSS2SDZrm",           "VCVTSS2SDZrr",
-    "VCVTSS2SDZrmk",          "VCVTSS2SDZrrk",
-    "VCVTSS2SDZrmkz",         "VCVTSS2SDZrrkz",
-    "VCVTSS2SDZrm_Int",       "VCVTSS2SDZrr_Int",
-    "VCVTSS2SDZrm_Intk",      "VCVTSS2SDZrr_Intk",
-    "VCVTSS2SDZrm_Intkz",     "VCVTSS2SDZrr_Intkz",
-    "VCVTSD2SSrm",            "VCVTSD2SSrr",
-    "VCVTSD2SSZrm",           "VCVTSD2SSZrr",
-    "VCVTSD2SSZrmk",          "VCVTSD2SSZrrk",
-    "VCVTSD2SSZrmkz",         "VCVTSD2SSZrrkz",
-    "VCVTSD2SSZrm_Int",       "VCVTSD2SSZrr_Int",
-    "VCVTSD2SSZrm_Intk",      "VCVTSD2SSZrr_Intk",
-    "VCVTSD2SSZrm_Intkz",     "VCVTSD2SSZrr_Intkz",
-    "VRCP14SSrm",             "VRCP14SSrr",
-    "VRCP14SDrm",             "VRCP14SDrr",
-    "VRSQRT14SSrm",           "VRSQRT14SSrr",
-    "VRSQRT14SDrm",           "VRSQRT14SDrr",
-    "VSQRTSSm",               "VSQRTSSr",
-    "VSQRTSSm_Int",           "VSQRTSSr_Int",
-    "VSQRTSSZm",              "VSQRTSSZr",
-    "VSQRTSSZm_Int",          "VSQRTSSZr_Int",
-    "VSQRTSSZm_Intk",         "VSQRTSSZr_Intk",
-    "VSQRTSSZm_Intkz",        "VSQRTSSZr_Intkz",
-    "VSQRTSDm",               "VSQRTSDr",
-    "VSQRTSDm_Int",           "VSQRTSDr_Int",
-    "VSQRTSDZm",              "VSQRTSDZr",
-    "VSQRTSDZm_Int",          "VSQRTSDZr_Int",
-    "VSQRTSDZm_Intk",         "VSQRTSDZr_Intk",
-    "VSQRTSDZm_Intkz",        "VSQRTSDZr_Intkz",
-};
-
-static bool isExplicitAlign(const CodeGenInstruction *Inst) {
-  return any_of(ExplicitAlign, [Inst](const char *InstStr) {
-    return Inst->TheDef->getName().find(InstStr) != StringRef::npos;
-  });
-}
-
-static bool isExplicitUnalign(const CodeGenInstruction *Inst) {
-  return any_of(ExplicitUnalign, [Inst](const char *InstStr) {
-    return Inst->TheDef->getName().find(InstStr) != StringRef::npos;
-  });
-}
-
-class X86FoldTablesEmitter {
-  RecordKeeper &Records;
-  CodeGenTarget Target;
-
-  // Represents an entry in the folding table
-  class X86FoldTableEntry {
-    const CodeGenInstruction *RegInst;
-    const CodeGenInstruction *MemInst;
-
-  public:
-    bool CannotUnfold = false;
-    bool IsLoad = false;
-    bool IsStore = false;
-    bool IsAligned = false;
-    unsigned int Alignment = 0;
-
-    X86FoldTableEntry(const CodeGenInstruction *RegInst,
-                      const CodeGenInstruction *MemInst)
-        : RegInst(RegInst), MemInst(MemInst) {}
-
-    friend raw_ostream &operator<<(raw_ostream &OS,
-                                   const X86FoldTableEntry &E) {
-      OS << "{ X86::" << E.RegInst->TheDef->getName()
-         << ", X86::" << E.MemInst->TheDef->getName() << ", ";
-
-      if (E.IsLoad)
-        OS << "TB_FOLDED_LOAD | ";
-      if (E.IsStore)
-        OS << "TB_FOLDED_STORE | ";
-      if (E.CannotUnfold)
-        OS << "TB_NO_REVERSE | ";
-      if (E.IsAligned)
-        OS << "TB_ALIGN_" << E.Alignment << " | ";
-
-      OS << "0 },\n";
-
-      return OS;
-    }
-  };
-
-  typedef std::vector<X86FoldTableEntry> FoldTable;
-  // std::vector for each folding table.
-  // Table2Addr - Holds instructions which their memory form performs load+store
-  // Table#i - Holds instructions which the their memory form perform a load OR
-  //           a store,  and their #i'th operand is folded.
-  FoldTable Table2Addr;
-  FoldTable Table0;
-  FoldTable Table1;
-  FoldTable Table2;
-  FoldTable Table3;
-  FoldTable Table4;
-
-public:
-  X86FoldTablesEmitter(RecordKeeper &R) : Records(R), Target(R) {}
-
-  // run - Generate the 6 X86 memory fold tables.
-  void run(raw_ostream &OS);
-
-private:
-  // Decides to which table to add the entry with the given instructions.
-  // S sets the strategy of adding the TB_NO_REVERSE flag.
-  void updateTables(const CodeGenInstruction *RegInstr,
-                    const CodeGenInstruction *MemInstr,
-                    const UnfoldStrategy S = NO_STRATEGY);
-
-  // Generates X86FoldTableEntry with the given instructions and fill it with
-  // the appropriate flags - then adds it to Table.
-  void addEntryWithFlags(FoldTable &Table, const CodeGenInstruction *RegInstr,
-                         const CodeGenInstruction *MemInstr,
-                         const UnfoldStrategy S, const unsigned int FoldedInd);
-
-  // Print the given table as a static const C++ array of type
-  // X86MemoryFoldTableEntry.
-  void printTable(const FoldTable &Table, std::string TableName,
-                  raw_ostream &OS) {
-    OS << "\nstatic const X86MemoryFoldTableEntry MemoryFold" << TableName
-       << "[] = {\n";
-
-    for (const X86FoldTableEntry &E : Table)
-      OS.indent(2) << E;
-
-    OS << "};\n";
-  }
-};
-
-// Return true if one of the instruction's operands is a RST register class
-static bool hasRSTRegClass(const CodeGenInstruction *Inst) {
-  return any_of(Inst->Operands, [](const CGIOperandList::OperandInfo &OpIn) {
-    return OpIn.Rec->getName() == "RST";
-  });
-}
-
-// Return true if one of the instruction's operands is a ptr_rc_tailcall
-static bool hasPtrTailcallRegClass(const CodeGenInstruction *Inst) {
-  return any_of(Inst->Operands, [](const CGIOperandList::OperandInfo &OpIn) {
-    return OpIn.Rec->getName() == "ptr_rc_tailcall";
-  });
-}
-
-// Calculates the integer value representing the BitsInit object
-static inline uint64_t getValueFromBitsInit(const BitsInit *B) {
-  assert(B->getNumBits() <= sizeof(uint64_t) * CHAR_BIT &&
-         "BitInits' too long!");
-
-  uint64_t Value = 0;
-  for (unsigned i = 0, e = B->getNumBits(); i != e; ++i) {
-    BitInit *Bit = cast<BitInit>(B->getBit(i));
-    Value |= uint64_t(Bit->getValue()) << i;
-  }
-  return Value;
-}
-
-// Returns true if the two given BitsInits represent the same integer value
-static inline bool equalBitsInits(const BitsInit *B1, const BitsInit *B2) {
-  if (B1->getNumBits() != B2->getNumBits())
-    PrintFatalError("Comparing two BitsInits with different sizes!");
-
-  for (unsigned i = 0, e = B1->getNumBits(); i != e; ++i) {
-    BitInit *Bit1 = cast<BitInit>(B1->getBit(i));
-    BitInit *Bit2 = cast<BitInit>(B2->getBit(i));
-    if (Bit1->getValue() != Bit2->getValue())
-      return false;
-  }
-  return true;
-}
-
-// Return the size of the register operand
-static inline unsigned int getRegOperandSize(const Record *RegRec) {
-  if (RegRec->isSubClassOf("RegisterOperand"))
-    RegRec = RegRec->getValueAsDef("RegClass");
-  if (RegRec->isSubClassOf("RegisterClass"))
-    return RegRec->getValueAsListOfDefs("RegTypes")[0]->getValueAsInt("Size");
-
-  llvm_unreachable("Register operand's size not known!");
-}
-
-// Return the size of the memory operand
-static inline unsigned int
-getMemOperandSize(const Record *MemRec, const bool IntrinsicSensitive = false) {
-  if (MemRec->isSubClassOf("Operand")) {
-    // Intrinsic memory instructions use ssmem/sdmem.
-    if (IntrinsicSensitive &&
-        (MemRec->getName() == "sdmem" || MemRec->getName() == "ssmem"))
-      return 128;
-
-    StringRef Name =
-        MemRec->getValueAsDef("ParserMatchClass")->getValueAsString("Name");
-    if (Name == "Mem8")
-      return 8;
-    if (Name == "Mem16")
-      return 16;
-    if (Name == "Mem32")
-      return 32;
-    if (Name == "Mem64")
-      return 64;
-    if (Name == "Mem80")
-      return 80;
-    if (Name == "Mem128")
-      return 128;
-    if (Name == "Mem256")
-      return 256;
-    if (Name == "Mem512")
-      return 512;
-  }
-
-  llvm_unreachable("Memory operand's size not known!");
-}
-
-// Returns true if the record's list of defs includes the given def.
-static inline bool hasDefInList(const Record *Rec, const StringRef List,
-                                const StringRef Def) {
-  if (!Rec->isValueUnset(List)) {
-    return any_of(*(Rec->getValueAsListInit(List)),
-                  [Def](const Init *I) { return I->getAsString() == Def; });
-  }
-  return false;
-}
-
-// Return true if the instruction defined as a register flavor.
-static inline bool hasRegisterFormat(const Record *Inst) {
-  const BitsInit *FormBits = Inst->getValueAsBitsInit("FormBits");
-  uint64_t FormBitsNum = getValueFromBitsInit(FormBits);
-
-  // Values from X86Local namespace defined in X86RecognizableInstr.cpp
-  return FormBitsNum >= X86Local::MRMDestReg && FormBitsNum <= X86Local::MRM7r;
-}
-
-// Return true if the instruction defined as a memory flavor.
-static inline bool hasMemoryFormat(const Record *Inst) {
-  const BitsInit *FormBits = Inst->getValueAsBitsInit("FormBits");
-  uint64_t FormBitsNum = getValueFromBitsInit(FormBits);
-
-  // Values from X86Local namespace defined in X86RecognizableInstr.cpp
-  return FormBitsNum >= X86Local::MRMDestMem && FormBitsNum <= X86Local::MRM7m;
-}
-
-static inline bool isNOREXRegClass(const Record *Op) {
-  return Op->getName().find("_NOREX") != StringRef::npos;
-}
-
-static inline bool isRegisterOperand(const Record *Rec) {
-  return Rec->isSubClassOf("RegisterClass") ||
-         Rec->isSubClassOf("RegisterOperand") ||
-         Rec->isSubClassOf("PointerLikeRegClass");
-}
-
-static inline bool isMemoryOperand(const Record *Rec) {
-  return Rec->isSubClassOf("Operand") &&
-         Rec->getValueAsString("OperandType") == "OPERAND_MEMORY";
-}
-
-static inline bool isImmediateOperand(const Record *Rec) {
-  return Rec->isSubClassOf("Operand") &&
-         Rec->getValueAsString("OperandType") == "OPERAND_IMMEDIATE";
-}
-
-// Get the alternative instruction pointed by "FoldGenRegForm" field.
-static inline const CodeGenInstruction *
-getAltRegInst(const CodeGenInstruction *I, const RecordKeeper &Records,
-              const CodeGenTarget &Target) {
-
-  StringRef AltRegInstStr = I->TheDef->getValueAsString("FoldGenRegForm");
-  Record *AltRegInstRec = Records.getDef(AltRegInstStr);
-  assert(AltRegInstRec &&
-         "Alternative register form instruction def not found");
-  CodeGenInstruction &AltRegInst = Target.getInstruction(AltRegInstRec);
-  return &AltRegInst;
-}
-
-// Function object - Operator() returns true if the given VEX instruction
-// matches the EVEX instruction of this object.
-class IsMatch {
-  const CodeGenInstruction *MemInst;
-  const RecordKeeper &Records;
-
-public:
-  IsMatch(const CodeGenInstruction *Inst, const RecordKeeper &Records)
-      : MemInst(Inst), Records(Records) {}
-
-  bool operator()(const CodeGenInstruction *RegInst) {
-    Record *MemRec = MemInst->TheDef;
-    Record *RegRec = RegInst->TheDef;
-
-    // Return false if one (at least) of the encoding fields of both
-    // instructions do not match.
-    if (RegRec->getValueAsDef("OpEnc") != MemRec->getValueAsDef("OpEnc") ||
-        !equalBitsInits(RegRec->getValueAsBitsInit("Opcode"),
-                        MemRec->getValueAsBitsInit("Opcode")) ||
-        // VEX/EVEX fields
-        RegRec->getValueAsDef("OpPrefix") !=
-            MemRec->getValueAsDef("OpPrefix") ||
-        RegRec->getValueAsDef("OpMap") != MemRec->getValueAsDef("OpMap") ||
-        RegRec->getValueAsDef("OpSize") != MemRec->getValueAsDef("OpSize") ||
-        RegRec->getValueAsBit("hasVEX_4V") !=
-            MemRec->getValueAsBit("hasVEX_4V") ||
-        RegRec->getValueAsBit("hasEVEX_K") !=
-            MemRec->getValueAsBit("hasEVEX_K") ||
-        RegRec->getValueAsBit("hasEVEX_Z") !=
-            MemRec->getValueAsBit("hasEVEX_Z") ||
-        RegRec->getValueAsBit("hasEVEX_B") !=
-            MemRec->getValueAsBit("hasEVEX_B") ||
-        RegRec->getValueAsBit("hasEVEX_RC") !=
-            MemRec->getValueAsBit("hasEVEX_RC") ||
-        RegRec->getValueAsBit("hasREX_WPrefix") !=
-            MemRec->getValueAsBit("hasREX_WPrefix") ||
-        RegRec->getValueAsBit("hasLockPrefix") !=
-            MemRec->getValueAsBit("hasLockPrefix") ||
-        !equalBitsInits(RegRec->getValueAsBitsInit("EVEX_LL"),
-                        MemRec->getValueAsBitsInit("EVEX_LL")) ||
-        !equalBitsInits(RegRec->getValueAsBitsInit("VEX_WPrefix"),
-                        MemRec->getValueAsBitsInit("VEX_WPrefix")) ||
-        // Instruction's format - The register form's "Form" field should be
-        // the opposite of the memory form's "Form" field.
-        !areOppositeForms(RegRec->getValueAsBitsInit("FormBits"),
-                          MemRec->getValueAsBitsInit("FormBits")) ||
-        RegRec->getValueAsBit("isAsmParserOnly") !=
-            MemRec->getValueAsBit("isAsmParserOnly"))
-      return false;
-
-    // Make sure the sizes of the operands of both instructions suit each other.
-    // This is needed for instructions with intrinsic version (_Int).
-    // Where the only difference is the size of the operands.
-    // For example: VUCOMISDZrm and Int_VUCOMISDrm
-    // Also for instructions that their EVEX version was upgraded to work with
-    // k-registers. For example VPCMPEQBrm (xmm output register) and
-    // VPCMPEQBZ128rm (k register output register).
-    bool ArgFolded = false;
-    unsigned MemOutSize = MemRec->getValueAsDag("OutOperandList")->getNumArgs();
-    unsigned RegOutSize = RegRec->getValueAsDag("OutOperandList")->getNumArgs();
-    unsigned MemInSize = MemRec->getValueAsDag("InOperandList")->getNumArgs();
-    unsigned RegInSize = RegRec->getValueAsDag("InOperandList")->getNumArgs();
-
-    // Instructions with one output in their memory form use the memory folded
-    // operand as source and destination (Read-Modify-Write).
-    unsigned RegStartIdx =
-        (MemOutSize + 1 == RegOutSize) && (MemInSize == RegInSize) ? 1 : 0;
-
-    for (unsigned i = 0, e = MemInst->Operands.size(); i < e; i++) {
-      Record *MemOpRec = MemInst->Operands[i].Rec;
-      Record *RegOpRec = RegInst->Operands[i + RegStartIdx].Rec;
-
-      if (MemOpRec == RegOpRec)
-        continue;
-
-      if (isRegisterOperand(MemOpRec) && isRegisterOperand(RegOpRec)) {
-        if (getRegOperandSize(MemOpRec) != getRegOperandSize(RegOpRec) ||
-            isNOREXRegClass(MemOpRec) != isNOREXRegClass(RegOpRec))
-          return false;
-      } else if (isMemoryOperand(MemOpRec) && isMemoryOperand(RegOpRec)) {
-        if (getMemOperandSize(MemOpRec) != getMemOperandSize(RegOpRec))
-          return false;
-      } else if (isImmediateOperand(MemOpRec) && isImmediateOperand(RegOpRec)) {
-        if (MemOpRec->getValueAsDef("Type") != RegOpRec->getValueAsDef("Type"))
-          return false;
-      } else {
-        // Only one operand can be folded.
-        if (ArgFolded)
-          return false;
-
-        assert(isRegisterOperand(RegOpRec) && isMemoryOperand(MemOpRec));
-        ArgFolded = true;
-      }
-    }
-
-    return true;
-  }
-
-private:
-  // Return true of the 2 given forms are the opposite of each other.
-  bool areOppositeForms(const BitsInit *RegFormBits,
-                        const BitsInit *MemFormBits) {
-    uint64_t MemFormNum = getValueFromBitsInit(MemFormBits);
-    uint64_t RegFormNum = getValueFromBitsInit(RegFormBits);
-
-    if ((MemFormNum == X86Local::MRM0m && RegFormNum == X86Local::MRM0r) ||
-        (MemFormNum == X86Local::MRM1m && RegFormNum == X86Local::MRM1r) ||
-        (MemFormNum == X86Local::MRM2m && RegFormNum == X86Local::MRM2r) ||
-        (MemFormNum == X86Local::MRM3m && RegFormNum == X86Local::MRM3r) ||
-        (MemFormNum == X86Local::MRM4m && RegFormNum == X86Local::MRM4r) ||
-        (MemFormNum == X86Local::MRM5m && RegFormNum == X86Local::MRM5r) ||
-        (MemFormNum == X86Local::MRM6m && RegFormNum == X86Local::MRM6r) ||
-        (MemFormNum == X86Local::MRM7m && RegFormNum == X86Local::MRM7r) ||
-        (MemFormNum == X86Local::MRMXm && RegFormNum == X86Local::MRMXr) ||
-        (MemFormNum == X86Local::MRMDestMem &&
-         RegFormNum == X86Local::MRMDestReg) ||
-        (MemFormNum == X86Local::MRMSrcMem &&
-         RegFormNum == X86Local::MRMSrcReg) ||
-        (MemFormNum == X86Local::MRMSrcMem4VOp3 &&
-         RegFormNum == X86Local::MRMSrcReg4VOp3) ||
-        (MemFormNum == X86Local::MRMSrcMemOp4 &&
-         RegFormNum == X86Local::MRMSrcRegOp4))
-      return true;
-
-    return false;
-  }
-};
-
-} // end anonymous namespace
-
-void X86FoldTablesEmitter::addEntryWithFlags(FoldTable &Table,
-                                             const CodeGenInstruction *RegInstr,
-                                             const CodeGenInstruction *MemInstr,
-                                             const UnfoldStrategy S,
-                                             const unsigned int FoldedInd) {
-
-  X86FoldTableEntry Result = X86FoldTableEntry(RegInstr, MemInstr);
-  Record *RegRec = RegInstr->TheDef;
-  Record *MemRec = MemInstr->TheDef;
-
-  // Only table0 entries should explicitly specify a load or store flag.
-  if (&Table == &Table0) {
-    unsigned MemInOpsNum = MemRec->getValueAsDag("InOperandList")->getNumArgs();
-    unsigned RegInOpsNum = RegRec->getValueAsDag("InOperandList")->getNumArgs();
-    // If the instruction writes to the folded operand, it will appear as an
-    // output in the register form instruction and as an input in the memory
-    // form instruction.
-    // If the instruction reads from the folded operand, it well appear as in
-    // input in both forms.
-    if (MemInOpsNum == RegInOpsNum)
-      Result.IsLoad = true;
-    else
-      Result.IsStore = true;
-  }
-
-  Record *RegOpRec = RegInstr->Operands[FoldedInd].Rec;
-  Record *MemOpRec = MemInstr->Operands[FoldedInd].Rec;
-
-  // Unfolding code generates a load/store instruction according to the size of
-  // the register in the register form instruction.
-  // If the register's size is greater than the memory's operand size, do not
-  // allow unfolding.
-  if (S == UNFOLD)
-    Result.CannotUnfold = false;
-  else if (S == NO_UNFOLD)
-    Result.CannotUnfold = true;
-  else if (getRegOperandSize(RegOpRec) > getMemOperandSize(MemOpRec))
-    Result.CannotUnfold = true; // S == NO_STRATEGY
-
-  uint64_t Enc = getValueFromBitsInit(RegRec->getValueAsBitsInit("OpEncBits"));
-  if (isExplicitAlign(RegInstr)) {
-    // The instruction require explicitly aligned memory.
-    BitsInit *VectSize = RegRec->getValueAsBitsInit("VectSize");
-    uint64_t Value = getValueFromBitsInit(VectSize);
-    Result.IsAligned = true;
-    Result.Alignment = Value;
-  } else if (Enc != X86Local::XOP && Enc != X86Local::VEX &&
-             Enc != X86Local::EVEX) {
-    // Instructions with VEX encoding do not require alignment.
-    if (!isExplicitUnalign(RegInstr) && getMemOperandSize(MemOpRec) > 64) {
-      // SSE packed vector instructions require a 16 byte alignment.
-      Result.IsAligned = true;
-      Result.Alignment = 16;
-    }
-  }
-
-  Table.push_back(Result);
-}
-
-void X86FoldTablesEmitter::updateTables(const CodeGenInstruction *RegInstr,
-                                        const CodeGenInstruction *MemInstr,
-                                        const UnfoldStrategy S) {
-
-  Record *RegRec = RegInstr->TheDef;
-  Record *MemRec = MemInstr->TheDef;
-  unsigned MemOutSize = MemRec->getValueAsDag("OutOperandList")->getNumArgs();
-  unsigned RegOutSize = RegRec->getValueAsDag("OutOperandList")->getNumArgs();
-  unsigned MemInSize = MemRec->getValueAsDag("InOperandList")->getNumArgs();
-  unsigned RegInSize = RegRec->getValueAsDag("InOperandList")->getNumArgs();
-
-  // Instructions which have the WriteRMW value (Read-Modify-Write) should be
-  // added to Table2Addr.
-  if (hasDefInList(MemRec, "SchedRW", "WriteRMW") && MemOutSize != RegOutSize &&
-      MemInSize == RegInSize) {
-    addEntryWithFlags(Table2Addr, RegInstr, MemInstr, S, 0);
-    return;
-  }
-
-  if (MemInSize == RegInSize && MemOutSize == RegOutSize) {
-    // Load-Folding cases.
-    // If the i'th register form operand is a register and the i'th memory form
-    // operand is a memory operand, add instructions to Table#i.
-    for (unsigned i = RegOutSize, e = RegInstr->Operands.size(); i < e; i++) {
-      Record *RegOpRec = RegInstr->Operands[i].Rec;
-      Record *MemOpRec = MemInstr->Operands[i].Rec;
-      if (isRegisterOperand(RegOpRec) && isMemoryOperand(MemOpRec)) {
-        switch (i) {
-        default: llvm_unreachable("Unexpected operand count!");
-        case 0:
-          addEntryWithFlags(Table0, RegInstr, MemInstr, S, 0);
-          return;
-        case 1:
-          addEntryWithFlags(Table1, RegInstr, MemInstr, S, 1);
-          return;
-        case 2:
-          addEntryWithFlags(Table2, RegInstr, MemInstr, S, 2);
-          return;
-        case 3:
-          addEntryWithFlags(Table3, RegInstr, MemInstr, S, 3);
-          return;
-        case 4:
-          addEntryWithFlags(Table4, RegInstr, MemInstr, S, 4);
-          return;
-        }
-      }
-    }
-  } else if (MemInSize == RegInSize + 1 && MemOutSize + 1 == RegOutSize) {
-    // Store-Folding cases.
-    // If the memory form instruction performs performs a store, the *output*
-    // register of the register form instructions disappear and instead a
-    // memory *input* operand appears in the memory form instruction.
-    // For example:
-    //   MOVAPSrr => (outs VR128:$dst), (ins VR128:$src)
-    //   MOVAPSmr => (outs), (ins f128mem:$dst, VR128:$src)
-    Record *RegOpRec = RegInstr->Operands[RegOutSize - 1].Rec;
-    Record *MemOpRec = MemInstr->Operands[RegOutSize - 1].Rec;
-    if (isRegisterOperand(RegOpRec) && isMemoryOperand(MemOpRec))
-      addEntryWithFlags(Table0, RegInstr, MemInstr, S, 0);
-  }
-
-  return;
-}
-
-void X86FoldTablesEmitter::run(raw_ostream &OS) {
-  emitSourceFileHeader("X86 fold tables", OS);
-
-  // Holds all memory instructions
-  std::vector<const CodeGenInstruction *> MemInsts;
-  // Holds all register instructions - divided according to opcode.
-  std::map<uint8_t, std::vector<const CodeGenInstruction *>> RegInsts;
-
-  ArrayRef<const CodeGenInstruction *> NumberedInstructions =
-      Target.getInstructionsByEnumValue();
-
-  for (const CodeGenInstruction *Inst : NumberedInstructions) {
-    if (!Inst->TheDef->getNameInit() || !Inst->TheDef->isSubClassOf("X86Inst"))
-      continue;
-
-    const Record *Rec = Inst->TheDef;
-
-    // - Do not proceed matching if the instruction in NoFoldSet.
-    // - Instructions including RST register class operands are not relevant
-    //   for memory folding (for further details check the explanation in
-    //   lib/Target/X86/X86InstrFPStack.td file).
-    // - Some instructions (listed in the manual map above) use the register
-    //   class ptr_rc_tailcall, which can be of a size 32 or 64, to ensure
-    //   safe mapping of these instruction we manually map them and exclude
-    //   them from the automation.
-    if (find(NoFoldSet, Rec->getName()) != std::end(NoFoldSet) ||
-        hasRSTRegClass(Inst) || hasPtrTailcallRegClass(Inst))
-      continue;
-
-    // Add all the memory form instructions to MemInsts, and all the register
-    // form instructions to RegInsts[Opc], where Opc in the opcode of each
-    // instructions. this helps reducing the runtime of the backend.
-    if (hasMemoryFormat(Rec))
-      MemInsts.push_back(Inst);
-    else if (hasRegisterFormat(Rec)) {
-      uint8_t Opc = getValueFromBitsInit(Rec->getValueAsBitsInit("Opcode"));
-      RegInsts[Opc].push_back(Inst);
-    }
-  }
-
-  // For each memory form instruction, try to find its register form
-  // instruction.
-  for (const CodeGenInstruction *MemInst : MemInsts) {
-    uint8_t Opc =
-        getValueFromBitsInit(MemInst->TheDef->getValueAsBitsInit("Opcode"));
-
-    if (RegInsts.count(Opc) == 0)
-      continue;
-
-    // Two forms (memory & register) of the same instruction must have the same
-    // opcode. try matching only with register form instructions with the same
-    // opcode.
-    std::vector<const CodeGenInstruction *> &OpcRegInsts =
-        RegInsts.find(Opc)->second;
-
-    auto Match = find_if(OpcRegInsts, IsMatch(MemInst, Records));
-    if (Match != OpcRegInsts.end()) {
-      const CodeGenInstruction *RegInst = *Match;
-      // If the matched instruction has it's "FoldGenRegForm" set, map the
-      // memory form instruction to the register form instruction pointed by
-      // this field
-      if (RegInst->TheDef->isValueUnset("FoldGenRegForm")) {
-        updateTables(RegInst, MemInst);
-      } else {
-        const CodeGenInstruction *AltRegInst =
-            getAltRegInst(RegInst, Records, Target);
-        updateTables(AltRegInst, MemInst);
-      }
-      OpcRegInsts.erase(Match);
-    }
-  }
-
-  // Add the manually mapped instructions listed above.
-  for (const ManualMapEntry &Entry : ManualMapSet) {
-    Record *RegInstIter = Records.getDef(Entry.RegInstStr);
-    Record *MemInstIter = Records.getDef(Entry.MemInstStr);
-
-    updateTables(&(Target.getInstruction(RegInstIter)),
-                 &(Target.getInstruction(MemInstIter)), Entry.Strategy);
-  }
-
-  // Print all tables to raw_ostream OS.
-  printTable(Table2Addr, "Table2Addr", OS);
-  printTable(Table0, "Table0", OS);
-  printTable(Table1, "Table1", OS);
-  printTable(Table2, "Table2", OS);
-  printTable(Table3, "Table3", OS);
-  printTable(Table4, "Table4", OS);
-}
-
-namespace llvm {
-
-void EmitX86FoldTables(RecordKeeper &RK, raw_ostream &OS) {
-  X86FoldTablesEmitter(RK).run(OS);
-}
-} // namespace llvm
diff --git a/utils/gdb-scripts/prettyprinters.py b/utils/gdb-scripts/prettyprinters.py
index be21b7083f32..1a549f875d44 100644
--- a/utils/gdb-scripts/prettyprinters.py
+++ b/utils/gdb-scripts/prettyprinters.py
@@ -226,54 +226,62 @@ class TwinePrinter:
 
     return s
 
+  def is_twine_kind(self, kind, expected):
+    if not kind.endswith(expected):
+      return False
+    # apparently some GDB versions add the NodeKind:: namespace
+    # (happens for me on GDB 7.11)
+    return kind in ('llvm::Twine::' + expected,
+                    'llvm::Twine::NodeKind::' + expected)
+
   def string_from_child(self, child, kind):
     '''Return the string representation of the Twine::Child child.'''
 
-    if kind in ('llvm::Twine::EmptyKind', 'llvm::Twine::NullKind'):
+    if self.is_twine_kind(kind, 'EmptyKind') or self.is_twine_kind(kind, 'NullKind'):
       return ''
 
-    if kind == 'llvm::Twine::TwineKind':
+    if self.is_twine_kind(kind, 'TwineKind'):
       return self.string_from_twine_object(child['twine'].dereference())
 
-    if kind == 'llvm::Twine::CStringKind':
+    if self.is_twine_kind(kind, 'CStringKind'):
       return child['cString'].string()
 
-    if kind == 'llvm::Twine::StdStringKind':
+    if self.is_twine_kind(kind, 'StdStringKind'):
       val = child['stdString'].dereference()
       return self.string_from_pretty_printer_lookup(val)
 
-    if kind == 'llvm::Twine::StringRefKind':
+    if self.is_twine_kind(kind, 'StringRefKind'):
       val = child['stringRef'].dereference()
       pp = StringRefPrinter(val)
       return pp.to_string()
 
-    if kind == 'llvm::Twine::SmallStringKind':
+    if self.is_twine_kind(kind, 'SmallStringKind'):
       val = child['smallString'].dereference()
       pp = SmallStringPrinter(val)
       return pp.to_string()
 
-    if kind == 'llvm::Twine::CharKind':
+    if self.is_twine_kind(kind, 'CharKind'):
       return chr(child['character'])
 
-    if kind == 'llvm::Twine::DecUIKind':
+    if self.is_twine_kind(kind, 'DecUIKind'):
       return str(child['decUI'])
 
-    if kind == 'llvm::Twine::DecIKind':
+    if self.is_twine_kind(kind, 'DecIKind'):
       return str(child['decI'])
 
-    if kind == 'llvm::Twine::DecULKind':
+    if self.is_twine_kind(kind, 'DecULKind'):
       return str(child['decUL'].dereference())
 
-    if kind == 'llvm::Twine::DecLKind':
+    if self.is_twine_kind(kind, 'DecLKind'):
       return str(child['decL'].dereference())
 
-    if kind == 'llvm::Twine::DecULLKind':
+    if self.is_twine_kind(kind, 'DecULLKind'):
       return str(child['decULL'].dereference())
 
-    if kind == 'llvm::Twine::DecLLKind':
+    if self.is_twine_kind(kind, 'DecLLKind'):
       return str(child['decLL'].dereference())
 
-    if kind == 'llvm::Twine::UHexKind':
+    if self.is_twine_kind(kind, 'UHexKind'):
       val = child['uHex'].dereference()
       return hex(int(val))
 
diff --git a/utils/git-svn/git-llvm b/utils/git-svn/git-llvm
index 975b8480601b..70b63f199494 100755
--- a/utils/git-svn/git-llvm
+++ b/utils/git-svn/git-llvm
@@ -36,14 +36,19 @@ GIT_TO_SVN_DIR = {
     for d in [
         'clang-tools-extra',
         'compiler-rt',
+        'debuginfo-tests',
         'dragonegg',
         'klee',
         'libclc',
         'libcxx',
         'libcxxabi',
+        'libunwind',
         'lld',
         'lldb',
+        'llgo',
         'llvm',
+        'openmp',
+        'parallel-libs',
         'polly',
     ]
 }
diff --git a/utils/opt-viewer/optrecord.py b/utils/opt-viewer/optrecord.py
index 3dc77e9db019..2f930a48a056 100644
--- a/utils/opt-viewer/optrecord.py
+++ b/utils/opt-viewer/optrecord.py
@@ -33,7 +33,7 @@ def html_file_name(filename):
     return filename.replace('/', '_') + ".html"
 
 def make_link(File, Line):
-    return "{}#L{}".format(html_file_name(File), Line)
+    return "\"{}#L{}\"".format(html_file_name(File), Line)
 
 
 class Remark(yaml.YAMLObject):
diff --git a/utils/release/test-release.sh b/utils/release/test-release.sh
index b597d5f45ad8..f5aa3ff0cd00 100755
--- a/utils/release/test-release.sh
+++ b/utils/release/test-release.sh
@@ -173,13 +173,6 @@ while [ $# -gt 0 ]; do
     shift
 done
 
-if [ "$do_test_suite" = "yes" ]; then
-  # See llvm.org/PR26146.
-  echo Skipping test-suite build when using CMake.
-  echo It will still be exported.
-  do_test_suite="export-only"
-fi
-
 # Check required arguments.
 if [ -z "$Release" ]; then
     echo "error: no release number specified"
@@ -315,11 +308,7 @@ function export_sources() {
             projsrc=llvm.src/projects/$proj
             ;;
         test-suite)
-            if [ $do_test_suite = 'yes' ]; then
-              projsrc=llvm.src/projects/$proj
-            else
-              projsrc=$proj.src
-            fi
+            projsrc=$proj.src
             ;;
         *)
             echo "error: unknown project $proj"
@@ -417,6 +406,22 @@ function test_llvmCore() {
       deferred_error $Phase $Flavor "check-all failed"
     fi
 
+    if [ $do_test_suite = 'yes' ]; then
+      SandboxDir="$BuildDir/sandbox"
+      Lit=$SandboxDir/bin/lit
+      TestSuiteBuildDir="$BuildDir/test-suite-build"
+      TestSuiteSrcDir="$BuildDir/test-suite.src"
+
+      virtualenv $SandboxDir
+      $SandboxDir/bin/python $BuildDir/llvm.src/utils/lit/setup.py install
+      mkdir -p $TestSuiteBuildDir
+      cd $TestSuiteBuildDir
+      cmake $TestSuiteSrcDir -DTEST_SUITE_LIT=$Lit
+      if ! ( ${MAKE} -j $NumJobs -k check \
+          2>&1 | tee $LogDir/llvm.check-Phase$Phase-$Flavor.log ) ; then
+        deferred_error $Phase $Flavor "test suite failed"
+      fi
+    fi
     cd $BuildDir
 }